nat: fix ICMP error translation
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vpp/app/version.h>
19
20 #include <vnet/vnet.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/ip4.h>
23 #include <vnet/ip/ip_table.h>
24 #include <vnet/ip/reass/ip4_sv_reass.h>
25 #include <vnet/fib/fib_table.h>
26 #include <vnet/fib/ip4_fib.h>
27 #include <vnet/plugin/plugin.h>
28 #include <vppinfra/bihash_16_8.h>
29
30 #include <nat/lib/log.h>
31 #include <nat/lib/nat_inlines.h>
32 #include <nat/lib/ipfix_logging.h>
33 #include <vnet/syslog/syslog.h>
34 #include <nat/lib/nat_syslog_constants.h>
35 #include <nat/lib/nat_syslog.h>
36
37 #include <nat/nat44-ed/nat44_ed.h>
38 #include <nat/nat44-ed/nat44_ed_affinity.h>
39 #include <nat/nat44-ed/nat44_ed_inlines.h>
40
41 #include <vlib/stats/stats.h>
42
43 snat_main_t snat_main;
44
45 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
46                                                            u32 sw_if_index);
47
48 #define skip_if_disabled()                                                    \
49   do                                                                          \
50     {                                                                         \
51       snat_main_t *sm = &snat_main;                                           \
52       if (PREDICT_FALSE (!sm->enabled))                                       \
53         return;                                                               \
54     }                                                                         \
55   while (0)
56
57 #define fail_if_enabled()                                                     \
58   do                                                                          \
59     {                                                                         \
60       snat_main_t *sm = &snat_main;                                           \
61       if (PREDICT_FALSE (sm->enabled))                                        \
62         {                                                                     \
63           nat_log_err ("plugin enabled");                                     \
64           return 1;                                                           \
65         }                                                                     \
66     }                                                                         \
67   while (0)
68
69 #define fail_if_disabled()                                                    \
70   do                                                                          \
71     {                                                                         \
72       snat_main_t *sm = &snat_main;                                           \
73       if (PREDICT_FALSE (!sm->enabled))                                       \
74         {                                                                     \
75           nat_log_err ("plugin disabled");                                    \
76           return 1;                                                           \
77         }                                                                     \
78     }                                                                         \
79   while (0)
80
81 /* Hook up input features */
82 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
83   .arc_name = "ip4-unicast",
84   .node_name = "nat-pre-in2out",
85   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
86                                "ip4-sv-reassembly-feature"),
87 };
88 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
89   .arc_name = "ip4-unicast",
90   .node_name = "nat-pre-out2in",
91   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
92                                "ip4-dhcp-client-detect",
93                                "ip4-sv-reassembly-feature"),
94 };
95 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
96   .arc_name = "ip4-unicast",
97   .node_name = "nat44-in2out-worker-handoff",
98   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
99 };
100 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
101   .arc_name = "ip4-unicast",
102   .node_name = "nat44-out2in-worker-handoff",
103   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
104                                "ip4-dhcp-client-detect"),
105 };
106 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
107   .arc_name = "ip4-unicast",
108   .node_name = "nat44-in2out",
109   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
110 };
111 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
112   .arc_name = "ip4-unicast",
113   .node_name = "nat44-out2in",
114   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
115                                "ip4-dhcp-client-detect"),
116 };
117 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
118   .arc_name = "ip4-unicast",
119   .node_name = "nat44-ed-in2out",
120   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
121 };
122 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
123   .arc_name = "ip4-unicast",
124   .node_name = "nat44-ed-out2in",
125   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
126                                "ip4-dhcp-client-detect"),
127 };
128 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
129   .arc_name = "ip4-unicast",
130   .node_name = "nat44-ed-classify",
131   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
132 };
133 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
134   .arc_name = "ip4-unicast",
135   .node_name = "nat44-handoff-classify",
136   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
137 };
138 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
139   .arc_name = "ip4-unicast",
140   .node_name = "nat44-in2out-fast",
141   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
142 };
143 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
144   .arc_name = "ip4-unicast",
145   .node_name = "nat44-out2in-fast",
146   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
147                                "ip4-dhcp-client-detect"),
148 };
149
150 /* Hook up output features */
151 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
152   .arc_name = "ip4-output",
153   .node_name = "nat44-in2out-output",
154   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
155   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
156 };
157 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
158   .arc_name = "ip4-output",
159   .node_name = "nat44-in2out-output-worker-handoff",
160   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
161   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
162 };
163 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
164   .arc_name = "ip4-output",
165   .node_name = "nat-pre-in2out-output",
166   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
167   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
168 };
169 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
170   .arc_name = "ip4-output",
171   .node_name = "nat44-ed-in2out-output",
172   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
173   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
174 };
175
176 VLIB_PLUGIN_REGISTER () = {
177     .version = VPP_BUILD_VER,
178     .description = "Network Address Translation (NAT)",
179 };
180
181 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
182 static void nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm);
183
184 static int nat44_ed_add_static_mapping_internal (
185   ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
186   ip_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags,
187   ip4_address_t pool_addr, u8 *tag);
188 static int nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
189                                                  ip4_address_t e_addr,
190                                                  u16 l_port, u16 e_port,
191                                                  ip_protocol_t proto,
192                                                  u32 vrf_id, u32 flags);
193
194 u32 nat_calc_bihash_buckets (u32 n_elts);
195
196 u8 *
197 format_ed_session_kvp (u8 * s, va_list * args)
198 {
199   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
200
201   u8 proto;
202   u16 r_port, l_port;
203   ip4_address_t l_addr, r_addr;
204   u32 fib_index;
205
206   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
207   s = format (s,
208               "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
209               "session-index %u",
210               format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
211               format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
212               format_ip_protocol, proto, fib_index,
213               ed_value_get_thread_index (v), ed_value_get_session_index (v));
214
215   return s;
216 }
217
218 static_always_inline int
219 nat44_ed_sm_i2o_add (snat_main_t *sm, snat_static_mapping_t *m,
220                      ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
221 {
222   ASSERT (!pool_is_free (sm->static_mappings, m));
223   clib_bihash_kv_16_8_t kv;
224   nat44_ed_sm_init_i2o_kv (&kv, addr.as_u32, port, fib_index, proto,
225                            m - sm->static_mappings);
226   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
227 }
228
229 static_always_inline int
230 nat44_ed_sm_i2o_del (snat_main_t *sm, ip4_address_t addr, u16 port,
231                      u32 fib_index, u8 proto)
232 {
233   clib_bihash_kv_16_8_t kv;
234   nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
235   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
236 }
237
238 static_always_inline int
239 nat44_ed_sm_o2i_add (snat_main_t *sm, snat_static_mapping_t *m,
240                      ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
241 {
242   ASSERT (!pool_is_free (sm->static_mappings, m));
243   clib_bihash_kv_16_8_t kv;
244   nat44_ed_sm_init_o2i_kv (&kv, addr.as_u32, port, fib_index, proto,
245                            m - sm->static_mappings);
246   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
247 }
248
249 static_always_inline int
250 nat44_ed_sm_o2i_del (snat_main_t *sm, ip4_address_t addr, u16 port,
251                      u32 fib_index, u8 proto)
252 {
253   clib_bihash_kv_16_8_t kv;
254   nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
255   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
256 }
257
258 void
259 nat44_ed_free_session_data (snat_main_t *sm, snat_session_t *s,
260                             u32 thread_index, u8 is_ha)
261 {
262   per_vrf_sessions_unregister_session (s, thread_index);
263
264   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
265     nat_elog_warn (sm, "flow hash del failed");
266
267   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
268     nat_elog_warn (sm, "flow hash del failed");
269
270   if (na44_ed_is_fwd_bypass_session (s))
271     {
272       return;
273     }
274
275   if (nat44_ed_is_affinity_session (s))
276     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr, s->proto,
277                          s->out2in.port);
278
279   if (!is_ha)
280     nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
281                            s->in2out.port, &s->ext_host_nat_addr,
282                            s->ext_host_nat_port, &s->out2in.addr,
283                            s->out2in.port, &s->ext_host_addr, s->ext_host_port,
284                            s->proto, nat44_ed_is_twice_nat_session (s));
285
286   if (!is_ha)
287     {
288       /* log NAT event */
289       nat_ipfix_logging_nat44_ses_delete (
290         thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
291         s->in2out.port, s->out2in.port, s->in2out.fib_index);
292     }
293 }
294
295 static ip_interface_address_t *
296 nat44_ed_get_ip_interface_address (u32 sw_if_index, ip4_address_t addr)
297 {
298   snat_main_t *sm = &snat_main;
299
300   ip_lookup_main_t *lm = &sm->ip4_main->lookup_main;
301   ip_interface_address_t *ia;
302   ip4_address_t *ip4a;
303
304   foreach_ip_interface_address (
305     lm, ia, sw_if_index, 1, ({
306       ip4a = ip_interface_address_get_address (lm, ia);
307       nat_log_debug ("sw_if_idx: %u addr: %U ? %U", sw_if_index,
308                      format_ip4_address, ip4a, format_ip4_address, &addr);
309       if (ip4a->as_u32 == addr.as_u32)
310         {
311           return ia;
312         }
313     }));
314   return NULL;
315 }
316
317 static int
318 nat44_ed_resolve_nat_addr_len (snat_address_t *ap,
319                                snat_interface_t *interfaces)
320 {
321   ip_interface_address_t *ia;
322   snat_interface_t *i;
323   u32 fib_index;
324
325   pool_foreach (i, interfaces)
326     {
327       if (!nat44_ed_is_interface_outside (i))
328         {
329           continue;
330         }
331
332       fib_index = ip4_fib_table_get_index_for_sw_if_index (i->sw_if_index);
333       if (fib_index != ap->fib_index)
334         {
335           continue;
336         }
337
338       if ((ia = nat44_ed_get_ip_interface_address (i->sw_if_index, ap->addr)))
339         {
340           ap->addr_len = ia->address_length;
341           ap->sw_if_index = i->sw_if_index;
342           ap->net.as_u32 = (ap->addr.as_u32 >> (32 - ap->addr_len))
343                            << (32 - ap->addr_len);
344
345           nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
346                          format_ip4_address, &ap->addr, ap->sw_if_index,
347                          format_ip4_address, &ap->net, ap->addr_len);
348           return 0;
349         }
350     }
351   return 1;
352 }
353
354 static void
355 nat44_ed_update_outside_if_addresses (snat_address_t *ap)
356 {
357   snat_main_t *sm = &snat_main;
358
359   if (!nat44_ed_resolve_nat_addr_len (ap, sm->interfaces))
360     {
361       return;
362     }
363
364   if (!nat44_ed_resolve_nat_addr_len (ap, sm->output_feature_interfaces))
365     {
366       return;
367     }
368 }
369
370 static void
371 nat44_ed_bind_if_addr_to_nat_addr (u32 sw_if_index)
372 {
373   snat_main_t *sm = &snat_main;
374   ip_interface_address_t *ia;
375   snat_address_t *ap;
376
377   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
378
379   vec_foreach (ap, sm->addresses)
380     {
381       if (fib_index != ap->fib_index)
382         {
383           continue;
384         }
385
386       if ((ia = nat44_ed_get_ip_interface_address (sw_if_index, ap->addr)))
387         {
388           ap->addr_len = ia->address_length;
389           ap->sw_if_index = sw_if_index;
390           ap->net.as_u32 = (ap->addr.as_u32 >> (32 - ap->addr_len))
391                            << (32 - ap->addr_len);
392
393           nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
394                          format_ip4_address, &ap->addr, ap->sw_if_index,
395                          format_ip4_address, &ap->net, ap->addr_len);
396           return;
397         }
398     }
399 }
400
401 static_always_inline snat_fib_entry_reg_t *
402 nat44_ed_get_fib_entry_reg (ip4_address_t addr, u32 sw_if_index, int *out_idx)
403 {
404   snat_main_t *sm = &snat_main;
405   snat_fib_entry_reg_t *fe;
406   int i;
407
408   for (i = 0; i < vec_len (sm->fib_entry_reg); i++)
409     {
410       fe = sm->fib_entry_reg + i;
411       if ((addr.as_u32 == fe->addr.as_u32) && (sw_if_index == fe->sw_if_index))
412         {
413           if (out_idx)
414             {
415               *out_idx = i;
416             }
417           return fe;
418         }
419     }
420   return NULL;
421 }
422
423 static void
424 nat44_ed_add_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
425 {
426   // Add the external NAT address to the FIB as receive entries. This ensures
427   // that VPP will reply to ARP for this address and we don't need to enable
428   // proxy ARP on the outside interface.
429   snat_main_t *sm = &snat_main;
430   snat_fib_entry_reg_t *fe;
431
432   if (!(fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, 0)))
433     {
434       fib_prefix_t prefix = {
435         .fp_len = 32,
436         .fp_proto = FIB_PROTOCOL_IP4,
437         .fp_addr = {
438                   .ip4.as_u32 = addr.as_u32,
439                 },
440       };
441       u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
442       fib_table_entry_update_one_path (fib_index, &prefix, sm->fib_src_low,
443                                        (FIB_ENTRY_FLAG_CONNECTED |
444                                         FIB_ENTRY_FLAG_LOCAL |
445                                         FIB_ENTRY_FLAG_EXCLUSIVE),
446                                        DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1,
447                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
448
449       vec_add2 (sm->fib_entry_reg, fe, 1);
450       clib_memset (fe, 0, sizeof (*fe));
451       fe->addr.as_u32 = addr.as_u32;
452       fe->sw_if_index = sw_if_index;
453     }
454   fe->count++;
455 }
456
457 static void
458 nat44_ed_del_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
459 {
460   snat_main_t *sm = &snat_main;
461   snat_fib_entry_reg_t *fe;
462   int i;
463
464   if ((fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, &i)))
465     {
466       fe->count--;
467       if (0 == fe->count)
468         {
469           fib_prefix_t prefix = {
470             .fp_len = 32,
471             .fp_proto = FIB_PROTOCOL_IP4,
472             .fp_addr = {
473               .ip4.as_u32 = addr.as_u32,
474                     },
475           };
476           u32 fib_index =
477             ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
478           fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
479           vec_del1 (sm->fib_entry_reg, i);
480         }
481     }
482 }
483
484 static void
485 nat44_ed_add_del_interface_fib_reg_entries (ip4_address_t addr, u8 is_add)
486 {
487   snat_main_t *sm = &snat_main;
488   snat_interface_t *i;
489
490   pool_foreach (i, sm->interfaces)
491     {
492       if (nat44_ed_is_interface_outside (i))
493         {
494           if (is_add)
495             {
496               nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
497             }
498           else
499             {
500               nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
501             }
502         }
503     }
504   pool_foreach (i, sm->output_feature_interfaces)
505     {
506       if (nat44_ed_is_interface_outside (i))
507         {
508           if (is_add)
509             {
510               nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
511             }
512           else
513             {
514               nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
515             }
516         }
517     }
518 }
519
520 static_always_inline void
521 nat44_ed_add_del_nat_addr_fib_reg_entries (u32 sw_if_index, u8 is_add)
522 {
523   snat_main_t *sm = &snat_main;
524   snat_address_t *ap;
525
526   vec_foreach (ap, sm->addresses)
527     {
528       if (is_add)
529         {
530           nat44_ed_add_fib_entry_reg (ap->addr, sw_if_index);
531         }
532       else
533         {
534           nat44_ed_del_fib_entry_reg (ap->addr, sw_if_index);
535         }
536     }
537 }
538
539 static_always_inline void
540 nat44_ed_add_del_sm_fib_reg_entries (u32 sw_if_index, u8 is_add)
541 {
542   snat_main_t *sm = &snat_main;
543   snat_static_mapping_t *m;
544
545   pool_foreach (m, sm->static_mappings)
546     {
547       if (is_add)
548         {
549           nat44_ed_add_fib_entry_reg (m->external_addr, sw_if_index);
550         }
551       else
552         {
553           nat44_ed_del_fib_entry_reg (m->external_addr, sw_if_index);
554         }
555     }
556 }
557
558 int
559 nat44_ed_add_address (ip4_address_t *addr, u32 vrf_id, u8 twice_nat)
560 {
561   snat_main_t *sm = &snat_main;
562   snat_address_t *ap, *addresses;
563
564   addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
565
566   if (!sm->enabled)
567     {
568       return VNET_API_ERROR_UNSUPPORTED;
569     }
570
571   // check if address already exists
572   vec_foreach (ap, addresses)
573     {
574       if (ap->addr.as_u32 == addr->as_u32)
575         {
576           nat_log_err ("address exist");
577           return VNET_API_ERROR_VALUE_EXIST;
578         }
579     }
580
581   if (twice_nat)
582     {
583       vec_add2 (sm->twice_nat_addresses, ap, 1);
584     }
585   else
586     {
587       vec_add2 (sm->addresses, ap, 1);
588     }
589
590   ap->addr_len = ~0;
591   ap->fib_index = ~0;
592   ap->addr = *addr;
593
594   if (vrf_id != ~0)
595     {
596       ap->fib_index = fib_table_find_or_create_and_lock (
597         FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
598     }
599
600   if (!twice_nat)
601     {
602       // if we don't have enabled interface we don't add address
603       // to fib
604       nat44_ed_add_del_interface_fib_reg_entries (*addr, 1);
605       nat44_ed_update_outside_if_addresses (ap);
606     }
607   return 0;
608 }
609
610 int
611 nat44_ed_del_address (ip4_address_t addr, u8 twice_nat)
612 {
613   snat_main_t *sm = &snat_main;
614   snat_address_t *a = 0, *addresses;
615   snat_session_t *ses;
616   u32 *ses_to_be_removed = 0, *ses_index;
617   snat_main_per_thread_data_t *tsm;
618   int j;
619
620   addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
621
622   for (j = 0; j < vec_len (addresses); j++)
623     {
624       if (addresses[j].addr.as_u32 == addr.as_u32)
625         {
626           a = addresses + j;
627           break;
628         }
629     }
630   if (!a)
631     {
632       nat_log_err ("no such address");
633       return VNET_API_ERROR_NO_SUCH_ENTRY;
634     }
635
636   // delete dynamic sessions only
637   vec_foreach (tsm, sm->per_thread_data)
638     {
639       pool_foreach (ses, tsm->sessions)
640         {
641           if (ses->flags & SNAT_SESSION_FLAG_STATIC_MAPPING)
642             {
643               continue;
644             }
645           if (ses->out2in.addr.as_u32 == addr.as_u32)
646             {
647               nat44_ed_free_session_data (sm, ses, tsm - sm->per_thread_data,
648                                           0);
649               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
650             }
651         }
652       vec_foreach (ses_index, ses_to_be_removed)
653         {
654           ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
655           nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
656         }
657       vec_free (ses_to_be_removed);
658     }
659
660   if (!twice_nat)
661     {
662       nat44_ed_add_del_interface_fib_reg_entries (addr, 0);
663     }
664
665   if (a->fib_index != ~0)
666     {
667       fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
668     }
669
670   if (!twice_nat)
671     {
672       vec_del1 (sm->addresses, j);
673     }
674   else
675     {
676       vec_del1 (sm->twice_nat_addresses, j);
677     }
678
679   return 0;
680 }
681
682 u32
683 get_thread_idx_by_port (u16 e_port)
684 {
685   snat_main_t *sm = &snat_main;
686   u32 thread_idx = sm->num_workers;
687   if (sm->num_workers > 1)
688     {
689       thread_idx =
690         sm->first_worker_index +
691         sm->workers[(e_port - 1024) / sm->port_per_thread];
692     }
693   return thread_idx;
694 }
695
696 void
697 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
698                                     snat_main_per_thread_data_t * tsm,
699                                     ip4_address_t l_addr,
700                                     u16 l_port,
701                                     u8 protocol,
702                                     u32 fib_index, int addr_only,
703                                     ip4_address_t e_addr, u16 e_port)
704 {
705   snat_session_t *s;
706   u32 *indexes_to_free = NULL;
707   pool_foreach (s, tsm->sessions) {
708     if (s->in2out.fib_index != fib_index ||
709         s->in2out.addr.as_u32 != l_addr.as_u32)
710       {
711         continue;
712       }
713     if (!addr_only)
714       {
715         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
716             s->out2in.port != e_port || s->in2out.port != l_port ||
717             s->proto != protocol)
718           continue;
719       }
720
721     if (nat44_ed_is_lb_session (s))
722       continue;
723     if (!nat44_ed_is_session_static (s))
724       continue;
725     nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
726     vec_add1 (indexes_to_free, s - tsm->sessions);
727     if (!addr_only)
728       break;
729   }
730   u32 *ses_index;
731   vec_foreach (ses_index, indexes_to_free)
732   {
733     s = pool_elt_at_index (tsm->sessions, *ses_index);
734     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
735   }
736   vec_free (indexes_to_free);
737 }
738
739 static_always_inline snat_static_mapping_t *
740 nat44_ed_sm_lookup (snat_main_t *sm, clib_bihash_kv_16_8_t *kv)
741 {
742   clib_bihash_kv_16_8_t v;
743   int rc = clib_bihash_search_16_8 (&sm->flow_hash, kv, &v);
744   if (!rc)
745     {
746       ASSERT (0 == ed_value_get_thread_index (&v));
747       return pool_elt_at_index (sm->static_mappings,
748                                 ed_value_get_session_index (&v));
749     }
750   return NULL;
751 }
752
753 snat_static_mapping_t *
754 nat44_ed_sm_o2i_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
755                         u32 fib_index, u8 proto)
756 {
757   clib_bihash_kv_16_8_t kv;
758   nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
759   return nat44_ed_sm_lookup (sm, &kv);
760 }
761
762 snat_static_mapping_t *
763 nat44_ed_sm_i2o_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
764                         u32 fib_index, u8 proto)
765 {
766   clib_bihash_kv_16_8_t kv;
767   nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
768   return nat44_ed_sm_lookup (sm, &kv);
769 }
770
771 static snat_static_mapping_resolve_t *
772 nat44_ed_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
773                              ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
774                              u32 flags, int *out_idx)
775 {
776   snat_static_mapping_resolve_t *rp;
777   snat_main_t *sm = &snat_main;
778   int i;
779
780   for (i = 0; i < vec_len (sm->sm_to_resolve); i++)
781     {
782       rp = sm->sm_to_resolve + i;
783
784       if (rp->sw_if_index == sw_if_index && rp->vrf_id == vrf_id)
785         {
786           if (is_sm_identity_nat (rp->flags) && is_sm_identity_nat (flags))
787             {
788               if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
789                 {
790                   if (rp->e_port != e_port || rp->proto != proto)
791                     {
792                       continue;
793                     }
794                 }
795             }
796           else if (rp->l_addr.as_u32 == l_addr.as_u32)
797             {
798               if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
799                 {
800                   if (rp->l_port != l_port || rp->e_port != e_port ||
801                       rp->proto != proto)
802                     {
803                       continue;
804                     }
805                 }
806             }
807           else
808             {
809               continue;
810             }
811           if (out_idx)
812             {
813               *out_idx = i;
814             }
815           return rp;
816         }
817     }
818   return NULL;
819 }
820
821 static int
822 nat44_ed_del_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
823                              ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
824                              u32 flags)
825 {
826   snat_main_t *sm = &snat_main;
827   int i;
828   if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
829                                    sw_if_index, flags, &i))
830     {
831       vec_del1 (sm->sm_to_resolve, i);
832       return 0;
833     }
834   return 1;
835 }
836
837 static_always_inline int
838 nat44_ed_validate_sm_input (u32 flags)
839 {
840   // identity nat can be initiated only from inside interface
841   if (is_sm_identity_nat (flags) && is_sm_out2in_only (flags))
842     {
843       return VNET_API_ERROR_UNSUPPORTED;
844     }
845
846   if (is_sm_twice_nat (flags) || is_sm_self_twice_nat (flags))
847     {
848       if (is_sm_addr_only (flags) || is_sm_identity_nat (flags))
849         {
850           return VNET_API_ERROR_UNSUPPORTED;
851         }
852     }
853   return 0;
854 }
855
856 int
857 nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
858                              u16 l_port, u16 e_port, ip_protocol_t proto,
859                              u32 vrf_id, u32 sw_if_index, u32 flags,
860                              ip4_address_t pool_addr, u8 *tag)
861 {
862   snat_static_mapping_resolve_t *rp;
863   snat_main_t *sm = &snat_main;
864   int rv;
865
866   if (!sm->enabled)
867     {
868       return VNET_API_ERROR_UNSUPPORTED;
869     }
870
871   rv = nat44_ed_validate_sm_input (flags);
872   if (rv != 0)
873     {
874       return rv;
875     }
876
877   // interface bound mapping
878   if (is_sm_switch_address (flags))
879     {
880       if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
881                                        sw_if_index, flags, 0))
882         {
883           return VNET_API_ERROR_VALUE_EXIST;
884         }
885
886       vec_add2 (sm->sm_to_resolve, rp, 1);
887       rp->l_addr.as_u32 = l_addr.as_u32;
888       rp->l_port = l_port;
889       rp->e_port = e_port;
890       rp->sw_if_index = sw_if_index;
891       rp->vrf_id = vrf_id;
892       rp->proto = proto;
893       rp->flags = flags;
894       rp->pool_addr = pool_addr;
895       rp->tag = vec_dup (tag);
896       rp->is_resolved = 0;
897
898       ip4_address_t *first_int_addr =
899         ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
900       if (!first_int_addr)
901         {
902           return 0;
903         }
904
905       e_addr.as_u32 = first_int_addr->as_u32;
906       rp->is_resolved = 1;
907     }
908
909   rv = nat44_ed_add_static_mapping_internal (l_addr, e_addr, l_port, e_port,
910                                              proto, vrf_id, sw_if_index, flags,
911                                              pool_addr, tag);
912   if ((0 != rv) && is_sm_switch_address (flags))
913     {
914       nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
915                                    sw_if_index, flags);
916     }
917
918   return rv;
919 }
920
921 int
922 nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
923                              u16 l_port, u16 e_port, ip_protocol_t proto,
924                              u32 vrf_id, u32 sw_if_index, u32 flags)
925 {
926   snat_main_t *sm = &snat_main;
927   int rv;
928
929   if (!sm->enabled)
930     {
931       return VNET_API_ERROR_UNSUPPORTED;
932     }
933
934   rv = nat44_ed_validate_sm_input (flags);
935   if (rv != 0)
936     {
937       return rv;
938     }
939
940   // interface bound mapping
941   if (is_sm_switch_address (flags))
942     {
943       if (nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
944                                        sw_if_index, flags))
945         {
946           return VNET_API_ERROR_NO_SUCH_ENTRY;
947         }
948
949       ip4_address_t *first_int_addr =
950         ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
951       if (!first_int_addr)
952         {
953           // dhcp resolution required
954           return 0;
955         }
956
957       e_addr.as_u32 = first_int_addr->as_u32;
958     }
959
960   return nat44_ed_del_static_mapping_internal (l_addr, e_addr, l_port, e_port,
961                                                proto, vrf_id, flags);
962 }
963
964 static int
965 nat44_ed_add_static_mapping_internal (ip4_address_t l_addr,
966                                       ip4_address_t e_addr, u16 l_port,
967                                       u16 e_port, ip_protocol_t proto,
968                                       u32 vrf_id, u32 sw_if_index, u32 flags,
969                                       ip4_address_t pool_addr, u8 *tag)
970 {
971   snat_main_t *sm = &snat_main;
972   nat44_lb_addr_port_t *local;
973   snat_static_mapping_t *m;
974   u32 fib_index = ~0;
975
976   if (is_sm_addr_only (flags))
977     {
978       e_port = l_port = proto = 0;
979     }
980
981   if (is_sm_identity_nat (flags))
982     {
983       l_port = e_port;
984       l_addr.as_u32 = e_addr.as_u32;
985     }
986
987   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
988   if (m)
989     {
990       // case:
991       // adding local identity nat record for different vrf table
992
993       if (!is_sm_identity_nat (m->flags))
994         {
995           return VNET_API_ERROR_VALUE_EXIST;
996         }
997
998       pool_foreach (local, m->locals)
999         {
1000           if (local->vrf_id == vrf_id)
1001             {
1002               return VNET_API_ERROR_VALUE_EXIST;
1003             }
1004         }
1005
1006       pool_get (m->locals, local);
1007
1008       local->vrf_id = vrf_id;
1009       local->fib_index = fib_table_find_or_create_and_lock (
1010         FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
1011
1012       nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port,
1013                            local->fib_index, m->proto);
1014
1015       return 0;
1016     }
1017
1018   if (vrf_id != ~0)
1019     {
1020       fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1021                                                      sm->fib_src_low);
1022     }
1023   else
1024     {
1025       // fallback to default vrf
1026       vrf_id = sm->inside_vrf_id;
1027       fib_index = sm->inside_fib_index;
1028       fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1029     }
1030
1031   // test if local mapping record doesn't exist
1032   // identity nat supports multiple records in local mapping
1033   if (!(is_sm_out2in_only (flags) || is_sm_identity_nat (flags)))
1034     {
1035       if (nat44_ed_sm_i2o_lookup (sm, l_addr, l_port, fib_index, proto))
1036         {
1037           return VNET_API_ERROR_VALUE_EXIST;
1038         }
1039     }
1040
1041   pool_get (sm->static_mappings, m);
1042   clib_memset (m, 0, sizeof (*m));
1043
1044   m->flags = flags;
1045   m->local_addr = l_addr;
1046   m->external_addr = e_addr;
1047
1048   m->pool_addr = pool_addr;
1049   m->tag = vec_dup (tag);
1050
1051   if (!is_sm_addr_only (flags))
1052     {
1053       m->local_port = l_port;
1054       m->external_port = e_port;
1055       m->proto = proto;
1056     }
1057
1058   if (is_sm_identity_nat (flags))
1059     {
1060       pool_get (m->locals, local);
1061
1062       local->vrf_id = vrf_id;
1063       local->fib_index = fib_index;
1064     }
1065   else
1066     {
1067       m->vrf_id = vrf_id;
1068       m->fib_index = fib_index;
1069     }
1070
1071   if (!is_sm_out2in_only (flags))
1072     {
1073       nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port, fib_index,
1074                            m->proto);
1075     }
1076
1077   nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0, m->proto);
1078
1079   if (sm->num_workers > 1)
1080     {
1081       // store worker index for this record
1082       ip4_header_t ip = {
1083         .src_address = m->local_addr,
1084       };
1085       u32 worker_index;
1086       worker_index =
1087         nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0);
1088       vec_add1 (m->workers, worker_index);
1089     }
1090
1091   nat44_ed_add_del_interface_fib_reg_entries (e_addr, 1);
1092
1093   return 0;
1094 }
1095
1096 static int
1097 nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
1098                                       ip4_address_t e_addr, u16 l_port,
1099                                       u16 e_port, ip_protocol_t proto,
1100                                       u32 vrf_id, u32 flags)
1101 {
1102   snat_main_per_thread_data_t *tsm;
1103   snat_main_t *sm = &snat_main;
1104
1105   nat44_lb_addr_port_t *local;
1106   snat_static_mapping_t *m;
1107   u32 fib_index = ~0;
1108
1109   if (is_sm_addr_only (flags))
1110     {
1111       e_port = l_port = proto = 0;
1112     }
1113
1114   if (is_sm_identity_nat (flags))
1115     {
1116       l_port = e_port;
1117       l_addr.as_u32 = e_addr.as_u32;
1118     }
1119
1120   // fib index 0
1121   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1122   if (!m)
1123     {
1124       return VNET_API_ERROR_NO_SUCH_ENTRY;
1125     }
1126
1127   if (is_sm_identity_nat (flags))
1128     {
1129       u8 found = 0;
1130
1131       if (vrf_id == ~0)
1132         {
1133           vrf_id = sm->inside_vrf_id;
1134         }
1135
1136       pool_foreach (local, m->locals)
1137         {
1138           if (local->vrf_id == vrf_id)
1139             {
1140               local = pool_elt_at_index (m->locals, local - m->locals);
1141               fib_index = local->fib_index;
1142               pool_put (m->locals, local);
1143               found = 1;
1144             }
1145         }
1146
1147       if (!found)
1148         {
1149           return VNET_API_ERROR_NO_SUCH_ENTRY;
1150         }
1151     }
1152   else
1153     {
1154       fib_index = m->fib_index;
1155     }
1156
1157   if (!is_sm_out2in_only (flags))
1158     {
1159       nat44_ed_sm_i2o_del (sm, l_addr, l_port, fib_index, proto);
1160     }
1161
1162   // delete sessions for static mapping
1163   if (sm->num_workers > 1)
1164     {
1165       tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1166     }
1167   else
1168     {
1169       tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1170     }
1171
1172   nat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr, m->local_port,
1173                                       m->proto, fib_index,
1174                                       is_sm_addr_only (flags), e_addr, e_port);
1175
1176   fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1177
1178   if (!pool_elts (m->locals))
1179     {
1180       // this is last record remove all required stuff
1181       // fib_index 0
1182       nat44_ed_sm_o2i_del (sm, e_addr, e_port, 0, proto);
1183
1184       vec_free (m->tag);
1185       vec_free (m->workers);
1186       pool_put (sm->static_mappings, m);
1187
1188       nat44_ed_add_del_interface_fib_reg_entries (e_addr, 0);
1189     }
1190
1191   return 0;
1192 }
1193
1194 int
1195 nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1196                                 ip_protocol_t proto,
1197                                 nat44_lb_addr_port_t *locals, u32 flags,
1198                                 u8 *tag, u32 affinity)
1199 {
1200   snat_main_t *sm = &snat_main;
1201   snat_static_mapping_t *m;
1202   snat_address_t *a = 0;
1203
1204   nat44_lb_addr_port_t *local;
1205   uword *bitmap = 0;
1206   int rc = 0;
1207
1208   int i;
1209
1210   if (!sm->enabled)
1211     {
1212       return VNET_API_ERROR_UNSUPPORTED;
1213     }
1214
1215   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1216
1217   if (m)
1218     {
1219       return VNET_API_ERROR_VALUE_EXIST;
1220     }
1221
1222   if (vec_len (locals) < 2)
1223     {
1224       return VNET_API_ERROR_INVALID_VALUE;
1225     }
1226
1227   if (!is_sm_out2in_only (flags))
1228     {
1229       /* Find external address in allocated addresses and reserve port for
1230          address and port pair mapping when dynamic translations enabled */
1231       for (i = 0; i < vec_len (sm->addresses); i++)
1232         {
1233           if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1234             {
1235               /* External port must be unused */
1236               a = sm->addresses + i;
1237               if (nat44_ed_sm_o2i_lookup (sm, a->addr, e_port, 0, proto))
1238                 {
1239                   return VNET_API_ERROR_VALUE_EXIST;
1240                 }
1241               break;
1242             }
1243         }
1244       // external address must be allocated
1245       if (!a)
1246         {
1247           return VNET_API_ERROR_NO_SUCH_ENTRY;
1248         }
1249     }
1250
1251   pool_get (sm->static_mappings, m);
1252   clib_memset (m, 0, sizeof (*m));
1253   m->tag = vec_dup (tag);
1254   m->external_addr = e_addr;
1255   m->external_port = e_port;
1256   m->affinity = affinity;
1257   m->proto = proto;
1258
1259   m->flags = flags;
1260   m->flags |= NAT_SM_FLAG_LB;
1261
1262   if (affinity)
1263     m->affinity_per_service_list_head_index =
1264       nat_affinity_get_per_service_list_head_index ();
1265   else
1266     m->affinity_per_service_list_head_index = ~0;
1267
1268   if (nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0,
1269                            m->proto))
1270     {
1271       nat_log_err ("sm o2i key add failed");
1272       return VNET_API_ERROR_UNSPECIFIED;
1273     }
1274
1275   for (i = 0; i < vec_len (locals); i++)
1276     {
1277       locals[i].fib_index = fib_table_find_or_create_and_lock (
1278         FIB_PROTOCOL_IP4, locals[i].vrf_id, sm->fib_src_low);
1279       if (!is_sm_out2in_only (flags))
1280         {
1281           if (nat44_ed_sm_o2i_add (sm, m, e_addr, e_port, 0, proto))
1282             {
1283               nat_log_err ("sm o2i key add failed");
1284               rc = VNET_API_ERROR_UNSPECIFIED;
1285               // here we continue with add operation so that it can be safely
1286               // reversed in delete path - otherwise we'd have to track what
1287               // we've done and deal with partial cleanups and since bihash
1288               // adds are (extremely improbable) the only points of failure,
1289               // it's easier to just do it this way
1290             }
1291         }
1292       locals[i].prefix = (i == 0) ?
1293                            locals[i].probability :
1294                            (locals[i - 1].prefix + locals[i].probability);
1295       pool_get (m->locals, local);
1296       *local = locals[i];
1297       if (sm->num_workers > 1)
1298         {
1299           ip4_header_t ip = {
1300             .src_address = locals[i].addr,
1301           };
1302           bitmap = clib_bitmap_set (
1303             bitmap, nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0),
1304             1);
1305         }
1306     }
1307
1308   /* Assign workers */
1309   if (sm->num_workers > 1)
1310     {
1311       clib_bitmap_foreach (i, bitmap)
1312         {
1313           vec_add1 (m->workers, i);
1314         }
1315     }
1316
1317   return rc;
1318 }
1319
1320 int
1321 nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1322                                 ip_protocol_t proto, u32 flags)
1323 {
1324   snat_main_t *sm = &snat_main;
1325   snat_static_mapping_t *m;
1326
1327   nat44_lb_addr_port_t *local;
1328   snat_main_per_thread_data_t *tsm;
1329   snat_session_t *s;
1330
1331   if (!sm->enabled)
1332     {
1333       return VNET_API_ERROR_UNSUPPORTED;
1334     }
1335
1336   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1337   if (!m)
1338     return VNET_API_ERROR_NO_SUCH_ENTRY;
1339
1340   if (!is_sm_lb (m->flags))
1341     return VNET_API_ERROR_INVALID_VALUE;
1342
1343   if (nat44_ed_sm_o2i_del (sm, m->external_addr, m->external_port, 0,
1344                            m->proto))
1345     {
1346       nat_log_err ("sm o2i key del failed");
1347       return VNET_API_ERROR_UNSPECIFIED;
1348     }
1349
1350   pool_foreach (local, m->locals)
1351     {
1352       fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1353       if (!is_sm_out2in_only (flags))
1354         {
1355           if (nat44_ed_sm_i2o_del (sm, local->addr, local->port,
1356                                    local->fib_index, m->proto))
1357             {
1358               nat_log_err ("sm i2o key del failed");
1359               return VNET_API_ERROR_UNSPECIFIED;
1360             }
1361         }
1362
1363       if (sm->num_workers > 1)
1364         {
1365           ip4_header_t ip = {
1366             .src_address = local->addr,
1367           };
1368           tsm = vec_elt_at_index (
1369             sm->per_thread_data,
1370             nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1371         }
1372       else
1373         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1374
1375       /* Delete sessions */
1376       pool_foreach (s, tsm->sessions)
1377         {
1378           if (!(nat44_ed_is_lb_session (s)))
1379             continue;
1380
1381           if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1382               s->in2out.port != local->port)
1383             continue;
1384
1385           nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1386           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1387         }
1388     }
1389
1390   if (m->affinity)
1391     {
1392       nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1393     }
1394
1395   pool_free (m->locals);
1396   vec_free (m->tag);
1397   vec_free (m->workers);
1398   pool_put (sm->static_mappings, m);
1399
1400   return 0;
1401 }
1402
1403 int
1404 nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
1405                                           ip4_address_t l_addr, u16 l_port,
1406                                           ip_protocol_t proto, u32 vrf_id,
1407                                           u8 probability, u8 is_add)
1408 {
1409   snat_main_t *sm = &snat_main;
1410   snat_static_mapping_t *m = 0;
1411   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1412   snat_main_per_thread_data_t *tsm;
1413   snat_session_t *s;
1414   u32 *locals = 0;
1415   uword *bitmap = 0;
1416   int i;
1417
1418   if (!sm->enabled)
1419     {
1420       return VNET_API_ERROR_UNSUPPORTED;
1421     }
1422
1423   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1424
1425   if (!m)
1426     {
1427       return VNET_API_ERROR_NO_SUCH_ENTRY;
1428     }
1429
1430   if (!is_sm_lb (m->flags))
1431     {
1432       return VNET_API_ERROR_INVALID_VALUE;
1433     }
1434
1435   pool_foreach (local, m->locals)
1436    {
1437     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1438         (local->vrf_id == vrf_id))
1439       {
1440         match_local = local;
1441         break;
1442       }
1443   }
1444
1445   if (is_add)
1446     {
1447       if (match_local)
1448         {
1449           return VNET_API_ERROR_VALUE_EXIST;
1450         }
1451
1452       pool_get (m->locals, local);
1453       clib_memset (local, 0, sizeof (*local));
1454       local->addr.as_u32 = l_addr.as_u32;
1455       local->port = l_port;
1456       local->probability = probability;
1457       local->vrf_id = vrf_id;
1458       local->fib_index =
1459         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1460                                            sm->fib_src_low);
1461
1462       if (!is_sm_out2in_only (m->flags))
1463         {
1464           if (nat44_ed_sm_i2o_add (sm, m, l_addr, l_port, local->fib_index,
1465                                    proto))
1466             {
1467               nat_log_err ("sm i2o key add failed");
1468               pool_put (m->locals, local);
1469               return VNET_API_ERROR_UNSPECIFIED;
1470             }
1471         }
1472     }
1473   else
1474     {
1475       if (!match_local)
1476         return VNET_API_ERROR_NO_SUCH_ENTRY;
1477
1478       if (pool_elts (m->locals) < 3)
1479         return VNET_API_ERROR_UNSPECIFIED;
1480
1481       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1482                         sm->fib_src_low);
1483
1484       if (!is_sm_out2in_only (m->flags))
1485         {
1486           if (nat44_ed_sm_i2o_del (sm, l_addr, l_port, match_local->fib_index,
1487                                    proto))
1488             nat_log_err ("sm i2o key del failed");
1489         }
1490
1491       if (sm->num_workers > 1)
1492         {
1493           ip4_header_t ip = {
1494             .src_address = local->addr,
1495           };
1496           tsm = vec_elt_at_index (
1497             sm->per_thread_data,
1498             nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1499         }
1500       else
1501         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1502
1503       /* Delete sessions */
1504       pool_foreach (s, tsm->sessions) {
1505           if (!(nat44_ed_is_lb_session (s)))
1506             continue;
1507
1508           if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1509               s->in2out.port != match_local->port)
1510             continue;
1511
1512           nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1513           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1514       }
1515
1516       pool_put (m->locals, match_local);
1517     }
1518
1519   vec_free (m->workers);
1520
1521   pool_foreach (local, m->locals)
1522    {
1523     vec_add1 (locals, local - m->locals);
1524     if (sm->num_workers > 1)
1525       {
1526         ip4_header_t ip;
1527         ip.src_address.as_u32 = local->addr.as_u32,
1528         bitmap = clib_bitmap_set (
1529           bitmap,
1530           nat44_ed_get_in2out_worker_index (0, &ip, local->fib_index, 0), 1);
1531       }
1532   }
1533
1534   ASSERT (vec_len (locals) > 1);
1535
1536   local = pool_elt_at_index (m->locals, locals[0]);
1537   local->prefix = local->probability;
1538   for (i = 1; i < vec_len (locals); i++)
1539     {
1540       local = pool_elt_at_index (m->locals, locals[i]);
1541       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1542       local->prefix = local->probability + prev_local->prefix;
1543     }
1544
1545   /* Assign workers */
1546   if (sm->num_workers > 1)
1547     {
1548       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1549     }
1550
1551   return 0;
1552 }
1553
1554 void
1555 expire_per_vrf_sessions (u32 fib_index)
1556 {
1557   per_vrf_sessions_t *per_vrf_sessions;
1558   snat_main_per_thread_data_t *tsm;
1559   snat_main_t *sm = &snat_main;
1560
1561   vec_foreach (tsm, sm->per_thread_data)
1562     {
1563       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1564         {
1565           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1566               (per_vrf_sessions->tx_fib_index == fib_index))
1567             {
1568               per_vrf_sessions->expired = 1;
1569             }
1570         }
1571     }
1572 }
1573
1574 void
1575 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1576 {
1577   snat_main_t *sm = &snat_main;
1578   nat_fib_t *fib;
1579
1580   // we don't care if it is outside/inside fib
1581   // we just care about their ref_count
1582   // if it reaches 0 sessions should expire
1583   // because the fib isn't valid for NAT anymore
1584
1585   vec_foreach (fib, sm->fibs)
1586   {
1587     if (fib->fib_index == fib_index)
1588       {
1589         if (is_del)
1590           {
1591             fib->ref_count--;
1592             if (!fib->ref_count)
1593               {
1594                 vec_del1 (sm->fibs, fib - sm->fibs);
1595                 expire_per_vrf_sessions (fib_index);
1596               }
1597             return;
1598           }
1599         else
1600           fib->ref_count++;
1601       }
1602   }
1603   if (!is_del)
1604     {
1605       vec_add2 (sm->fibs, fib, 1);
1606       fib->ref_count = 1;
1607       fib->fib_index = fib_index;
1608     }
1609 }
1610
1611 static_always_inline nat_outside_fib_t *
1612 nat44_ed_get_outside_fib (nat_outside_fib_t *outside_fibs, u32 fib_index)
1613 {
1614   nat_outside_fib_t *f;
1615   vec_foreach (f, outside_fibs)
1616     {
1617       if (f->fib_index == fib_index)
1618         {
1619           return f;
1620         }
1621     }
1622   return 0;
1623 }
1624
1625 static_always_inline snat_interface_t *
1626 nat44_ed_get_interface (snat_interface_t *interfaces, u32 sw_if_index)
1627 {
1628   snat_interface_t *i;
1629   pool_foreach (i, interfaces)
1630     {
1631       if (i->sw_if_index == sw_if_index)
1632         {
1633           return i;
1634         }
1635     }
1636   return 0;
1637 }
1638
1639 int
1640 nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
1641 {
1642   const char *del_feature_name, *feature_name;
1643   snat_main_t *sm = &snat_main;
1644
1645   nat_outside_fib_t *outside_fib;
1646   snat_interface_t *i;
1647   u32 fib_index;
1648   int rv;
1649
1650   if (!sm->enabled)
1651     {
1652       nat_log_err ("nat44 is disabled");
1653       return VNET_API_ERROR_UNSUPPORTED;
1654     }
1655
1656   if (nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index))
1657     {
1658       nat_log_err ("error interface already configured");
1659       return VNET_API_ERROR_VALUE_EXIST;
1660     }
1661
1662   i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
1663   if (i)
1664     {
1665       if ((nat44_ed_is_interface_inside (i) && is_inside) ||
1666           (nat44_ed_is_interface_outside (i) && !is_inside))
1667         {
1668           return 0;
1669         }
1670       if (sm->num_workers > 1)
1671         {
1672           del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1673                                           "nat44-out2in-worker-handoff";
1674           feature_name = "nat44-handoff-classify";
1675         }
1676       else
1677         {
1678           del_feature_name = !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1679
1680           feature_name = "nat44-ed-classify";
1681         }
1682
1683       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1684       if (rv)
1685         return rv;
1686       vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1687                                    sw_if_index, 0, 0, 0);
1688       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1689                                    0, 0);
1690     }
1691   else
1692     {
1693       if (sm->num_workers > 1)
1694         {
1695           feature_name = is_inside ? "nat44-in2out-worker-handoff" :
1696                                      "nat44-out2in-worker-handoff";
1697         }
1698       else
1699         {
1700           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1701         }
1702
1703       nat_validate_interface_counters (sm, sw_if_index);
1704       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1705       if (rv)
1706         return rv;
1707       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1708                                    0, 0);
1709
1710       pool_get (sm->interfaces, i);
1711       i->sw_if_index = sw_if_index;
1712       i->flags = 0;
1713     }
1714
1715   fib_index =
1716     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1717
1718   update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
1719
1720   if (!is_inside)
1721     {
1722       i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1723
1724       outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1725       if (outside_fib)
1726         {
1727           outside_fib->refcount++;
1728         }
1729       else
1730         {
1731           vec_add2 (sm->outside_fibs, outside_fib, 1);
1732           outside_fib->fib_index = fib_index;
1733           outside_fib->refcount = 1;
1734         }
1735
1736       nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
1737       nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
1738
1739       nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
1740     }
1741   else
1742     {
1743       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1744     }
1745
1746   return 0;
1747 }
1748
1749 int
1750 nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
1751 {
1752   const char *del_feature_name, *feature_name;
1753   snat_main_t *sm = &snat_main;
1754
1755   nat_outside_fib_t *outside_fib;
1756   snat_interface_t *i;
1757   u32 fib_index;
1758   int rv;
1759
1760   if (!sm->enabled)
1761     {
1762       nat_log_err ("nat44 is disabled");
1763       return VNET_API_ERROR_UNSUPPORTED;
1764     }
1765
1766   i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
1767   if (i == 0)
1768     {
1769       nat_log_err ("error interface couldn't be found");
1770       return VNET_API_ERROR_NO_SUCH_ENTRY;
1771     }
1772
1773   if (nat44_ed_is_interface_inside (i) && nat44_ed_is_interface_outside (i))
1774     {
1775       if (sm->num_workers > 1)
1776         {
1777           del_feature_name = "nat44-handoff-classify";
1778           feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1779                                       "nat44-out2in-worker-handoff";
1780         }
1781       else
1782         {
1783           del_feature_name = "nat44-ed-classify";
1784           feature_name = !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1785         }
1786
1787       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1788       if (rv)
1789         {
1790           return rv;
1791         }
1792       vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1793                                    sw_if_index, 0, 0, 0);
1794       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1795                                    0, 0);
1796
1797       if (is_inside)
1798         {
1799           i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1800         }
1801       else
1802         {
1803           i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1804         }
1805     }
1806   else
1807     {
1808       if (sm->num_workers > 1)
1809         {
1810           feature_name = is_inside ? "nat44-in2out-worker-handoff" :
1811                                      "nat44-out2in-worker-handoff";
1812         }
1813       else
1814         {
1815           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1816         }
1817
1818       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1819       if (rv)
1820         {
1821           return rv;
1822         }
1823       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 0,
1824                                    0, 0);
1825
1826       // remove interface
1827       pool_put (sm->interfaces, i);
1828     }
1829
1830   fib_index =
1831     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1832
1833   update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
1834
1835   if (!is_inside)
1836     {
1837       outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1838       if (outside_fib)
1839         {
1840           outside_fib->refcount--;
1841           if (!outside_fib->refcount)
1842             {
1843               vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1844             }
1845         }
1846
1847       nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
1848       nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
1849     }
1850
1851   return 0;
1852 }
1853
1854 int
1855 nat44_ed_add_output_interface (u32 sw_if_index)
1856 {
1857   snat_main_t *sm = &snat_main;
1858
1859   nat_outside_fib_t *outside_fib;
1860   snat_interface_t *i;
1861   u32 fib_index;
1862   int rv;
1863
1864   if (!sm->enabled)
1865     {
1866       nat_log_err ("nat44 is disabled");
1867       return VNET_API_ERROR_UNSUPPORTED;
1868     }
1869
1870   if (nat44_ed_get_interface (sm->interfaces, sw_if_index))
1871     {
1872       nat_log_err ("error interface already configured");
1873       return VNET_API_ERROR_VALUE_EXIST;
1874     }
1875
1876   if (nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index))
1877     {
1878       nat_log_err ("error interface already configured");
1879       return VNET_API_ERROR_VALUE_EXIST;
1880     }
1881
1882   if (sm->num_workers > 1)
1883     {
1884       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1885       if (rv)
1886         {
1887           return rv;
1888         }
1889
1890       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
1891       if (rv)
1892         {
1893           return rv;
1894         }
1895
1896       vnet_feature_enable_disable (
1897         "ip4-unicast", "nat44-out2in-worker-handoff", sw_if_index, 1, 0, 0);
1898       vnet_feature_enable_disable ("ip4-output",
1899                                    "nat44-in2out-output-worker-handoff",
1900                                    sw_if_index, 1, 0, 0);
1901     }
1902   else
1903     {
1904       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1905       if (rv)
1906         {
1907           return rv;
1908         }
1909
1910       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
1911       if (rv)
1912         {
1913           return rv;
1914         }
1915
1916       vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1917                                    sw_if_index, 1, 0, 0);
1918       vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1919                                    sw_if_index, 1, 0, 0);
1920     }
1921
1922   nat_validate_interface_counters (sm, sw_if_index);
1923
1924   pool_get (sm->output_feature_interfaces, i);
1925   i->sw_if_index = sw_if_index;
1926   i->flags = 0;
1927   i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1928   i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1929
1930   fib_index =
1931     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1932   update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
1933
1934   outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1935   if (outside_fib)
1936     {
1937       outside_fib->refcount++;
1938     }
1939   else
1940     {
1941       vec_add2 (sm->outside_fibs, outside_fib, 1);
1942       outside_fib->fib_index = fib_index;
1943       outside_fib->refcount = 1;
1944     }
1945
1946   nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
1947   nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
1948
1949   nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
1950
1951   return 0;
1952 }
1953
1954 int
1955 nat44_ed_del_output_interface (u32 sw_if_index)
1956 {
1957   snat_main_t *sm = &snat_main;
1958
1959   nat_outside_fib_t *outside_fib;
1960   snat_interface_t *i;
1961   u32 fib_index;
1962   int rv;
1963
1964   if (!sm->enabled)
1965     {
1966       nat_log_err ("nat44 is disabled");
1967       return VNET_API_ERROR_UNSUPPORTED;
1968     }
1969
1970   i = nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index);
1971   if (!i)
1972     {
1973       nat_log_err ("error interface couldn't be found");
1974       return VNET_API_ERROR_NO_SUCH_ENTRY;
1975     }
1976
1977   if (sm->num_workers > 1)
1978     {
1979       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1980       if (rv)
1981         {
1982           return rv;
1983         }
1984
1985       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
1986       if (rv)
1987         {
1988           return rv;
1989         }
1990
1991       vnet_feature_enable_disable (
1992         "ip4-unicast", "nat44-out2in-worker-handoff", sw_if_index, 0, 0, 0);
1993       vnet_feature_enable_disable ("ip4-output",
1994                                    "nat44-in2out-output-worker-handoff",
1995                                    sw_if_index, 0, 0, 0);
1996     }
1997   else
1998     {
1999       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2000       if (rv)
2001         {
2002           return rv;
2003         }
2004
2005       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
2006       if (rv)
2007         {
2008           return rv;
2009         }
2010
2011       vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
2012                                    sw_if_index, 0, 0, 0);
2013       vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
2014                                    sw_if_index, 0, 0, 0);
2015     }
2016
2017   // remove interface
2018   pool_put (sm->output_feature_interfaces, i);
2019
2020   fib_index =
2021     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
2022   update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
2023
2024   outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
2025   if (outside_fib)
2026     {
2027       outside_fib->refcount--;
2028       if (!outside_fib->refcount)
2029         {
2030           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2031         }
2032     }
2033
2034   nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
2035   nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
2036
2037   return 0;
2038 }
2039
2040 int
2041 snat_set_workers (uword * bitmap)
2042 {
2043   snat_main_t *sm = &snat_main;
2044   int i, j = 0;
2045
2046   if (sm->num_workers < 2)
2047     return VNET_API_ERROR_FEATURE_DISABLED;
2048
2049   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2050     return VNET_API_ERROR_INVALID_WORKER;
2051
2052   vec_free (sm->workers);
2053   clib_bitmap_foreach (i, bitmap)
2054     {
2055       vec_add1(sm->workers, i);
2056       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2057       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2058       j++;
2059     }
2060
2061   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2062
2063   return 0;
2064 }
2065
2066 int
2067 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
2068 {
2069   fail_if_enabled ();
2070   snat_main_t *sm = &snat_main;
2071   sm->frame_queue_nelts = frame_queue_nelts;
2072   return 0;
2073 }
2074
2075 static void
2076 nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque, u32 sw_if_index,
2077                                 u32 new_fib_index, u32 old_fib_index)
2078 {
2079   snat_main_t *sm = &snat_main;
2080   nat_outside_fib_t *outside_fib;
2081   snat_interface_t *i;
2082   u8 is_add = 1;
2083   u8 match = 0;
2084
2085   if (!sm->enabled || (new_fib_index == old_fib_index)
2086       || (!vec_len (sm->outside_fibs)))
2087     {
2088       return;
2089     }
2090
2091   pool_foreach (i, sm->interfaces)
2092     {
2093       if (i->sw_if_index == sw_if_index)
2094         {
2095           if (!(nat44_ed_is_interface_outside (i)))
2096             return;
2097           match = 1;
2098         }
2099     }
2100
2101   pool_foreach (i, sm->output_feature_interfaces)
2102     {
2103       if (i->sw_if_index == sw_if_index)
2104         {
2105           if (!(nat44_ed_is_interface_outside (i)))
2106             return;
2107           match = 1;
2108         }
2109     }
2110
2111   if (!match)
2112     return;
2113
2114   vec_foreach (outside_fib, sm->outside_fibs)
2115     {
2116       if (outside_fib->fib_index == old_fib_index)
2117         {
2118           outside_fib->refcount--;
2119           if (!outside_fib->refcount)
2120             vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2121           break;
2122         }
2123     }
2124
2125   vec_foreach (outside_fib, sm->outside_fibs)
2126     {
2127       if (outside_fib->fib_index == new_fib_index)
2128         {
2129           outside_fib->refcount++;
2130           is_add = 0;
2131           break;
2132         }
2133     }
2134
2135   if (is_add)
2136     {
2137       vec_add2 (sm->outside_fibs, outside_fib, 1);
2138       outside_fib->refcount = 1;
2139       outside_fib->fib_index = new_fib_index;
2140     }
2141 }
2142
2143 static void nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque,
2144                                             u32 sw_if_index, u32 new_fib_index,
2145                                             u32 old_fib_index);
2146
2147 static void nat44_ed_add_del_interface_address_cb (
2148   ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
2149   u32 address_length, u32 if_address_index, u32 is_delete);
2150
2151 static void nat44_ed_add_del_static_mapping_cb (
2152   ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
2153   u32 address_length, u32 if_address_index, u32 is_delete);
2154
2155 void
2156 test_key_calc_split ()
2157 {
2158   ip4_address_t l_addr;
2159   l_addr.as_u8[0] = 1;
2160   l_addr.as_u8[1] = 1;
2161   l_addr.as_u8[2] = 1;
2162   l_addr.as_u8[3] = 1;
2163   ip4_address_t r_addr;
2164   r_addr.as_u8[0] = 2;
2165   r_addr.as_u8[1] = 2;
2166   r_addr.as_u8[2] = 2;
2167   r_addr.as_u8[3] = 2;
2168   u16 l_port = 40001;
2169   u16 r_port = 40301;
2170   u8 proto = 9;
2171   u32 fib_index = 9000001;
2172   u32 thread_index = 3000000001;
2173   u32 session_index = 3000000221;
2174   clib_bihash_kv_16_8_t kv;
2175   init_ed_kv (&kv, l_addr.as_u32, l_port, r_addr.as_u32, r_port, fib_index,
2176               proto, thread_index, session_index);
2177   ip4_address_t l_addr2;
2178   ip4_address_t r_addr2;
2179   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2180   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2181   u16 l_port2 = 0;
2182   u16 r_port2 = 0;
2183   u8 proto2 = 0;
2184   u32 fib_index2 = 0;
2185   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2186                &r_port2);
2187   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2188   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2189   ASSERT (l_port == l_port2);
2190   ASSERT (r_port == r_port2);
2191   ASSERT (proto == proto2);
2192   ASSERT (fib_index == fib_index2);
2193   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2194   ASSERT (session_index == ed_value_get_session_index (&kv));
2195 }
2196
2197 static clib_error_t *
2198 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2199 {
2200   u32 fib_index;
2201   if (!is_add)
2202     {
2203       fib_index = ip4_fib_index_from_table_id (table_id);
2204       if (fib_index != ~0)
2205         {
2206           expire_per_vrf_sessions (fib_index);
2207         }
2208     }
2209   return 0;
2210 }
2211
2212 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2213
2214 void
2215 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
2216 {
2217   vlib_node_t *node;
2218
2219   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2220   sm->out2in_node_index = node->index;
2221
2222   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2223   sm->in2out_node_index = node->index;
2224
2225   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2226   sm->in2out_output_node_index = node->index;
2227 }
2228
2229 #define nat_validate_simple_counter(c, i)                                     \
2230   do                                                                          \
2231     {                                                                         \
2232       vlib_validate_simple_counter (&c, i);                                   \
2233       vlib_zero_simple_counter (&c, i);                                       \
2234     }                                                                         \
2235   while (0);
2236
2237 #define nat_init_simple_counter(c, n, sn)                                     \
2238   do                                                                          \
2239     {                                                                         \
2240       c.name = n;                                                             \
2241       c.stat_segment_name = sn;                                               \
2242       nat_validate_simple_counter (c, 0);                                     \
2243     }                                                                         \
2244   while (0);
2245
2246 static_always_inline void
2247 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
2248 {
2249 #define _(x)                                                                  \
2250   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
2251   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
2252   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
2253   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
2254   foreach_nat_counter;
2255 #undef _
2256   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
2257 }
2258
2259 static clib_error_t *
2260 nat_init (vlib_main_t * vm)
2261 {
2262   snat_main_t *sm = &snat_main;
2263   vlib_thread_main_t *tm = vlib_get_thread_main ();
2264   vlib_thread_registration_t *tr;
2265   ip4_add_del_interface_address_callback_t cbi = { 0 };
2266   ip4_table_bind_callback_t cbt = { 0 };
2267   u32 i, num_threads = 0;
2268   uword *p, *bitmap = 0;
2269
2270   clib_memset (sm, 0, sizeof (*sm));
2271
2272   // required
2273   sm->vnet_main = vnet_get_main ();
2274   // convenience
2275   sm->ip4_main = &ip4_main;
2276   sm->api_main = vlibapi_get_main ();
2277   sm->ip4_lookup_main = &ip4_main.lookup_main;
2278
2279   // frame queue indices used for handoff
2280   sm->fq_out2in_index = ~0;
2281   sm->fq_in2out_index = ~0;
2282   sm->fq_in2out_output_index = ~0;
2283
2284   sm->log_level = NAT_LOG_ERROR;
2285
2286   nat44_set_node_indexes (sm, vm);
2287
2288   sm->log_class = vlib_log_register_class ("nat", 0);
2289   nat_ipfix_logging_init (vm);
2290
2291   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2292                            "/nat44-ed/total-sessions");
2293   sm->max_cfg_sessions_gauge =
2294     vlib_stats_add_gauge ("/nat44-ed/max-cfg-sessions");
2295
2296 #define _(x)                                                                  \
2297   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2298                            "/nat44-ed/in2out/fastpath/" #x);                  \
2299   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2300                            "/nat44-ed/out2in/fastpath/" #x);                  \
2301   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2302                            "/nat44-ed/in2out/slowpath/" #x);                  \
2303   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2304                            "/nat44-ed/out2in/slowpath/" #x);
2305   foreach_nat_counter;
2306 #undef _
2307   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2308                            "/nat44-ed/hairpinning");
2309
2310   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2311   if (p)
2312     {
2313       tr = (vlib_thread_registration_t *) p[0];
2314       if (tr)
2315         {
2316           sm->num_workers = tr->count;
2317           sm->first_worker_index = tr->first_index;
2318         }
2319     }
2320   num_threads = tm->n_vlib_mains - 1;
2321   sm->port_per_thread = 0xffff - 1024;
2322   vec_validate (sm->per_thread_data, num_threads);
2323
2324   /* Use all available workers by default */
2325   if (sm->num_workers > 1)
2326     {
2327       for (i = 0; i < sm->num_workers; i++)
2328         bitmap = clib_bitmap_set (bitmap, i, 1);
2329       snat_set_workers (bitmap);
2330       clib_bitmap_free (bitmap);
2331     }
2332   else
2333     {
2334       sm->per_thread_data[0].snat_thread_index = 0;
2335     }
2336
2337   /* callbacks to call when interface address changes. */
2338   cbi.function = nat44_ed_add_del_interface_address_cb;
2339   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2340   cbi.function = nat44_ed_add_del_static_mapping_cb;
2341   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2342
2343   /* callbacks to call when interface to table biding changes */
2344   cbt.function = nat44_ed_update_outside_fib_cb;
2345   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2346
2347   sm->fib_src_low =
2348     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2349                          FIB_SOURCE_BH_SIMPLE);
2350   sm->fib_src_hi =
2351     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2352                          FIB_SOURCE_BH_SIMPLE);
2353
2354   nat_affinity_init (vm);
2355   test_key_calc_split ();
2356
2357   return nat44_api_hookup (vm);
2358 }
2359
2360 VLIB_INIT_FUNCTION (nat_init);
2361
2362 int
2363 nat44_plugin_enable (nat44_config_t c)
2364 {
2365   snat_main_t *sm = &snat_main;
2366
2367   fail_if_enabled ();
2368
2369   sm->forwarding_enabled = 0;
2370   sm->mss_clamping = 0;
2371
2372   if (!c.sessions)
2373     c.sessions = 63 * 1024;
2374
2375   sm->max_translations_per_thread = c.sessions;
2376   vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
2377                         sm->max_translations_per_thread);
2378   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2379
2380   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2381
2382   sm->inside_vrf_id = c.inside_vrf;
2383   sm->inside_fib_index =
2384     fib_table_find_or_create_and_lock
2385     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2386
2387   sm->outside_vrf_id = c.outside_vrf;
2388   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2389     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2390
2391   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2392
2393   nat44_ed_init_tcp_state_stable (sm);
2394
2395   nat_affinity_enable ();
2396
2397   nat_reset_timeouts (&sm->timeouts);
2398
2399   vlib_zero_simple_counter (&sm->total_sessions, 0);
2400
2401   if (!sm->frame_queue_nelts)
2402     {
2403       sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2404     }
2405
2406   if (sm->num_workers > 1)
2407     {
2408       if (sm->fq_in2out_index == ~0)
2409         {
2410           sm->fq_in2out_index = vlib_frame_queue_main_init (
2411             sm->in2out_node_index, sm->frame_queue_nelts);
2412         }
2413       if (sm->fq_out2in_index == ~0)
2414         {
2415           sm->fq_out2in_index = vlib_frame_queue_main_init (
2416             sm->out2in_node_index, sm->frame_queue_nelts);
2417         }
2418       if (sm->fq_in2out_output_index == ~0)
2419         {
2420           sm->fq_in2out_output_index = vlib_frame_queue_main_init (
2421             sm->in2out_output_node_index, sm->frame_queue_nelts);
2422         }
2423     }
2424
2425   sm->enabled = 1;
2426   sm->rconfig = c;
2427
2428   return 0;
2429 }
2430
2431 int
2432 nat44_ed_del_addresses ()
2433 {
2434   snat_main_t *sm = &snat_main;
2435   snat_address_t *a, *vec;
2436   int error = 0;
2437
2438   vec = vec_dup (sm->addresses);
2439   vec_foreach (a, vec)
2440     {
2441       error = nat44_ed_del_address (a->addr, 0);
2442       if (error)
2443         {
2444           nat_log_err ("error occurred while removing adderess");
2445         }
2446     }
2447   vec_free (vec);
2448   vec_free (sm->addresses);
2449   sm->addresses = 0;
2450
2451   vec = vec_dup (sm->twice_nat_addresses);
2452   vec_foreach (a, vec)
2453     {
2454       error = nat44_ed_del_address (a->addr, 1);
2455       if (error)
2456         {
2457           nat_log_err ("error occurred while removing adderess");
2458         }
2459     }
2460   vec_free (vec);
2461   vec_free (sm->twice_nat_addresses);
2462   sm->twice_nat_addresses = 0;
2463
2464   vec_free (sm->addr_to_resolve);
2465   sm->addr_to_resolve = 0;
2466
2467   return error;
2468 }
2469
2470 int
2471 nat44_ed_del_interfaces ()
2472 {
2473   snat_main_t *sm = &snat_main;
2474   snat_interface_t *i, *pool;
2475   int error = 0;
2476
2477   pool = pool_dup (sm->interfaces);
2478   pool_foreach (i, pool)
2479     {
2480       if (nat44_ed_is_interface_inside (i))
2481         {
2482           error = nat44_ed_del_interface (i->sw_if_index, 1);
2483         }
2484       if (nat44_ed_is_interface_outside (i))
2485         {
2486           error = nat44_ed_del_interface (i->sw_if_index, 0);
2487         }
2488
2489       if (error)
2490         {
2491           nat_log_err ("error occurred while removing interface");
2492         }
2493     }
2494   pool_free (pool);
2495   pool_free (sm->interfaces);
2496   sm->interfaces = 0;
2497   return error;
2498 }
2499
2500 int
2501 nat44_ed_del_output_interfaces ()
2502 {
2503   snat_main_t *sm = &snat_main;
2504   snat_interface_t *i, *pool;
2505   int error = 0;
2506
2507   pool = pool_dup (sm->output_feature_interfaces);
2508   pool_foreach (i, pool)
2509     {
2510       error = nat44_ed_del_output_interface (i->sw_if_index);
2511       if (error)
2512         {
2513           nat_log_err ("error occurred while removing output interface");
2514         }
2515     }
2516   pool_free (pool);
2517   pool_free (sm->output_feature_interfaces);
2518   sm->output_feature_interfaces = 0;
2519   return error;
2520 }
2521
2522 int
2523 nat44_ed_del_static_mappings ()
2524 {
2525   snat_main_t *sm = &snat_main;
2526   snat_static_mapping_t *m, *pool;
2527   int error = 0;
2528
2529   pool = pool_dup (sm->static_mappings);
2530   pool_foreach (m, pool)
2531     {
2532       error = nat44_ed_del_static_mapping_internal (
2533         m->local_addr, m->external_addr, m->local_port, m->external_port,
2534         m->proto, m->vrf_id, m->flags);
2535       if (error)
2536         {
2537           nat_log_err ("error occurred while removing mapping");
2538         }
2539     }
2540   pool_free (pool);
2541   pool_free (sm->static_mappings);
2542   sm->static_mappings = 0;
2543
2544   vec_free (sm->sm_to_resolve);
2545   sm->sm_to_resolve = 0;
2546
2547   return error;
2548 }
2549
2550 int
2551 nat44_plugin_disable ()
2552 {
2553   snat_main_per_thread_data_t *tsm;
2554   snat_main_t *sm = &snat_main;
2555   int rc, error = 0;
2556
2557   fail_if_disabled ();
2558
2559   rc = nat44_ed_del_static_mappings ();
2560   if (rc)
2561     error = 1;
2562
2563   rc = nat44_ed_del_addresses ();
2564   if (rc)
2565     error = 1;
2566
2567   rc = nat44_ed_del_interfaces ();
2568   if (rc)
2569     error = 1;
2570
2571   rc = nat44_ed_del_output_interfaces ();
2572   if (rc)
2573     error = 1;
2574
2575   vec_free (sm->max_translations_per_fib);
2576   sm->max_translations_per_fib = 0;
2577
2578   clib_bihash_free_16_8 (&sm->flow_hash);
2579
2580   vec_foreach (tsm, sm->per_thread_data)
2581     {
2582       nat44_ed_worker_db_free (tsm);
2583     }
2584
2585   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2586
2587   nat_affinity_disable ();
2588
2589   sm->forwarding_enabled = 0;
2590   sm->enabled = 0;
2591
2592   return error;
2593 }
2594
2595 void
2596 nat44_ed_forwarding_enable_disable (u8 is_enable)
2597 {
2598   snat_main_per_thread_data_t *tsm;
2599   snat_main_t *sm = &snat_main;
2600   snat_session_t *s;
2601
2602   u32 *ses_to_be_removed = 0, *ses_index;
2603
2604   sm->forwarding_enabled = is_enable != 0;
2605
2606   if (!sm->enabled || is_enable)
2607     {
2608       return;
2609     }
2610
2611   vec_foreach (tsm, sm->per_thread_data)
2612     {
2613       pool_foreach (s, tsm->sessions)
2614         {
2615           if (na44_ed_is_fwd_bypass_session (s))
2616             {
2617               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2618             }
2619         }
2620       vec_foreach (ses_index, ses_to_be_removed)
2621         {
2622           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2623           nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2624           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2625         }
2626
2627       vec_free (ses_to_be_removed);
2628     }
2629 }
2630
2631 static_always_inline snat_static_mapping_t *
2632 nat44_ed_sm_match (snat_main_t *sm, ip4_address_t match_addr, u16 match_port,
2633                    u32 match_fib_index, ip_protocol_t match_protocol,
2634                    int by_external)
2635 {
2636   snat_static_mapping_t *m;
2637   if (!by_external)
2638     {
2639       m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port, match_fib_index,
2640                                   match_protocol);
2641       if (m)
2642         return m;
2643
2644       /* Try address only mapping */
2645       m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, match_fib_index, 0);
2646       if (m)
2647         return m;
2648
2649       if (sm->inside_fib_index != match_fib_index)
2650         {
2651           m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
2652                                       sm->inside_fib_index, match_protocol);
2653           if (m)
2654             return m;
2655
2656           /* Try address only mapping */
2657           m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->inside_fib_index,
2658                                       0);
2659           if (m)
2660             return m;
2661         }
2662       if (sm->outside_fib_index != match_fib_index)
2663         {
2664           m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
2665                                       sm->outside_fib_index, match_protocol);
2666           if (m)
2667             return m;
2668
2669           /* Try address only mapping */
2670           m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->outside_fib_index,
2671                                       0);
2672           if (m)
2673             return m;
2674         }
2675     }
2676   else
2677     {
2678       m =
2679         nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
2680       if (m)
2681         return m;
2682
2683       /* Try address only mapping */
2684       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
2685       if (m)
2686         return m;
2687     }
2688   return 0;
2689 }
2690
2691 int
2692 snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
2693                            ip4_address_t match_addr, u16 match_port,
2694                            u32 match_fib_index, ip_protocol_t match_protocol,
2695                            ip4_address_t *mapping_addr, u16 *mapping_port,
2696                            u32 *mapping_fib_index, int by_external,
2697                            u8 *is_addr_only, twice_nat_type_t *twice_nat,
2698                            lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
2699                            u8 *is_identity_nat, snat_static_mapping_t **out)
2700 {
2701   snat_static_mapping_t *m;
2702   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2703   nat44_lb_addr_port_t *local;
2704   u8 backend_index;
2705
2706   m = nat44_ed_sm_match (sm, match_addr, match_port, match_fib_index,
2707                          match_protocol, by_external);
2708   if (!m)
2709     {
2710       return 1;
2711     }
2712
2713   if (by_external)
2714     {
2715       if (is_sm_lb (m->flags))
2716         {
2717           if (PREDICT_FALSE (lb != 0))
2718             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2719           if (m->affinity && !nat_affinity_find_and_lock (
2720                                vm, ext_host_addr[0], match_addr,
2721                                match_protocol, match_port, &backend_index))
2722             {
2723               local = pool_elt_at_index (m->locals, backend_index);
2724               *mapping_addr = local->addr;
2725               *mapping_port = local->port;
2726               *mapping_fib_index = local->fib_index;
2727               goto end;
2728             }
2729           // pick locals matching this worker
2730           if (PREDICT_FALSE (sm->num_workers > 1))
2731             {
2732               u32 thread_index = vlib_get_thread_index ();
2733               pool_foreach_index (i, m->locals)
2734                {
2735                 local = pool_elt_at_index (m->locals, i);
2736
2737                 ip4_header_t ip = {
2738                   .src_address = local->addr,
2739                 };
2740
2741                 if (nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index,
2742                                                       0) == thread_index)
2743                   {
2744                     vec_add1 (tmp, i);
2745                   }
2746                }
2747               ASSERT (vec_len (tmp) != 0);
2748             }
2749           else
2750             {
2751               pool_foreach_index (i, m->locals)
2752                {
2753                 vec_add1 (tmp, i);
2754               }
2755             }
2756           hi = vec_len (tmp) - 1;
2757           local = pool_elt_at_index (m->locals, tmp[hi]);
2758           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2759           while (lo < hi)
2760             {
2761               mid = ((hi - lo) >> 1) + lo;
2762               local = pool_elt_at_index (m->locals, tmp[mid]);
2763               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2764             }
2765           local = pool_elt_at_index (m->locals, tmp[lo]);
2766           if (!(local->prefix >= rand))
2767             return 1;
2768           *mapping_addr = local->addr;
2769           *mapping_port = local->port;
2770           *mapping_fib_index = local->fib_index;
2771           if (m->affinity)
2772             {
2773               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2774                                                 match_protocol, match_port,
2775                                                 tmp[lo], m->affinity,
2776                                                 m->affinity_per_service_list_head_index))
2777                 nat_elog_info (sm, "create affinity record failed");
2778             }
2779           vec_free (tmp);
2780         }
2781       else
2782         {
2783           if (PREDICT_FALSE (lb != 0))
2784             *lb = NO_LB_NAT;
2785           *mapping_fib_index = m->fib_index;
2786           *mapping_addr = m->local_addr;
2787           /* Address only mapping doesn't change port */
2788           *mapping_port =
2789             is_sm_addr_only (m->flags) ? match_port : m->local_port;
2790         }
2791     }
2792   else
2793     {
2794       *mapping_addr = m->external_addr;
2795       /* Address only mapping doesn't change port */
2796       *mapping_port =
2797         is_sm_addr_only (m->flags) ? match_port : m->external_port;
2798       *mapping_fib_index = sm->outside_fib_index;
2799     }
2800
2801 end:
2802   if (PREDICT_FALSE (is_addr_only != 0))
2803     *is_addr_only = is_sm_addr_only (m->flags);
2804
2805   if (PREDICT_FALSE (twice_nat != 0))
2806     {
2807       *twice_nat = TWICE_NAT_DISABLED;
2808
2809       if (is_sm_twice_nat (m->flags))
2810         {
2811           *twice_nat = TWICE_NAT;
2812         }
2813       else if (is_sm_self_twice_nat (m->flags))
2814         {
2815           *twice_nat = TWICE_NAT_SELF;
2816         }
2817     }
2818
2819   if (PREDICT_FALSE (is_identity_nat != 0))
2820     *is_identity_nat = is_sm_identity_nat (m->flags);
2821
2822   if (out != 0)
2823     *out = m;
2824
2825   return 0;
2826 }
2827
2828 u32
2829 nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2830                                   u32 rx_fib_index, u8 is_output)
2831 {
2832   snat_main_t *sm = &snat_main;
2833   u32 next_worker_index = sm->first_worker_index;
2834   u32 hash;
2835
2836   clib_bihash_kv_16_8_t kv16, value16;
2837
2838   u32 fib_index = rx_fib_index;
2839   if (b)
2840     {
2841       if (PREDICT_FALSE (is_output))
2842         {
2843           fib_index = sm->outside_fib_index;
2844           nat_outside_fib_t *outside_fib;
2845           fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2846           fib_prefix_t pfx = {
2847                   .fp_proto = FIB_PROTOCOL_IP4,
2848                   .fp_len = 32,
2849                   .fp_addr = {
2850                           .ip4.as_u32 = ip->dst_address.as_u32,
2851                   } ,
2852           };
2853
2854           switch (vec_len (sm->outside_fibs))
2855             {
2856             case 0:
2857               fib_index = sm->outside_fib_index;
2858               break;
2859             case 1:
2860               fib_index = sm->outside_fibs[0].fib_index;
2861               break;
2862             default:
2863               vec_foreach (outside_fib, sm->outside_fibs)
2864                 {
2865                   fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2866                   if (FIB_NODE_INDEX_INVALID != fei)
2867                     {
2868                       if (fib_entry_get_resolving_interface (fei) != ~0)
2869                         {
2870                           fib_index = outside_fib->fib_index;
2871                           break;
2872                         }
2873                     }
2874                 }
2875               break;
2876             }
2877         }
2878
2879       if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2880         {
2881           ip4_address_t lookup_saddr, lookup_daddr;
2882           u16 lookup_sport, lookup_dport;
2883           u8 lookup_protocol;
2884
2885           if (!nat_get_icmp_session_lookup_values (
2886                 b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr,
2887                 &lookup_dport, &lookup_protocol))
2888             {
2889               init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
2890                          lookup_daddr.as_u32, lookup_dport, rx_fib_index,
2891                          lookup_protocol);
2892               if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2893                 {
2894                   next_worker_index = ed_value_get_thread_index (&value16);
2895                   vnet_buffer2 (b)->nat.cached_session_index =
2896                     ed_value_get_session_index (&value16);
2897                   goto out;
2898                 }
2899             }
2900         }
2901
2902       init_ed_k (&kv16, ip->src_address.as_u32,
2903                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
2904                  vnet_buffer (b)->ip.reass.l4_dst_port, fib_index,
2905                  ip->protocol);
2906
2907       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2908         {
2909           next_worker_index = ed_value_get_thread_index (&value16);
2910           vnet_buffer2 (b)->nat.cached_session_index =
2911             ed_value_get_session_index (&value16);
2912           goto out;
2913         }
2914
2915       // dst NAT
2916       init_ed_k (&kv16, ip->dst_address.as_u32,
2917                  vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
2918                  vnet_buffer (b)->ip.reass.l4_src_port, rx_fib_index,
2919                  ip->protocol);
2920       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2921         {
2922           next_worker_index = ed_value_get_thread_index (&value16);
2923           vnet_buffer2 (b)->nat.cached_dst_nat_session_index =
2924             ed_value_get_session_index (&value16);
2925           goto out;
2926         }
2927     }
2928
2929   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2930     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2931
2932   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2933     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2934   else
2935     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2936
2937 out:
2938   if (PREDICT_TRUE (!is_output))
2939     {
2940       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2941                               rx_fib_index,
2942                               clib_net_to_host_u32 (ip->src_address.as_u32),
2943                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2944     }
2945   else
2946     {
2947       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2948                               next_worker_index, rx_fib_index,
2949                               clib_net_to_host_u32 (ip->src_address.as_u32),
2950                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2951     }
2952
2953   return next_worker_index;
2954 }
2955
2956 u32
2957 nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2958                                   u32 rx_fib_index, u8 is_output)
2959 {
2960   snat_main_t *sm = &snat_main;
2961   clib_bihash_kv_16_8_t kv16, value16;
2962
2963   u8 proto, next_worker_index = 0;
2964   u16 port;
2965   snat_static_mapping_t *m;
2966   u32 hash;
2967
2968   proto = ip->protocol;
2969
2970   if (PREDICT_FALSE (IP_PROTOCOL_ICMP == proto))
2971     {
2972       ip4_address_t lookup_saddr, lookup_daddr;
2973       u16 lookup_sport, lookup_dport;
2974       u8 lookup_protocol;
2975       if (!nat_get_icmp_session_lookup_values (
2976             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2977             &lookup_protocol))
2978         {
2979           init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
2980                      lookup_daddr.as_u32, lookup_dport, rx_fib_index,
2981                      lookup_protocol);
2982           if (PREDICT_TRUE (
2983                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2984             {
2985               next_worker_index = ed_value_get_thread_index (&value16);
2986               nat_elog_debug_handoff (
2987                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2988                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2989                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2990               return next_worker_index;
2991             }
2992         }
2993     }
2994
2995   init_ed_k (&kv16, ip->src_address.as_u32,
2996              vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
2997              vnet_buffer (b)->ip.reass.l4_dst_port, rx_fib_index,
2998              ip->protocol);
2999
3000   if (PREDICT_TRUE (
3001         !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
3002     {
3003       vnet_buffer2 (b)->nat.cached_session_index =
3004         ed_value_get_session_index (&value16);
3005       next_worker_index = ed_value_get_thread_index (&value16);
3006       nat_elog_debug_handoff (sm, "HANDOFF OUT2IN (session)",
3007                               next_worker_index, rx_fib_index,
3008                               clib_net_to_host_u32 (ip->src_address.as_u32),
3009                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3010       return next_worker_index;
3011     }
3012
3013   /* first try static mappings without port */
3014   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3015     {
3016       m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, 0, 0, proto);
3017       if (m)
3018         {
3019           {
3020             next_worker_index = m->workers[0];
3021             goto done;
3022           }
3023         }
3024     }
3025
3026   /* unknown protocol */
3027   if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto)))
3028     {
3029       /* use current thread */
3030       next_worker_index = vlib_get_thread_index ();
3031       goto done;
3032     }
3033
3034   port = vnet_buffer (b)->ip.reass.l4_dst_port;
3035
3036   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
3037     {
3038       udp_header_t *udp = ip4_next_header (ip);
3039       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3040       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3041       if (!icmp_type_is_error_message
3042           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3043         port = vnet_buffer (b)->ip.reass.l4_src_port;
3044       else
3045         {
3046           /* if error message, then it's not fragmented and we can access it */
3047           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3048           proto = inner_ip->protocol;
3049           void *l4_header = ip4_next_header (inner_ip);
3050           switch (proto)
3051             {
3052             case IP_PROTOCOL_ICMP:
3053               icmp = (icmp46_header_t *) l4_header;
3054               echo = (icmp_echo_header_t *) (icmp + 1);
3055               port = echo->identifier;
3056               break;
3057             case IP_PROTOCOL_UDP:
3058               /* breakthrough */
3059             case IP_PROTOCOL_TCP:
3060               port = ((tcp_udp_header_t *) l4_header)->src_port;
3061               break;
3062             default:
3063               next_worker_index = vlib_get_thread_index ();
3064               goto done;
3065             }
3066         }
3067     }
3068
3069   /* try static mappings with port */
3070   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3071     {
3072       m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, port, 0, proto);
3073       if (m)
3074         {
3075           if (!is_sm_lb (m->flags))
3076             {
3077               next_worker_index = m->workers[0];
3078               goto done;
3079             }
3080
3081           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3082             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3083
3084           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3085             next_worker_index =
3086               m->workers[hash & (_vec_len (m->workers) - 1)];
3087           else
3088             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3089           goto done;
3090         }
3091     }
3092
3093   /* worker by outside port */
3094   next_worker_index = sm->first_worker_index;
3095   next_worker_index +=
3096     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3097
3098 done:
3099   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
3100                           rx_fib_index,
3101                           clib_net_to_host_u32 (ip->src_address.as_u32),
3102                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3103   return next_worker_index;
3104 }
3105
3106 u32
3107 nat44_get_max_session_limit ()
3108 {
3109   snat_main_t *sm = &snat_main;
3110   u32 max_limit = 0, len = 0;
3111
3112   for (; len < vec_len (sm->max_translations_per_fib); len++)
3113     {
3114       if (max_limit < sm->max_translations_per_fib[len])
3115         max_limit = sm->max_translations_per_fib[len];
3116     }
3117   return max_limit;
3118 }
3119
3120 int
3121 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
3122 {
3123   snat_main_t *sm = &snat_main;
3124   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3125   u32 len = vec_len (sm->max_translations_per_fib);
3126
3127   if (len <= fib_index)
3128     {
3129       vec_validate (sm->max_translations_per_fib, fib_index + 1);
3130
3131       for (; len < vec_len (sm->max_translations_per_fib); len++)
3132         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
3133     }
3134
3135   sm->max_translations_per_fib[fib_index] = session_limit;
3136   return 0;
3137 }
3138
3139 int
3140 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
3141 {
3142   snat_main_t *sm = &snat_main;
3143
3144   if (nat44_set_session_limit (session_limit, vrf_id))
3145     return 1;
3146   sm->max_translations_per_thread = nat44_get_max_session_limit ();
3147
3148   vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
3149                         sm->max_translations_per_thread);
3150
3151   sm->translation_buckets =
3152     nat_calc_bihash_buckets (sm->max_translations_per_thread);
3153
3154   nat44_ed_sessions_clear ();
3155   return 0;
3156 }
3157
3158 static void
3159 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
3160                          u32 translation_buckets)
3161 {
3162   dlist_elt_t *head;
3163
3164   pool_alloc (tsm->sessions, translations);
3165   pool_alloc (tsm->lru_pool, translations);
3166
3167   pool_get (tsm->lru_pool, head);
3168   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
3169   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
3170
3171   pool_get (tsm->lru_pool, head);
3172   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
3173   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
3174
3175   pool_get (tsm->lru_pool, head);
3176   tsm->udp_lru_head_index = head - tsm->lru_pool;
3177   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
3178
3179   pool_get (tsm->lru_pool, head);
3180   tsm->icmp_lru_head_index = head - tsm->lru_pool;
3181   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
3182
3183   pool_get (tsm->lru_pool, head);
3184   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
3185   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
3186 }
3187
3188 static void
3189 reinit_ed_flow_hash ()
3190 {
3191   snat_main_t *sm = &snat_main;
3192   // we expect 2 flows per session, so multiply translation_buckets by 2
3193   clib_bihash_init_16_8 (
3194     &sm->flow_hash, "ed-flow-hash",
3195     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
3196   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
3197 }
3198
3199 static void
3200 nat44_ed_db_init (u32 translations, u32 translation_buckets)
3201 {
3202   snat_main_t *sm = &snat_main;
3203   snat_main_per_thread_data_t *tsm;
3204
3205   reinit_ed_flow_hash ();
3206
3207   vec_foreach (tsm, sm->per_thread_data)
3208     {
3209       nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
3210                                sm->translation_buckets);
3211     }
3212 }
3213
3214 static void
3215 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
3216 {
3217   pool_free (tsm->lru_pool);
3218   pool_free (tsm->sessions);
3219   vec_free (tsm->per_vrf_sessions_vec);
3220 }
3221
3222 void
3223 nat44_ed_sessions_clear ()
3224 {
3225   snat_main_t *sm = &snat_main;
3226   snat_main_per_thread_data_t *tsm;
3227
3228   reinit_ed_flow_hash ();
3229
3230   vec_foreach (tsm, sm->per_thread_data)
3231     {
3232       nat44_ed_worker_db_free (tsm);
3233       nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
3234                                sm->translation_buckets);
3235     }
3236   vlib_zero_simple_counter (&sm->total_sessions, 0);
3237 }
3238
3239 static void
3240 nat44_ed_add_del_static_mapping_cb (ip4_main_t *im, uword opaque,
3241                                     u32 sw_if_index, ip4_address_t *address,
3242                                     u32 address_length, u32 if_address_index,
3243                                     u32 is_delete)
3244 {
3245   snat_static_mapping_resolve_t *rp;
3246   snat_main_t *sm = &snat_main;
3247   int rv = 0;
3248
3249   if (!sm->enabled)
3250     {
3251       return;
3252     }
3253
3254   vec_foreach (rp, sm->sm_to_resolve)
3255     {
3256       if (sw_if_index == rp->sw_if_index)
3257         {
3258           if (is_delete)
3259             {
3260               if (rp->is_resolved)
3261                 {
3262                   rv = nat44_ed_del_static_mapping_internal (
3263                     rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
3264                     rp->vrf_id, rp->flags);
3265                   if (rv)
3266                     {
3267                       nat_log_err ("ed del static mapping failed");
3268                     }
3269                   else
3270                     {
3271                       rp->is_resolved = 0;
3272                     }
3273                 }
3274             }
3275           else
3276             {
3277               if (!rp->is_resolved)
3278                 {
3279                   rv = nat44_ed_add_static_mapping_internal (
3280                     rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
3281                     rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
3282                   if (rv)
3283                     {
3284                       nat_log_err ("ed add static mapping failed");
3285                     }
3286                   else
3287                     {
3288                       rp->is_resolved = 1;
3289                     }
3290                 }
3291             }
3292         }
3293     }
3294 }
3295
3296 static int
3297 nat44_ed_get_addr_resolve_record (u32 sw_if_index, u8 twice_nat, int *out)
3298 {
3299   snat_main_t *sm = &snat_main;
3300   snat_address_resolve_t *rp;
3301   int i;
3302
3303   for (i = 0; i < vec_len (sm->addr_to_resolve); i++)
3304     {
3305       rp = sm->addr_to_resolve + i;
3306
3307       if ((rp->sw_if_index == sw_if_index) && (rp->is_twice_nat == twice_nat))
3308         {
3309           if (out)
3310             {
3311               *out = i;
3312             }
3313           return 0;
3314         }
3315     }
3316   return 1;
3317 }
3318 static int
3319 nat44_ed_del_addr_resolve_record (u32 sw_if_index, u8 twice_nat)
3320 {
3321   snat_main_t *sm = &snat_main;
3322   int i;
3323   if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
3324     {
3325       vec_del1 (sm->addr_to_resolve, i);
3326       return 0;
3327     }
3328   return 1;
3329 }
3330
3331 static void
3332 nat44_ed_add_del_interface_address_cb (ip4_main_t *im, uword opaque,
3333                                        u32 sw_if_index, ip4_address_t *address,
3334                                        u32 address_length,
3335                                        u32 if_address_index, u32 is_delete)
3336 {
3337   snat_main_t *sm = &snat_main;
3338   snat_address_resolve_t *arp;
3339   snat_address_t *ap;
3340   u8 twice_nat = 0;
3341   int i, rv;
3342
3343   if (!sm->enabled)
3344     {
3345       return;
3346     }
3347
3348   if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
3349     {
3350       twice_nat = 1;
3351       if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
3352         {
3353           u32 fib_index =
3354             ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
3355           vec_foreach (ap, sm->addresses)
3356             {
3357               if ((fib_index == ap->fib_index) &&
3358                   (address->as_u32 == ap->addr.as_u32))
3359                 {
3360                   if (!is_delete)
3361                     {
3362                       ap->addr_len = address_length;
3363                       ap->sw_if_index = sw_if_index;
3364                       ap->net.as_u32 = (ap->addr.as_u32 >> (32 - ap->addr_len))
3365                                        << (32 - ap->addr_len);
3366
3367                       nat_log_debug (
3368                         "pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
3369                         format_ip4_address, &ap->addr, ap->sw_if_index,
3370                         format_ip4_address, &ap->net, ap->addr_len);
3371                     }
3372                   else
3373                     {
3374                       ap->addr_len = ~0;
3375                     }
3376                   break;
3377                 }
3378             }
3379           return;
3380         }
3381     }
3382
3383   arp = sm->addr_to_resolve + i;
3384
3385   if (!is_delete)
3386     {
3387       if (arp->is_resolved)
3388         {
3389           return;
3390         }
3391
3392       rv = nat44_ed_add_address (address, ~0, arp->is_twice_nat);
3393       if (0 == rv)
3394         {
3395           arp->is_resolved = 1;
3396         }
3397     }
3398   else
3399     {
3400       if (!arp->is_resolved)
3401         {
3402           return;
3403         }
3404
3405       rv = nat44_ed_del_address (address[0], arp->is_twice_nat);
3406       if (0 == rv)
3407         {
3408           arp->is_resolved = 0;
3409         }
3410     }
3411 }
3412
3413 int
3414 nat44_ed_add_interface_address (u32 sw_if_index, u8 twice_nat)
3415 {
3416   snat_main_t *sm = &snat_main;
3417   ip4_main_t *ip4_main = sm->ip4_main;
3418   ip4_address_t *first_int_addr;
3419   snat_address_resolve_t *ap;
3420   int rv;
3421
3422   if (!sm->enabled)
3423     {
3424       return VNET_API_ERROR_UNSUPPORTED;
3425     }
3426
3427   if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, 0))
3428     {
3429       return VNET_API_ERROR_VALUE_EXIST;
3430     }
3431
3432   vec_add2 (sm->addr_to_resolve, ap, 1);
3433   ap->sw_if_index = sw_if_index;
3434   ap->is_twice_nat = twice_nat;
3435   ap->is_resolved = 0;
3436
3437   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
3438   if (first_int_addr)
3439     {
3440       rv = nat44_ed_add_address (first_int_addr, ~0, twice_nat);
3441       if (0 != rv)
3442         {
3443           nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat);
3444           return rv;
3445         }
3446       ap->is_resolved = 1;
3447     }
3448
3449   return 0;
3450 }
3451
3452 int
3453 nat44_ed_del_interface_address (u32 sw_if_index, u8 twice_nat)
3454 {
3455   snat_main_t *sm = &snat_main;
3456   ip4_main_t *ip4_main = sm->ip4_main;
3457   ip4_address_t *first_int_addr;
3458
3459   if (!sm->enabled)
3460     {
3461       return VNET_API_ERROR_UNSUPPORTED;
3462     }
3463
3464   if (nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat))
3465     {
3466       return VNET_API_ERROR_NO_SUCH_ENTRY;
3467     }
3468
3469   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
3470   if (first_int_addr)
3471     {
3472       return nat44_ed_del_address (first_int_addr[0], twice_nat);
3473     }
3474
3475   return 0;
3476 }
3477
3478 int
3479 nat44_ed_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
3480                       ip4_address_t *eh_addr, u16 eh_port, u8 proto,
3481                       u32 vrf_id, int is_in)
3482 {
3483   ip4_header_t ip;
3484   clib_bihash_kv_16_8_t kv, value;
3485   u32 fib_index;
3486   snat_session_t *s;
3487   snat_main_per_thread_data_t *tsm;
3488
3489   if (!sm->enabled)
3490     {
3491       return VNET_API_ERROR_UNSUPPORTED;
3492     }
3493
3494   fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3495   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3496   if (sm->num_workers > 1)
3497     tsm = vec_elt_at_index (
3498       sm->per_thread_data,
3499       nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
3500   else
3501     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3502
3503   init_ed_k (&kv, addr->as_u32, port, eh_addr->as_u32, eh_port, fib_index,
3504              proto);
3505   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3506     {
3507       return VNET_API_ERROR_NO_SUCH_ENTRY;
3508     }
3509
3510   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3511     return VNET_API_ERROR_UNSPECIFIED;
3512   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3513   nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3514   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3515   return 0;
3516 }
3517
3518 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3519                                  vlib_node_runtime_t * node,
3520                                  vlib_frame_t * frame)
3521 {
3522   return 0;
3523 }
3524
3525 VLIB_REGISTER_NODE (nat_default_node) = {
3526   .name = "nat-default",
3527   .vector_size = sizeof (u32),
3528   .format_trace = 0,
3529   .type = VLIB_NODE_TYPE_INTERNAL,
3530   .n_errors = 0,
3531   .n_next_nodes = NAT_N_NEXT,
3532   .next_nodes = {
3533     [NAT_NEXT_DROP] = "error-drop",
3534     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3535     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3536     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3537     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3538     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3539     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3540     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3541     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3542     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3543   },
3544 };
3545
3546 void
3547 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3548 {
3549   f->l3_csum_delta = 0;
3550   f->l4_csum_delta = 0;
3551   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3552       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3553     {
3554       f->l3_csum_delta =
3555         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3556       f->l3_csum_delta =
3557         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3558     }
3559   else
3560     {
3561       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3562     }
3563   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3564       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3565     {
3566       f->l3_csum_delta =
3567         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3568       f->l3_csum_delta =
3569         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3570     }
3571   else
3572     {
3573       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3574     }
3575   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3576     {
3577       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3578       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3579     }
3580   else
3581     {
3582       f->rewrite.sport = f->match.sport;
3583     }
3584   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3585     {
3586       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3587       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3588     }
3589   else
3590     {
3591       f->rewrite.dport = f->match.dport;
3592     }
3593   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3594       f->rewrite.icmp_id != f->match.sport)
3595     {
3596       f->l4_csum_delta =
3597         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3598       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3599     }
3600   else
3601     {
3602       f->rewrite.icmp_id = f->match.sport;
3603     }
3604   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3605     {
3606     }
3607   else
3608     {
3609       f->rewrite.fib_index = f->match.fib_index;
3610     }
3611 }
3612
3613 static_always_inline int
3614 nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
3615                             ip4_header_t *ip, nat_6t_flow_t *f);
3616
3617 static_always_inline void
3618 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3619                            nat_6t_flow_t *f, ip_protocol_t proto,
3620                            int is_icmp_inner_ip4, int skip_saddr_rewrite)
3621 {
3622   udp_header_t *udp = ip4_next_header (ip);
3623   tcp_header_t *tcp = (tcp_header_t *) udp;
3624
3625   if ((IP_PROTOCOL_TCP == proto || IP_PROTOCOL_UDP == proto) &&
3626       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3627     {
3628       if (!is_icmp_inner_ip4)
3629         { // regular case
3630           ip->src_address = f->rewrite.saddr;
3631           ip->dst_address = f->rewrite.daddr;
3632           udp->src_port = f->rewrite.sport;
3633           udp->dst_port = f->rewrite.dport;
3634         }
3635       else
3636         { // icmp inner ip4 - reversed saddr/daddr
3637           ip->src_address = f->rewrite.daddr;
3638           ip->dst_address = f->rewrite.saddr;
3639           udp->src_port = f->rewrite.dport;
3640           udp->dst_port = f->rewrite.sport;
3641         }
3642
3643       if (IP_PROTOCOL_TCP == proto)
3644         {
3645           ip_csum_t tcp_sum = tcp->checksum;
3646           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3647           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3648           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3649           tcp->checksum = ip_csum_fold (tcp_sum);
3650         }
3651       else if (IP_PROTOCOL_UDP == proto && udp->checksum)
3652         {
3653           ip_csum_t udp_sum = udp->checksum;
3654           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3655           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3656           udp->checksum = ip_csum_fold (udp_sum);
3657         }
3658     }
3659   else
3660     {
3661       if (!is_icmp_inner_ip4)
3662         { // regular case
3663           if (!skip_saddr_rewrite)
3664             {
3665               ip->src_address = f->rewrite.saddr;
3666             }
3667           ip->dst_address = f->rewrite.daddr;
3668         }
3669       else
3670         { // icmp inner ip4 - reversed saddr/daddr
3671           ip->src_address = f->rewrite.daddr;
3672           ip->dst_address = f->rewrite.saddr;
3673         }
3674     }
3675
3676   if (skip_saddr_rewrite)
3677     {
3678       ip->checksum = ip4_header_checksum (ip);
3679     }
3680   else
3681     {
3682       ip_csum_t ip_sum = ip->checksum;
3683       ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3684       ip->checksum = ip_csum_fold (ip_sum);
3685     }
3686   if (0xffff == ip->checksum)
3687     ip->checksum = 0;
3688   ASSERT (ip4_header_checksum_is_valid (ip));
3689 }
3690
3691 static_always_inline int
3692 it_fits (vlib_main_t *vm, vlib_buffer_t *b, void *object, size_t size)
3693 {
3694   int result = ((u8 *) object + size <=
3695                 (u8 *) vlib_buffer_get_current (b) + b->current_length) &&
3696                vlib_object_within_buffer_data (vm, b, object, size);
3697   return result;
3698 }
3699
3700 static_always_inline int
3701 nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
3702                             ip4_header_t *ip, nat_6t_flow_t *f)
3703 {
3704   if (IP_PROTOCOL_ICMP != ip->protocol)
3705     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3706
3707   icmp46_header_t *icmp = ip4_next_header (ip);
3708   icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3709
3710   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3711     {
3712       if (!it_fits (vm, b, icmp, sizeof (*icmp)))
3713         {
3714           return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3715         }
3716
3717       if (!icmp_type_is_error_message (icmp->type))
3718         {
3719           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3720               (f->rewrite.icmp_id != echo->identifier))
3721             {
3722               ip_csum_t sum = icmp->checksum;
3723               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3724                                     icmp_echo_header_t,
3725                                     identifier /* changed member */);
3726               echo->identifier = f->rewrite.icmp_id;
3727               icmp->checksum = ip_csum_fold (sum);
3728             }
3729         }
3730       else
3731         {
3732           ip_csum_t sum = ip_incremental_checksum (
3733             0, icmp,
3734             clib_net_to_host_u16 (ip->length) - ip4_header_bytes (ip));
3735           sum = (u16) ~ip_csum_fold (sum);
3736           if (sum != 0)
3737             {
3738               return NAT_ED_TRNSL_ERR_INVALID_CSUM;
3739             }
3740
3741           // errors are not fragmented
3742           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3743
3744           if (!ip4_header_checksum_is_valid (inner_ip))
3745             {
3746               return NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT;
3747             }
3748
3749           ip_protocol_t inner_proto = inner_ip->protocol;
3750
3751           ip_csum_t old_icmp_sum = icmp->checksum;
3752           ip_csum_t old_inner_ip_sum = inner_ip->checksum;
3753           ip_csum_t old_udp_sum;
3754           ip_csum_t old_tcp_sum;
3755           ip_csum_t new_icmp_sum;
3756           udp_header_t *udp;
3757           tcp_header_t *tcp;
3758
3759           switch (inner_proto)
3760             {
3761             case IP_PROTOCOL_UDP:
3762               udp = (udp_header_t *) (inner_ip + 1);
3763               if (!it_fits (vm, b, udp, sizeof (*udp)))
3764                 {
3765                   return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3766                 }
3767               old_udp_sum = udp->checksum;
3768               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3769                                          1 /* is_icmp_inner_ip4 */,
3770                                          0 /* skip_saddr_rewrite */);
3771               new_icmp_sum = ip_csum_sub_even (old_icmp_sum, f->l3_csum_delta);
3772               new_icmp_sum = ip_csum_sub_even (new_icmp_sum, f->l4_csum_delta);
3773               new_icmp_sum =
3774                 ip_csum_update (new_icmp_sum, old_inner_ip_sum,
3775                                 inner_ip->checksum, ip4_header_t, checksum);
3776               new_icmp_sum =
3777                 ip_csum_update (new_icmp_sum, old_udp_sum, udp->checksum,
3778                                 udp_header_t, checksum);
3779               new_icmp_sum = ip_csum_fold (new_icmp_sum);
3780               icmp->checksum = new_icmp_sum;
3781               break;
3782             case IP_PROTOCOL_TCP:
3783               tcp = (tcp_header_t *) (inner_ip + 1);
3784               if (!it_fits (vm, b, tcp, sizeof (*tcp)))
3785                 {
3786                   return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3787                 }
3788               old_tcp_sum = tcp->checksum;
3789               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3790                                          1 /* is_icmp_inner_ip4 */,
3791                                          0 /* skip_saddr_rewrite */);
3792               new_icmp_sum = ip_csum_sub_even (old_icmp_sum, f->l3_csum_delta);
3793               new_icmp_sum = ip_csum_sub_even (new_icmp_sum, f->l4_csum_delta);
3794               new_icmp_sum =
3795                 ip_csum_update (new_icmp_sum, old_inner_ip_sum,
3796                                 inner_ip->checksum, ip4_header_t, checksum);
3797               new_icmp_sum =
3798                 ip_csum_update (new_icmp_sum, old_tcp_sum, tcp->checksum,
3799                                 tcp_header_t, checksum);
3800               new_icmp_sum = ip_csum_fold (new_icmp_sum);
3801               icmp->checksum = new_icmp_sum;
3802               break;
3803             case IP_PROTOCOL_ICMP:
3804               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3805                                          1 /* is_icmp_inner_ip4 */,
3806                                          0 /* skip_saddr_rewrite */);
3807               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3808                 {
3809                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3810                   if (!it_fits (vm, b, inner_icmp, sizeof (*inner_icmp)))
3811                     {
3812                       return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3813                     }
3814                   icmp_echo_header_t *inner_echo =
3815                     (icmp_echo_header_t *) (inner_icmp + 1);
3816                   if (f->rewrite.icmp_id != inner_echo->identifier)
3817                     {
3818                       ip_csum_t sum = icmp->checksum;
3819                       sum = ip_csum_update (
3820                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3821                         icmp_echo_header_t, identifier /* changed member */);
3822                       icmp->checksum = ip_csum_fold (sum);
3823                       ip_csum_t inner_sum = inner_icmp->checksum;
3824                       inner_sum = ip_csum_update (
3825                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3826                         icmp_echo_header_t, identifier /* changed member */);
3827                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3828                       inner_echo->identifier = f->rewrite.icmp_id;
3829                     }
3830                 }
3831               break;
3832             default:
3833               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3834               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3835             }
3836         }
3837     }
3838
3839   return NAT_ED_TRNSL_ERR_SUCCESS;
3840 }
3841
3842 static_always_inline nat_translation_error_e
3843 nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
3844                            ip4_header_t *ip, nat_6t_flow_t *f,
3845                            ip_protocol_t proto, int is_output_feature,
3846                            int is_i2o)
3847 {
3848   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3849     {
3850       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3851     }
3852
3853   if (IP_PROTOCOL_ICMP == proto)
3854     {
3855       if (ip->src_address.as_u32 != f->rewrite.saddr.as_u32)
3856         {
3857           // packet is returned from a router, not from destination
3858           // skip source address rewrite if in o2i path
3859           nat_6t_flow_ip4_translate (sm, b, ip, f, proto,
3860                                      0 /* is_icmp_inner_ip4 */,
3861                                      !is_i2o /* skip_saddr_rewrite */);
3862         }
3863       else
3864         {
3865           nat_6t_flow_ip4_translate (sm, b, ip, f, proto,
3866                                      0 /* is_icmp_inner_ip4 */,
3867                                      0 /* skip_saddr_rewrite */);
3868         }
3869       return nat_6t_flow_icmp_translate (vm, sm, b, ip, f);
3870     }
3871
3872   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */,
3873                              0 /* skip_saddr_rewrite */);
3874
3875   return NAT_ED_TRNSL_ERR_SUCCESS;
3876 }
3877
3878 nat_translation_error_e
3879 nat_6t_flow_buf_translate_i2o (vlib_main_t *vm, snat_main_t *sm,
3880                                vlib_buffer_t *b, ip4_header_t *ip,
3881                                nat_6t_flow_t *f, ip_protocol_t proto,
3882                                int is_output_feature)
3883 {
3884   return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
3885                                     1 /* is_i2o */);
3886 }
3887
3888 nat_translation_error_e
3889 nat_6t_flow_buf_translate_o2i (vlib_main_t *vm, snat_main_t *sm,
3890                                vlib_buffer_t *b, ip4_header_t *ip,
3891                                nat_6t_flow_t *f, ip_protocol_t proto,
3892                                int is_output_feature)
3893 {
3894   return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
3895                                     0 /* is_i2o */);
3896 }
3897
3898 u8 *
3899 format_nat_6t (u8 *s, va_list *args)
3900 {
3901   nat_6t_t *t = va_arg (*args, nat_6t_t *);
3902
3903   s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
3904               format_ip4_address, t->saddr.as_u8,
3905               clib_net_to_host_u16 (t->sport), format_ip4_address,
3906               t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
3907               format_ip_protocol, t->proto, t->fib_index);
3908   return s;
3909 }
3910
3911 u8 *
3912 format_nat_ed_translation_error (u8 *s, va_list *args)
3913 {
3914   nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
3915
3916   switch (e)
3917     {
3918     case NAT_ED_TRNSL_ERR_SUCCESS:
3919       s = format (s, "success");
3920       break;
3921     case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
3922       s = format (s, "translation-failed");
3923       break;
3924     case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
3925       s = format (s, "flow-mismatch");
3926       break;
3927     case NAT_ED_TRNSL_ERR_PACKET_TRUNCATED:
3928       s = format (s, "packet-truncated");
3929       break;
3930     case NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT:
3931       s = format (s, "inner-ip-corrupted");
3932       break;
3933     case NAT_ED_TRNSL_ERR_INVALID_CSUM:
3934       s = format (s, "invalid-checksum");
3935       break;
3936     }
3937   return s;
3938 }
3939
3940 u8 *
3941 format_nat_6t_flow (u8 *s, va_list *args)
3942 {
3943   nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
3944
3945   s = format (s, "match: %U ", format_nat_6t, &f->match);
3946   int r = 0;
3947   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
3948     {
3949       s = format (s, "rewrite: saddr %U ", format_ip4_address,
3950                   f->rewrite.saddr.as_u8);
3951       r = 1;
3952     }
3953   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
3954     {
3955       if (!r)
3956         {
3957           s = format (s, "rewrite: ");
3958           r = 1;
3959         }
3960       s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
3961     }
3962   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
3963     {
3964       if (!r)
3965         {
3966           s = format (s, "rewrite: ");
3967           r = 1;
3968         }
3969       s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
3970     }
3971   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
3972     {
3973       if (!r)
3974         {
3975           s = format (s, "rewrite: ");
3976           r = 1;
3977         }
3978       s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
3979     }
3980   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3981     {
3982       if (!r)
3983         {
3984           s = format (s, "rewrite: ");
3985           r = 1;
3986         }
3987       s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
3988     }
3989   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3990     {
3991       if (!r)
3992         {
3993           s = format (s, "rewrite: ");
3994           r = 1;
3995         }
3996       s = format (s, "txfib %u ", f->rewrite.fib_index);
3997     }
3998   return s;
3999 }
4000
4001 static inline void
4002 nat_syslog_nat44_sess (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
4003                        u16 isport, ip4_address_t *xsaddr, u16 xsport,
4004                        ip4_address_t *idaddr, u16 idport,
4005                        ip4_address_t *xdaddr, u16 xdport, u8 proto, u8 is_add,
4006                        u8 is_twicenat)
4007 {
4008   syslog_msg_t syslog_msg;
4009   fib_table_t *fib;
4010
4011   if (!syslog_is_enabled ())
4012     return;
4013
4014   if (syslog_severity_filter_block (SADD_SDEL_SEVERITY))
4015     return;
4016
4017   fib = fib_table_get (sfibix, FIB_PROTOCOL_IP4);
4018
4019   syslog_msg_init (&syslog_msg, NAT_FACILITY, SADD_SDEL_SEVERITY, NAT_APPNAME,
4020                    is_add ? SADD_MSGID : SDEL_MSGID);
4021
4022   syslog_msg_sd_init (&syslog_msg, NSESS_SDID);
4023   syslog_msg_add_sd_param (&syslog_msg, SSUBIX_SDPARAM_NAME, "%d", ssubix);
4024   syslog_msg_add_sd_param (&syslog_msg, SVLAN_SDPARAM_NAME, "%d",
4025                            fib->ft_table_id);
4026   syslog_msg_add_sd_param (&syslog_msg, IATYP_SDPARAM_NAME, IATYP_IPV4);
4027   syslog_msg_add_sd_param (&syslog_msg, ISADDR_SDPARAM_NAME, "%U",
4028                            format_ip4_address, isaddr);
4029   syslog_msg_add_sd_param (&syslog_msg, ISPORT_SDPARAM_NAME, "%d",
4030                            clib_net_to_host_u16 (isport));
4031   syslog_msg_add_sd_param (&syslog_msg, XATYP_SDPARAM_NAME, IATYP_IPV4);
4032   syslog_msg_add_sd_param (&syslog_msg, XSADDR_SDPARAM_NAME, "%U",
4033                            format_ip4_address, xsaddr);
4034   syslog_msg_add_sd_param (&syslog_msg, XSPORT_SDPARAM_NAME, "%d",
4035                            clib_net_to_host_u16 (xsport));
4036   syslog_msg_add_sd_param (&syslog_msg, PROTO_SDPARAM_NAME, "%d", proto);
4037   syslog_msg_add_sd_param (&syslog_msg, XDADDR_SDPARAM_NAME, "%U",
4038                            format_ip4_address, xdaddr);
4039   syslog_msg_add_sd_param (&syslog_msg, XDPORT_SDPARAM_NAME, "%d",
4040                            clib_net_to_host_u16 (xdport));
4041   if (is_twicenat)
4042     {
4043       syslog_msg_add_sd_param (&syslog_msg, IDADDR_SDPARAM_NAME, "%U",
4044                                format_ip4_address, idaddr);
4045       syslog_msg_add_sd_param (&syslog_msg, IDPORT_SDPARAM_NAME, "%d",
4046                                clib_net_to_host_u16 (idport));
4047     }
4048
4049   syslog_msg_send (&syslog_msg);
4050 }
4051
4052 void
4053 nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
4054                        u16 isport, ip4_address_t *idaddr, u16 idport,
4055                        ip4_address_t *xsaddr, u16 xsport,
4056                        ip4_address_t *xdaddr, u16 xdport, u8 proto,
4057                        u8 is_twicenat)
4058 {
4059   nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
4060                          idaddr, idport, xdaddr, xdport, proto, 1,
4061                          is_twicenat);
4062 }
4063
4064 void
4065 nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
4066                        u16 isport, ip4_address_t *idaddr, u16 idport,
4067                        ip4_address_t *xsaddr, u16 xsport,
4068                        ip4_address_t *xdaddr, u16 xdport, u8 proto,
4069                        u8 is_twicenat)
4070 {
4071   nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
4072                          idaddr, idport, xdaddr, xdport, proto, 0,
4073                          is_twicenat);
4074 }
4075
4076 u8 *
4077 format_nat44_ed_tcp_state (u8 *s, va_list *args)
4078 {
4079   nat44_ed_tcp_state_e e = va_arg (*args, nat44_ed_tcp_state_e);
4080   switch (e)
4081     {
4082     case NAT44_ED_TCP_STATE_CLOSED:
4083       s = format (s, "closed");
4084       break;
4085     case NAT44_ED_TCP_STATE_SYN_I2O:
4086       s = format (s, "SYN seen in in2out direction");
4087       break;
4088     case NAT44_ED_TCP_STATE_SYN_O2I:
4089       s = format (s, "SYN seen in out2in direction");
4090       break;
4091     case NAT44_ED_TCP_STATE_ESTABLISHED:
4092       s = format (s, "SYN seen in both directions/established");
4093       break;
4094     case NAT44_ED_TCP_STATE_FIN_I2O:
4095       s = format (s, "FIN seen in in2out direction");
4096       break;
4097     case NAT44_ED_TCP_STATE_FIN_O2I:
4098       s = format (s, "FIN seen in out2in direction");
4099       break;
4100     case NAT44_ED_TCP_STATE_RST_TRANS:
4101       s = format (s, "RST seen/transitory timeout");
4102       break;
4103     case NAT44_ED_TCP_STATE_FIN_TRANS:
4104       s = format (s, "FIN seen in both directions/transitory timeout");
4105       break;
4106     case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I:
4107       s = format (s, "FIN seen in both directions/transitory timeout/session "
4108                      "reopening in out2in direction");
4109       break;
4110     case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O:
4111       s = format (s, "FIN seen in both directions/transitory timeout/session "
4112                      "reopening in in2out direction");
4113       break;
4114     case NAT44_ED_TCP_N_STATE:
4115       s = format (s, "BUG! unexpected N_STATE! BUG!");
4116       break;
4117     }
4118   return s;
4119 }
4120
4121 /*
4122  * fd.io coding-style-patch-verification: ON
4123  *
4124  * Local Variables:
4125  * eval: (c-set-style "gnu")
4126  * End:
4127  */