nat: nat44-ed cleanup & fixes
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vpp/app/version.h>
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/ip/ip_table.h>
22 #include <vnet/ip/reass/ip4_sv_reass.h>
23 #include <vnet/fib/fib_table.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/plugin/plugin.h>
26 #include <vppinfra/bihash_16_8.h>
27
28 #include <nat/lib/log.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31 #include <vnet/syslog/syslog.h>
32 #include <nat/lib/nat_syslog_constants.h>
33 #include <nat/lib/nat_syslog.h>
34
35 #include <nat/nat44-ed/nat44_ed.h>
36 #include <nat/nat44-ed/nat44_ed_affinity.h>
37 #include <nat/nat44-ed/nat44_ed_inlines.h>
38
39 #include <vlib/stats/stats.h>
40
41 snat_main_t snat_main;
42
43 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
44                                                            u32 sw_if_index);
45
46 #define skip_if_disabled()                                                    \
47   do                                                                          \
48     {                                                                         \
49       snat_main_t *sm = &snat_main;                                           \
50       if (PREDICT_FALSE (!sm->enabled))                                       \
51         return;                                                               \
52     }                                                                         \
53   while (0)
54
55 #define fail_if_enabled()                                                     \
56   do                                                                          \
57     {                                                                         \
58       snat_main_t *sm = &snat_main;                                           \
59       if (PREDICT_FALSE (sm->enabled))                                        \
60         {                                                                     \
61           nat_log_err ("plugin enabled");                                     \
62           return 1;                                                           \
63         }                                                                     \
64     }                                                                         \
65   while (0)
66
67 #define fail_if_disabled()                                                    \
68   do                                                                          \
69     {                                                                         \
70       snat_main_t *sm = &snat_main;                                           \
71       if (PREDICT_FALSE (!sm->enabled))                                       \
72         {                                                                     \
73           nat_log_err ("plugin disabled");                                    \
74           return 1;                                                           \
75         }                                                                     \
76     }                                                                         \
77   while (0)
78
79 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
80   .arc_name = "ip4-unicast",
81   .node_name = "nat-pre-in2out",
82   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
83                                "ip4-sv-reassembly-feature"),
84 };
85 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
86   .arc_name = "ip4-unicast",
87   .node_name = "nat-pre-out2in",
88   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
89                                "ip4-dhcp-client-detect",
90                                "ip4-sv-reassembly-feature"),
91 };
92 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
93   .arc_name = "ip4-unicast",
94   .node_name = "nat44-ed-classify",
95   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
96                                "ip4-sv-reassembly-feature"),
97 };
98 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
99   .arc_name = "ip4-unicast",
100   .node_name = "nat44-handoff-classify",
101   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
102                                "ip4-sv-reassembly-feature"),
103 };
104 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
105   .arc_name = "ip4-unicast",
106   .node_name = "nat44-in2out-worker-handoff",
107   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
108 };
109 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
110   .arc_name = "ip4-unicast",
111   .node_name = "nat44-out2in-worker-handoff",
112   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
113                                "ip4-dhcp-client-detect"),
114 };
115 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
116   .arc_name = "ip4-unicast",
117   .node_name = "nat44-ed-in2out",
118   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
119 };
120 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
121   .arc_name = "ip4-unicast",
122   .node_name = "nat44-ed-out2in",
123   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
124                                "ip4-dhcp-client-detect"),
125 };
126 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
127   .arc_name = "ip4-output",
128   .node_name = "nat-pre-in2out-output",
129   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
130   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
131 };
132 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
133   .arc_name = "ip4-output",
134   .node_name = "nat44-in2out-output-worker-handoff",
135   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
136   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
137 };
138 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
139   .arc_name = "ip4-output",
140   .node_name = "nat44-ed-in2out-output",
141   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
142   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
143 };
144
145 VLIB_PLUGIN_REGISTER () = {
146     .version = VPP_BUILD_VER,
147     .description = "Network Address Translation (NAT)",
148 };
149
150 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
151 static void nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm);
152
153 static int nat44_ed_add_static_mapping_internal (
154   ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
155   ip_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags,
156   ip4_address_t pool_addr, u8 *tag);
157 static int nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
158                                                  ip4_address_t e_addr,
159                                                  u16 l_port, u16 e_port,
160                                                  ip_protocol_t proto,
161                                                  u32 vrf_id, u32 flags);
162
163 u32 nat_calc_bihash_buckets (u32 n_elts);
164
165 static_always_inline int
166 nat44_ed_sm_i2o_add (snat_main_t *sm, snat_static_mapping_t *m,
167                      ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
168 {
169   ASSERT (!pool_is_free (sm->static_mappings, m));
170   clib_bihash_kv_16_8_t kv;
171   nat44_ed_sm_init_i2o_kv (&kv, addr.as_u32, port, fib_index, proto,
172                            m - sm->static_mappings);
173   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
174 }
175
176 static_always_inline int
177 nat44_ed_sm_i2o_del (snat_main_t *sm, ip4_address_t addr, u16 port,
178                      u32 fib_index, u8 proto)
179 {
180   clib_bihash_kv_16_8_t kv;
181   nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
182   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
183 }
184
185 static_always_inline int
186 nat44_ed_sm_o2i_add (snat_main_t *sm, snat_static_mapping_t *m,
187                      ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
188 {
189   ASSERT (!pool_is_free (sm->static_mappings, m));
190   clib_bihash_kv_16_8_t kv;
191   nat44_ed_sm_init_o2i_kv (&kv, addr.as_u32, port, fib_index, proto,
192                            m - sm->static_mappings);
193   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
194 }
195
196 static_always_inline int
197 nat44_ed_sm_o2i_del (snat_main_t *sm, ip4_address_t addr, u16 port,
198                      u32 fib_index, u8 proto)
199 {
200   clib_bihash_kv_16_8_t kv;
201   nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
202   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
203 }
204
205 void
206 nat44_ed_free_session_data (snat_main_t *sm, snat_session_t *s,
207                             u32 thread_index, u8 is_ha)
208 {
209   per_vrf_sessions_unregister_session (s, thread_index);
210
211   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
212     nat_elog_warn (sm, "flow hash del failed");
213
214   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
215     nat_elog_warn (sm, "flow hash del failed");
216
217   if (na44_ed_is_fwd_bypass_session (s))
218     {
219       return;
220     }
221
222   if (nat44_ed_is_affinity_session (s))
223     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr, s->proto,
224                          s->out2in.port);
225
226   if (!is_ha)
227     nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
228                            s->in2out.port, &s->ext_host_nat_addr,
229                            s->ext_host_nat_port, &s->out2in.addr,
230                            s->out2in.port, &s->ext_host_addr, s->ext_host_port,
231                            s->proto, nat44_ed_is_twice_nat_session (s));
232
233   if (!is_ha)
234     {
235       /* log NAT event */
236       nat_ipfix_logging_nat44_ses_delete (
237         thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
238         s->in2out.port, s->out2in.port, s->in2out.fib_index);
239     }
240 }
241
242 static ip_interface_address_t *
243 nat44_ed_get_ip_interface_address (u32 sw_if_index, ip4_address_t addr)
244 {
245   snat_main_t *sm = &snat_main;
246
247   ip_lookup_main_t *lm = &sm->ip4_main->lookup_main;
248   ip_interface_address_t *ia;
249   ip4_address_t *ip4a;
250
251   foreach_ip_interface_address (
252     lm, ia, sw_if_index, 1, ({
253       ip4a = ip_interface_address_get_address (lm, ia);
254       nat_log_debug ("sw_if_idx: %u addr: %U ? %U", sw_if_index,
255                      format_ip4_address, ip4a, format_ip4_address, &addr);
256       if (ip4a->as_u32 == addr.as_u32)
257         {
258           return ia;
259         }
260     }));
261   return NULL;
262 }
263
264 static int
265 nat44_ed_resolve_nat_addr_len (snat_address_t *ap,
266                                snat_interface_t *interfaces)
267 {
268   ip_interface_address_t *ia;
269   snat_interface_t *i;
270   u32 fib_index;
271
272   pool_foreach (i, interfaces)
273     {
274       if (!nat44_ed_is_interface_outside (i))
275         {
276           continue;
277         }
278
279       fib_index = ip4_fib_table_get_index_for_sw_if_index (i->sw_if_index);
280       if (fib_index != ap->fib_index)
281         {
282           continue;
283         }
284
285       if ((ia = nat44_ed_get_ip_interface_address (i->sw_if_index, ap->addr)))
286         {
287           ap->addr_len = ia->address_length;
288           ap->sw_if_index = i->sw_if_index;
289           ap->net.as_u32 = (ap->addr.as_u32 >> (32 - ap->addr_len))
290                            << (32 - ap->addr_len);
291
292           nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
293                          format_ip4_address, &ap->addr, ap->sw_if_index,
294                          format_ip4_address, &ap->net, ap->addr_len);
295           return 0;
296         }
297     }
298   return 1;
299 }
300
301 static void
302 nat44_ed_update_outside_if_addresses (snat_address_t *ap)
303 {
304   snat_main_t *sm = &snat_main;
305
306   if (!nat44_ed_resolve_nat_addr_len (ap, sm->interfaces))
307     {
308       return;
309     }
310
311   if (!nat44_ed_resolve_nat_addr_len (ap, sm->output_feature_interfaces))
312     {
313       return;
314     }
315 }
316
317 static void
318 nat44_ed_bind_if_addr_to_nat_addr (u32 sw_if_index)
319 {
320   snat_main_t *sm = &snat_main;
321   ip_interface_address_t *ia;
322   snat_address_t *ap;
323
324   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
325
326   vec_foreach (ap, sm->addresses)
327     {
328       if (fib_index != ap->fib_index)
329         {
330           continue;
331         }
332
333       if ((ia = nat44_ed_get_ip_interface_address (sw_if_index, ap->addr)))
334         {
335           ap->addr_len = ia->address_length;
336           ap->sw_if_index = sw_if_index;
337           ap->net.as_u32 = (ap->addr.as_u32 >> (32 - ap->addr_len))
338                            << (32 - ap->addr_len);
339
340           nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
341                          format_ip4_address, &ap->addr, ap->sw_if_index,
342                          format_ip4_address, &ap->net, ap->addr_len);
343           return;
344         }
345     }
346 }
347
348 static_always_inline snat_fib_entry_reg_t *
349 nat44_ed_get_fib_entry_reg (ip4_address_t addr, u32 sw_if_index, int *out_idx)
350 {
351   snat_main_t *sm = &snat_main;
352   snat_fib_entry_reg_t *fe;
353   int i;
354
355   for (i = 0; i < vec_len (sm->fib_entry_reg); i++)
356     {
357       fe = sm->fib_entry_reg + i;
358       if ((addr.as_u32 == fe->addr.as_u32) && (sw_if_index == fe->sw_if_index))
359         {
360           if (out_idx)
361             {
362               *out_idx = i;
363             }
364           return fe;
365         }
366     }
367   return NULL;
368 }
369
370 static void
371 nat44_ed_add_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
372 {
373   // Add the external NAT address to the FIB as receive entries. This ensures
374   // that VPP will reply to ARP for this address and we don't need to enable
375   // proxy ARP on the outside interface.
376   snat_main_t *sm = &snat_main;
377   snat_fib_entry_reg_t *fe;
378
379   if (!(fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, 0)))
380     {
381       fib_prefix_t prefix = {
382         .fp_len = 32,
383         .fp_proto = FIB_PROTOCOL_IP4,
384         .fp_addr = {
385                   .ip4.as_u32 = addr.as_u32,
386                 },
387       };
388       u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
389       fib_table_entry_update_one_path (fib_index, &prefix, sm->fib_src_low,
390                                        (FIB_ENTRY_FLAG_CONNECTED |
391                                         FIB_ENTRY_FLAG_LOCAL |
392                                         FIB_ENTRY_FLAG_EXCLUSIVE),
393                                        DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1,
394                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
395
396       vec_add2 (sm->fib_entry_reg, fe, 1);
397       clib_memset (fe, 0, sizeof (*fe));
398       fe->addr.as_u32 = addr.as_u32;
399       fe->sw_if_index = sw_if_index;
400     }
401   fe->count++;
402 }
403
404 static void
405 nat44_ed_del_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
406 {
407   snat_main_t *sm = &snat_main;
408   snat_fib_entry_reg_t *fe;
409   int i;
410
411   if ((fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, &i)))
412     {
413       fe->count--;
414       if (0 == fe->count)
415         {
416           fib_prefix_t prefix = {
417             .fp_len = 32,
418             .fp_proto = FIB_PROTOCOL_IP4,
419             .fp_addr = {
420               .ip4.as_u32 = addr.as_u32,
421                     },
422           };
423           u32 fib_index =
424             ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
425           fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
426           vec_del1 (sm->fib_entry_reg, i);
427         }
428     }
429 }
430
431 static void
432 nat44_ed_add_del_interface_fib_reg_entries (ip4_address_t addr, u8 is_add)
433 {
434   snat_main_t *sm = &snat_main;
435   snat_interface_t *i;
436
437   pool_foreach (i, sm->interfaces)
438     {
439       if (nat44_ed_is_interface_outside (i))
440         {
441           if (is_add)
442             {
443               nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
444             }
445           else
446             {
447               nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
448             }
449         }
450     }
451   pool_foreach (i, sm->output_feature_interfaces)
452     {
453       if (nat44_ed_is_interface_outside (i))
454         {
455           if (is_add)
456             {
457               nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
458             }
459           else
460             {
461               nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
462             }
463         }
464     }
465 }
466
467 static_always_inline void
468 nat44_ed_add_del_nat_addr_fib_reg_entries (u32 sw_if_index, u8 is_add)
469 {
470   snat_main_t *sm = &snat_main;
471   snat_address_t *ap;
472
473   vec_foreach (ap, sm->addresses)
474     {
475       if (is_add)
476         {
477           nat44_ed_add_fib_entry_reg (ap->addr, sw_if_index);
478         }
479       else
480         {
481           nat44_ed_del_fib_entry_reg (ap->addr, sw_if_index);
482         }
483     }
484 }
485
486 static_always_inline void
487 nat44_ed_add_del_sm_fib_reg_entries (u32 sw_if_index, u8 is_add)
488 {
489   snat_main_t *sm = &snat_main;
490   snat_static_mapping_t *m;
491
492   pool_foreach (m, sm->static_mappings)
493     {
494       if (is_add)
495         {
496           nat44_ed_add_fib_entry_reg (m->external_addr, sw_if_index);
497         }
498       else
499         {
500           nat44_ed_del_fib_entry_reg (m->external_addr, sw_if_index);
501         }
502     }
503 }
504
505 int
506 nat44_ed_add_address (ip4_address_t *addr, u32 vrf_id, u8 twice_nat)
507 {
508   snat_main_t *sm = &snat_main;
509   snat_address_t *ap, *addresses;
510
511   addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
512
513   if (!sm->enabled)
514     {
515       return VNET_API_ERROR_UNSUPPORTED;
516     }
517
518   // check if address already exists
519   vec_foreach (ap, addresses)
520     {
521       if (ap->addr.as_u32 == addr->as_u32)
522         {
523           nat_log_err ("address exist");
524           return VNET_API_ERROR_VALUE_EXIST;
525         }
526     }
527
528   if (twice_nat)
529     {
530       vec_add2 (sm->twice_nat_addresses, ap, 1);
531     }
532   else
533     {
534       vec_add2 (sm->addresses, ap, 1);
535     }
536
537   ap->addr_len = ~0;
538   ap->fib_index = ~0;
539   ap->addr = *addr;
540
541   if (vrf_id != ~0)
542     {
543       ap->fib_index = fib_table_find_or_create_and_lock (
544         FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
545     }
546
547   if (!twice_nat)
548     {
549       // if we don't have enabled interface we don't add address
550       // to fib
551       nat44_ed_add_del_interface_fib_reg_entries (*addr, 1);
552       nat44_ed_update_outside_if_addresses (ap);
553     }
554   return 0;
555 }
556
557 int
558 nat44_ed_del_address (ip4_address_t addr, u8 twice_nat)
559 {
560   snat_main_t *sm = &snat_main;
561   snat_address_t *a = 0, *addresses;
562   snat_session_t *ses;
563   u32 *ses_to_be_removed = 0, *ses_index;
564   snat_main_per_thread_data_t *tsm;
565   int j;
566
567   addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
568
569   for (j = 0; j < vec_len (addresses); j++)
570     {
571       if (addresses[j].addr.as_u32 == addr.as_u32)
572         {
573           a = addresses + j;
574           break;
575         }
576     }
577   if (!a)
578     {
579       nat_log_err ("no such address");
580       return VNET_API_ERROR_NO_SUCH_ENTRY;
581     }
582
583   // delete dynamic sessions only
584   vec_foreach (tsm, sm->per_thread_data)
585     {
586       pool_foreach (ses, tsm->sessions)
587         {
588           if (ses->flags & SNAT_SESSION_FLAG_STATIC_MAPPING)
589             {
590               continue;
591             }
592           if (ses->out2in.addr.as_u32 == addr.as_u32)
593             {
594               nat44_ed_free_session_data (sm, ses, tsm - sm->per_thread_data,
595                                           0);
596               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
597             }
598         }
599       vec_foreach (ses_index, ses_to_be_removed)
600         {
601           ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
602           nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
603         }
604       vec_free (ses_to_be_removed);
605     }
606
607   if (!twice_nat)
608     {
609       nat44_ed_add_del_interface_fib_reg_entries (addr, 0);
610     }
611
612   if (a->fib_index != ~0)
613     {
614       fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
615     }
616
617   if (!twice_nat)
618     {
619       vec_del1 (sm->addresses, j);
620     }
621   else
622     {
623       vec_del1 (sm->twice_nat_addresses, j);
624     }
625
626   return 0;
627 }
628
629 u32
630 get_thread_idx_by_port (u16 e_port)
631 {
632   snat_main_t *sm = &snat_main;
633   u32 thread_idx = sm->num_workers;
634   if (sm->num_workers > 1)
635     {
636       thread_idx =
637         sm->first_worker_index +
638         sm->workers[(e_port - 1024) / sm->port_per_thread];
639     }
640   return thread_idx;
641 }
642
643 void
644 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
645                                     snat_main_per_thread_data_t * tsm,
646                                     ip4_address_t l_addr,
647                                     u16 l_port,
648                                     u8 protocol,
649                                     u32 fib_index, int addr_only,
650                                     ip4_address_t e_addr, u16 e_port)
651 {
652   snat_session_t *s;
653   u32 *indexes_to_free = NULL;
654   pool_foreach (s, tsm->sessions) {
655     if (s->in2out.fib_index != fib_index ||
656         s->in2out.addr.as_u32 != l_addr.as_u32)
657       {
658         continue;
659       }
660     if (!addr_only)
661       {
662         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
663             s->out2in.port != e_port || s->in2out.port != l_port ||
664             s->proto != protocol)
665           continue;
666       }
667
668     if (nat44_ed_is_lb_session (s))
669       continue;
670     if (!nat44_ed_is_session_static (s))
671       continue;
672     nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
673     vec_add1 (indexes_to_free, s - tsm->sessions);
674     if (!addr_only)
675       break;
676   }
677   u32 *ses_index;
678   vec_foreach (ses_index, indexes_to_free)
679   {
680     s = pool_elt_at_index (tsm->sessions, *ses_index);
681     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
682   }
683   vec_free (indexes_to_free);
684 }
685
686 static_always_inline snat_static_mapping_t *
687 nat44_ed_sm_lookup (snat_main_t *sm, clib_bihash_kv_16_8_t *kv)
688 {
689   clib_bihash_kv_16_8_t v;
690   int rc = clib_bihash_search_16_8 (&sm->flow_hash, kv, &v);
691   if (!rc)
692     {
693       ASSERT (0 == ed_value_get_thread_index (&v));
694       return pool_elt_at_index (sm->static_mappings,
695                                 ed_value_get_session_index (&v));
696     }
697   return NULL;
698 }
699
700 snat_static_mapping_t *
701 nat44_ed_sm_o2i_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
702                         u32 fib_index, u8 proto)
703 {
704   clib_bihash_kv_16_8_t kv;
705   nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
706   return nat44_ed_sm_lookup (sm, &kv);
707 }
708
709 snat_static_mapping_t *
710 nat44_ed_sm_i2o_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
711                         u32 fib_index, u8 proto)
712 {
713   clib_bihash_kv_16_8_t kv;
714   nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
715   return nat44_ed_sm_lookup (sm, &kv);
716 }
717
718 static snat_static_mapping_resolve_t *
719 nat44_ed_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
720                              ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
721                              u32 flags, int *out_idx)
722 {
723   snat_static_mapping_resolve_t *rp;
724   snat_main_t *sm = &snat_main;
725   int i;
726
727   for (i = 0; i < vec_len (sm->sm_to_resolve); i++)
728     {
729       rp = sm->sm_to_resolve + i;
730
731       if (rp->sw_if_index == sw_if_index && rp->vrf_id == vrf_id)
732         {
733           if (is_sm_identity_nat (rp->flags) && is_sm_identity_nat (flags))
734             {
735               if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
736                 {
737                   if (rp->e_port != e_port || rp->proto != proto)
738                     {
739                       continue;
740                     }
741                 }
742             }
743           else if (rp->l_addr.as_u32 == l_addr.as_u32)
744             {
745               if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
746                 {
747                   if (rp->l_port != l_port || rp->e_port != e_port ||
748                       rp->proto != proto)
749                     {
750                       continue;
751                     }
752                 }
753             }
754           else
755             {
756               continue;
757             }
758           if (out_idx)
759             {
760               *out_idx = i;
761             }
762           return rp;
763         }
764     }
765   return NULL;
766 }
767
768 static int
769 nat44_ed_del_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
770                              ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
771                              u32 flags)
772 {
773   snat_main_t *sm = &snat_main;
774   int i;
775   if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
776                                    sw_if_index, flags, &i))
777     {
778       vec_del1 (sm->sm_to_resolve, i);
779       return 0;
780     }
781   return 1;
782 }
783
784 static_always_inline int
785 nat44_ed_validate_sm_input (u32 flags)
786 {
787   // identity nat can be initiated only from inside interface
788   if (is_sm_identity_nat (flags) && is_sm_out2in_only (flags))
789     {
790       return VNET_API_ERROR_UNSUPPORTED;
791     }
792
793   if (is_sm_twice_nat (flags) || is_sm_self_twice_nat (flags))
794     {
795       if (is_sm_addr_only (flags) || is_sm_identity_nat (flags))
796         {
797           return VNET_API_ERROR_UNSUPPORTED;
798         }
799     }
800   return 0;
801 }
802
803 int
804 nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
805                              u16 l_port, u16 e_port, ip_protocol_t proto,
806                              u32 vrf_id, u32 sw_if_index, u32 flags,
807                              ip4_address_t pool_addr, u8 *tag)
808 {
809   snat_static_mapping_resolve_t *rp;
810   snat_main_t *sm = &snat_main;
811   int rv;
812
813   if (!sm->enabled)
814     {
815       return VNET_API_ERROR_UNSUPPORTED;
816     }
817
818   rv = nat44_ed_validate_sm_input (flags);
819   if (rv != 0)
820     {
821       return rv;
822     }
823
824   // interface bound mapping
825   if (is_sm_switch_address (flags))
826     {
827       if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
828                                        sw_if_index, flags, 0))
829         {
830           return VNET_API_ERROR_VALUE_EXIST;
831         }
832
833       vec_add2 (sm->sm_to_resolve, rp, 1);
834       rp->l_addr.as_u32 = l_addr.as_u32;
835       rp->l_port = l_port;
836       rp->e_port = e_port;
837       rp->sw_if_index = sw_if_index;
838       rp->vrf_id = vrf_id;
839       rp->proto = proto;
840       rp->flags = flags;
841       rp->pool_addr = pool_addr;
842       rp->tag = vec_dup (tag);
843       rp->is_resolved = 0;
844
845       ip4_address_t *first_int_addr =
846         ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
847       if (!first_int_addr)
848         {
849           return 0;
850         }
851
852       e_addr.as_u32 = first_int_addr->as_u32;
853       rp->is_resolved = 1;
854     }
855
856   rv = nat44_ed_add_static_mapping_internal (l_addr, e_addr, l_port, e_port,
857                                              proto, vrf_id, sw_if_index, flags,
858                                              pool_addr, tag);
859   if ((0 != rv) && is_sm_switch_address (flags))
860     {
861       nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
862                                    sw_if_index, flags);
863     }
864
865   return rv;
866 }
867
868 int
869 nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
870                              u16 l_port, u16 e_port, ip_protocol_t proto,
871                              u32 vrf_id, u32 sw_if_index, u32 flags)
872 {
873   snat_main_t *sm = &snat_main;
874   int rv;
875
876   if (!sm->enabled)
877     {
878       return VNET_API_ERROR_UNSUPPORTED;
879     }
880
881   rv = nat44_ed_validate_sm_input (flags);
882   if (rv != 0)
883     {
884       return rv;
885     }
886
887   // interface bound mapping
888   if (is_sm_switch_address (flags))
889     {
890       if (nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
891                                        sw_if_index, flags))
892         {
893           return VNET_API_ERROR_NO_SUCH_ENTRY;
894         }
895
896       ip4_address_t *first_int_addr =
897         ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
898       if (!first_int_addr)
899         {
900           // dhcp resolution required
901           return 0;
902         }
903
904       e_addr.as_u32 = first_int_addr->as_u32;
905     }
906
907   return nat44_ed_del_static_mapping_internal (l_addr, e_addr, l_port, e_port,
908                                                proto, vrf_id, flags);
909 }
910
911 static int
912 nat44_ed_add_static_mapping_internal (ip4_address_t l_addr,
913                                       ip4_address_t e_addr, u16 l_port,
914                                       u16 e_port, ip_protocol_t proto,
915                                       u32 vrf_id, u32 sw_if_index, u32 flags,
916                                       ip4_address_t pool_addr, u8 *tag)
917 {
918   snat_main_t *sm = &snat_main;
919   nat44_lb_addr_port_t *local;
920   snat_static_mapping_t *m;
921   u32 fib_index = ~0;
922
923   if (is_sm_addr_only (flags))
924     {
925       e_port = l_port = proto = 0;
926     }
927
928   if (is_sm_identity_nat (flags))
929     {
930       l_port = e_port;
931       l_addr.as_u32 = e_addr.as_u32;
932     }
933
934   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
935   if (m)
936     {
937       // case:
938       // adding local identity nat record for different vrf table
939
940       if (!is_sm_identity_nat (m->flags))
941         {
942           return VNET_API_ERROR_VALUE_EXIST;
943         }
944
945       pool_foreach (local, m->locals)
946         {
947           if (local->vrf_id == vrf_id)
948             {
949               return VNET_API_ERROR_VALUE_EXIST;
950             }
951         }
952
953       pool_get (m->locals, local);
954
955       local->vrf_id = vrf_id;
956       local->fib_index = fib_table_find_or_create_and_lock (
957         FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
958
959       nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port,
960                            local->fib_index, m->proto);
961
962       return 0;
963     }
964
965   if (vrf_id != ~0)
966     {
967       fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
968                                                      sm->fib_src_low);
969     }
970   else
971     {
972       // fallback to default vrf
973       vrf_id = sm->inside_vrf_id;
974       fib_index = sm->inside_fib_index;
975       fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
976     }
977
978   // test if local mapping record doesn't exist
979   // identity nat supports multiple records in local mapping
980   if (!(is_sm_out2in_only (flags) || is_sm_identity_nat (flags)))
981     {
982       if (nat44_ed_sm_i2o_lookup (sm, l_addr, l_port, fib_index, proto))
983         {
984           return VNET_API_ERROR_VALUE_EXIST;
985         }
986     }
987
988   pool_get (sm->static_mappings, m);
989   clib_memset (m, 0, sizeof (*m));
990
991   m->flags = flags;
992   m->local_addr = l_addr;
993   m->external_addr = e_addr;
994
995   m->pool_addr = pool_addr;
996   m->tag = vec_dup (tag);
997
998   if (!is_sm_addr_only (flags))
999     {
1000       m->local_port = l_port;
1001       m->external_port = e_port;
1002       m->proto = proto;
1003     }
1004
1005   if (is_sm_identity_nat (flags))
1006     {
1007       pool_get (m->locals, local);
1008
1009       local->vrf_id = vrf_id;
1010       local->fib_index = fib_index;
1011     }
1012   else
1013     {
1014       m->vrf_id = vrf_id;
1015       m->fib_index = fib_index;
1016     }
1017
1018   if (!is_sm_out2in_only (flags))
1019     {
1020       nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port, fib_index,
1021                            m->proto);
1022     }
1023
1024   nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0, m->proto);
1025
1026   if (sm->num_workers > 1)
1027     {
1028       // store worker index for this record
1029       ip4_header_t ip = {
1030         .src_address = m->local_addr,
1031       };
1032       u32 worker_index;
1033       worker_index =
1034         nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0);
1035       vec_add1 (m->workers, worker_index);
1036     }
1037
1038   nat44_ed_add_del_interface_fib_reg_entries (e_addr, 1);
1039
1040   return 0;
1041 }
1042
1043 static int
1044 nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
1045                                       ip4_address_t e_addr, u16 l_port,
1046                                       u16 e_port, ip_protocol_t proto,
1047                                       u32 vrf_id, u32 flags)
1048 {
1049   snat_main_per_thread_data_t *tsm;
1050   snat_main_t *sm = &snat_main;
1051
1052   nat44_lb_addr_port_t *local;
1053   snat_static_mapping_t *m;
1054   u32 fib_index = ~0;
1055
1056   if (is_sm_addr_only (flags))
1057     {
1058       e_port = l_port = proto = 0;
1059     }
1060
1061   if (is_sm_identity_nat (flags))
1062     {
1063       l_port = e_port;
1064       l_addr.as_u32 = e_addr.as_u32;
1065     }
1066
1067   // fib index 0
1068   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1069   if (!m)
1070     {
1071       return VNET_API_ERROR_NO_SUCH_ENTRY;
1072     }
1073
1074   if (is_sm_identity_nat (flags))
1075     {
1076       u8 found = 0;
1077
1078       if (vrf_id == ~0)
1079         {
1080           vrf_id = sm->inside_vrf_id;
1081         }
1082
1083       pool_foreach (local, m->locals)
1084         {
1085           if (local->vrf_id == vrf_id)
1086             {
1087               local = pool_elt_at_index (m->locals, local - m->locals);
1088               fib_index = local->fib_index;
1089               pool_put (m->locals, local);
1090               found = 1;
1091             }
1092         }
1093
1094       if (!found)
1095         {
1096           return VNET_API_ERROR_NO_SUCH_ENTRY;
1097         }
1098     }
1099   else
1100     {
1101       fib_index = m->fib_index;
1102     }
1103
1104   if (!is_sm_out2in_only (flags))
1105     {
1106       nat44_ed_sm_i2o_del (sm, l_addr, l_port, fib_index, proto);
1107     }
1108
1109   // delete sessions for static mapping
1110   if (sm->num_workers > 1)
1111     {
1112       tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1113     }
1114   else
1115     {
1116       tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1117     }
1118
1119   nat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr, m->local_port,
1120                                       m->proto, fib_index,
1121                                       is_sm_addr_only (flags), e_addr, e_port);
1122
1123   fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1124
1125   if (!pool_elts (m->locals))
1126     {
1127       // this is last record remove all required stuff
1128       // fib_index 0
1129       nat44_ed_sm_o2i_del (sm, e_addr, e_port, 0, proto);
1130
1131       vec_free (m->tag);
1132       vec_free (m->workers);
1133       pool_put (sm->static_mappings, m);
1134
1135       nat44_ed_add_del_interface_fib_reg_entries (e_addr, 0);
1136     }
1137
1138   return 0;
1139 }
1140
1141 int
1142 nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1143                                 ip_protocol_t proto,
1144                                 nat44_lb_addr_port_t *locals, u32 flags,
1145                                 u8 *tag, u32 affinity)
1146 {
1147   snat_main_t *sm = &snat_main;
1148   snat_static_mapping_t *m;
1149   snat_address_t *a = 0;
1150
1151   nat44_lb_addr_port_t *local;
1152   uword *bitmap = 0;
1153   int rc = 0;
1154
1155   int i;
1156
1157   if (!sm->enabled)
1158     {
1159       return VNET_API_ERROR_UNSUPPORTED;
1160     }
1161
1162   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1163
1164   if (m)
1165     {
1166       return VNET_API_ERROR_VALUE_EXIST;
1167     }
1168
1169   if (vec_len (locals) < 2)
1170     {
1171       return VNET_API_ERROR_INVALID_VALUE;
1172     }
1173
1174   if (!is_sm_out2in_only (flags))
1175     {
1176       /* Find external address in allocated addresses and reserve port for
1177          address and port pair mapping when dynamic translations enabled */
1178       for (i = 0; i < vec_len (sm->addresses); i++)
1179         {
1180           if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1181             {
1182               /* External port must be unused */
1183               a = sm->addresses + i;
1184               if (nat44_ed_sm_o2i_lookup (sm, a->addr, e_port, 0, proto))
1185                 {
1186                   return VNET_API_ERROR_VALUE_EXIST;
1187                 }
1188               break;
1189             }
1190         }
1191       // external address must be allocated
1192       if (!a)
1193         {
1194           return VNET_API_ERROR_NO_SUCH_ENTRY;
1195         }
1196     }
1197
1198   pool_get (sm->static_mappings, m);
1199   clib_memset (m, 0, sizeof (*m));
1200   m->tag = vec_dup (tag);
1201   m->external_addr = e_addr;
1202   m->external_port = e_port;
1203   m->affinity = affinity;
1204   m->proto = proto;
1205
1206   m->flags = flags;
1207   m->flags |= NAT_SM_FLAG_LB;
1208
1209   if (affinity)
1210     m->affinity_per_service_list_head_index =
1211       nat_affinity_get_per_service_list_head_index ();
1212   else
1213     m->affinity_per_service_list_head_index = ~0;
1214
1215   if (nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0,
1216                            m->proto))
1217     {
1218       nat_log_err ("sm o2i key add failed");
1219       return VNET_API_ERROR_UNSPECIFIED;
1220     }
1221
1222   for (i = 0; i < vec_len (locals); i++)
1223     {
1224       locals[i].fib_index = fib_table_find_or_create_and_lock (
1225         FIB_PROTOCOL_IP4, locals[i].vrf_id, sm->fib_src_low);
1226       if (!is_sm_out2in_only (flags))
1227         {
1228           if (nat44_ed_sm_o2i_add (sm, m, e_addr, e_port, 0, proto))
1229             {
1230               nat_log_err ("sm o2i key add failed");
1231               rc = VNET_API_ERROR_UNSPECIFIED;
1232               // here we continue with add operation so that it can be safely
1233               // reversed in delete path - otherwise we'd have to track what
1234               // we've done and deal with partial cleanups and since bihash
1235               // adds are (extremely improbable) the only points of failure,
1236               // it's easier to just do it this way
1237             }
1238         }
1239       locals[i].prefix = (i == 0) ?
1240                            locals[i].probability :
1241                            (locals[i - 1].prefix + locals[i].probability);
1242       pool_get (m->locals, local);
1243       *local = locals[i];
1244       if (sm->num_workers > 1)
1245         {
1246           ip4_header_t ip = {
1247             .src_address = locals[i].addr,
1248           };
1249           bitmap = clib_bitmap_set (
1250             bitmap, nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0),
1251             1);
1252         }
1253     }
1254
1255   /* Assign workers */
1256   if (sm->num_workers > 1)
1257     {
1258       clib_bitmap_foreach (i, bitmap)
1259         {
1260           vec_add1 (m->workers, i);
1261         }
1262     }
1263
1264   return rc;
1265 }
1266
1267 int
1268 nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1269                                 ip_protocol_t proto, u32 flags)
1270 {
1271   snat_main_t *sm = &snat_main;
1272   snat_static_mapping_t *m;
1273
1274   nat44_lb_addr_port_t *local;
1275   snat_main_per_thread_data_t *tsm;
1276   snat_session_t *s;
1277
1278   if (!sm->enabled)
1279     {
1280       return VNET_API_ERROR_UNSUPPORTED;
1281     }
1282
1283   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1284   if (!m)
1285     return VNET_API_ERROR_NO_SUCH_ENTRY;
1286
1287   if (!is_sm_lb (m->flags))
1288     return VNET_API_ERROR_INVALID_VALUE;
1289
1290   if (nat44_ed_sm_o2i_del (sm, m->external_addr, m->external_port, 0,
1291                            m->proto))
1292     {
1293       nat_log_err ("sm o2i key del failed");
1294       return VNET_API_ERROR_UNSPECIFIED;
1295     }
1296
1297   pool_foreach (local, m->locals)
1298     {
1299       fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1300       if (!is_sm_out2in_only (flags))
1301         {
1302           if (nat44_ed_sm_i2o_del (sm, local->addr, local->port,
1303                                    local->fib_index, m->proto))
1304             {
1305               nat_log_err ("sm i2o key del failed");
1306               return VNET_API_ERROR_UNSPECIFIED;
1307             }
1308         }
1309
1310       if (sm->num_workers > 1)
1311         {
1312           ip4_header_t ip = {
1313             .src_address = local->addr,
1314           };
1315           tsm = vec_elt_at_index (
1316             sm->per_thread_data,
1317             nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1318         }
1319       else
1320         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1321
1322       /* Delete sessions */
1323       pool_foreach (s, tsm->sessions)
1324         {
1325           if (!(nat44_ed_is_lb_session (s)))
1326             continue;
1327
1328           if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1329               s->in2out.port != local->port)
1330             continue;
1331
1332           nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1333           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1334         }
1335     }
1336
1337   if (m->affinity)
1338     {
1339       nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1340     }
1341
1342   pool_free (m->locals);
1343   vec_free (m->tag);
1344   vec_free (m->workers);
1345   pool_put (sm->static_mappings, m);
1346
1347   return 0;
1348 }
1349
1350 int
1351 nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
1352                                           ip4_address_t l_addr, u16 l_port,
1353                                           ip_protocol_t proto, u32 vrf_id,
1354                                           u8 probability, u8 is_add)
1355 {
1356   snat_main_t *sm = &snat_main;
1357   snat_static_mapping_t *m = 0;
1358   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1359   snat_main_per_thread_data_t *tsm;
1360   snat_session_t *s;
1361   u32 *locals = 0;
1362   uword *bitmap = 0;
1363   int i;
1364
1365   if (!sm->enabled)
1366     {
1367       return VNET_API_ERROR_UNSUPPORTED;
1368     }
1369
1370   m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
1371
1372   if (!m)
1373     {
1374       return VNET_API_ERROR_NO_SUCH_ENTRY;
1375     }
1376
1377   if (!is_sm_lb (m->flags))
1378     {
1379       return VNET_API_ERROR_INVALID_VALUE;
1380     }
1381
1382   pool_foreach (local, m->locals)
1383    {
1384     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1385         (local->vrf_id == vrf_id))
1386       {
1387         match_local = local;
1388         break;
1389       }
1390   }
1391
1392   if (is_add)
1393     {
1394       if (match_local)
1395         {
1396           return VNET_API_ERROR_VALUE_EXIST;
1397         }
1398
1399       pool_get (m->locals, local);
1400       clib_memset (local, 0, sizeof (*local));
1401       local->addr.as_u32 = l_addr.as_u32;
1402       local->port = l_port;
1403       local->probability = probability;
1404       local->vrf_id = vrf_id;
1405       local->fib_index =
1406         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1407                                            sm->fib_src_low);
1408
1409       if (!is_sm_out2in_only (m->flags))
1410         {
1411           if (nat44_ed_sm_i2o_add (sm, m, l_addr, l_port, local->fib_index,
1412                                    proto))
1413             {
1414               nat_log_err ("sm i2o key add failed");
1415               pool_put (m->locals, local);
1416               return VNET_API_ERROR_UNSPECIFIED;
1417             }
1418         }
1419     }
1420   else
1421     {
1422       if (!match_local)
1423         return VNET_API_ERROR_NO_SUCH_ENTRY;
1424
1425       if (pool_elts (m->locals) < 3)
1426         return VNET_API_ERROR_UNSPECIFIED;
1427
1428       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1429                         sm->fib_src_low);
1430
1431       if (!is_sm_out2in_only (m->flags))
1432         {
1433           if (nat44_ed_sm_i2o_del (sm, l_addr, l_port, match_local->fib_index,
1434                                    proto))
1435             nat_log_err ("sm i2o key del failed");
1436         }
1437
1438       if (sm->num_workers > 1)
1439         {
1440           ip4_header_t ip = {
1441             .src_address = local->addr,
1442           };
1443           tsm = vec_elt_at_index (
1444             sm->per_thread_data,
1445             nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1446         }
1447       else
1448         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1449
1450       /* Delete sessions */
1451       pool_foreach (s, tsm->sessions) {
1452           if (!(nat44_ed_is_lb_session (s)))
1453             continue;
1454
1455           if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1456               s->in2out.port != match_local->port)
1457             continue;
1458
1459           nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1460           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1461       }
1462
1463       pool_put (m->locals, match_local);
1464     }
1465
1466   vec_free (m->workers);
1467
1468   pool_foreach (local, m->locals)
1469    {
1470     vec_add1 (locals, local - m->locals);
1471     if (sm->num_workers > 1)
1472       {
1473         ip4_header_t ip;
1474         ip.src_address.as_u32 = local->addr.as_u32,
1475         bitmap = clib_bitmap_set (
1476           bitmap,
1477           nat44_ed_get_in2out_worker_index (0, &ip, local->fib_index, 0), 1);
1478       }
1479   }
1480
1481   ASSERT (vec_len (locals) > 1);
1482
1483   local = pool_elt_at_index (m->locals, locals[0]);
1484   local->prefix = local->probability;
1485   for (i = 1; i < vec_len (locals); i++)
1486     {
1487       local = pool_elt_at_index (m->locals, locals[i]);
1488       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1489       local->prefix = local->probability + prev_local->prefix;
1490     }
1491
1492   /* Assign workers */
1493   if (sm->num_workers > 1)
1494     {
1495       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1496     }
1497
1498   return 0;
1499 }
1500
1501 void
1502 expire_per_vrf_sessions (u32 fib_index)
1503 {
1504   per_vrf_sessions_t *per_vrf_sessions;
1505   snat_main_per_thread_data_t *tsm;
1506   snat_main_t *sm = &snat_main;
1507
1508   vec_foreach (tsm, sm->per_thread_data)
1509     {
1510       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1511         {
1512           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1513               (per_vrf_sessions->tx_fib_index == fib_index))
1514             {
1515               per_vrf_sessions->expired = 1;
1516             }
1517         }
1518     }
1519 }
1520
1521 void
1522 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1523 {
1524   snat_main_t *sm = &snat_main;
1525   nat_fib_t *fib;
1526
1527   // we don't care if it is outside/inside fib
1528   // we just care about their ref_count
1529   // if it reaches 0 sessions should expire
1530   // because the fib isn't valid for NAT anymore
1531
1532   vec_foreach (fib, sm->fibs)
1533   {
1534     if (fib->fib_index == fib_index)
1535       {
1536         if (is_del)
1537           {
1538             fib->ref_count--;
1539             if (!fib->ref_count)
1540               {
1541                 vec_del1 (sm->fibs, fib - sm->fibs);
1542                 expire_per_vrf_sessions (fib_index);
1543               }
1544             return;
1545           }
1546         else
1547           fib->ref_count++;
1548       }
1549   }
1550   if (!is_del)
1551     {
1552       vec_add2 (sm->fibs, fib, 1);
1553       fib->ref_count = 1;
1554       fib->fib_index = fib_index;
1555     }
1556 }
1557
1558 static_always_inline nat_outside_fib_t *
1559 nat44_ed_get_outside_fib (nat_outside_fib_t *outside_fibs, u32 fib_index)
1560 {
1561   nat_outside_fib_t *f;
1562   vec_foreach (f, outside_fibs)
1563     {
1564       if (f->fib_index == fib_index)
1565         {
1566           return f;
1567         }
1568     }
1569   return 0;
1570 }
1571
1572 static_always_inline snat_interface_t *
1573 nat44_ed_get_interface (snat_interface_t *interfaces, u32 sw_if_index)
1574 {
1575   snat_interface_t *i;
1576   pool_foreach (i, interfaces)
1577     {
1578       if (i->sw_if_index == sw_if_index)
1579         {
1580           return i;
1581         }
1582     }
1583   return 0;
1584 }
1585
1586 int
1587 nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
1588 {
1589   const char *del_feature_name, *feature_name;
1590   snat_main_t *sm = &snat_main;
1591
1592   nat_outside_fib_t *outside_fib;
1593   snat_interface_t *i;
1594   u32 fib_index;
1595   int rv;
1596
1597   if (!sm->enabled)
1598     {
1599       nat_log_err ("nat44 is disabled");
1600       return VNET_API_ERROR_UNSUPPORTED;
1601     }
1602
1603   if (nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index))
1604     {
1605       nat_log_err ("error interface already configured");
1606       return VNET_API_ERROR_VALUE_EXIST;
1607     }
1608
1609   i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
1610   if (i)
1611     {
1612       if ((nat44_ed_is_interface_inside (i) && is_inside) ||
1613           (nat44_ed_is_interface_outside (i) && !is_inside))
1614         {
1615           return 0;
1616         }
1617       if (sm->num_workers > 1)
1618         {
1619           del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1620                                           "nat44-out2in-worker-handoff";
1621           feature_name = "nat44-handoff-classify";
1622         }
1623       else
1624         {
1625           del_feature_name = !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1626
1627           feature_name = "nat44-ed-classify";
1628         }
1629
1630       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1631       if (rv)
1632         return rv;
1633       vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1634                                    sw_if_index, 0, 0, 0);
1635       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1636                                    0, 0);
1637     }
1638   else
1639     {
1640       if (sm->num_workers > 1)
1641         {
1642           feature_name = is_inside ? "nat44-in2out-worker-handoff" :
1643                                      "nat44-out2in-worker-handoff";
1644         }
1645       else
1646         {
1647           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1648         }
1649
1650       nat_validate_interface_counters (sm, sw_if_index);
1651       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1652       if (rv)
1653         return rv;
1654       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1655                                    0, 0);
1656
1657       pool_get (sm->interfaces, i);
1658       i->sw_if_index = sw_if_index;
1659       i->flags = 0;
1660     }
1661
1662   fib_index =
1663     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1664
1665   update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
1666
1667   if (!is_inside)
1668     {
1669       i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1670
1671       outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1672       if (outside_fib)
1673         {
1674           outside_fib->refcount++;
1675         }
1676       else
1677         {
1678           vec_add2 (sm->outside_fibs, outside_fib, 1);
1679           outside_fib->fib_index = fib_index;
1680           outside_fib->refcount = 1;
1681         }
1682
1683       nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
1684       nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
1685
1686       nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
1687     }
1688   else
1689     {
1690       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1691     }
1692
1693   return 0;
1694 }
1695
1696 int
1697 nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
1698 {
1699   const char *del_feature_name, *feature_name;
1700   snat_main_t *sm = &snat_main;
1701
1702   nat_outside_fib_t *outside_fib;
1703   snat_interface_t *i;
1704   u32 fib_index;
1705   int rv;
1706
1707   if (!sm->enabled)
1708     {
1709       nat_log_err ("nat44 is disabled");
1710       return VNET_API_ERROR_UNSUPPORTED;
1711     }
1712
1713   i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
1714   if (i == 0)
1715     {
1716       nat_log_err ("error interface couldn't be found");
1717       return VNET_API_ERROR_NO_SUCH_ENTRY;
1718     }
1719
1720   if (nat44_ed_is_interface_inside (i) && nat44_ed_is_interface_outside (i))
1721     {
1722       if (sm->num_workers > 1)
1723         {
1724           del_feature_name = "nat44-handoff-classify";
1725           feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1726                                       "nat44-out2in-worker-handoff";
1727         }
1728       else
1729         {
1730           del_feature_name = "nat44-ed-classify";
1731           feature_name = !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1732         }
1733
1734       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1735       if (rv)
1736         {
1737           return rv;
1738         }
1739       vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1740                                    sw_if_index, 0, 0, 0);
1741       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1742                                    0, 0);
1743
1744       if (is_inside)
1745         {
1746           i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1747         }
1748       else
1749         {
1750           i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1751         }
1752     }
1753   else
1754     {
1755       if (sm->num_workers > 1)
1756         {
1757           feature_name = is_inside ? "nat44-in2out-worker-handoff" :
1758                                      "nat44-out2in-worker-handoff";
1759         }
1760       else
1761         {
1762           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1763         }
1764
1765       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1766       if (rv)
1767         {
1768           return rv;
1769         }
1770       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 0,
1771                                    0, 0);
1772
1773       // remove interface
1774       pool_put (sm->interfaces, i);
1775     }
1776
1777   fib_index =
1778     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1779
1780   update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
1781
1782   if (!is_inside)
1783     {
1784       outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1785       if (outside_fib)
1786         {
1787           outside_fib->refcount--;
1788           if (!outside_fib->refcount)
1789             {
1790               vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1791             }
1792         }
1793
1794       nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
1795       nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
1796     }
1797
1798   return 0;
1799 }
1800
1801 int
1802 nat44_ed_add_output_interface (u32 sw_if_index)
1803 {
1804   snat_main_t *sm = &snat_main;
1805
1806   nat_outside_fib_t *outside_fib;
1807   snat_interface_t *i;
1808   u32 fib_index;
1809   int rv;
1810
1811   if (!sm->enabled)
1812     {
1813       nat_log_err ("nat44 is disabled");
1814       return VNET_API_ERROR_UNSUPPORTED;
1815     }
1816
1817   if (nat44_ed_get_interface (sm->interfaces, sw_if_index))
1818     {
1819       nat_log_err ("error interface already configured");
1820       return VNET_API_ERROR_VALUE_EXIST;
1821     }
1822
1823   if (nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index))
1824     {
1825       nat_log_err ("error interface already configured");
1826       return VNET_API_ERROR_VALUE_EXIST;
1827     }
1828
1829   if (sm->num_workers > 1)
1830     {
1831       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1832       if (rv)
1833         {
1834           return rv;
1835         }
1836
1837       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
1838       if (rv)
1839         {
1840           return rv;
1841         }
1842
1843       vnet_feature_enable_disable (
1844         "ip4-unicast", "nat44-out2in-worker-handoff", sw_if_index, 1, 0, 0);
1845       vnet_feature_enable_disable ("ip4-output",
1846                                    "nat44-in2out-output-worker-handoff",
1847                                    sw_if_index, 1, 0, 0);
1848     }
1849   else
1850     {
1851       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1852       if (rv)
1853         {
1854           return rv;
1855         }
1856
1857       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
1858       if (rv)
1859         {
1860           return rv;
1861         }
1862
1863       vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1864                                    sw_if_index, 1, 0, 0);
1865       vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1866                                    sw_if_index, 1, 0, 0);
1867     }
1868
1869   nat_validate_interface_counters (sm, sw_if_index);
1870
1871   pool_get (sm->output_feature_interfaces, i);
1872   i->sw_if_index = sw_if_index;
1873   i->flags = 0;
1874   i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1875   i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1876
1877   fib_index =
1878     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1879   update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
1880
1881   outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1882   if (outside_fib)
1883     {
1884       outside_fib->refcount++;
1885     }
1886   else
1887     {
1888       vec_add2 (sm->outside_fibs, outside_fib, 1);
1889       outside_fib->fib_index = fib_index;
1890       outside_fib->refcount = 1;
1891     }
1892
1893   nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
1894   nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
1895
1896   nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
1897
1898   return 0;
1899 }
1900
1901 int
1902 nat44_ed_del_output_interface (u32 sw_if_index)
1903 {
1904   snat_main_t *sm = &snat_main;
1905
1906   nat_outside_fib_t *outside_fib;
1907   snat_interface_t *i;
1908   u32 fib_index;
1909   int rv;
1910
1911   if (!sm->enabled)
1912     {
1913       nat_log_err ("nat44 is disabled");
1914       return VNET_API_ERROR_UNSUPPORTED;
1915     }
1916
1917   i = nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index);
1918   if (!i)
1919     {
1920       nat_log_err ("error interface couldn't be found");
1921       return VNET_API_ERROR_NO_SUCH_ENTRY;
1922     }
1923
1924   if (sm->num_workers > 1)
1925     {
1926       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1927       if (rv)
1928         {
1929           return rv;
1930         }
1931
1932       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
1933       if (rv)
1934         {
1935           return rv;
1936         }
1937
1938       vnet_feature_enable_disable (
1939         "ip4-unicast", "nat44-out2in-worker-handoff", sw_if_index, 0, 0, 0);
1940       vnet_feature_enable_disable ("ip4-output",
1941                                    "nat44-in2out-output-worker-handoff",
1942                                    sw_if_index, 0, 0, 0);
1943     }
1944   else
1945     {
1946       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1947       if (rv)
1948         {
1949           return rv;
1950         }
1951
1952       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
1953       if (rv)
1954         {
1955           return rv;
1956         }
1957
1958       vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1959                                    sw_if_index, 0, 0, 0);
1960       vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1961                                    sw_if_index, 0, 0, 0);
1962     }
1963
1964   // remove interface
1965   pool_put (sm->output_feature_interfaces, i);
1966
1967   fib_index =
1968     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1969   update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
1970
1971   outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1972   if (outside_fib)
1973     {
1974       outside_fib->refcount--;
1975       if (!outside_fib->refcount)
1976         {
1977           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1978         }
1979     }
1980
1981   nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
1982   nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
1983
1984   return 0;
1985 }
1986
1987 int
1988 snat_set_workers (uword * bitmap)
1989 {
1990   snat_main_t *sm = &snat_main;
1991   int i, j = 0;
1992
1993   if (sm->num_workers < 2)
1994     return VNET_API_ERROR_FEATURE_DISABLED;
1995
1996   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
1997     return VNET_API_ERROR_INVALID_WORKER;
1998
1999   vec_free (sm->workers);
2000   clib_bitmap_foreach (i, bitmap)
2001     {
2002       vec_add1(sm->workers, i);
2003       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2004       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2005       j++;
2006     }
2007
2008   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2009
2010   return 0;
2011 }
2012
2013 int
2014 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
2015 {
2016   fail_if_enabled ();
2017   snat_main_t *sm = &snat_main;
2018
2019   if ((sm->fq_in2out_index != ~0) || (sm->fq_out2in_index != ~0) ||
2020       (sm->fq_in2out_output_index != ~0))
2021     {
2022       // frame queu nelts can be set only before first
2023       // call to nat44_plugin_enable after that it
2024       // doesn't make sense
2025       nat_log_err ("Frame queue was already initialized. "
2026                    "Change is not possible");
2027       return 1;
2028     }
2029
2030   sm->frame_queue_nelts = frame_queue_nelts;
2031   return 0;
2032 }
2033
2034 static void
2035 nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque, u32 sw_if_index,
2036                                 u32 new_fib_index, u32 old_fib_index)
2037 {
2038   snat_main_t *sm = &snat_main;
2039   nat_outside_fib_t *outside_fib;
2040   snat_interface_t *i;
2041   u8 is_add = 1;
2042   u8 match = 0;
2043
2044   if (!sm->enabled || (new_fib_index == old_fib_index)
2045       || (!vec_len (sm->outside_fibs)))
2046     {
2047       return;
2048     }
2049
2050   pool_foreach (i, sm->interfaces)
2051     {
2052       if (i->sw_if_index == sw_if_index)
2053         {
2054           if (!(nat44_ed_is_interface_outside (i)))
2055             return;
2056           match = 1;
2057         }
2058     }
2059
2060   pool_foreach (i, sm->output_feature_interfaces)
2061     {
2062       if (i->sw_if_index == sw_if_index)
2063         {
2064           if (!(nat44_ed_is_interface_outside (i)))
2065             return;
2066           match = 1;
2067         }
2068     }
2069
2070   if (!match)
2071     return;
2072
2073   vec_foreach (outside_fib, sm->outside_fibs)
2074     {
2075       if (outside_fib->fib_index == old_fib_index)
2076         {
2077           outside_fib->refcount--;
2078           if (!outside_fib->refcount)
2079             vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2080           break;
2081         }
2082     }
2083
2084   vec_foreach (outside_fib, sm->outside_fibs)
2085     {
2086       if (outside_fib->fib_index == new_fib_index)
2087         {
2088           outside_fib->refcount++;
2089           is_add = 0;
2090           break;
2091         }
2092     }
2093
2094   if (is_add)
2095     {
2096       vec_add2 (sm->outside_fibs, outside_fib, 1);
2097       outside_fib->refcount = 1;
2098       outside_fib->fib_index = new_fib_index;
2099     }
2100 }
2101
2102 static void nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque,
2103                                             u32 sw_if_index, u32 new_fib_index,
2104                                             u32 old_fib_index);
2105
2106 static void nat44_ed_add_del_interface_address_cb (
2107   ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
2108   u32 address_length, u32 if_address_index, u32 is_delete);
2109
2110 static void nat44_ed_add_del_static_mapping_cb (
2111   ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
2112   u32 address_length, u32 if_address_index, u32 is_delete);
2113
2114 void
2115 test_key_calc_split ()
2116 {
2117   ip4_address_t l_addr;
2118   l_addr.as_u8[0] = 1;
2119   l_addr.as_u8[1] = 1;
2120   l_addr.as_u8[2] = 1;
2121   l_addr.as_u8[3] = 1;
2122   ip4_address_t r_addr;
2123   r_addr.as_u8[0] = 2;
2124   r_addr.as_u8[1] = 2;
2125   r_addr.as_u8[2] = 2;
2126   r_addr.as_u8[3] = 2;
2127   u16 l_port = 40001;
2128   u16 r_port = 40301;
2129   u8 proto = 9;
2130   u32 fib_index = 9000001;
2131   u32 thread_index = 3000000001;
2132   u32 session_index = 3000000221;
2133   clib_bihash_kv_16_8_t kv;
2134   init_ed_kv (&kv, l_addr.as_u32, l_port, r_addr.as_u32, r_port, fib_index,
2135               proto, thread_index, session_index);
2136   ip4_address_t l_addr2;
2137   ip4_address_t r_addr2;
2138   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2139   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2140   u16 l_port2 = 0;
2141   u16 r_port2 = 0;
2142   u8 proto2 = 0;
2143   u32 fib_index2 = 0;
2144   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2145                &r_port2);
2146   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2147   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2148   ASSERT (l_port == l_port2);
2149   ASSERT (r_port == r_port2);
2150   ASSERT (proto == proto2);
2151   ASSERT (fib_index == fib_index2);
2152   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2153   ASSERT (session_index == ed_value_get_session_index (&kv));
2154 }
2155
2156 static clib_error_t *
2157 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2158 {
2159   u32 fib_index;
2160   if (!is_add)
2161     {
2162       fib_index = ip4_fib_index_from_table_id (table_id);
2163       if (fib_index != ~0)
2164         {
2165           expire_per_vrf_sessions (fib_index);
2166         }
2167     }
2168   return 0;
2169 }
2170
2171 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2172
2173 #define nat_validate_simple_counter(c, i)                                     \
2174   do                                                                          \
2175     {                                                                         \
2176       vlib_validate_simple_counter (&c, i);                                   \
2177       vlib_zero_simple_counter (&c, i);                                       \
2178     }                                                                         \
2179   while (0);
2180
2181 #define nat_init_simple_counter(c, n, sn)                                     \
2182   do                                                                          \
2183     {                                                                         \
2184       c.name = n;                                                             \
2185       c.stat_segment_name = sn;                                               \
2186       nat_validate_simple_counter (c, 0);                                     \
2187     }                                                                         \
2188   while (0);
2189
2190 static_always_inline void
2191 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
2192 {
2193 #define _(x)                                                                  \
2194   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
2195   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
2196   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
2197   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
2198   foreach_nat_counter;
2199 #undef _
2200   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
2201 }
2202
2203 static clib_error_t *
2204 nat_init (vlib_main_t * vm)
2205 {
2206   snat_main_t *sm = &snat_main;
2207   vlib_thread_main_t *tm = vlib_get_thread_main ();
2208   vlib_thread_registration_t *tr;
2209   ip4_add_del_interface_address_callback_t cbi = { 0 };
2210   ip4_table_bind_callback_t cbt = { 0 };
2211   u32 i, num_threads = 0;
2212   uword *p, *bitmap = 0;
2213
2214   clib_memset (sm, 0, sizeof (*sm));
2215
2216   // required
2217   sm->vnet_main = vnet_get_main ();
2218   // convenience
2219   sm->ip4_main = &ip4_main;
2220
2221   // frame queue indices used for handoff
2222   sm->fq_out2in_index = ~0;
2223   sm->fq_in2out_index = ~0;
2224   sm->fq_in2out_output_index = ~0;
2225
2226   sm->log_level = NAT_LOG_ERROR;
2227
2228   sm->log_class = vlib_log_register_class ("nat", 0);
2229   nat_ipfix_logging_init (vm);
2230
2231   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2232                            "/nat44-ed/total-sessions");
2233   sm->max_cfg_sessions_gauge =
2234     vlib_stats_add_gauge ("/nat44-ed/max-cfg-sessions");
2235
2236 #define _(x)                                                                  \
2237   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2238                            "/nat44-ed/in2out/fastpath/" #x);                  \
2239   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2240                            "/nat44-ed/out2in/fastpath/" #x);                  \
2241   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2242                            "/nat44-ed/in2out/slowpath/" #x);                  \
2243   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2244                            "/nat44-ed/out2in/slowpath/" #x);
2245   foreach_nat_counter;
2246 #undef _
2247   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2248                            "/nat44-ed/hairpinning");
2249
2250   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2251   if (p)
2252     {
2253       tr = (vlib_thread_registration_t *) p[0];
2254       if (tr)
2255         {
2256           sm->num_workers = tr->count;
2257           sm->first_worker_index = tr->first_index;
2258         }
2259     }
2260   num_threads = tm->n_vlib_mains - 1;
2261   sm->port_per_thread = 0xffff - 1024;
2262   vec_validate (sm->per_thread_data, num_threads);
2263
2264   /* Use all available workers by default */
2265   if (sm->num_workers > 1)
2266     {
2267       for (i = 0; i < sm->num_workers; i++)
2268         bitmap = clib_bitmap_set (bitmap, i, 1);
2269       snat_set_workers (bitmap);
2270       clib_bitmap_free (bitmap);
2271     }
2272   else
2273     {
2274       sm->per_thread_data[0].snat_thread_index = 0;
2275     }
2276
2277   /* callbacks to call when interface address changes. */
2278   cbi.function = nat44_ed_add_del_interface_address_cb;
2279   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2280   cbi.function = nat44_ed_add_del_static_mapping_cb;
2281   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2282
2283   /* callbacks to call when interface to table biding changes */
2284   cbt.function = nat44_ed_update_outside_fib_cb;
2285   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2286
2287   sm->fib_src_low =
2288     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2289                          FIB_SOURCE_BH_SIMPLE);
2290   sm->fib_src_hi =
2291     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2292                          FIB_SOURCE_BH_SIMPLE);
2293
2294   nat_affinity_init (vm);
2295   test_key_calc_split ();
2296
2297   return nat44_api_hookup (vm);
2298 }
2299
2300 VLIB_INIT_FUNCTION (nat_init);
2301
2302 int
2303 nat44_plugin_enable (nat44_config_t c)
2304 {
2305   snat_main_t *sm = &snat_main;
2306
2307   fail_if_enabled ();
2308
2309   sm->forwarding_enabled = 0;
2310   sm->mss_clamping = 0;
2311
2312   if (!c.sessions)
2313     c.sessions = 63 * 1024;
2314
2315   sm->max_translations_per_thread = c.sessions;
2316   vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
2317                         sm->max_translations_per_thread);
2318   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2319
2320   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2321
2322   sm->inside_vrf_id = c.inside_vrf;
2323   sm->inside_fib_index =
2324     fib_table_find_or_create_and_lock
2325     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2326
2327   sm->outside_vrf_id = c.outside_vrf;
2328   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2329     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2330
2331   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2332
2333   nat44_ed_init_tcp_state_stable (sm);
2334
2335   nat_affinity_enable ();
2336
2337   nat_reset_timeouts (&sm->timeouts);
2338
2339   vlib_zero_simple_counter (&sm->total_sessions, 0);
2340
2341   if (!sm->frame_queue_nelts)
2342     {
2343       sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2344     }
2345
2346   if (sm->num_workers > 1)
2347     {
2348       vlib_main_t *vm = vlib_get_main ();
2349       vlib_node_t *node;
2350
2351       if (sm->fq_in2out_index == ~0)
2352         {
2353           node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2354           sm->fq_in2out_index =
2355             vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
2356         }
2357       if (sm->fq_out2in_index == ~0)
2358         {
2359           node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2360           sm->fq_out2in_index =
2361             vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
2362         }
2363       if (sm->fq_in2out_output_index == ~0)
2364         {
2365           node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2366           sm->fq_in2out_output_index =
2367             vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
2368         }
2369     }
2370
2371   sm->enabled = 1;
2372   sm->rconfig = c;
2373
2374   return 0;
2375 }
2376
2377 int
2378 nat44_ed_del_addresses ()
2379 {
2380   snat_main_t *sm = &snat_main;
2381   snat_address_t *a, *vec;
2382   int error = 0;
2383
2384   vec = vec_dup (sm->addresses);
2385   vec_foreach (a, vec)
2386     {
2387       error = nat44_ed_del_address (a->addr, 0);
2388       if (error)
2389         {
2390           nat_log_err ("error occurred while removing adderess");
2391         }
2392     }
2393   vec_free (vec);
2394   vec_free (sm->addresses);
2395   sm->addresses = 0;
2396
2397   vec = vec_dup (sm->twice_nat_addresses);
2398   vec_foreach (a, vec)
2399     {
2400       error = nat44_ed_del_address (a->addr, 1);
2401       if (error)
2402         {
2403           nat_log_err ("error occurred while removing adderess");
2404         }
2405     }
2406   vec_free (vec);
2407   vec_free (sm->twice_nat_addresses);
2408   sm->twice_nat_addresses = 0;
2409
2410   vec_free (sm->addr_to_resolve);
2411   sm->addr_to_resolve = 0;
2412
2413   return error;
2414 }
2415
2416 int
2417 nat44_ed_del_interfaces ()
2418 {
2419   snat_main_t *sm = &snat_main;
2420   snat_interface_t *i, *pool;
2421   int error = 0;
2422
2423   pool = pool_dup (sm->interfaces);
2424   pool_foreach (i, pool)
2425     {
2426       if (nat44_ed_is_interface_inside (i))
2427         {
2428           error = nat44_ed_del_interface (i->sw_if_index, 1);
2429         }
2430       if (nat44_ed_is_interface_outside (i))
2431         {
2432           error = nat44_ed_del_interface (i->sw_if_index, 0);
2433         }
2434
2435       if (error)
2436         {
2437           nat_log_err ("error occurred while removing interface");
2438         }
2439     }
2440   pool_free (pool);
2441   pool_free (sm->interfaces);
2442   sm->interfaces = 0;
2443   return error;
2444 }
2445
2446 int
2447 nat44_ed_del_output_interfaces ()
2448 {
2449   snat_main_t *sm = &snat_main;
2450   snat_interface_t *i, *pool;
2451   int error = 0;
2452
2453   pool = pool_dup (sm->output_feature_interfaces);
2454   pool_foreach (i, pool)
2455     {
2456       error = nat44_ed_del_output_interface (i->sw_if_index);
2457       if (error)
2458         {
2459           nat_log_err ("error occurred while removing output interface");
2460         }
2461     }
2462   pool_free (pool);
2463   pool_free (sm->output_feature_interfaces);
2464   sm->output_feature_interfaces = 0;
2465   return error;
2466 }
2467
2468 int
2469 nat44_ed_del_static_mappings ()
2470 {
2471   snat_main_t *sm = &snat_main;
2472   snat_static_mapping_t *m, *pool;
2473   int error = 0;
2474
2475   pool = pool_dup (sm->static_mappings);
2476   pool_foreach (m, pool)
2477     {
2478       error = nat44_ed_del_static_mapping_internal (
2479         m->local_addr, m->external_addr, m->local_port, m->external_port,
2480         m->proto, m->vrf_id, m->flags);
2481       if (error)
2482         {
2483           nat_log_err ("error occurred while removing mapping");
2484         }
2485     }
2486   pool_free (pool);
2487   pool_free (sm->static_mappings);
2488   sm->static_mappings = 0;
2489
2490   vec_free (sm->sm_to_resolve);
2491   sm->sm_to_resolve = 0;
2492
2493   return error;
2494 }
2495
2496 int
2497 nat44_plugin_disable ()
2498 {
2499   snat_main_per_thread_data_t *tsm;
2500   snat_main_t *sm = &snat_main;
2501   int rc, error = 0;
2502
2503   fail_if_disabled ();
2504
2505   rc = nat44_ed_del_static_mappings ();
2506   if (rc)
2507     error = 1;
2508
2509   rc = nat44_ed_del_addresses ();
2510   if (rc)
2511     error = 1;
2512
2513   rc = nat44_ed_del_interfaces ();
2514   if (rc)
2515     error = 1;
2516
2517   rc = nat44_ed_del_output_interfaces ();
2518   if (rc)
2519     error = 1;
2520
2521   vec_free (sm->max_translations_per_fib);
2522   sm->max_translations_per_fib = 0;
2523
2524   clib_bihash_free_16_8 (&sm->flow_hash);
2525
2526   vec_foreach (tsm, sm->per_thread_data)
2527     {
2528       nat44_ed_worker_db_free (tsm);
2529     }
2530
2531   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2532
2533   nat_affinity_disable ();
2534
2535   sm->forwarding_enabled = 0;
2536   sm->enabled = 0;
2537
2538   return error;
2539 }
2540
2541 void
2542 nat44_ed_forwarding_enable_disable (u8 is_enable)
2543 {
2544   snat_main_per_thread_data_t *tsm;
2545   snat_main_t *sm = &snat_main;
2546   snat_session_t *s;
2547
2548   u32 *ses_to_be_removed = 0, *ses_index;
2549
2550   sm->forwarding_enabled = is_enable != 0;
2551
2552   if (!sm->enabled || is_enable)
2553     {
2554       return;
2555     }
2556
2557   vec_foreach (tsm, sm->per_thread_data)
2558     {
2559       pool_foreach (s, tsm->sessions)
2560         {
2561           if (na44_ed_is_fwd_bypass_session (s))
2562             {
2563               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2564             }
2565         }
2566       vec_foreach (ses_index, ses_to_be_removed)
2567         {
2568           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2569           nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2570           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2571         }
2572
2573       vec_free (ses_to_be_removed);
2574     }
2575 }
2576
2577 static_always_inline snat_static_mapping_t *
2578 nat44_ed_sm_match (snat_main_t *sm, ip4_address_t match_addr, u16 match_port,
2579                    u32 match_fib_index, ip_protocol_t match_protocol,
2580                    int by_external)
2581 {
2582   snat_static_mapping_t *m;
2583   if (!by_external)
2584     {
2585       m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port, match_fib_index,
2586                                   match_protocol);
2587       if (m)
2588         return m;
2589
2590       /* Try address only mapping */
2591       m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, match_fib_index, 0);
2592       if (m)
2593         return m;
2594
2595       if (sm->inside_fib_index != match_fib_index)
2596         {
2597           m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
2598                                       sm->inside_fib_index, match_protocol);
2599           if (m)
2600             return m;
2601
2602           /* Try address only mapping */
2603           m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->inside_fib_index,
2604                                       0);
2605           if (m)
2606             return m;
2607         }
2608       if (sm->outside_fib_index != match_fib_index)
2609         {
2610           m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
2611                                       sm->outside_fib_index, match_protocol);
2612           if (m)
2613             return m;
2614
2615           /* Try address only mapping */
2616           m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->outside_fib_index,
2617                                       0);
2618           if (m)
2619             return m;
2620         }
2621     }
2622   else
2623     {
2624       m =
2625         nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
2626       if (m)
2627         return m;
2628
2629       /* Try address only mapping */
2630       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
2631       if (m)
2632         return m;
2633     }
2634   return 0;
2635 }
2636
2637 int
2638 snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
2639                            ip4_address_t match_addr, u16 match_port,
2640                            u32 match_fib_index, ip_protocol_t match_protocol,
2641                            ip4_address_t *mapping_addr, u16 *mapping_port,
2642                            u32 *mapping_fib_index, int by_external,
2643                            u8 *is_addr_only, twice_nat_type_t *twice_nat,
2644                            lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
2645                            u8 *is_identity_nat, snat_static_mapping_t **out)
2646 {
2647   snat_static_mapping_t *m;
2648   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2649   nat44_lb_addr_port_t *local;
2650   u8 backend_index;
2651
2652   m = nat44_ed_sm_match (sm, match_addr, match_port, match_fib_index,
2653                          match_protocol, by_external);
2654   if (!m)
2655     {
2656       return 1;
2657     }
2658
2659   if (by_external)
2660     {
2661       if (is_sm_lb (m->flags))
2662         {
2663           if (PREDICT_FALSE (lb != 0))
2664             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2665           if (m->affinity && !nat_affinity_find_and_lock (
2666                                vm, ext_host_addr[0], match_addr,
2667                                match_protocol, match_port, &backend_index))
2668             {
2669               local = pool_elt_at_index (m->locals, backend_index);
2670               *mapping_addr = local->addr;
2671               *mapping_port = local->port;
2672               *mapping_fib_index = local->fib_index;
2673               goto end;
2674             }
2675           // pick locals matching this worker
2676           if (PREDICT_FALSE (sm->num_workers > 1))
2677             {
2678               u32 thread_index = vlib_get_thread_index ();
2679               pool_foreach_index (i, m->locals)
2680                {
2681                 local = pool_elt_at_index (m->locals, i);
2682
2683                 ip4_header_t ip = {
2684                   .src_address = local->addr,
2685                 };
2686
2687                 if (nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index,
2688                                                       0) == thread_index)
2689                   {
2690                     vec_add1 (tmp, i);
2691                   }
2692                }
2693               ASSERT (vec_len (tmp) != 0);
2694             }
2695           else
2696             {
2697               pool_foreach_index (i, m->locals)
2698                {
2699                 vec_add1 (tmp, i);
2700               }
2701             }
2702           hi = vec_len (tmp) - 1;
2703           local = pool_elt_at_index (m->locals, tmp[hi]);
2704           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2705           while (lo < hi)
2706             {
2707               mid = ((hi - lo) >> 1) + lo;
2708               local = pool_elt_at_index (m->locals, tmp[mid]);
2709               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2710             }
2711           local = pool_elt_at_index (m->locals, tmp[lo]);
2712           if (!(local->prefix >= rand))
2713             return 1;
2714           *mapping_addr = local->addr;
2715           *mapping_port = local->port;
2716           *mapping_fib_index = local->fib_index;
2717           if (m->affinity)
2718             {
2719               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2720                                                 match_protocol, match_port,
2721                                                 tmp[lo], m->affinity,
2722                                                 m->affinity_per_service_list_head_index))
2723                 nat_elog_info (sm, "create affinity record failed");
2724             }
2725           vec_free (tmp);
2726         }
2727       else
2728         {
2729           if (PREDICT_FALSE (lb != 0))
2730             *lb = NO_LB_NAT;
2731           *mapping_fib_index = m->fib_index;
2732           *mapping_addr = m->local_addr;
2733           /* Address only mapping doesn't change port */
2734           *mapping_port =
2735             is_sm_addr_only (m->flags) ? match_port : m->local_port;
2736         }
2737     }
2738   else
2739     {
2740       *mapping_addr = m->external_addr;
2741       /* Address only mapping doesn't change port */
2742       *mapping_port =
2743         is_sm_addr_only (m->flags) ? match_port : m->external_port;
2744       *mapping_fib_index = sm->outside_fib_index;
2745     }
2746
2747 end:
2748   if (PREDICT_FALSE (is_addr_only != 0))
2749     *is_addr_only = is_sm_addr_only (m->flags);
2750
2751   if (PREDICT_FALSE (twice_nat != 0))
2752     {
2753       *twice_nat = TWICE_NAT_DISABLED;
2754
2755       if (is_sm_twice_nat (m->flags))
2756         {
2757           *twice_nat = TWICE_NAT;
2758         }
2759       else if (is_sm_self_twice_nat (m->flags))
2760         {
2761           *twice_nat = TWICE_NAT_SELF;
2762         }
2763     }
2764
2765   if (PREDICT_FALSE (is_identity_nat != 0))
2766     *is_identity_nat = is_sm_identity_nat (m->flags);
2767
2768   if (out != 0)
2769     *out = m;
2770
2771   return 0;
2772 }
2773
2774 u32
2775 nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2776                                   u32 rx_fib_index, u8 is_output)
2777 {
2778   snat_main_t *sm = &snat_main;
2779   u32 next_worker_index = sm->first_worker_index;
2780   u32 hash;
2781
2782   clib_bihash_kv_16_8_t kv16, value16;
2783
2784   u32 fib_index = rx_fib_index;
2785   if (b)
2786     {
2787       if (PREDICT_FALSE (is_output))
2788         {
2789           fib_index = sm->outside_fib_index;
2790           nat_outside_fib_t *outside_fib;
2791           fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2792           fib_prefix_t pfx = {
2793                   .fp_proto = FIB_PROTOCOL_IP4,
2794                   .fp_len = 32,
2795                   .fp_addr = {
2796                           .ip4.as_u32 = ip->dst_address.as_u32,
2797                   } ,
2798           };
2799
2800           switch (vec_len (sm->outside_fibs))
2801             {
2802             case 0:
2803               fib_index = sm->outside_fib_index;
2804               break;
2805             case 1:
2806               fib_index = sm->outside_fibs[0].fib_index;
2807               break;
2808             default:
2809               vec_foreach (outside_fib, sm->outside_fibs)
2810                 {
2811                   fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2812                   if (FIB_NODE_INDEX_INVALID != fei)
2813                     {
2814                       if (fib_entry_get_resolving_interface (fei) != ~0)
2815                         {
2816                           fib_index = outside_fib->fib_index;
2817                           break;
2818                         }
2819                     }
2820                 }
2821               break;
2822             }
2823         }
2824
2825       if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2826         {
2827           ip4_address_t lookup_saddr, lookup_daddr;
2828           u16 lookup_sport, lookup_dport;
2829           u8 lookup_protocol;
2830
2831           if (!nat_get_icmp_session_lookup_values (
2832                 b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr,
2833                 &lookup_dport, &lookup_protocol))
2834             {
2835               init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
2836                          lookup_daddr.as_u32, lookup_dport, rx_fib_index,
2837                          lookup_protocol);
2838               if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2839                 {
2840                   next_worker_index = ed_value_get_thread_index (&value16);
2841                   vnet_buffer2 (b)->nat.cached_session_index =
2842                     ed_value_get_session_index (&value16);
2843                   goto out;
2844                 }
2845             }
2846         }
2847
2848       init_ed_k (&kv16, ip->src_address.as_u32,
2849                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
2850                  vnet_buffer (b)->ip.reass.l4_dst_port, fib_index,
2851                  ip->protocol);
2852
2853       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2854         {
2855           next_worker_index = ed_value_get_thread_index (&value16);
2856           vnet_buffer2 (b)->nat.cached_session_index =
2857             ed_value_get_session_index (&value16);
2858           goto out;
2859         }
2860
2861       // dst NAT
2862       init_ed_k (&kv16, ip->dst_address.as_u32,
2863                  vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
2864                  vnet_buffer (b)->ip.reass.l4_src_port, rx_fib_index,
2865                  ip->protocol);
2866       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2867         {
2868           next_worker_index = ed_value_get_thread_index (&value16);
2869           vnet_buffer2 (b)->nat.cached_dst_nat_session_index =
2870             ed_value_get_session_index (&value16);
2871           goto out;
2872         }
2873     }
2874
2875   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2876     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2877
2878   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2879     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2880   else
2881     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2882
2883 out:
2884   if (PREDICT_TRUE (!is_output))
2885     {
2886       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2887                               rx_fib_index,
2888                               clib_net_to_host_u32 (ip->src_address.as_u32),
2889                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2890     }
2891   else
2892     {
2893       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2894                               next_worker_index, rx_fib_index,
2895                               clib_net_to_host_u32 (ip->src_address.as_u32),
2896                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2897     }
2898
2899   return next_worker_index;
2900 }
2901
2902 u32
2903 nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2904                                   u32 rx_fib_index, u8 is_output)
2905 {
2906   snat_main_t *sm = &snat_main;
2907   clib_bihash_kv_16_8_t kv16, value16;
2908
2909   u8 proto, next_worker_index = 0;
2910   u16 port;
2911   snat_static_mapping_t *m;
2912   u32 hash;
2913
2914   proto = ip->protocol;
2915
2916   if (PREDICT_FALSE (IP_PROTOCOL_ICMP == proto))
2917     {
2918       ip4_address_t lookup_saddr, lookup_daddr;
2919       u16 lookup_sport, lookup_dport;
2920       u8 lookup_protocol;
2921       if (!nat_get_icmp_session_lookup_values (
2922             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2923             &lookup_protocol))
2924         {
2925           init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
2926                      lookup_daddr.as_u32, lookup_dport, rx_fib_index,
2927                      lookup_protocol);
2928           if (PREDICT_TRUE (
2929                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2930             {
2931               next_worker_index = ed_value_get_thread_index (&value16);
2932               nat_elog_debug_handoff (
2933                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2934                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2935                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2936               return next_worker_index;
2937             }
2938         }
2939     }
2940
2941   init_ed_k (&kv16, ip->src_address.as_u32,
2942              vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
2943              vnet_buffer (b)->ip.reass.l4_dst_port, rx_fib_index,
2944              ip->protocol);
2945
2946   if (PREDICT_TRUE (
2947         !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2948     {
2949       vnet_buffer2 (b)->nat.cached_session_index =
2950         ed_value_get_session_index (&value16);
2951       next_worker_index = ed_value_get_thread_index (&value16);
2952       nat_elog_debug_handoff (sm, "HANDOFF OUT2IN (session)",
2953                               next_worker_index, rx_fib_index,
2954                               clib_net_to_host_u32 (ip->src_address.as_u32),
2955                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2956       return next_worker_index;
2957     }
2958
2959   /* first try static mappings without port */
2960   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2961     {
2962       m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, 0, 0, proto);
2963       if (m)
2964         {
2965           {
2966             next_worker_index = m->workers[0];
2967             goto done;
2968           }
2969         }
2970     }
2971
2972   /* unknown protocol */
2973   if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto)))
2974     {
2975       /* use current thread */
2976       next_worker_index = vlib_get_thread_index ();
2977       goto done;
2978     }
2979
2980   port = vnet_buffer (b)->ip.reass.l4_dst_port;
2981
2982   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2983     {
2984       udp_header_t *udp = ip4_next_header (ip);
2985       icmp46_header_t *icmp = (icmp46_header_t *) udp;
2986       nat_icmp_echo_header_t *echo = (nat_icmp_echo_header_t *) (icmp + 1);
2987       if (!icmp_type_is_error_message
2988           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
2989         port = vnet_buffer (b)->ip.reass.l4_src_port;
2990       else
2991         {
2992           /* if error message, then it's not fragmented and we can access it */
2993           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
2994           proto = inner_ip->protocol;
2995           void *l4_header = ip4_next_header (inner_ip);
2996           switch (proto)
2997             {
2998             case IP_PROTOCOL_ICMP:
2999               icmp = (icmp46_header_t *) l4_header;
3000               echo = (nat_icmp_echo_header_t *) (icmp + 1);
3001               port = echo->identifier;
3002               break;
3003             case IP_PROTOCOL_UDP:
3004               /* breakthrough */
3005             case IP_PROTOCOL_TCP:
3006               port = ((nat_tcp_udp_header_t *) l4_header)->src_port;
3007               break;
3008             default:
3009               next_worker_index = vlib_get_thread_index ();
3010               goto done;
3011             }
3012         }
3013     }
3014
3015   /* try static mappings with port */
3016   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3017     {
3018       m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, port, 0, proto);
3019       if (m)
3020         {
3021           if (!is_sm_lb (m->flags))
3022             {
3023               next_worker_index = m->workers[0];
3024               goto done;
3025             }
3026
3027           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3028             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3029
3030           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3031             next_worker_index =
3032               m->workers[hash & (_vec_len (m->workers) - 1)];
3033           else
3034             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3035           goto done;
3036         }
3037     }
3038
3039   /* worker by outside port */
3040   next_worker_index = sm->first_worker_index;
3041   next_worker_index +=
3042     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3043
3044 done:
3045   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
3046                           rx_fib_index,
3047                           clib_net_to_host_u32 (ip->src_address.as_u32),
3048                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3049   return next_worker_index;
3050 }
3051
3052 u32
3053 nat44_get_max_session_limit ()
3054 {
3055   snat_main_t *sm = &snat_main;
3056   u32 max_limit = 0, len = 0;
3057
3058   for (; len < vec_len (sm->max_translations_per_fib); len++)
3059     {
3060       if (max_limit < sm->max_translations_per_fib[len])
3061         max_limit = sm->max_translations_per_fib[len];
3062     }
3063   return max_limit;
3064 }
3065
3066 int
3067 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
3068 {
3069   snat_main_t *sm = &snat_main;
3070   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3071   u32 len = vec_len (sm->max_translations_per_fib);
3072
3073   if (len <= fib_index)
3074     {
3075       vec_validate (sm->max_translations_per_fib, fib_index + 1);
3076
3077       for (; len < vec_len (sm->max_translations_per_fib); len++)
3078         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
3079     }
3080
3081   sm->max_translations_per_fib[fib_index] = session_limit;
3082   return 0;
3083 }
3084
3085 int
3086 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
3087 {
3088   snat_main_t *sm = &snat_main;
3089
3090   if (nat44_set_session_limit (session_limit, vrf_id))
3091     return 1;
3092   sm->max_translations_per_thread = nat44_get_max_session_limit ();
3093
3094   vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
3095                         sm->max_translations_per_thread);
3096
3097   sm->translation_buckets =
3098     nat_calc_bihash_buckets (sm->max_translations_per_thread);
3099
3100   nat44_ed_sessions_clear ();
3101   return 0;
3102 }
3103
3104 static void
3105 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
3106                          u32 translation_buckets)
3107 {
3108   dlist_elt_t *head;
3109
3110   pool_alloc (tsm->sessions, translations);
3111   pool_alloc (tsm->lru_pool, translations);
3112
3113   pool_get (tsm->lru_pool, head);
3114   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
3115   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
3116
3117   pool_get (tsm->lru_pool, head);
3118   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
3119   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
3120
3121   pool_get (tsm->lru_pool, head);
3122   tsm->udp_lru_head_index = head - tsm->lru_pool;
3123   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
3124
3125   pool_get (tsm->lru_pool, head);
3126   tsm->icmp_lru_head_index = head - tsm->lru_pool;
3127   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
3128
3129   pool_get (tsm->lru_pool, head);
3130   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
3131   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
3132 }
3133
3134 static void
3135 reinit_ed_flow_hash ()
3136 {
3137   snat_main_t *sm = &snat_main;
3138   // we expect 2 flows per session, so multiply translation_buckets by 2
3139   clib_bihash_init_16_8 (
3140     &sm->flow_hash, "ed-flow-hash",
3141     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
3142   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
3143 }
3144
3145 static void
3146 nat44_ed_db_init (u32 translations, u32 translation_buckets)
3147 {
3148   snat_main_t *sm = &snat_main;
3149   snat_main_per_thread_data_t *tsm;
3150
3151   reinit_ed_flow_hash ();
3152
3153   vec_foreach (tsm, sm->per_thread_data)
3154     {
3155       nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
3156                                sm->translation_buckets);
3157     }
3158 }
3159
3160 static void
3161 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
3162 {
3163   pool_free (tsm->lru_pool);
3164   pool_free (tsm->sessions);
3165   vec_free (tsm->per_vrf_sessions_vec);
3166 }
3167
3168 void
3169 nat44_ed_sessions_clear ()
3170 {
3171   snat_main_t *sm = &snat_main;
3172   snat_main_per_thread_data_t *tsm;
3173
3174   reinit_ed_flow_hash ();
3175
3176   vec_foreach (tsm, sm->per_thread_data)
3177     {
3178       nat44_ed_worker_db_free (tsm);
3179       nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
3180                                sm->translation_buckets);
3181     }
3182   vlib_zero_simple_counter (&sm->total_sessions, 0);
3183 }
3184
3185 static void
3186 nat44_ed_add_del_static_mapping_cb (ip4_main_t *im, uword opaque,
3187                                     u32 sw_if_index, ip4_address_t *address,
3188                                     u32 address_length, u32 if_address_index,
3189                                     u32 is_delete)
3190 {
3191   snat_static_mapping_resolve_t *rp;
3192   snat_main_t *sm = &snat_main;
3193   int rv = 0;
3194
3195   if (!sm->enabled)
3196     {
3197       return;
3198     }
3199
3200   vec_foreach (rp, sm->sm_to_resolve)
3201     {
3202       if (sw_if_index == rp->sw_if_index)
3203         {
3204           if (is_delete)
3205             {
3206               if (rp->is_resolved)
3207                 {
3208                   rv = nat44_ed_del_static_mapping_internal (
3209                     rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
3210                     rp->vrf_id, rp->flags);
3211                   if (rv)
3212                     {
3213                       nat_log_err ("ed del static mapping failed");
3214                     }
3215                   else
3216                     {
3217                       rp->is_resolved = 0;
3218                     }
3219                 }
3220             }
3221           else
3222             {
3223               if (!rp->is_resolved)
3224                 {
3225                   rv = nat44_ed_add_static_mapping_internal (
3226                     rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
3227                     rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
3228                   if (rv)
3229                     {
3230                       nat_log_err ("ed add static mapping failed");
3231                     }
3232                   else
3233                     {
3234                       rp->is_resolved = 1;
3235                     }
3236                 }
3237             }
3238         }
3239     }
3240 }
3241
3242 static int
3243 nat44_ed_get_addr_resolve_record (u32 sw_if_index, u8 twice_nat, int *out)
3244 {
3245   snat_main_t *sm = &snat_main;
3246   snat_address_resolve_t *rp;
3247   int i;
3248
3249   for (i = 0; i < vec_len (sm->addr_to_resolve); i++)
3250     {
3251       rp = sm->addr_to_resolve + i;
3252
3253       if ((rp->sw_if_index == sw_if_index) && (rp->is_twice_nat == twice_nat))
3254         {
3255           if (out)
3256             {
3257               *out = i;
3258             }
3259           return 0;
3260         }
3261     }
3262   return 1;
3263 }
3264 static int
3265 nat44_ed_del_addr_resolve_record (u32 sw_if_index, u8 twice_nat)
3266 {
3267   snat_main_t *sm = &snat_main;
3268   int i;
3269   if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
3270     {
3271       vec_del1 (sm->addr_to_resolve, i);
3272       return 0;
3273     }
3274   return 1;
3275 }
3276
3277 static void
3278 nat44_ed_add_del_interface_address_cb (ip4_main_t *im, uword opaque,
3279                                        u32 sw_if_index, ip4_address_t *address,
3280                                        u32 address_length,
3281                                        u32 if_address_index, u32 is_delete)
3282 {
3283   snat_main_t *sm = &snat_main;
3284   snat_address_resolve_t *arp;
3285   snat_address_t *ap;
3286   u8 twice_nat = 0;
3287   int i, rv;
3288
3289   if (!sm->enabled)
3290     {
3291       return;
3292     }
3293
3294   if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
3295     {
3296       twice_nat = 1;
3297       if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
3298         {
3299           u32 fib_index =
3300             ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
3301           vec_foreach (ap, sm->addresses)
3302             {
3303               if ((fib_index == ap->fib_index) &&
3304                   (address->as_u32 == ap->addr.as_u32))
3305                 {
3306                   if (!is_delete)
3307                     {
3308                       ap->addr_len = address_length;
3309                       ap->sw_if_index = sw_if_index;
3310                       ap->net.as_u32 = (ap->addr.as_u32 >> (32 - ap->addr_len))
3311                                        << (32 - ap->addr_len);
3312
3313                       nat_log_debug (
3314                         "pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
3315                         format_ip4_address, &ap->addr, ap->sw_if_index,
3316                         format_ip4_address, &ap->net, ap->addr_len);
3317                     }
3318                   else
3319                     {
3320                       ap->addr_len = ~0;
3321                     }
3322                   break;
3323                 }
3324             }
3325           return;
3326         }
3327     }
3328
3329   arp = sm->addr_to_resolve + i;
3330
3331   if (!is_delete)
3332     {
3333       if (arp->is_resolved)
3334         {
3335           return;
3336         }
3337
3338       rv = nat44_ed_add_address (address, ~0, arp->is_twice_nat);
3339       if (0 == rv)
3340         {
3341           arp->is_resolved = 1;
3342         }
3343     }
3344   else
3345     {
3346       if (!arp->is_resolved)
3347         {
3348           return;
3349         }
3350
3351       rv = nat44_ed_del_address (address[0], arp->is_twice_nat);
3352       if (0 == rv)
3353         {
3354           arp->is_resolved = 0;
3355         }
3356     }
3357 }
3358
3359 int
3360 nat44_ed_add_interface_address (u32 sw_if_index, u8 twice_nat)
3361 {
3362   snat_main_t *sm = &snat_main;
3363   ip4_main_t *ip4_main = sm->ip4_main;
3364   ip4_address_t *first_int_addr;
3365   snat_address_resolve_t *ap;
3366   int rv;
3367
3368   if (!sm->enabled)
3369     {
3370       return VNET_API_ERROR_UNSUPPORTED;
3371     }
3372
3373   if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, 0))
3374     {
3375       return VNET_API_ERROR_VALUE_EXIST;
3376     }
3377
3378   vec_add2 (sm->addr_to_resolve, ap, 1);
3379   ap->sw_if_index = sw_if_index;
3380   ap->is_twice_nat = twice_nat;
3381   ap->is_resolved = 0;
3382
3383   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
3384   if (first_int_addr)
3385     {
3386       rv = nat44_ed_add_address (first_int_addr, ~0, twice_nat);
3387       if (0 != rv)
3388         {
3389           nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat);
3390           return rv;
3391         }
3392       ap->is_resolved = 1;
3393     }
3394
3395   return 0;
3396 }
3397
3398 int
3399 nat44_ed_del_interface_address (u32 sw_if_index, u8 twice_nat)
3400 {
3401   snat_main_t *sm = &snat_main;
3402   ip4_main_t *ip4_main = sm->ip4_main;
3403   ip4_address_t *first_int_addr;
3404
3405   if (!sm->enabled)
3406     {
3407       return VNET_API_ERROR_UNSUPPORTED;
3408     }
3409
3410   if (nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat))
3411     {
3412       return VNET_API_ERROR_NO_SUCH_ENTRY;
3413     }
3414
3415   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
3416   if (first_int_addr)
3417     {
3418       return nat44_ed_del_address (first_int_addr[0], twice_nat);
3419     }
3420
3421   return 0;
3422 }
3423
3424 int
3425 nat44_ed_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
3426                       ip4_address_t *eh_addr, u16 eh_port, u8 proto,
3427                       u32 vrf_id, int is_in)
3428 {
3429   ip4_header_t ip;
3430   clib_bihash_kv_16_8_t kv, value;
3431   u32 fib_index;
3432   snat_session_t *s;
3433   snat_main_per_thread_data_t *tsm;
3434
3435   if (!sm->enabled)
3436     {
3437       return VNET_API_ERROR_UNSUPPORTED;
3438     }
3439
3440   fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3441   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3442   if (sm->num_workers > 1)
3443     tsm = vec_elt_at_index (
3444       sm->per_thread_data,
3445       nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
3446   else
3447     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3448
3449   init_ed_k (&kv, addr->as_u32, port, eh_addr->as_u32, eh_port, fib_index,
3450              proto);
3451   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3452     {
3453       return VNET_API_ERROR_NO_SUCH_ENTRY;
3454     }
3455
3456   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3457     return VNET_API_ERROR_UNSPECIFIED;
3458   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3459   nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3460   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3461   return 0;
3462 }
3463
3464 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3465                                  vlib_node_runtime_t * node,
3466                                  vlib_frame_t * frame)
3467 {
3468   return 0;
3469 }
3470
3471 VLIB_REGISTER_NODE (nat_default_node) = {
3472   .name = "nat-default",
3473   .vector_size = sizeof (u32),
3474   .format_trace = 0,
3475   .type = VLIB_NODE_TYPE_INTERNAL,
3476   .n_errors = 0,
3477   .n_next_nodes = NAT_N_NEXT,
3478   .next_nodes = {
3479     [NAT_NEXT_DROP] = "error-drop",
3480     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3481     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3482     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3483     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3484     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3485     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3486     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3487     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3488     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3489   },
3490 };
3491
3492 void
3493 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3494 {
3495   f->l3_csum_delta = 0;
3496   f->l4_csum_delta = 0;
3497   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3498       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3499     {
3500       f->l3_csum_delta =
3501         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3502       f->l3_csum_delta =
3503         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3504     }
3505   else
3506     {
3507       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3508     }
3509   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3510       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3511     {
3512       f->l3_csum_delta =
3513         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3514       f->l3_csum_delta =
3515         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3516     }
3517   else
3518     {
3519       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3520     }
3521   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3522     {
3523       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3524       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3525     }
3526   else
3527     {
3528       f->rewrite.sport = f->match.sport;
3529     }
3530   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3531     {
3532       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3533       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3534     }
3535   else
3536     {
3537       f->rewrite.dport = f->match.dport;
3538     }
3539   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3540       f->rewrite.icmp_id != f->match.sport)
3541     {
3542       f->l4_csum_delta =
3543         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3544       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3545     }
3546   else
3547     {
3548       f->rewrite.icmp_id = f->match.sport;
3549     }
3550   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3551     {
3552     }
3553   else
3554     {
3555       f->rewrite.fib_index = f->match.fib_index;
3556     }
3557 }
3558
3559 static_always_inline int
3560 nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
3561                             ip4_header_t *ip, nat_6t_flow_t *f);
3562
3563 static_always_inline void
3564 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3565                            nat_6t_flow_t *f, ip_protocol_t proto,
3566                            int is_icmp_inner_ip4, int skip_saddr_rewrite)
3567 {
3568   udp_header_t *udp = ip4_next_header (ip);
3569   tcp_header_t *tcp = (tcp_header_t *) udp;
3570
3571   if ((IP_PROTOCOL_TCP == proto || IP_PROTOCOL_UDP == proto) &&
3572       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3573     {
3574       if (!is_icmp_inner_ip4)
3575         { // regular case
3576           ip->src_address = f->rewrite.saddr;
3577           ip->dst_address = f->rewrite.daddr;
3578           udp->src_port = f->rewrite.sport;
3579           udp->dst_port = f->rewrite.dport;
3580         }
3581       else
3582         { // icmp inner ip4 - reversed saddr/daddr
3583           ip->src_address = f->rewrite.daddr;
3584           ip->dst_address = f->rewrite.saddr;
3585           udp->src_port = f->rewrite.dport;
3586           udp->dst_port = f->rewrite.sport;
3587         }
3588
3589       if (IP_PROTOCOL_TCP == proto)
3590         {
3591           ip_csum_t tcp_sum = tcp->checksum;
3592           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3593           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3594           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3595           tcp->checksum = ip_csum_fold (tcp_sum);
3596         }
3597       else if (IP_PROTOCOL_UDP == proto && udp->checksum)
3598         {
3599           ip_csum_t udp_sum = udp->checksum;
3600           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3601           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3602           udp->checksum = ip_csum_fold (udp_sum);
3603         }
3604     }
3605   else
3606     {
3607       if (!is_icmp_inner_ip4)
3608         { // regular case
3609           if (!skip_saddr_rewrite)
3610             {
3611               ip->src_address = f->rewrite.saddr;
3612             }
3613           ip->dst_address = f->rewrite.daddr;
3614         }
3615       else
3616         { // icmp inner ip4 - reversed saddr/daddr
3617           ip->src_address = f->rewrite.daddr;
3618           ip->dst_address = f->rewrite.saddr;
3619         }
3620     }
3621
3622   if (skip_saddr_rewrite)
3623     {
3624       ip->checksum = ip4_header_checksum (ip);
3625     }
3626   else
3627     {
3628       ip_csum_t ip_sum = ip->checksum;
3629       ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3630       ip->checksum = ip_csum_fold (ip_sum);
3631     }
3632   if (0xffff == ip->checksum)
3633     ip->checksum = 0;
3634   ASSERT (ip4_header_checksum_is_valid (ip));
3635 }
3636
3637 static_always_inline int
3638 it_fits (vlib_main_t *vm, vlib_buffer_t *b, void *object, size_t size)
3639 {
3640   int result = ((u8 *) object + size <=
3641                 (u8 *) vlib_buffer_get_current (b) + b->current_length) &&
3642                vlib_object_within_buffer_data (vm, b, object, size);
3643   return result;
3644 }
3645
3646 static_always_inline int
3647 nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
3648                             ip4_header_t *ip, nat_6t_flow_t *f)
3649 {
3650   if (IP_PROTOCOL_ICMP != ip->protocol)
3651     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3652
3653   icmp46_header_t *icmp = ip4_next_header (ip);
3654   nat_icmp_echo_header_t *echo = (nat_icmp_echo_header_t *) (icmp + 1);
3655
3656   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3657     {
3658       if (!it_fits (vm, b, icmp, sizeof (*icmp)))
3659         {
3660           return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3661         }
3662
3663       if (!icmp_type_is_error_message (icmp->type))
3664         {
3665           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3666               (f->rewrite.icmp_id != echo->identifier))
3667             {
3668               ip_csum_t sum = icmp->checksum;
3669               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3670                                     nat_icmp_echo_header_t,
3671                                     identifier /* changed member */);
3672               echo->identifier = f->rewrite.icmp_id;
3673               icmp->checksum = ip_csum_fold (sum);
3674             }
3675         }
3676       else
3677         {
3678           ip_csum_t sum = ip_incremental_checksum (
3679             0, icmp,
3680             clib_net_to_host_u16 (ip->length) - ip4_header_bytes (ip));
3681           sum = (u16) ~ip_csum_fold (sum);
3682           if (sum != 0)
3683             {
3684               return NAT_ED_TRNSL_ERR_INVALID_CSUM;
3685             }
3686
3687           // errors are not fragmented
3688           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3689
3690           if (!ip4_header_checksum_is_valid (inner_ip))
3691             {
3692               return NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT;
3693             }
3694
3695           ip_protocol_t inner_proto = inner_ip->protocol;
3696
3697           ip_csum_t old_icmp_sum = icmp->checksum;
3698           ip_csum_t old_inner_ip_sum = inner_ip->checksum;
3699           ip_csum_t old_udp_sum;
3700           ip_csum_t old_tcp_sum;
3701           ip_csum_t new_icmp_sum;
3702           udp_header_t *udp;
3703           tcp_header_t *tcp;
3704
3705           switch (inner_proto)
3706             {
3707             case IP_PROTOCOL_UDP:
3708               udp = (udp_header_t *) (inner_ip + 1);
3709               if (!it_fits (vm, b, udp, sizeof (*udp)))
3710                 {
3711                   return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3712                 }
3713               old_udp_sum = udp->checksum;
3714               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3715                                          1 /* is_icmp_inner_ip4 */,
3716                                          0 /* skip_saddr_rewrite */);
3717               new_icmp_sum = ip_csum_sub_even (old_icmp_sum, f->l3_csum_delta);
3718               new_icmp_sum = ip_csum_sub_even (new_icmp_sum, f->l4_csum_delta);
3719               new_icmp_sum =
3720                 ip_csum_update (new_icmp_sum, old_inner_ip_sum,
3721                                 inner_ip->checksum, ip4_header_t, checksum);
3722               new_icmp_sum =
3723                 ip_csum_update (new_icmp_sum, old_udp_sum, udp->checksum,
3724                                 udp_header_t, checksum);
3725               new_icmp_sum = ip_csum_fold (new_icmp_sum);
3726               icmp->checksum = new_icmp_sum;
3727               break;
3728             case IP_PROTOCOL_TCP:
3729               tcp = (tcp_header_t *) (inner_ip + 1);
3730               if (!it_fits (vm, b, tcp, sizeof (*tcp)))
3731                 {
3732                   return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3733                 }
3734               old_tcp_sum = tcp->checksum;
3735               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3736                                          1 /* is_icmp_inner_ip4 */,
3737                                          0 /* skip_saddr_rewrite */);
3738               new_icmp_sum = ip_csum_sub_even (old_icmp_sum, f->l3_csum_delta);
3739               new_icmp_sum = ip_csum_sub_even (new_icmp_sum, f->l4_csum_delta);
3740               new_icmp_sum =
3741                 ip_csum_update (new_icmp_sum, old_inner_ip_sum,
3742                                 inner_ip->checksum, ip4_header_t, checksum);
3743               new_icmp_sum =
3744                 ip_csum_update (new_icmp_sum, old_tcp_sum, tcp->checksum,
3745                                 tcp_header_t, checksum);
3746               new_icmp_sum = ip_csum_fold (new_icmp_sum);
3747               icmp->checksum = new_icmp_sum;
3748               break;
3749             case IP_PROTOCOL_ICMP:
3750               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3751                                          1 /* is_icmp_inner_ip4 */,
3752                                          0 /* skip_saddr_rewrite */);
3753               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3754                 {
3755                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3756                   if (!it_fits (vm, b, inner_icmp, sizeof (*inner_icmp)))
3757                     {
3758                       return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
3759                     }
3760                   nat_icmp_echo_header_t *inner_echo =
3761                     (nat_icmp_echo_header_t *) (inner_icmp + 1);
3762                   if (f->rewrite.icmp_id != inner_echo->identifier)
3763                     {
3764                       ip_csum_t sum = icmp->checksum;
3765                       sum = ip_csum_update (sum, inner_echo->identifier,
3766                                             f->rewrite.icmp_id,
3767                                             nat_icmp_echo_header_t,
3768                                             identifier /* changed member */);
3769                       icmp->checksum = ip_csum_fold (sum);
3770                       ip_csum_t inner_sum = inner_icmp->checksum;
3771                       inner_sum = ip_csum_update (
3772                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3773                         nat_icmp_echo_header_t,
3774                         identifier /* changed member */);
3775                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3776                       inner_echo->identifier = f->rewrite.icmp_id;
3777                     }
3778                 }
3779               break;
3780             default:
3781               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3782               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3783             }
3784         }
3785     }
3786
3787   return NAT_ED_TRNSL_ERR_SUCCESS;
3788 }
3789
3790 static_always_inline nat_translation_error_e
3791 nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
3792                            ip4_header_t *ip, nat_6t_flow_t *f,
3793                            ip_protocol_t proto, int is_output_feature,
3794                            int is_i2o)
3795 {
3796   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3797     {
3798       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3799     }
3800
3801   if (IP_PROTOCOL_ICMP == proto)
3802     {
3803       if (ip->src_address.as_u32 != f->rewrite.saddr.as_u32)
3804         {
3805           // packet is returned from a router, not from destination
3806           // skip source address rewrite if in o2i path
3807           nat_6t_flow_ip4_translate (sm, b, ip, f, proto,
3808                                      0 /* is_icmp_inner_ip4 */,
3809                                      !is_i2o /* skip_saddr_rewrite */);
3810         }
3811       else
3812         {
3813           nat_6t_flow_ip4_translate (sm, b, ip, f, proto,
3814                                      0 /* is_icmp_inner_ip4 */,
3815                                      0 /* skip_saddr_rewrite */);
3816         }
3817       return nat_6t_flow_icmp_translate (vm, sm, b, ip, f);
3818     }
3819
3820   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */,
3821                              0 /* skip_saddr_rewrite */);
3822
3823   return NAT_ED_TRNSL_ERR_SUCCESS;
3824 }
3825
3826 nat_translation_error_e
3827 nat_6t_flow_buf_translate_i2o (vlib_main_t *vm, snat_main_t *sm,
3828                                vlib_buffer_t *b, ip4_header_t *ip,
3829                                nat_6t_flow_t *f, ip_protocol_t proto,
3830                                int is_output_feature)
3831 {
3832   return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
3833                                     1 /* is_i2o */);
3834 }
3835
3836 nat_translation_error_e
3837 nat_6t_flow_buf_translate_o2i (vlib_main_t *vm, snat_main_t *sm,
3838                                vlib_buffer_t *b, ip4_header_t *ip,
3839                                nat_6t_flow_t *f, ip_protocol_t proto,
3840                                int is_output_feature)
3841 {
3842   return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
3843                                     0 /* is_i2o */);
3844 }
3845
3846 static_always_inline void
3847 nat_syslog_nat44_sess (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
3848                        u16 isport, ip4_address_t *xsaddr, u16 xsport,
3849                        ip4_address_t *idaddr, u16 idport,
3850                        ip4_address_t *xdaddr, u16 xdport, u8 proto, u8 is_add,
3851                        u8 is_twicenat)
3852 {
3853   syslog_msg_t syslog_msg;
3854   fib_table_t *fib;
3855
3856   if (!syslog_is_enabled ())
3857     return;
3858
3859   if (syslog_severity_filter_block (SADD_SDEL_SEVERITY))
3860     return;
3861
3862   fib = fib_table_get (sfibix, FIB_PROTOCOL_IP4);
3863
3864   syslog_msg_init (&syslog_msg, NAT_FACILITY, SADD_SDEL_SEVERITY, NAT_APPNAME,
3865                    is_add ? SADD_MSGID : SDEL_MSGID);
3866
3867   syslog_msg_sd_init (&syslog_msg, NSESS_SDID);
3868   syslog_msg_add_sd_param (&syslog_msg, SSUBIX_SDPARAM_NAME, "%d", ssubix);
3869   syslog_msg_add_sd_param (&syslog_msg, SVLAN_SDPARAM_NAME, "%d",
3870                            fib->ft_table_id);
3871   syslog_msg_add_sd_param (&syslog_msg, IATYP_SDPARAM_NAME, IATYP_IPV4);
3872   syslog_msg_add_sd_param (&syslog_msg, ISADDR_SDPARAM_NAME, "%U",
3873                            format_ip4_address, isaddr);
3874   syslog_msg_add_sd_param (&syslog_msg, ISPORT_SDPARAM_NAME, "%d",
3875                            clib_net_to_host_u16 (isport));
3876   syslog_msg_add_sd_param (&syslog_msg, XATYP_SDPARAM_NAME, IATYP_IPV4);
3877   syslog_msg_add_sd_param (&syslog_msg, XSADDR_SDPARAM_NAME, "%U",
3878                            format_ip4_address, xsaddr);
3879   syslog_msg_add_sd_param (&syslog_msg, XSPORT_SDPARAM_NAME, "%d",
3880                            clib_net_to_host_u16 (xsport));
3881   syslog_msg_add_sd_param (&syslog_msg, PROTO_SDPARAM_NAME, "%d", proto);
3882   syslog_msg_add_sd_param (&syslog_msg, XDADDR_SDPARAM_NAME, "%U",
3883                            format_ip4_address, xdaddr);
3884   syslog_msg_add_sd_param (&syslog_msg, XDPORT_SDPARAM_NAME, "%d",
3885                            clib_net_to_host_u16 (xdport));
3886   if (is_twicenat)
3887     {
3888       syslog_msg_add_sd_param (&syslog_msg, IDADDR_SDPARAM_NAME, "%U",
3889                                format_ip4_address, idaddr);
3890       syslog_msg_add_sd_param (&syslog_msg, IDPORT_SDPARAM_NAME, "%d",
3891                                clib_net_to_host_u16 (idport));
3892     }
3893
3894   syslog_msg_send (&syslog_msg);
3895 }
3896
3897 void
3898 nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
3899                        u16 isport, ip4_address_t *idaddr, u16 idport,
3900                        ip4_address_t *xsaddr, u16 xsport,
3901                        ip4_address_t *xdaddr, u16 xdport, u8 proto,
3902                        u8 is_twicenat)
3903 {
3904   nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
3905                          idaddr, idport, xdaddr, xdport, proto, 1,
3906                          is_twicenat);
3907 }
3908
3909 void
3910 nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
3911                        u16 isport, ip4_address_t *idaddr, u16 idport,
3912                        ip4_address_t *xsaddr, u16 xsport,
3913                        ip4_address_t *xdaddr, u16 xdport, u8 proto,
3914                        u8 is_twicenat)
3915 {
3916   nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
3917                          idaddr, idport, xdaddr, xdport, proto, 0,
3918                          is_twicenat);
3919 }
3920
3921 /*
3922  * fd.io coding-style-patch-verification: ON
3923  *
3924  * Local Variables:
3925  * eval: (c-set-style "gnu")
3926  * End:
3927  */