nat: refactoring NAT44ED cfg functions
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vpp/app/version.h>
19
20 #include <vnet/vnet.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/ip4.h>
23 #include <vnet/ip/ip_table.h>
24 #include <vnet/ip/reass/ip4_sv_reass.h>
25 #include <vnet/fib/fib_table.h>
26 #include <vnet/fib/ip4_fib.h>
27 #include <vnet/plugin/plugin.h>
28 #include <vppinfra/bihash_16_8.h>
29
30 #include <nat/lib/log.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/lib/nat_inlines.h>
33 #include <nat/lib/ipfix_logging.h>
34
35 #include <nat/nat44-ed/nat44_ed.h>
36 #include <nat/nat44-ed/nat44_ed_affinity.h>
37 #include <nat/nat44-ed/nat44_ed_inlines.h>
38
39 #include <vpp/stats/stat_segment.h>
40
41 snat_main_t snat_main;
42
43 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
44                                                            u32 sw_if_index);
45
46 #define skip_if_disabled()                                                    \
47   do                                                                          \
48     {                                                                         \
49       snat_main_t *sm = &snat_main;                                           \
50       if (PREDICT_FALSE (!sm->enabled))                                       \
51         return;                                                               \
52     }                                                                         \
53   while (0)
54
55 #define fail_if_enabled()                                                     \
56   do                                                                          \
57     {                                                                         \
58       snat_main_t *sm = &snat_main;                                           \
59       if (PREDICT_FALSE (sm->enabled))                                        \
60         {                                                                     \
61           nat_log_err ("plugin enabled");                                     \
62           return 1;                                                           \
63         }                                                                     \
64     }                                                                         \
65   while (0)
66
67 #define fail_if_disabled()                                                    \
68   do                                                                          \
69     {                                                                         \
70       snat_main_t *sm = &snat_main;                                           \
71       if (PREDICT_FALSE (!sm->enabled))                                       \
72         {                                                                     \
73           nat_log_err ("plugin disabled");                                    \
74           return 1;                                                           \
75         }                                                                     \
76     }                                                                         \
77   while (0)
78
79 /* Hook up input features */
80 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
81   .arc_name = "ip4-unicast",
82   .node_name = "nat-pre-in2out",
83   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
84                                "ip4-sv-reassembly-feature"),
85 };
86 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
87   .arc_name = "ip4-unicast",
88   .node_name = "nat-pre-out2in",
89   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
90                                "ip4-dhcp-client-detect",
91                                "ip4-sv-reassembly-feature"),
92 };
93 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
94   .arc_name = "ip4-unicast",
95   .node_name = "nat44-in2out-worker-handoff",
96   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
97 };
98 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
99   .arc_name = "ip4-unicast",
100   .node_name = "nat44-out2in-worker-handoff",
101   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
102                                "ip4-dhcp-client-detect"),
103 };
104 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
105   .arc_name = "ip4-unicast",
106   .node_name = "nat44-in2out",
107   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
108 };
109 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
110   .arc_name = "ip4-unicast",
111   .node_name = "nat44-out2in",
112   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
113                                "ip4-dhcp-client-detect"),
114 };
115 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
116   .arc_name = "ip4-unicast",
117   .node_name = "nat44-ed-in2out",
118   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
119 };
120 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
121   .arc_name = "ip4-unicast",
122   .node_name = "nat44-ed-out2in",
123   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
124                                "ip4-dhcp-client-detect"),
125 };
126 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
127   .arc_name = "ip4-unicast",
128   .node_name = "nat44-ed-classify",
129   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
130 };
131 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
132   .arc_name = "ip4-unicast",
133   .node_name = "nat44-handoff-classify",
134   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
135 };
136 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
137   .arc_name = "ip4-unicast",
138   .node_name = "nat44-in2out-fast",
139   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
140 };
141 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
142   .arc_name = "ip4-unicast",
143   .node_name = "nat44-out2in-fast",
144   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
145                                "ip4-dhcp-client-detect"),
146 };
147
148 /* Hook up output features */
149 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
150   .arc_name = "ip4-output",
151   .node_name = "nat44-in2out-output",
152   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
153   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
154 };
155 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
156   .arc_name = "ip4-output",
157   .node_name = "nat44-in2out-output-worker-handoff",
158   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
159   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
160 };
161 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
162   .arc_name = "ip4-output",
163   .node_name = "nat-pre-in2out-output",
164   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
165   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
166 };
167 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
168   .arc_name = "ip4-output",
169   .node_name = "nat44-ed-in2out-output",
170   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
171   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
172 };
173
174 VLIB_PLUGIN_REGISTER () = {
175     .version = VPP_BUILD_VER,
176     .description = "Network Address Translation (NAT)",
177 };
178
179 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
180
181 static void nat44_ed_db_free ();
182
183 u32 nat_calc_bihash_buckets (u32 n_elts);
184
185 u8 *
186 format_session_kvp (u8 * s, va_list * args)
187 {
188   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
189
190   s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
191               v->key, nat_value_get_thread_index (v),
192               nat_value_get_session_index (v));
193
194   return s;
195 }
196
197 u8 *
198 format_static_mapping_kvp (u8 * s, va_list * args)
199 {
200   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
201
202   s = format (s, "%U static-mapping-index %llu",
203               format_snat_key, v->key, v->value);
204
205   return s;
206 }
207
208 u8 *
209 format_ed_session_kvp (u8 * s, va_list * args)
210 {
211   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
212
213   u8 proto;
214   u16 r_port, l_port;
215   ip4_address_t l_addr, r_addr;
216   u32 fib_index;
217
218   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
219   s = format (s,
220               "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
221               "session-index %u",
222               format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
223               format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
224               format_ip_protocol, proto, fib_index,
225               ed_value_get_thread_index (v), ed_value_get_session_index (v));
226
227   return s;
228 }
229
230 void
231 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
232                        u8 is_ha)
233 {
234       per_vrf_sessions_unregister_session (s, thread_index);
235
236       if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
237         nat_elog_warn (sm, "flow hash del failed");
238
239       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
240         nat_elog_warn (sm, "flow hash del failed");
241
242   if (is_fwd_bypass_session (s))
243     {
244       return;
245     }
246
247       if (is_affinity_sessions (s))
248         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
249                              s->nat_proto, s->out2in.port);
250
251       if (!is_ha)
252         nat_syslog_nat44_sdel (
253           0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
254           &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
255           s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
256           is_twice_nat_session (s));
257
258   if (snat_is_unk_proto_session (s))
259     return;
260
261   if (!is_ha)
262     {
263       /* log NAT event */
264       nat_ipfix_logging_nat44_ses_delete (thread_index,
265                                           s->in2out.addr.as_u32,
266                                           s->out2in.addr.as_u32,
267                                           s->nat_proto,
268                                           s->in2out.port,
269                                           s->out2in.port,
270                                           s->in2out.fib_index);
271     }
272
273   /* Twice NAT address and port for external host */
274   if (is_twice_nat_session (s))
275     {
276       snat_free_outside_address_and_port (sm->twice_nat_addresses,
277                                           thread_index,
278                                           &s->ext_host_nat_addr,
279                                           s->ext_host_nat_port, s->nat_proto);
280     }
281
282   if (snat_is_session_static (s))
283     return;
284
285   snat_free_outside_address_and_port (sm->addresses, thread_index,
286                                       &s->out2in.addr, s->out2in.port,
287                                       s->nat_proto);
288 }
289
290 void
291 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
292                           int is_add)
293 {
294   snat_main_t *sm = &snat_main;
295   fib_prefix_t prefix = {
296     .fp_len = p_len,
297     .fp_proto = FIB_PROTOCOL_IP4,
298     .fp_addr = {
299                 .ip4.as_u32 = addr->as_u32,
300                 },
301   };
302   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
303
304   if (is_add)
305     fib_table_entry_update_one_path (fib_index,
306                                      &prefix,
307                                      sm->fib_src_low,
308                                      (FIB_ENTRY_FLAG_CONNECTED |
309                                       FIB_ENTRY_FLAG_LOCAL |
310                                       FIB_ENTRY_FLAG_EXCLUSIVE),
311                                      DPO_PROTO_IP4,
312                                      NULL,
313                                      sw_if_index,
314                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
315   else
316     fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
317 }
318
319 int
320 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
321                   u8 twice_nat)
322 {
323   snat_address_t *ap;
324   snat_interface_t *i;
325   vlib_thread_main_t *tm = vlib_get_thread_main ();
326
327   /* Check if address already exists */
328   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
329     {
330       if (ap->addr.as_u32 == addr->as_u32)
331         {
332           nat_log_err ("address exist");
333           return VNET_API_ERROR_VALUE_EXIST;
334         }
335     }
336
337   if (twice_nat)
338     vec_add2 (sm->twice_nat_addresses, ap, 1);
339   else
340     vec_add2 (sm->addresses, ap, 1);
341
342   ap->addr = *addr;
343   if (vrf_id != ~0)
344     ap->fib_index =
345       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
346                                          sm->fib_src_low);
347   else
348     ap->fib_index = ~0;
349
350   #define _(N, i, n, s) \
351     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
352     ap->busy_##n##_ports = 0; \
353     ap->busy_##n##_ports_per_thread = 0;\
354     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
355     foreach_nat_protocol
356   #undef _
357
358   if (twice_nat)
359     return 0;
360
361   /* Add external address to FIB */
362   pool_foreach (i, sm->interfaces)
363    {
364      if (nat_interface_is_inside (i))
365        continue;
366
367      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
368      break;
369   }
370   pool_foreach (i, sm->output_feature_interfaces)
371    {
372      if (nat_interface_is_inside (i))
373        continue;
374
375      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
376      break;
377   }
378
379   return 0;
380 }
381
382 static int
383 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
384 {
385   snat_static_mapping_t *m;
386   pool_foreach (m, sm->static_mappings)
387    {
388       if (is_addr_only_static_mapping (m) ||
389           is_out2in_only_static_mapping (m) ||
390           is_identity_static_mapping (m))
391         continue;
392       if (m->external_addr.as_u32 == addr.as_u32)
393         return 1;
394   }
395
396   return 0;
397 }
398
399 static void
400 snat_add_static_mapping_when_resolved (snat_main_t *sm, ip4_address_t l_addr,
401                                        u16 l_port, u32 sw_if_index, u16 e_port,
402                                        u32 vrf_id, nat_protocol_t proto,
403                                        int addr_only, u8 *tag, int twice_nat,
404                                        int out2in_only, int identity_nat,
405                                        ip4_address_t pool_addr, int exact)
406 {
407   snat_static_map_resolve_t *rp;
408
409   vec_add2 (sm->to_resolve, rp, 1);
410   rp->l_addr.as_u32 = l_addr.as_u32;
411   rp->l_port = l_port;
412   rp->sw_if_index = sw_if_index;
413   rp->e_port = e_port;
414   rp->vrf_id = vrf_id;
415   rp->proto = proto;
416   rp->addr_only = addr_only;
417   rp->twice_nat = twice_nat;
418   rp->out2in_only = out2in_only;
419   rp->identity_nat = identity_nat;
420   rp->tag = vec_dup (tag);
421   rp->pool_addr = pool_addr;
422   rp->exact = exact;
423 }
424
425 u32
426 get_thread_idx_by_port (u16 e_port)
427 {
428   snat_main_t *sm = &snat_main;
429   u32 thread_idx = sm->num_workers;
430   if (sm->num_workers > 1)
431     {
432       thread_idx =
433         sm->first_worker_index +
434         sm->workers[(e_port - 1024) / sm->port_per_thread];
435     }
436   return thread_idx;
437 }
438
439 void
440 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
441                                     snat_main_per_thread_data_t * tsm,
442                                     ip4_address_t l_addr,
443                                     u16 l_port,
444                                     u8 protocol,
445                                     u32 fib_index, int addr_only,
446                                     ip4_address_t e_addr, u16 e_port)
447 {
448   snat_session_t *s;
449   u32 *indexes_to_free = NULL;
450   pool_foreach (s, tsm->sessions) {
451     if (s->in2out.fib_index != fib_index ||
452         s->in2out.addr.as_u32 != l_addr.as_u32)
453       {
454         continue;
455       }
456     if (!addr_only)
457       {
458         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
459             s->out2in.port != e_port ||
460             s->in2out.port != l_port ||
461             s->nat_proto != protocol)
462           continue;
463       }
464
465     if (is_lb_session (s))
466       continue;
467     if (!snat_is_session_static (s))
468       continue;
469     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
470     vec_add1 (indexes_to_free, s - tsm->sessions);
471     if (!addr_only)
472       break;
473   }
474   u32 *ses_index;
475   vec_foreach (ses_index, indexes_to_free)
476   {
477     s = pool_elt_at_index (tsm->sessions, *ses_index);
478     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
479   }
480   vec_free (indexes_to_free);
481 }
482
483 int
484 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
485                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
486                          u32 sw_if_index, nat_protocol_t proto, int is_add,
487                          twice_nat_type_t twice_nat, u8 out2in_only, u8 *tag,
488                          u8 identity_nat, ip4_address_t pool_addr, int exact)
489 {
490   snat_main_t *sm = &snat_main;
491   snat_static_mapping_t *m;
492   clib_bihash_kv_8_8_t kv, value;
493   snat_address_t *a = 0;
494   u32 fib_index = ~0;
495   snat_interface_t *interface;
496   snat_main_per_thread_data_t *tsm;
497   snat_static_map_resolve_t *rp, *rp_match = 0;
498   nat44_lb_addr_port_t *local;
499   u32 find = ~0;
500   int i;
501
502   /* If the external address is a specific interface address */
503   if (sw_if_index != ~0)
504     {
505       ip4_address_t *first_int_addr;
506
507       for (i = 0; i < vec_len (sm->to_resolve); i++)
508         {
509           rp = sm->to_resolve + i;
510           if (rp->sw_if_index != sw_if_index ||
511               rp->l_addr.as_u32 != l_addr.as_u32 ||
512               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
513             continue;
514
515           if (!addr_only)
516             {
517               if ((rp->l_port != l_port && rp->e_port != e_port)
518                   || rp->proto != proto)
519                 continue;
520             }
521
522           rp_match = rp;
523           break;
524         }
525
526       /* Might be already set... */
527       first_int_addr = ip4_interface_first_address
528         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
529
530       if (is_add)
531         {
532           if (rp_match)
533             return VNET_API_ERROR_VALUE_EXIST;
534
535           snat_add_static_mapping_when_resolved (
536             sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, addr_only,
537             tag, twice_nat, out2in_only, identity_nat, pool_addr, exact);
538
539           /* DHCP resolution required? */
540           if (first_int_addr == 0)
541             {
542               return 0;
543             }
544           else
545             {
546               e_addr.as_u32 = first_int_addr->as_u32;
547               /* Identity mapping? */
548               if (l_addr.as_u32 == 0)
549                 l_addr.as_u32 = e_addr.as_u32;
550             }
551         }
552       else
553         {
554           if (!rp_match)
555             return VNET_API_ERROR_NO_SUCH_ENTRY;
556
557           vec_del1 (sm->to_resolve, i);
558
559           if (first_int_addr)
560             {
561               e_addr.as_u32 = first_int_addr->as_u32;
562               /* Identity mapping? */
563               if (l_addr.as_u32 == 0)
564                 l_addr.as_u32 = e_addr.as_u32;
565             }
566           else
567             return 0;
568         }
569     }
570
571   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
572   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
573     m = 0;
574   else
575     m = pool_elt_at_index (sm->static_mappings, value.value);
576
577   if (is_add)
578     {
579       if (m)
580         {
581           if (is_identity_static_mapping (m))
582             {
583               pool_foreach (local, m->locals)
584                {
585                 if (local->vrf_id == vrf_id)
586                   return VNET_API_ERROR_VALUE_EXIST;
587               }
588               pool_get (m->locals, local);
589               local->vrf_id = vrf_id;
590               local->fib_index =
591                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
592                                                    sm->fib_src_low);
593               init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
594                            m->proto, 0, m - sm->static_mappings);
595               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
596               return 0;
597             }
598           else
599             return VNET_API_ERROR_VALUE_EXIST;
600         }
601
602       if (twice_nat && addr_only)
603         return VNET_API_ERROR_UNSUPPORTED;
604
605       /* Convert VRF id to FIB index */
606       if (vrf_id != ~0)
607         fib_index =
608           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
609                                              sm->fib_src_low);
610       /* If not specified use inside VRF id from SNAT plugin startup config */
611       else
612         {
613           fib_index = sm->inside_fib_index;
614           vrf_id = sm->inside_vrf_id;
615           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
616         }
617
618       if (!(out2in_only || identity_nat))
619         {
620           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
621                       addr_only ? 0 : proto);
622           if (!clib_bihash_search_8_8
623               (&sm->static_mapping_by_local, &kv, &value))
624             return VNET_API_ERROR_VALUE_EXIST;
625         }
626
627       /* Find external address in allocated addresses and reserve port for
628          address and port pair mapping when dynamic translations enabled */
629       if (!(addr_only || sm->static_mapping_only || out2in_only))
630         {
631           for (i = 0; i < vec_len (sm->addresses); i++)
632             {
633               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
634                 {
635                   a = sm->addresses + i;
636                   /* External port must be unused */
637                   switch (proto)
638                     {
639 #define _(N, j, n, s) \
640                     case NAT_PROTOCOL_##N: \
641                       if (a->busy_##n##_port_refcounts[e_port]) \
642                         return VNET_API_ERROR_INVALID_VALUE; \
643                       ++a->busy_##n##_port_refcounts[e_port]; \
644                       if (e_port > 1024) \
645                         { \
646                           a->busy_##n##_ports++; \
647                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
648                         } \
649                       break;
650                       foreach_nat_protocol
651 #undef _
652                         default : nat_elog_info (sm, "unknown protocol");
653                       return VNET_API_ERROR_INVALID_VALUE_2;
654                     }
655                   break;
656                 }
657             }
658           /* External address must be allocated */
659           if (!a && (l_addr.as_u32 != e_addr.as_u32))
660             {
661               if (sw_if_index != ~0)
662                 {
663                   for (i = 0; i < vec_len (sm->to_resolve); i++)
664                     {
665                       rp = sm->to_resolve + i;
666                       if (rp->addr_only)
667                         continue;
668                       if (rp->sw_if_index != sw_if_index &&
669                           rp->l_addr.as_u32 != l_addr.as_u32 &&
670                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
671                           rp->e_port != e_port && rp->proto != proto)
672                         continue;
673
674                       vec_del1 (sm->to_resolve, i);
675                       break;
676                     }
677                 }
678               return VNET_API_ERROR_NO_SUCH_ENTRY;
679             }
680         }
681
682       pool_get (sm->static_mappings, m);
683       clib_memset (m, 0, sizeof (*m));
684       m->tag = vec_dup (tag);
685       m->local_addr = l_addr;
686       m->external_addr = e_addr;
687       m->twice_nat = twice_nat;
688
689       if (twice_nat == TWICE_NAT && exact)
690         {
691           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
692           m->pool_addr = pool_addr;
693         }
694
695       if (out2in_only)
696         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
697       if (addr_only)
698         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
699       if (identity_nat)
700         {
701           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
702           pool_get (m->locals, local);
703           local->vrf_id = vrf_id;
704           local->fib_index = fib_index;
705         }
706       else
707         {
708           m->vrf_id = vrf_id;
709           m->fib_index = fib_index;
710         }
711       if (!addr_only)
712         {
713           m->local_port = l_port;
714           m->external_port = e_port;
715           m->proto = proto;
716         }
717
718       if (sm->num_workers > 1)
719         {
720           ip4_header_t ip = {
721             .src_address = m->local_addr,
722           };
723           vec_add1 (m->workers, nat44_ed_get_in2out_worker_index (
724                                   0, &ip, m->fib_index, 0));
725           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
726         }
727       else
728         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
729
730       if (!out2in_only)
731         {
732           init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
733                        0, m - sm->static_mappings);
734           clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
735         }
736
737       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
738                    m - sm->static_mappings);
739       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
740     }
741   else
742     {
743       if (!m)
744         {
745           if (sw_if_index != ~0)
746             return 0;
747           else
748             return VNET_API_ERROR_NO_SUCH_ENTRY;
749         }
750
751       if (identity_nat)
752         {
753           if (vrf_id == ~0)
754             vrf_id = sm->inside_vrf_id;
755
756           pool_foreach (local, m->locals)
757            {
758             if (local->vrf_id == vrf_id)
759               find = local - m->locals;
760           }
761           if (find == ~0)
762             return VNET_API_ERROR_NO_SUCH_ENTRY;
763
764           local = pool_elt_at_index (m->locals, find);
765           fib_index = local->fib_index;
766           pool_put (m->locals, local);
767         }
768       else
769         fib_index = m->fib_index;
770
771       /* Free external address port */
772       if (!(addr_only || sm->static_mapping_only || out2in_only))
773         {
774           for (i = 0; i < vec_len (sm->addresses); i++)
775             {
776               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
777                 {
778                   a = sm->addresses + i;
779                   switch (proto)
780                     {
781 #define _(N, j, n, s) \
782                     case NAT_PROTOCOL_##N: \
783                       --a->busy_##n##_port_refcounts[e_port]; \
784                       if (e_port > 1024) \
785                         { \
786                           a->busy_##n##_ports--; \
787                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
788                         } \
789                       break;
790                       foreach_nat_protocol
791 #undef _
792                         default : nat_elog_info (sm, "unknown protocol");
793                       return VNET_API_ERROR_INVALID_VALUE_2;
794                     }
795                   break;
796                 }
797             }
798         }
799
800       if (sm->num_workers > 1)
801         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
802       else
803         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
804
805       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
806       if (!out2in_only)
807         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
808
809       /* Delete session(s) for static mapping if exist */
810       if (!(sm->static_mapping_only) ||
811           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
812         {
813           nat_ed_static_mapping_del_sessions (
814             sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
815             addr_only, e_addr, e_port);
816         }
817
818       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
819       if (pool_elts (m->locals))
820         return 0;
821
822       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
823       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
824
825       vec_free (m->tag);
826       vec_free (m->workers);
827       /* Delete static mapping from pool */
828       pool_put (sm->static_mappings, m);
829     }
830
831   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
832     return 0;
833
834   /* Add/delete external address to FIB */
835   pool_foreach (interface, sm->interfaces)
836    {
837      if (nat_interface_is_inside (interface))
838        continue;
839
840      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
841      break;
842   }
843   pool_foreach (interface, sm->output_feature_interfaces)
844    {
845      if (nat_interface_is_inside (interface))
846        continue;
847
848      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
849      break;
850   }
851
852   return 0;
853 }
854
855 int
856 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
857                                  nat_protocol_t proto,
858                                  nat44_lb_addr_port_t * locals, u8 is_add,
859                                  twice_nat_type_t twice_nat, u8 out2in_only,
860                                  u8 * tag, u32 affinity)
861 {
862   snat_main_t *sm = &snat_main;
863   snat_static_mapping_t *m;
864   clib_bihash_kv_8_8_t kv, value;
865   snat_address_t *a = 0;
866   int i;
867   nat44_lb_addr_port_t *local;
868   snat_main_per_thread_data_t *tsm;
869   snat_session_t *s;
870   uword *bitmap = 0;
871
872   init_nat_k (&kv, e_addr, e_port, 0, proto);
873   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
874     m = 0;
875   else
876     m = pool_elt_at_index (sm->static_mappings, value.value);
877
878   if (is_add)
879     {
880       if (m)
881         return VNET_API_ERROR_VALUE_EXIST;
882
883       if (vec_len (locals) < 2)
884         return VNET_API_ERROR_INVALID_VALUE;
885
886       /* Find external address in allocated addresses and reserve port for
887          address and port pair mapping when dynamic translations enabled */
888       if (!(sm->static_mapping_only || out2in_only))
889         {
890           for (i = 0; i < vec_len (sm->addresses); i++)
891             {
892               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
893                 {
894                   a = sm->addresses + i;
895                   /* External port must be unused */
896                   switch (proto)
897                     {
898 #define _(N, j, n, s) \
899                     case NAT_PROTOCOL_##N: \
900                       if (a->busy_##n##_port_refcounts[e_port]) \
901                         return VNET_API_ERROR_INVALID_VALUE; \
902                       ++a->busy_##n##_port_refcounts[e_port]; \
903                       if (e_port > 1024) \
904                         { \
905                           a->busy_##n##_ports++; \
906                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
907                         } \
908                       break;
909                       foreach_nat_protocol
910 #undef _
911                         default : nat_elog_info (sm, "unknown protocol");
912                       return VNET_API_ERROR_INVALID_VALUE_2;
913                     }
914                   break;
915                 }
916             }
917           /* External address must be allocated */
918           if (!a)
919             return VNET_API_ERROR_NO_SUCH_ENTRY;
920         }
921
922       pool_get (sm->static_mappings, m);
923       clib_memset (m, 0, sizeof (*m));
924       m->tag = vec_dup (tag);
925       m->external_addr = e_addr;
926       m->external_port = e_port;
927       m->proto = proto;
928       m->twice_nat = twice_nat;
929       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
930       if (out2in_only)
931         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
932       m->affinity = affinity;
933
934       if (affinity)
935         m->affinity_per_service_list_head_index =
936           nat_affinity_get_per_service_list_head_index ();
937       else
938         m->affinity_per_service_list_head_index = ~0;
939
940       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
941                    m - sm->static_mappings);
942       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
943         {
944           nat_elog_err (sm, "static_mapping_by_external key add failed");
945           return VNET_API_ERROR_UNSPECIFIED;
946         }
947
948       for (i = 0; i < vec_len (locals); i++)
949         {
950           locals[i].fib_index =
951             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
952                                                locals[i].vrf_id,
953                                                sm->fib_src_low);
954           if (!out2in_only)
955             {
956               init_nat_kv (&kv, locals[i].addr, locals[i].port,
957                            locals[i].fib_index, m->proto, 0,
958                            m - sm->static_mappings);
959               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
960             }
961           locals[i].prefix = (i == 0) ? locals[i].probability :
962             (locals[i - 1].prefix + locals[i].probability);
963           pool_get (m->locals, local);
964           *local = locals[i];
965           if (sm->num_workers > 1)
966             {
967               ip4_header_t ip = {
968                 .src_address = locals[i].addr,
969               };
970               bitmap = clib_bitmap_set (
971                 bitmap,
972                 nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0), 1);
973             }
974         }
975
976       /* Assign workers */
977       if (sm->num_workers > 1)
978         {
979           clib_bitmap_foreach (i, bitmap)
980              {
981                vec_add1(m->workers, i);
982             }
983         }
984     }
985   else
986     {
987       if (!m)
988         return VNET_API_ERROR_NO_SUCH_ENTRY;
989
990       if (!is_lb_static_mapping (m))
991         return VNET_API_ERROR_INVALID_VALUE;
992
993       /* Free external address port */
994       if (!(sm->static_mapping_only || out2in_only))
995         {
996           for (i = 0; i < vec_len (sm->addresses); i++)
997             {
998               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
999                 {
1000                   a = sm->addresses + i;
1001                   switch (proto)
1002                     {
1003 #define _(N, j, n, s) \
1004                     case NAT_PROTOCOL_##N: \
1005                       --a->busy_##n##_port_refcounts[e_port]; \
1006                       if (e_port > 1024) \
1007                         { \
1008                           a->busy_##n##_ports--; \
1009                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1010                         } \
1011                       break;
1012                       foreach_nat_protocol
1013 #undef _
1014                         default : nat_elog_info (sm, "unknown protocol");
1015                       return VNET_API_ERROR_INVALID_VALUE_2;
1016                     }
1017                   break;
1018                 }
1019             }
1020         }
1021
1022       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1023       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1024         {
1025           nat_elog_err (sm, "static_mapping_by_external key del failed");
1026           return VNET_API_ERROR_UNSPECIFIED;
1027         }
1028
1029       pool_foreach (local, m->locals)
1030       {
1031           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1032                             sm->fib_src_low);
1033           if (!out2in_only)
1034             {
1035               init_nat_k (&kv, local->addr, local->port, local->fib_index,
1036                           m->proto);
1037               if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv,
1038                                            0))
1039                 {
1040                   nat_elog_err (sm, "static_mapping_by_local key del failed");
1041                   return VNET_API_ERROR_UNSPECIFIED;
1042                 }
1043             }
1044
1045           if (sm->num_workers > 1)
1046             {
1047               ip4_header_t ip = {
1048                 .src_address = local->addr,
1049               };
1050               tsm = vec_elt_at_index (
1051                 sm->per_thread_data,
1052                 nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1053             }
1054           else
1055             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1056
1057           /* Delete sessions */
1058           pool_foreach (s, tsm->sessions)
1059             {
1060               if (!(is_lb_session (s)))
1061                 continue;
1062
1063               if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1064                   s->in2out.port != local->port)
1065                 continue;
1066
1067               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1068               nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1069             }
1070       }
1071       if (m->affinity)
1072         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1073       pool_free (m->locals);
1074       vec_free (m->tag);
1075       vec_free (m->workers);
1076
1077       pool_put (sm->static_mappings, m);
1078     }
1079
1080   return 0;
1081 }
1082
1083 int
1084 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1085                                        ip4_address_t l_addr, u16 l_port,
1086                                        nat_protocol_t proto, u32 vrf_id,
1087                                        u8 probability, u8 is_add)
1088 {
1089   snat_main_t *sm = &snat_main;
1090   snat_static_mapping_t *m = 0;
1091   clib_bihash_kv_8_8_t kv, value;
1092   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1093   snat_main_per_thread_data_t *tsm;
1094   snat_session_t *s;
1095   u32 *locals = 0;
1096   uword *bitmap = 0;
1097   int i;
1098
1099   init_nat_k (&kv, e_addr, e_port, 0, proto);
1100   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1101     m = pool_elt_at_index (sm->static_mappings, value.value);
1102
1103   if (!m)
1104     return VNET_API_ERROR_NO_SUCH_ENTRY;
1105
1106   if (!is_lb_static_mapping (m))
1107     return VNET_API_ERROR_INVALID_VALUE;
1108
1109   pool_foreach (local, m->locals)
1110    {
1111     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1112         (local->vrf_id == vrf_id))
1113       {
1114         match_local = local;
1115         break;
1116       }
1117   }
1118
1119   if (is_add)
1120     {
1121       if (match_local)
1122         return VNET_API_ERROR_VALUE_EXIST;
1123
1124       pool_get (m->locals, local);
1125       clib_memset (local, 0, sizeof (*local));
1126       local->addr.as_u32 = l_addr.as_u32;
1127       local->port = l_port;
1128       local->probability = probability;
1129       local->vrf_id = vrf_id;
1130       local->fib_index =
1131         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1132                                            sm->fib_src_low);
1133
1134       if (!is_out2in_only_static_mapping (m))
1135         {
1136           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
1137                        m - sm->static_mappings);
1138           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1139             nat_elog_err (sm, "static_mapping_by_local key add failed");
1140         }
1141     }
1142   else
1143     {
1144       if (!match_local)
1145         return VNET_API_ERROR_NO_SUCH_ENTRY;
1146
1147       if (pool_elts (m->locals) < 3)
1148         return VNET_API_ERROR_UNSPECIFIED;
1149
1150       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1151                         sm->fib_src_low);
1152
1153       if (!is_out2in_only_static_mapping (m))
1154         {
1155           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1156           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1157             nat_elog_err (sm, "static_mapping_by_local key del failed");
1158         }
1159
1160       if (sm->num_workers > 1)
1161         {
1162           ip4_header_t ip = {
1163             .src_address = local->addr,
1164           };
1165           tsm = vec_elt_at_index (
1166             sm->per_thread_data,
1167             nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1168         }
1169       else
1170         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1171
1172       /* Delete sessions */
1173       pool_foreach (s, tsm->sessions) {
1174         if (!(is_lb_session (s)))
1175           continue;
1176
1177         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1178             s->in2out.port != match_local->port)
1179           continue;
1180
1181         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1182         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1183       }
1184
1185       pool_put (m->locals, match_local);
1186     }
1187
1188   vec_free (m->workers);
1189
1190   pool_foreach (local, m->locals)
1191    {
1192     vec_add1 (locals, local - m->locals);
1193     if (sm->num_workers > 1)
1194       {
1195         ip4_header_t ip;
1196         ip.src_address.as_u32 = local->addr.as_u32,
1197         bitmap = clib_bitmap_set (
1198           bitmap,
1199           nat44_ed_get_in2out_worker_index (0, &ip, local->fib_index, 0), 1);
1200       }
1201   }
1202
1203   ASSERT (vec_len (locals) > 1);
1204
1205   local = pool_elt_at_index (m->locals, locals[0]);
1206   local->prefix = local->probability;
1207   for (i = 1; i < vec_len (locals); i++)
1208     {
1209       local = pool_elt_at_index (m->locals, locals[i]);
1210       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1211       local->prefix = local->probability + prev_local->prefix;
1212     }
1213
1214   /* Assign workers */
1215   if (sm->num_workers > 1)
1216     {
1217       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1218     }
1219
1220   return 0;
1221 }
1222
1223 int
1224 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1225                   u8 twice_nat)
1226 {
1227   snat_address_t *a = 0;
1228   snat_session_t *ses;
1229   u32 *ses_to_be_removed = 0, *ses_index;
1230   snat_main_per_thread_data_t *tsm;
1231   snat_static_mapping_t *m;
1232   snat_interface_t *interface;
1233   int i;
1234   snat_address_t *addresses =
1235     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1236
1237   /* Find SNAT address */
1238   for (i = 0; i < vec_len (addresses); i++)
1239     {
1240       if (addresses[i].addr.as_u32 == addr.as_u32)
1241         {
1242           a = addresses + i;
1243           break;
1244         }
1245     }
1246   if (!a)
1247     {
1248       nat_log_err ("no such address");
1249       return VNET_API_ERROR_NO_SUCH_ENTRY;
1250     }
1251
1252   if (delete_sm)
1253     {
1254       ip4_address_t pool_addr = { 0 };
1255       pool_foreach (m, sm->static_mappings)
1256        {
1257           if (m->external_addr.as_u32 == addr.as_u32)
1258             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1259                                             m->local_port, m->external_port,
1260                                             m->vrf_id,
1261                                             is_addr_only_static_mapping(m), ~0,
1262                                             m->proto, 0 /* is_add */,
1263                                             m->twice_nat,
1264                                             is_out2in_only_static_mapping(m),
1265                                             m->tag,
1266                                             is_identity_static_mapping(m),
1267                                             pool_addr, 0);
1268       }
1269     }
1270   else
1271     {
1272       /* Check if address is used in some static mapping */
1273       if (is_snat_address_used_in_static_mapping (sm, addr))
1274         {
1275           nat_log_err ("address used in static mapping");
1276           return VNET_API_ERROR_UNSPECIFIED;
1277         }
1278     }
1279
1280   if (a->fib_index != ~0)
1281     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1282
1283   /* Delete sessions using address */
1284   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1285     {
1286       vec_foreach (tsm, sm->per_thread_data)
1287       {
1288         pool_foreach (ses, tsm->sessions)  {
1289           if (ses->out2in.addr.as_u32 == addr.as_u32)
1290             {
1291               nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1292               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1293             }
1294         }
1295
1296             vec_foreach (ses_index, ses_to_be_removed)
1297             {
1298               ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1299               nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1300             }
1301
1302         vec_free (ses_to_be_removed);
1303       }
1304     }
1305
1306 #define _(N, i, n, s) \
1307   vec_free (a->busy_##n##_ports_per_thread);
1308   foreach_nat_protocol
1309 #undef _
1310
1311     if (twice_nat)
1312   {
1313     vec_del1 (sm->twice_nat_addresses, i);
1314     return 0;
1315   }
1316   else vec_del1 (sm->addresses, i);
1317
1318   /* Delete external address from FIB */
1319   pool_foreach (interface, sm->interfaces)
1320     {
1321       if (nat_interface_is_inside (interface))
1322         continue;
1323
1324       snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1325       break;
1326     }
1327   pool_foreach (interface, sm->output_feature_interfaces)
1328    {
1329      if (nat_interface_is_inside (interface))
1330        continue;
1331
1332      snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1333      break;
1334   }
1335
1336   return 0;
1337 }
1338
1339 void
1340 expire_per_vrf_sessions (u32 fib_index)
1341 {
1342   per_vrf_sessions_t *per_vrf_sessions;
1343   snat_main_per_thread_data_t *tsm;
1344   snat_main_t *sm = &snat_main;
1345
1346   vec_foreach (tsm, sm->per_thread_data)
1347     {
1348       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1349         {
1350           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1351               (per_vrf_sessions->tx_fib_index == fib_index))
1352             {
1353               per_vrf_sessions->expired = 1;
1354             }
1355         }
1356     }
1357 }
1358
1359 void
1360 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1361 {
1362   snat_main_t *sm = &snat_main;
1363   nat_fib_t *fib;
1364
1365   // we don't care if it is outside/inside fib
1366   // we just care about their ref_count
1367   // if it reaches 0 sessions should expire
1368   // because the fib isn't valid for NAT anymore
1369
1370   vec_foreach (fib, sm->fibs)
1371   {
1372     if (fib->fib_index == fib_index)
1373       {
1374         if (is_del)
1375           {
1376             fib->ref_count--;
1377             if (!fib->ref_count)
1378               {
1379                 vec_del1 (sm->fibs, fib - sm->fibs);
1380                 expire_per_vrf_sessions (fib_index);
1381               }
1382             return;
1383           }
1384         else
1385           fib->ref_count++;
1386       }
1387   }
1388   if (!is_del)
1389     {
1390       vec_add2 (sm->fibs, fib, 1);
1391       fib->ref_count = 1;
1392       fib->fib_index = fib_index;
1393     }
1394 }
1395
1396 static_always_inline nat_outside_fib_t *
1397 nat44_ed_get_outside_fib (nat_outside_fib_t *outside_fibs, u32 fib_index)
1398 {
1399   nat_outside_fib_t *f;
1400   vec_foreach (f, outside_fibs)
1401     {
1402       if (f->fib_index == fib_index)
1403         {
1404           return f;
1405         }
1406     }
1407   return 0;
1408 }
1409
1410 static_always_inline snat_interface_t *
1411 nat44_ed_get_interface (snat_interface_t *interfaces, u32 sw_if_index)
1412 {
1413   snat_interface_t *i;
1414   pool_foreach (i, interfaces)
1415     {
1416       if (i->sw_if_index == sw_if_index)
1417         {
1418           return i;
1419         }
1420     }
1421   return 0;
1422 }
1423
1424 int
1425 nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
1426 {
1427   const char *del_feature_name, *feature_name;
1428   snat_main_t *sm = &snat_main;
1429
1430   nat_outside_fib_t *outside_fib;
1431   snat_static_mapping_t *m;
1432   snat_interface_t *i;
1433   snat_address_t *ap;
1434   u32 fib_index;
1435   int rv;
1436
1437   if (!sm->enabled)
1438     {
1439       nat_log_err ("nat44 is disabled");
1440       return VNET_API_ERROR_UNSUPPORTED;
1441     }
1442
1443   if (nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index))
1444     {
1445       nat_log_err ("error interface already configured");
1446       return VNET_API_ERROR_VALUE_EXIST;
1447     }
1448
1449   i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
1450   if (i)
1451     {
1452       if ((nat_interface_is_inside (i) && is_inside) ||
1453           (nat_interface_is_outside (i) && !is_inside))
1454         {
1455           return 0;
1456         }
1457       if (sm->num_workers > 1)
1458         {
1459           del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1460                                           "nat44-out2in-worker-handoff";
1461           feature_name = "nat44-handoff-classify";
1462         }
1463       else
1464         {
1465           del_feature_name = !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1466
1467           feature_name = "nat44-ed-classify";
1468         }
1469
1470       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1471       if (rv)
1472         return rv;
1473       vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1474                                    sw_if_index, 0, 0, 0);
1475       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1476                                    0, 0);
1477     }
1478   else
1479     {
1480       if (sm->num_workers > 1)
1481         {
1482           feature_name = is_inside ? "nat44-in2out-worker-handoff" :
1483                                      "nat44-out2in-worker-handoff";
1484         }
1485       else
1486         {
1487           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1488         }
1489
1490       nat_validate_interface_counters (sm, sw_if_index);
1491       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1492       if (rv)
1493         return rv;
1494       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1495                                    0, 0);
1496
1497       pool_get (sm->interfaces, i);
1498       i->sw_if_index = sw_if_index;
1499       i->flags = 0;
1500     }
1501
1502   fib_index =
1503     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1504
1505   update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
1506
1507   if (!is_inside)
1508     {
1509       i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1510
1511       outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1512       if (outside_fib)
1513         {
1514           outside_fib->refcount++;
1515         }
1516       else
1517         {
1518           vec_add2 (sm->outside_fibs, outside_fib, 1);
1519           outside_fib->fib_index = fib_index;
1520           outside_fib->refcount = 1;
1521         }
1522
1523       vec_foreach (ap, sm->addresses)
1524         {
1525           snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 1);
1526         }
1527       pool_foreach (m, sm->static_mappings)
1528         {
1529           if (!(is_addr_only_static_mapping (m)) ||
1530               (m->local_addr.as_u32 == m->external_addr.as_u32))
1531             {
1532               continue;
1533             }
1534           snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 1);
1535         }
1536     }
1537   else
1538     {
1539       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1540     }
1541
1542   return 0;
1543 }
1544
1545 int
1546 nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
1547 {
1548   const char *del_feature_name, *feature_name;
1549   snat_main_t *sm = &snat_main;
1550
1551   nat_outside_fib_t *outside_fib;
1552   snat_static_mapping_t *m;
1553   snat_interface_t *i;
1554   snat_address_t *ap;
1555   u32 fib_index;
1556   int rv;
1557
1558   if (!sm->enabled)
1559     {
1560       nat_log_err ("nat44 is disabled");
1561       return VNET_API_ERROR_UNSUPPORTED;
1562     }
1563
1564   i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
1565   if (i == 0)
1566     {
1567       nat_log_err ("error interface couldn't be found");
1568       return VNET_API_ERROR_NO_SUCH_ENTRY;
1569     }
1570
1571   if (nat_interface_is_inside (i) && nat_interface_is_outside (i))
1572     {
1573       if (sm->num_workers > 1)
1574         {
1575           del_feature_name = "nat44-handoff-classify";
1576           feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1577                                       "nat44-out2in-worker-handoff";
1578         }
1579       else
1580         {
1581           del_feature_name = "nat44-ed-classify";
1582           feature_name = !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1583         }
1584
1585       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1586       if (rv)
1587         {
1588           return rv;
1589         }
1590       vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1591                                    sw_if_index, 0, 0, 0);
1592       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1,
1593                                    0, 0);
1594
1595       if (is_inside)
1596         {
1597           i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1598         }
1599       else
1600         {
1601           i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1602         }
1603     }
1604   else
1605     {
1606       if (sm->num_workers > 1)
1607         {
1608           feature_name = is_inside ? "nat44-in2out-worker-handoff" :
1609                                      "nat44-out2in-worker-handoff";
1610         }
1611       else
1612         {
1613           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1614         }
1615
1616       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1617       if (rv)
1618         {
1619           return rv;
1620         }
1621       vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 0,
1622                                    0, 0);
1623
1624       // remove interface
1625       pool_put (sm->interfaces, i);
1626     }
1627
1628   fib_index =
1629     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1630
1631   update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
1632
1633   if (!is_inside)
1634     {
1635       outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1636       if (outside_fib)
1637         {
1638           outside_fib->refcount--;
1639           if (!outside_fib->refcount)
1640             {
1641               vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1642             }
1643         }
1644
1645       vec_foreach (ap, sm->addresses)
1646         {
1647           snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 0);
1648         }
1649
1650       pool_foreach (m, sm->static_mappings)
1651         {
1652           if (!(is_addr_only_static_mapping (m)) ||
1653               (m->local_addr.as_u32 == m->external_addr.as_u32))
1654             {
1655               continue;
1656             }
1657           snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 0);
1658         }
1659     }
1660
1661   return 0;
1662 }
1663
1664 int
1665 nat44_ed_add_output_interface (u32 sw_if_index)
1666 {
1667   snat_main_t *sm = &snat_main;
1668
1669   nat_outside_fib_t *outside_fib;
1670   snat_static_mapping_t *m;
1671   snat_interface_t *i;
1672   snat_address_t *ap;
1673   u32 fib_index;
1674   int rv;
1675
1676   if (!sm->enabled)
1677     {
1678       nat_log_err ("nat44 is disabled");
1679       return VNET_API_ERROR_UNSUPPORTED;
1680     }
1681
1682   if (nat44_ed_get_interface (sm->interfaces, sw_if_index))
1683     {
1684       nat_log_err ("error interface already configured");
1685       return VNET_API_ERROR_VALUE_EXIST;
1686     }
1687
1688   if (nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index))
1689     {
1690       nat_log_err ("error interface already configured");
1691       return VNET_API_ERROR_VALUE_EXIST;
1692     }
1693
1694   if (sm->num_workers > 1)
1695     {
1696       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1697       if (rv)
1698         {
1699           return rv;
1700         }
1701
1702       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
1703       if (rv)
1704         {
1705           return rv;
1706         }
1707
1708       vnet_feature_enable_disable (
1709         "ip4-unicast", "nat44-out2in-worker-handoff", sw_if_index, 1, 0, 0);
1710       vnet_feature_enable_disable ("ip4-output",
1711                                    "nat44-in2out-output-worker-handoff",
1712                                    sw_if_index, 1, 0, 0);
1713     }
1714   else
1715     {
1716       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1717       if (rv)
1718         {
1719           return rv;
1720         }
1721
1722       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
1723       if (rv)
1724         {
1725           return rv;
1726         }
1727
1728       vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1729                                    sw_if_index, 1, 0, 0);
1730       vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1731                                    sw_if_index, 1, 0, 0);
1732     }
1733
1734   nat_validate_interface_counters (sm, sw_if_index);
1735
1736   pool_get (sm->output_feature_interfaces, i);
1737   i->sw_if_index = sw_if_index;
1738   i->flags = 0;
1739   i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1740   i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1741
1742   fib_index =
1743     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1744   update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
1745
1746   outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1747   if (outside_fib)
1748     {
1749       outside_fib->refcount++;
1750     }
1751   else
1752     {
1753       vec_add2 (sm->outside_fibs, outside_fib, 1);
1754       outside_fib->fib_index = fib_index;
1755       outside_fib->refcount = 1;
1756     }
1757
1758   vec_foreach (ap, sm->addresses)
1759     {
1760       snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 1);
1761     }
1762
1763   pool_foreach (m, sm->static_mappings)
1764     {
1765       if (!((is_addr_only_static_mapping (m))) ||
1766           (m->local_addr.as_u32 == m->external_addr.as_u32))
1767         {
1768           continue;
1769         }
1770       snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 1);
1771     }
1772
1773   return 0;
1774 }
1775
1776 int
1777 nat44_ed_del_output_interface (u32 sw_if_index)
1778 {
1779   snat_main_t *sm = &snat_main;
1780
1781   nat_outside_fib_t *outside_fib;
1782   snat_static_mapping_t *m;
1783   snat_interface_t *i;
1784   snat_address_t *ap;
1785   u32 fib_index;
1786   int rv;
1787
1788   if (!sm->enabled)
1789     {
1790       nat_log_err ("nat44 is disabled");
1791       return VNET_API_ERROR_UNSUPPORTED;
1792     }
1793
1794   i = nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index);
1795   if (!i)
1796     {
1797       nat_log_err ("error interface couldn't be found");
1798       return VNET_API_ERROR_NO_SUCH_ENTRY;
1799     }
1800
1801   if (sm->num_workers > 1)
1802     {
1803       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1804       if (rv)
1805         {
1806           return rv;
1807         }
1808
1809       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
1810       if (rv)
1811         {
1812           return rv;
1813         }
1814
1815       vnet_feature_enable_disable (
1816         "ip4-unicast", "nat44-out2in-worker-handoff", sw_if_index, 0, 0, 0);
1817       vnet_feature_enable_disable ("ip4-output",
1818                                    "nat44-in2out-output-worker-handoff",
1819                                    sw_if_index, 0, 0, 0);
1820     }
1821   else
1822     {
1823       rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1824       if (rv)
1825         {
1826           return rv;
1827         }
1828
1829       rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
1830       if (rv)
1831         {
1832           return rv;
1833         }
1834
1835       vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1836                                    sw_if_index, 0, 0, 0);
1837       vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1838                                    sw_if_index, 0, 0, 0);
1839     }
1840
1841   // remove interface
1842   pool_put (sm->output_feature_interfaces, i);
1843
1844   fib_index =
1845     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
1846   update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
1847
1848   outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
1849   if (outside_fib)
1850     {
1851       outside_fib->refcount--;
1852       if (!outside_fib->refcount)
1853         {
1854           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1855         }
1856     }
1857
1858   vec_foreach (ap, sm->addresses)
1859     {
1860       snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 0);
1861     }
1862
1863   pool_foreach (m, sm->static_mappings)
1864     {
1865       if (!((is_addr_only_static_mapping (m))) ||
1866           (m->local_addr.as_u32 == m->external_addr.as_u32))
1867         {
1868           continue;
1869         }
1870       snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 0);
1871     }
1872
1873   return 0;
1874 }
1875
1876 int
1877 snat_set_workers (uword * bitmap)
1878 {
1879   snat_main_t *sm = &snat_main;
1880   int i, j = 0;
1881
1882   if (sm->num_workers < 2)
1883     return VNET_API_ERROR_FEATURE_DISABLED;
1884
1885   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
1886     return VNET_API_ERROR_INVALID_WORKER;
1887
1888   vec_free (sm->workers);
1889   clib_bitmap_foreach (i, bitmap)
1890     {
1891       vec_add1(sm->workers, i);
1892       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
1893       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
1894       j++;
1895     }
1896
1897   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
1898
1899   return 0;
1900 }
1901
1902 int
1903 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
1904 {
1905   fail_if_enabled ();
1906   snat_main_t *sm = &snat_main;
1907   sm->frame_queue_nelts = frame_queue_nelts;
1908   return 0;
1909 }
1910
1911 static void
1912 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1913                          u32 sw_if_index, u32 new_fib_index,
1914                          u32 old_fib_index)
1915 {
1916   snat_main_t *sm = &snat_main;
1917   nat_outside_fib_t *outside_fib;
1918   snat_interface_t *i;
1919   u8 is_add = 1;
1920   u8 match = 0;
1921
1922   if (!sm->enabled || (new_fib_index == old_fib_index)
1923       || (!vec_len (sm->outside_fibs)))
1924     {
1925       return;
1926     }
1927
1928   pool_foreach (i, sm->interfaces)
1929      {
1930       if (i->sw_if_index == sw_if_index)
1931         {
1932           if (!(nat_interface_is_outside (i)))
1933             return;
1934           match = 1;
1935         }
1936     }
1937
1938   pool_foreach (i, sm->output_feature_interfaces)
1939      {
1940       if (i->sw_if_index == sw_if_index)
1941         {
1942           if (!(nat_interface_is_outside (i)))
1943             return;
1944           match = 1;
1945         }
1946     }
1947
1948   if (!match)
1949     return;
1950
1951   vec_foreach (outside_fib, sm->outside_fibs)
1952   {
1953     if (outside_fib->fib_index == old_fib_index)
1954       {
1955         outside_fib->refcount--;
1956         if (!outside_fib->refcount)
1957           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1958         break;
1959       }
1960   }
1961
1962   vec_foreach (outside_fib, sm->outside_fibs)
1963   {
1964     if (outside_fib->fib_index == new_fib_index)
1965       {
1966         outside_fib->refcount++;
1967         is_add = 0;
1968         break;
1969       }
1970   }
1971
1972   if (is_add)
1973     {
1974       vec_add2 (sm->outside_fibs, outside_fib, 1);
1975       outside_fib->refcount = 1;
1976       outside_fib->fib_index = new_fib_index;
1977     }
1978 }
1979
1980 static void
1981 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1982                          u32 sw_if_index, u32 new_fib_index,
1983                          u32 old_fib_index);
1984
1985 static void
1986 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
1987                                        uword opaque,
1988                                        u32 sw_if_index,
1989                                        ip4_address_t * address,
1990                                        u32 address_length,
1991                                        u32 if_address_index, u32 is_delete);
1992
1993 static void
1994 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
1995                                  uword opaque,
1996                                  u32 sw_if_index,
1997                                  ip4_address_t * address,
1998                                  u32 address_length,
1999                                  u32 if_address_index, u32 is_delete);
2000
2001 void
2002 test_key_calc_split ()
2003 {
2004   ip4_address_t l_addr;
2005   l_addr.as_u8[0] = 1;
2006   l_addr.as_u8[1] = 1;
2007   l_addr.as_u8[2] = 1;
2008   l_addr.as_u8[3] = 1;
2009   ip4_address_t r_addr;
2010   r_addr.as_u8[0] = 2;
2011   r_addr.as_u8[1] = 2;
2012   r_addr.as_u8[2] = 2;
2013   r_addr.as_u8[3] = 2;
2014   u16 l_port = 40001;
2015   u16 r_port = 40301;
2016   u8 proto = 9;
2017   u32 fib_index = 9000001;
2018   u32 thread_index = 3000000001;
2019   u32 session_index = 3000000221;
2020   clib_bihash_kv_16_8_t kv;
2021   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
2022               thread_index, session_index);
2023   ip4_address_t l_addr2;
2024   ip4_address_t r_addr2;
2025   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2026   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2027   u16 l_port2 = 0;
2028   u16 r_port2 = 0;
2029   u8 proto2 = 0;
2030   u32 fib_index2 = 0;
2031   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2032                &r_port2);
2033   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2034   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2035   ASSERT (l_port == l_port2);
2036   ASSERT (r_port == r_port2);
2037   ASSERT (proto == proto2);
2038   ASSERT (fib_index == fib_index2);
2039   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2040   ASSERT (session_index == ed_value_get_session_index (&kv));
2041
2042   fib_index = 7001;
2043   proto = 5;
2044   nat_protocol_t proto3 = ~0;
2045   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
2046   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
2047   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2048   ASSERT (l_port == l_port2);
2049   ASSERT (proto == proto3);
2050   ASSERT (fib_index == fib_index2);
2051 }
2052
2053 static clib_error_t *
2054 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2055 {
2056   u32 fib_index;
2057   if (!is_add)
2058     {
2059       fib_index = ip4_fib_index_from_table_id (table_id);
2060       if (fib_index != ~0)
2061         {
2062           expire_per_vrf_sessions (fib_index);
2063         }
2064     }
2065   return 0;
2066 }
2067
2068 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2069
2070 void
2071 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
2072 {
2073   vlib_node_t *node;
2074
2075   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2076   sm->out2in_node_index = node->index;
2077
2078   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2079   sm->in2out_node_index = node->index;
2080
2081   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2082   sm->in2out_output_node_index = node->index;
2083 }
2084
2085 #define nat_validate_simple_counter(c, i)                                     \
2086   do                                                                          \
2087     {                                                                         \
2088       vlib_validate_simple_counter (&c, i);                                   \
2089       vlib_zero_simple_counter (&c, i);                                       \
2090     }                                                                         \
2091   while (0);
2092
2093 #define nat_init_simple_counter(c, n, sn)                                     \
2094   do                                                                          \
2095     {                                                                         \
2096       c.name = n;                                                             \
2097       c.stat_segment_name = sn;                                               \
2098       nat_validate_simple_counter (c, 0);                                     \
2099     }                                                                         \
2100   while (0);
2101
2102 static_always_inline void
2103 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
2104 {
2105 #define _(x)                                                                  \
2106   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
2107   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
2108   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
2109   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
2110   foreach_nat_counter;
2111 #undef _
2112   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
2113 }
2114
2115 static clib_error_t *
2116 nat_init (vlib_main_t * vm)
2117 {
2118   snat_main_t *sm = &snat_main;
2119   vlib_thread_main_t *tm = vlib_get_thread_main ();
2120   vlib_thread_registration_t *tr;
2121   ip4_add_del_interface_address_callback_t cbi = { 0 };
2122   ip4_table_bind_callback_t cbt = { 0 };
2123   u32 i, num_threads = 0;
2124   uword *p, *bitmap = 0;
2125
2126   clib_memset (sm, 0, sizeof (*sm));
2127
2128   // required
2129   sm->vnet_main = vnet_get_main ();
2130   // convenience
2131   sm->ip4_main = &ip4_main;
2132   sm->api_main = vlibapi_get_main ();
2133   sm->ip4_lookup_main = &ip4_main.lookup_main;
2134
2135   // frame queue indices used for handoff
2136   sm->fq_out2in_index = ~0;
2137   sm->fq_in2out_index = ~0;
2138   sm->fq_in2out_output_index = ~0;
2139
2140   sm->log_level = NAT_LOG_ERROR;
2141
2142   nat44_set_node_indexes (sm, vm);
2143
2144   sm->log_class = vlib_log_register_class ("nat", 0);
2145   nat_ipfix_logging_init (vm);
2146
2147   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2148                            "/nat44-ed/total-sessions");
2149   sm->max_cfg_sessions_gauge = stat_segment_new_entry (
2150     (u8 *) "/nat44-ed/max-cfg-sessions", STAT_DIR_TYPE_SCALAR_INDEX);
2151
2152 #define _(x)                                                                  \
2153   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2154                            "/nat44-ed/in2out/fastpath/" #x);                  \
2155   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2156                            "/nat44-ed/out2in/fastpath/" #x);                  \
2157   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2158                            "/nat44-ed/in2out/slowpath/" #x);                  \
2159   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2160                            "/nat44-ed/out2in/slowpath/" #x);
2161   foreach_nat_counter;
2162 #undef _
2163   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2164                            "/nat44-ed/hairpinning");
2165
2166   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2167   if (p)
2168     {
2169       tr = (vlib_thread_registration_t *) p[0];
2170       if (tr)
2171         {
2172           sm->num_workers = tr->count;
2173           sm->first_worker_index = tr->first_index;
2174         }
2175     }
2176   num_threads = tm->n_vlib_mains - 1;
2177   sm->port_per_thread = 0xffff - 1024;
2178   vec_validate (sm->per_thread_data, num_threads);
2179
2180   /* Use all available workers by default */
2181   if (sm->num_workers > 1)
2182     {
2183       for (i = 0; i < sm->num_workers; i++)
2184         bitmap = clib_bitmap_set (bitmap, i, 1);
2185       snat_set_workers (bitmap);
2186       clib_bitmap_free (bitmap);
2187     }
2188   else
2189     {
2190       sm->per_thread_data[0].snat_thread_index = 0;
2191     }
2192
2193   /* callbacks to call when interface address changes. */
2194   cbi.function = snat_ip4_add_del_interface_address_cb;
2195   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2196   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2197   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2198
2199   /* callbacks to call when interface to table biding changes */
2200   cbt.function = snat_update_outside_fib;
2201   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2202
2203   sm->fib_src_low =
2204     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2205                          FIB_SOURCE_BH_SIMPLE);
2206   sm->fib_src_hi =
2207     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2208                          FIB_SOURCE_BH_SIMPLE);
2209
2210   nat_affinity_init (vm);
2211   test_key_calc_split ();
2212
2213   return nat44_api_hookup (vm);
2214 }
2215
2216 VLIB_INIT_FUNCTION (nat_init);
2217
2218 int
2219 nat44_plugin_enable (nat44_config_t c)
2220 {
2221   snat_main_t *sm = &snat_main;
2222
2223   fail_if_enabled ();
2224
2225   if (c.static_mapping_only && !c.connection_tracking)
2226     {
2227       nat_log_err ("unsupported combination of configuration");
2228       return 1;
2229     }
2230
2231   sm->static_mapping_only = c.static_mapping_only;
2232   sm->static_mapping_connection_tracking = c.connection_tracking;
2233
2234   sm->forwarding_enabled = 0;
2235   sm->mss_clamping = 0;
2236   sm->pat = (!c.static_mapping_only ||
2237              (c.static_mapping_only && c.connection_tracking));
2238
2239   if (!c.sessions)
2240     c.sessions = 63 * 1024;
2241
2242   sm->max_translations_per_thread = c.sessions;
2243   stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
2244                                   sm->max_translations_per_thread);
2245   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2246
2247   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2248
2249   sm->inside_vrf_id = c.inside_vrf;
2250   sm->inside_fib_index =
2251     fib_table_find_or_create_and_lock
2252     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2253
2254   sm->outside_vrf_id = c.outside_vrf;
2255   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2256     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2257
2258   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2259
2260   nat_affinity_enable ();
2261
2262   nat_reset_timeouts (&sm->timeouts);
2263
2264   vlib_zero_simple_counter (&sm->total_sessions, 0);
2265
2266   if (!sm->frame_queue_nelts)
2267     {
2268       sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2269     }
2270
2271   if (sm->num_workers > 1)
2272     {
2273       if (sm->fq_in2out_index == ~0)
2274         {
2275           sm->fq_in2out_index = vlib_frame_queue_main_init (
2276             sm->in2out_node_index, sm->frame_queue_nelts);
2277         }
2278       if (sm->fq_out2in_index == ~0)
2279         {
2280           sm->fq_out2in_index = vlib_frame_queue_main_init (
2281             sm->out2in_node_index, sm->frame_queue_nelts);
2282         }
2283       if (sm->fq_in2out_output_index == ~0)
2284         {
2285           sm->fq_in2out_output_index = vlib_frame_queue_main_init (
2286             sm->in2out_output_node_index, sm->frame_queue_nelts);
2287         }
2288     }
2289
2290   sm->enabled = 1;
2291   sm->rconfig = c;
2292
2293   return 0;
2294 }
2295
2296 void
2297 nat44_addresses_free (snat_address_t ** addresses)
2298 {
2299   snat_address_t *ap;
2300   vec_foreach (ap, *addresses)
2301     {
2302     #define _(N, i, n, s) \
2303       vec_free (ap->busy_##n##_ports_per_thread);
2304       foreach_nat_protocol
2305     #undef _
2306     }
2307   vec_free (*addresses);
2308   *addresses = 0;
2309 }
2310
2311 int
2312 nat44_plugin_disable ()
2313 {
2314   snat_main_t *sm = &snat_main;
2315   snat_interface_t *i, *pool;
2316   int error = 0;
2317
2318   fail_if_disabled ();
2319
2320   pool = pool_dup (sm->interfaces);
2321   pool_foreach (i, pool)
2322     {
2323       if (nat_interface_is_inside (i))
2324         {
2325           error = nat44_ed_del_interface (i->sw_if_index, 1);
2326         }
2327       if (nat_interface_is_outside (i))
2328         {
2329           error = nat44_ed_del_interface (i->sw_if_index, 0);
2330         }
2331       if (error)
2332         {
2333           nat_log_err ("error occurred while removing interface %u",
2334                        i->sw_if_index);
2335         }
2336     }
2337   pool_free (sm->interfaces);
2338   pool_free (pool);
2339   sm->interfaces = 0;
2340
2341   pool = pool_dup (sm->output_feature_interfaces);
2342   pool_foreach (i, pool)
2343     {
2344       error = nat44_ed_del_output_interface (i->sw_if_index);
2345       if (error)
2346         {
2347           nat_log_err ("error occurred while removing interface %u",
2348                        i->sw_if_index);
2349         }
2350     }
2351   pool_free (sm->output_feature_interfaces);
2352   pool_free (pool);
2353   sm->output_feature_interfaces = 0;
2354
2355   vec_free (sm->max_translations_per_fib);
2356
2357   nat44_ed_db_free ();
2358
2359   nat44_addresses_free (&sm->addresses);
2360   nat44_addresses_free (&sm->twice_nat_addresses);
2361
2362   vec_free (sm->to_resolve);
2363   vec_free (sm->auto_add_sw_if_indices);
2364   vec_free (sm->auto_add_sw_if_indices_twice_nat);
2365
2366   sm->to_resolve = 0;
2367   sm->auto_add_sw_if_indices = 0;
2368   sm->auto_add_sw_if_indices_twice_nat = 0;
2369
2370   sm->forwarding_enabled = 0;
2371
2372   sm->enabled = 0;
2373   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2374
2375   return 0;
2376 }
2377
2378 void
2379 nat44_ed_forwarding_enable_disable (u8 is_enable)
2380 {
2381   snat_main_per_thread_data_t *tsm;
2382   snat_main_t *sm = &snat_main;
2383   snat_session_t *s;
2384
2385   u32 *ses_to_be_removed = 0, *ses_index;
2386
2387   sm->forwarding_enabled = is_enable != 0;
2388
2389   if (is_enable)
2390     return;
2391
2392   vec_foreach (tsm, sm->per_thread_data)
2393     {
2394       pool_foreach (s, tsm->sessions)
2395         {
2396           if (is_fwd_bypass_session (s))
2397             {
2398               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2399             }
2400         }
2401       vec_foreach (ses_index, ses_to_be_removed)
2402         {
2403           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2404           nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2405           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2406         }
2407
2408       vec_free (ses_to_be_removed);
2409     }
2410 }
2411
2412 void
2413 snat_free_outside_address_and_port (snat_address_t *addresses,
2414                                     u32 thread_index, ip4_address_t *addr,
2415                                     u16 port, nat_protocol_t protocol)
2416 {
2417   snat_main_t *sm = &snat_main;
2418   snat_address_t *a;
2419   u32 address_index;
2420   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2421
2422   for (address_index = 0; address_index < vec_len (addresses);
2423        address_index++)
2424     {
2425       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2426         break;
2427     }
2428
2429   ASSERT (address_index < vec_len (addresses));
2430
2431   a = addresses + address_index;
2432
2433   switch (protocol)
2434     {
2435 #define _(N, i, n, s) \
2436     case NAT_PROTOCOL_##N: \
2437       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2438       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2439       a->busy_##n##_ports--; \
2440       a->busy_##n##_ports_per_thread[thread_index]--; \
2441       break;
2442       foreach_nat_protocol
2443 #undef _
2444         default : nat_elog_info (sm, "unknown protocol");
2445       return;
2446     }
2447 }
2448
2449 int
2450 nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
2451                                   ip4_address_t addr, u16 port,
2452                                   nat_protocol_t protocol)
2453 {
2454   snat_main_t *sm = &snat_main;
2455   snat_address_t *a = 0;
2456   u32 address_index;
2457   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2458
2459   for (address_index = 0; address_index < vec_len (addresses);
2460        address_index++)
2461     {
2462       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2463         continue;
2464
2465       a = addresses + address_index;
2466       switch (protocol)
2467         {
2468 #define _(N, j, n, s) \
2469         case NAT_PROTOCOL_##N: \
2470           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2471             return VNET_API_ERROR_INSTANCE_IN_USE; \
2472           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2473           a->busy_##n##_ports_per_thread[thread_index]++; \
2474           a->busy_##n##_ports++; \
2475           return 0;
2476           foreach_nat_protocol
2477 #undef _
2478             default : nat_elog_info (sm, "unknown protocol");
2479           return 1;
2480         }
2481     }
2482
2483   return VNET_API_ERROR_NO_SUCH_ENTRY;
2484 }
2485
2486 int
2487 snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
2488                            ip4_address_t match_addr, u16 match_port,
2489                            u32 match_fib_index, nat_protocol_t match_protocol,
2490                            ip4_address_t *mapping_addr, u16 *mapping_port,
2491                            u32 *mapping_fib_index, u8 by_external,
2492                            u8 *is_addr_only, twice_nat_type_t *twice_nat,
2493                            lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
2494                            u8 *is_identity_nat, snat_static_mapping_t **out)
2495 {
2496   clib_bihash_kv_8_8_t kv, value;
2497   clib_bihash_8_8_t *mapping_hash;
2498   snat_static_mapping_t *m;
2499   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2500   nat44_lb_addr_port_t *local;
2501   u8 backend_index;
2502
2503   if (!by_external)
2504     {
2505       mapping_hash = &sm->static_mapping_by_local;
2506       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2507                   match_protocol);
2508       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2509         {
2510           /* Try address only mapping */
2511           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2512           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2513             return 1;
2514         }
2515     }
2516   else
2517     {
2518       mapping_hash = &sm->static_mapping_by_external;
2519       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2520       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2521         {
2522           /* Try address only mapping */
2523           init_nat_k (&kv, match_addr, 0, 0, 0);
2524           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2525             return 1;
2526         }
2527     }
2528
2529   m = pool_elt_at_index (sm->static_mappings, value.value);
2530
2531   if (by_external)
2532     {
2533       if (is_lb_static_mapping (m))
2534         {
2535           if (PREDICT_FALSE (lb != 0))
2536             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2537           if (m->affinity && !nat_affinity_find_and_lock (
2538                                vm, ext_host_addr[0], match_addr,
2539                                match_protocol, match_port, &backend_index))
2540             {
2541               local = pool_elt_at_index (m->locals, backend_index);
2542               *mapping_addr = local->addr;
2543               *mapping_port = local->port;
2544               *mapping_fib_index = local->fib_index;
2545               goto end;
2546             }
2547           // pick locals matching this worker
2548           if (PREDICT_FALSE (sm->num_workers > 1))
2549             {
2550               u32 thread_index = vlib_get_thread_index ();
2551               pool_foreach_index (i, m->locals)
2552                {
2553                 local = pool_elt_at_index (m->locals, i);
2554
2555                 ip4_header_t ip = {
2556                   .src_address = local->addr,
2557                 };
2558
2559                 if (nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index,
2560                                                       0) == thread_index)
2561                   {
2562                     vec_add1 (tmp, i);
2563                   }
2564                }
2565               ASSERT (vec_len (tmp) != 0);
2566             }
2567           else
2568             {
2569               pool_foreach_index (i, m->locals)
2570                {
2571                 vec_add1 (tmp, i);
2572               }
2573             }
2574           hi = vec_len (tmp) - 1;
2575           local = pool_elt_at_index (m->locals, tmp[hi]);
2576           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2577           while (lo < hi)
2578             {
2579               mid = ((hi - lo) >> 1) + lo;
2580               local = pool_elt_at_index (m->locals, tmp[mid]);
2581               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2582             }
2583           local = pool_elt_at_index (m->locals, tmp[lo]);
2584           if (!(local->prefix >= rand))
2585             return 1;
2586           *mapping_addr = local->addr;
2587           *mapping_port = local->port;
2588           *mapping_fib_index = local->fib_index;
2589           if (m->affinity)
2590             {
2591               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2592                                                 match_protocol, match_port,
2593                                                 tmp[lo], m->affinity,
2594                                                 m->affinity_per_service_list_head_index))
2595                 nat_elog_info (sm, "create affinity record failed");
2596             }
2597           vec_free (tmp);
2598         }
2599       else
2600         {
2601           if (PREDICT_FALSE (lb != 0))
2602             *lb = NO_LB_NAT;
2603           *mapping_fib_index = m->fib_index;
2604           *mapping_addr = m->local_addr;
2605           /* Address only mapping doesn't change port */
2606           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2607             : m->local_port;
2608         }
2609     }
2610   else
2611     {
2612       *mapping_addr = m->external_addr;
2613       /* Address only mapping doesn't change port */
2614       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2615         : m->external_port;
2616       *mapping_fib_index = sm->outside_fib_index;
2617     }
2618
2619 end:
2620   if (PREDICT_FALSE (is_addr_only != 0))
2621     *is_addr_only = is_addr_only_static_mapping (m);
2622
2623   if (PREDICT_FALSE (twice_nat != 0))
2624     *twice_nat = m->twice_nat;
2625
2626   if (PREDICT_FALSE (is_identity_nat != 0))
2627     *is_identity_nat = is_identity_static_mapping (m);
2628
2629   if (out != 0)
2630     *out = m;
2631
2632   return 0;
2633 }
2634
2635 u32
2636 nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2637                                   u32 rx_fib_index, u8 is_output)
2638 {
2639   snat_main_t *sm = &snat_main;
2640   u32 next_worker_index = sm->first_worker_index;
2641   u32 hash;
2642
2643   clib_bihash_kv_16_8_t kv16, value16;
2644
2645   u32 fib_index = rx_fib_index;
2646   if (b)
2647     {
2648       if (PREDICT_FALSE (is_output))
2649         {
2650           fib_index = sm->outside_fib_index;
2651           nat_outside_fib_t *outside_fib;
2652           fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2653           fib_prefix_t pfx = {
2654                   .fp_proto = FIB_PROTOCOL_IP4,
2655                   .fp_len = 32,
2656                   .fp_addr = {
2657                           .ip4.as_u32 = ip->dst_address.as_u32,
2658                   } ,
2659           };
2660
2661           switch (vec_len (sm->outside_fibs))
2662             {
2663             case 0:
2664               fib_index = sm->outside_fib_index;
2665               break;
2666             case 1:
2667               fib_index = sm->outside_fibs[0].fib_index;
2668               break;
2669             default:
2670               vec_foreach (outside_fib, sm->outside_fibs)
2671                 {
2672                   fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2673                   if (FIB_NODE_INDEX_INVALID != fei)
2674                     {
2675                       if (fib_entry_get_resolving_interface (fei) != ~0)
2676                         {
2677                           fib_index = outside_fib->fib_index;
2678                           break;
2679                         }
2680                     }
2681                 }
2682               break;
2683             }
2684         }
2685
2686       init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2687                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2688                  fib_index, ip->protocol);
2689
2690       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2691         {
2692           next_worker_index = ed_value_get_thread_index (&value16);
2693           vnet_buffer2 (b)->nat.cached_session_index =
2694             ed_value_get_session_index (&value16);
2695           goto out;
2696         }
2697
2698       // dst NAT
2699       init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2700                  ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2701                  rx_fib_index, ip->protocol);
2702       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2703         {
2704           next_worker_index = ed_value_get_thread_index (&value16);
2705           vnet_buffer2 (b)->nat.cached_dst_nat_session_index =
2706             ed_value_get_session_index (&value16);
2707           goto out;
2708         }
2709     }
2710
2711   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2712     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2713
2714   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2715     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2716   else
2717     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2718
2719 out:
2720   if (PREDICT_TRUE (!is_output))
2721     {
2722       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2723                               rx_fib_index,
2724                               clib_net_to_host_u32 (ip->src_address.as_u32),
2725                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2726     }
2727   else
2728     {
2729       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2730                               next_worker_index, rx_fib_index,
2731                               clib_net_to_host_u32 (ip->src_address.as_u32),
2732                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2733     }
2734
2735   return next_worker_index;
2736 }
2737
2738 u32
2739 nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2740                                   u32 rx_fib_index, u8 is_output)
2741 {
2742   snat_main_t *sm = &snat_main;
2743   clib_bihash_kv_8_8_t kv, value;
2744   clib_bihash_kv_16_8_t kv16, value16;
2745
2746   u32 proto, next_worker_index = 0;
2747   u16 port;
2748   snat_static_mapping_t *m;
2749   u32 hash;
2750
2751   proto = ip_proto_to_nat_proto (ip->protocol);
2752
2753   if (PREDICT_FALSE (proto == NAT_PROTOCOL_ICMP))
2754     {
2755       ip4_address_t lookup_saddr, lookup_daddr;
2756       u16 lookup_sport, lookup_dport;
2757       u8 lookup_protocol;
2758       if (!nat_get_icmp_session_lookup_values (
2759             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2760             &lookup_protocol))
2761         {
2762           init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
2763                      lookup_dport, rx_fib_index, lookup_protocol);
2764           if (PREDICT_TRUE (
2765                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2766             {
2767               next_worker_index = ed_value_get_thread_index (&value16);
2768               nat_elog_debug_handoff (
2769                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2770                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2771                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2772               return next_worker_index;
2773             }
2774         }
2775     }
2776
2777   init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2778              ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2779              rx_fib_index, ip->protocol);
2780
2781   if (PREDICT_TRUE (
2782         !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2783     {
2784       vnet_buffer2 (b)->nat.cached_session_index =
2785         ed_value_get_session_index (&value16);
2786       next_worker_index = ed_value_get_thread_index (&value16);
2787       nat_elog_debug_handoff (sm, "HANDOFF OUT2IN (session)",
2788                               next_worker_index, rx_fib_index,
2789                               clib_net_to_host_u32 (ip->src_address.as_u32),
2790                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2791       return next_worker_index;
2792     }
2793
2794   /* first try static mappings without port */
2795   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2796     {
2797       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
2798       if (!clib_bihash_search_8_8
2799           (&sm->static_mapping_by_external, &kv, &value))
2800         {
2801           m = pool_elt_at_index (sm->static_mappings, value.value);
2802           next_worker_index = m->workers[0];
2803           goto done;
2804         }
2805     }
2806
2807   /* unknown protocol */
2808   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
2809     {
2810       /* use current thread */
2811       next_worker_index = vlib_get_thread_index ();
2812       goto done;
2813     }
2814
2815   port = vnet_buffer (b)->ip.reass.l4_dst_port;
2816
2817   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2818     {
2819       udp_header_t *udp = ip4_next_header (ip);
2820       icmp46_header_t *icmp = (icmp46_header_t *) udp;
2821       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
2822       if (!icmp_type_is_error_message
2823           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
2824         port = vnet_buffer (b)->ip.reass.l4_src_port;
2825       else
2826         {
2827           /* if error message, then it's not fragmented and we can access it */
2828           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
2829           proto = ip_proto_to_nat_proto (inner_ip->protocol);
2830           void *l4_header = ip4_next_header (inner_ip);
2831           switch (proto)
2832             {
2833             case NAT_PROTOCOL_ICMP:
2834               icmp = (icmp46_header_t *) l4_header;
2835               echo = (icmp_echo_header_t *) (icmp + 1);
2836               port = echo->identifier;
2837               break;
2838             case NAT_PROTOCOL_UDP:
2839             case NAT_PROTOCOL_TCP:
2840               port = ((tcp_udp_header_t *) l4_header)->src_port;
2841               break;
2842             default:
2843               next_worker_index = vlib_get_thread_index ();
2844               goto done;
2845             }
2846         }
2847     }
2848
2849   /* try static mappings with port */
2850   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2851     {
2852       init_nat_k (&kv, ip->dst_address, port, 0, proto);
2853       if (!clib_bihash_search_8_8
2854           (&sm->static_mapping_by_external, &kv, &value))
2855         {
2856           m = pool_elt_at_index (sm->static_mappings, value.value);
2857           if (!is_lb_static_mapping (m))
2858             {
2859               next_worker_index = m->workers[0];
2860               goto done;
2861             }
2862
2863           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2864             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2865
2866           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
2867             next_worker_index =
2868               m->workers[hash & (_vec_len (m->workers) - 1)];
2869           else
2870             next_worker_index = m->workers[hash % _vec_len (m->workers)];
2871           goto done;
2872         }
2873     }
2874
2875   /* worker by outside port */
2876   next_worker_index = sm->first_worker_index;
2877   next_worker_index +=
2878     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2879
2880 done:
2881   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
2882                           rx_fib_index,
2883                           clib_net_to_host_u32 (ip->src_address.as_u32),
2884                           clib_net_to_host_u32 (ip->dst_address.as_u32));
2885   return next_worker_index;
2886 }
2887
2888 u32
2889 nat44_get_max_session_limit ()
2890 {
2891   snat_main_t *sm = &snat_main;
2892   u32 max_limit = 0, len = 0;
2893
2894   for (; len < vec_len (sm->max_translations_per_fib); len++)
2895     {
2896       if (max_limit < sm->max_translations_per_fib[len])
2897         max_limit = sm->max_translations_per_fib[len];
2898     }
2899   return max_limit;
2900 }
2901
2902 int
2903 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
2904 {
2905   snat_main_t *sm = &snat_main;
2906   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
2907   u32 len = vec_len (sm->max_translations_per_fib);
2908
2909   if (len <= fib_index)
2910     {
2911       vec_validate (sm->max_translations_per_fib, fib_index + 1);
2912
2913       for (; len < vec_len (sm->max_translations_per_fib); len++)
2914         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
2915     }
2916
2917   sm->max_translations_per_fib[fib_index] = session_limit;
2918   return 0;
2919 }
2920
2921 int
2922 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
2923 {
2924   snat_main_t *sm = &snat_main;
2925
2926   if (nat44_set_session_limit (session_limit, vrf_id))
2927     return 1;
2928   sm->max_translations_per_thread = nat44_get_max_session_limit ();
2929
2930   stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
2931                                   sm->max_translations_per_thread);
2932
2933   sm->translation_buckets =
2934     nat_calc_bihash_buckets (sm->max_translations_per_thread);
2935
2936   nat44_ed_sessions_clear ();
2937   return 0;
2938 }
2939
2940 static void
2941 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
2942                          u32 translation_buckets)
2943 {
2944   dlist_elt_t *head;
2945
2946   pool_alloc (tsm->sessions, translations);
2947   pool_alloc (tsm->lru_pool, translations);
2948
2949   pool_get (tsm->lru_pool, head);
2950   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
2951   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
2952
2953   pool_get (tsm->lru_pool, head);
2954   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
2955   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
2956
2957   pool_get (tsm->lru_pool, head);
2958   tsm->udp_lru_head_index = head - tsm->lru_pool;
2959   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
2960
2961   pool_get (tsm->lru_pool, head);
2962   tsm->icmp_lru_head_index = head - tsm->lru_pool;
2963   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
2964
2965   pool_get (tsm->lru_pool, head);
2966   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
2967   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
2968 }
2969
2970 static void
2971 reinit_ed_flow_hash ()
2972 {
2973   snat_main_t *sm = &snat_main;
2974   // we expect 2 flows per session, so multiply translation_buckets by 2
2975   clib_bihash_init_16_8 (
2976     &sm->flow_hash, "ed-flow-hash",
2977     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
2978   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
2979 }
2980
2981 static void
2982 nat44_ed_db_init (u32 translations, u32 translation_buckets)
2983 {
2984   snat_main_t *sm = &snat_main;
2985   snat_main_per_thread_data_t *tsm;
2986   u32 static_mapping_buckets = 1024;
2987   u32 static_mapping_memory_size = 64 << 20;
2988
2989   reinit_ed_flow_hash ();
2990
2991   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2992                         "static_mapping_by_local", static_mapping_buckets,
2993                         static_mapping_memory_size);
2994   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2995                                      format_static_mapping_kvp);
2996
2997   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2998                         "static_mapping_by_external", static_mapping_buckets,
2999                         static_mapping_memory_size);
3000   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
3001                                      format_static_mapping_kvp);
3002
3003   if (sm->pat)
3004     {
3005       vec_foreach (tsm, sm->per_thread_data)
3006         {
3007           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
3008                                    sm->translation_buckets);
3009         }
3010     }
3011 }
3012
3013 static void
3014 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
3015 {
3016   pool_free (tsm->lru_pool);
3017   pool_free (tsm->sessions);
3018   vec_free (tsm->per_vrf_sessions_vec);
3019 }
3020
3021 static void
3022 nat44_ed_db_free ()
3023 {
3024   snat_main_t *sm = &snat_main;
3025   snat_main_per_thread_data_t *tsm;
3026
3027   pool_free (sm->static_mappings);
3028   clib_bihash_free_16_8 (&sm->flow_hash);
3029   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
3030   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
3031
3032   if (sm->pat)
3033     {
3034       vec_foreach (tsm, sm->per_thread_data)
3035         {
3036           nat44_ed_worker_db_free (tsm);
3037         }
3038     }
3039 }
3040
3041 void
3042 nat44_ed_sessions_clear ()
3043 {
3044   snat_main_t *sm = &snat_main;
3045   snat_main_per_thread_data_t *tsm;
3046
3047   reinit_ed_flow_hash ();
3048
3049   if (sm->pat)
3050     {
3051       vec_foreach (tsm, sm->per_thread_data)
3052         {
3053
3054           nat44_ed_worker_db_free (tsm);
3055           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
3056                                    sm->translation_buckets);
3057         }
3058     }
3059   vlib_zero_simple_counter (&sm->total_sessions, 0);
3060 }
3061
3062 static void
3063 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
3064                                  uword opaque,
3065                                  u32 sw_if_index,
3066                                  ip4_address_t * address,
3067                                  u32 address_length,
3068                                  u32 if_address_index, u32 is_delete)
3069 {
3070   snat_main_t *sm = &snat_main;
3071   snat_static_map_resolve_t *rp;
3072   snat_static_mapping_t *m;
3073   clib_bihash_kv_8_8_t kv, value;
3074   int i, rv;
3075   ip4_address_t l_addr;
3076
3077   if (!sm->enabled)
3078     return;
3079
3080   for (i = 0; i < vec_len (sm->to_resolve); i++)
3081     {
3082       rp = sm->to_resolve + i;
3083       if (rp->addr_only == 0)
3084         continue;
3085       if (rp->sw_if_index == sw_if_index)
3086         goto match;
3087     }
3088
3089   return;
3090
3091 match:
3092   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
3093               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
3094   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3095     m = 0;
3096   else
3097     m = pool_elt_at_index (sm->static_mappings, value.value);
3098
3099   if (!is_delete)
3100     {
3101       /* Don't trip over lease renewal, static config */
3102       if (m)
3103         return;
3104     }
3105   else
3106     {
3107       if (!m)
3108         return;
3109     }
3110
3111   /* Indetity mapping? */
3112   if (rp->l_addr.as_u32 == 0)
3113     l_addr.as_u32 = address[0].as_u32;
3114   else
3115     l_addr.as_u32 = rp->l_addr.as_u32;
3116   /* Add the static mapping */
3117   rv = snat_add_static_mapping (l_addr,
3118                                 address[0],
3119                                 rp->l_port,
3120                                 rp->e_port,
3121                                 rp->vrf_id,
3122                                 rp->addr_only, ~0 /* sw_if_index */ ,
3123                                 rp->proto, !is_delete, rp->twice_nat,
3124                                 rp->out2in_only, rp->tag, rp->identity_nat,
3125                                 rp->pool_addr, rp->exact);
3126   if (rv)
3127     nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", "i4", rv);
3128 }
3129
3130 static void
3131 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
3132                                        uword opaque,
3133                                        u32 sw_if_index,
3134                                        ip4_address_t * address,
3135                                        u32 address_length,
3136                                        u32 if_address_index, u32 is_delete)
3137 {
3138   snat_main_t *sm = &snat_main;
3139   snat_static_map_resolve_t *rp;
3140   ip4_address_t l_addr;
3141   int i, j;
3142   int rv;
3143   u8 twice_nat = 0;
3144   snat_address_t *addresses = sm->addresses;
3145
3146   if (!sm->enabled)
3147     return;
3148
3149   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
3150     {
3151       if (sw_if_index == sm->auto_add_sw_if_indices[i])
3152         goto match;
3153     }
3154
3155   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
3156     {
3157       twice_nat = 1;
3158       addresses = sm->twice_nat_addresses;
3159       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
3160         goto match;
3161     }
3162
3163   return;
3164
3165 match:
3166   if (!is_delete)
3167     {
3168       /* Don't trip over lease renewal, static config */
3169       for (j = 0; j < vec_len (addresses); j++)
3170         if (addresses[j].addr.as_u32 == address->as_u32)
3171           return;
3172
3173       (void) snat_add_address (sm, address, ~0, twice_nat);
3174       /* Scan static map resolution vector */
3175       for (j = 0; j < vec_len (sm->to_resolve); j++)
3176         {
3177           rp = sm->to_resolve + j;
3178           if (rp->addr_only)
3179             continue;
3180           /* On this interface? */
3181           if (rp->sw_if_index == sw_if_index)
3182             {
3183               /* Indetity mapping? */
3184               if (rp->l_addr.as_u32 == 0)
3185                 l_addr.as_u32 = address[0].as_u32;
3186               else
3187                 l_addr.as_u32 = rp->l_addr.as_u32;
3188               /* Add the static mapping */
3189               rv = snat_add_static_mapping (
3190                 l_addr, address[0], rp->l_port, rp->e_port, rp->vrf_id,
3191                 rp->addr_only, ~0 /* sw_if_index */, rp->proto, 1,
3192                 rp->twice_nat, rp->out2in_only, rp->tag, rp->identity_nat,
3193                 rp->pool_addr, rp->exact);
3194               if (rv)
3195                 nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d",
3196                                     "i4", rv);
3197             }
3198         }
3199       return;
3200     }
3201   else
3202     {
3203       (void) snat_del_address (sm, address[0], 1, twice_nat);
3204       return;
3205     }
3206 }
3207
3208 int
3209 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
3210                             u8 twice_nat)
3211 {
3212   ip4_main_t *ip4_main = sm->ip4_main;
3213   ip4_address_t *first_int_addr;
3214   snat_static_map_resolve_t *rp;
3215   u32 *indices_to_delete = 0;
3216   int i, j;
3217   u32 *auto_add_sw_if_indices =
3218     twice_nat ? sm->
3219     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
3220
3221   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
3222     );
3223
3224   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
3225     {
3226       if (auto_add_sw_if_indices[i] == sw_if_index)
3227         {
3228           if (is_del)
3229             {
3230               /* if have address remove it */
3231               if (first_int_addr)
3232                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
3233               else
3234                 {
3235                   for (j = 0; j < vec_len (sm->to_resolve); j++)
3236                     {
3237                       rp = sm->to_resolve + j;
3238                       if (rp->sw_if_index == sw_if_index)
3239                         vec_add1 (indices_to_delete, j);
3240                     }
3241                   if (vec_len (indices_to_delete))
3242                     {
3243                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
3244                         vec_del1 (sm->to_resolve, j);
3245                       vec_free (indices_to_delete);
3246                     }
3247                 }
3248               if (twice_nat)
3249                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
3250               else
3251                 vec_del1 (sm->auto_add_sw_if_indices, i);
3252             }
3253           else
3254             return VNET_API_ERROR_VALUE_EXIST;
3255
3256           return 0;
3257         }
3258     }
3259
3260   if (is_del)
3261     return VNET_API_ERROR_NO_SUCH_ENTRY;
3262
3263   /* add to the auto-address list */
3264   if (twice_nat)
3265     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
3266   else
3267     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
3268
3269   /* If the address is already bound - or static - add it now */
3270   if (first_int_addr)
3271     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
3272
3273   return 0;
3274 }
3275
3276 int
3277 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
3278                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3279                       u32 vrf_id, int is_in)
3280 {
3281   ip4_header_t ip;
3282   clib_bihash_kv_16_8_t kv, value;
3283   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3284   snat_session_t *s;
3285   snat_main_per_thread_data_t *tsm;
3286
3287   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3288   if (sm->num_workers > 1)
3289     tsm = vec_elt_at_index (
3290       sm->per_thread_data,
3291       nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
3292   else
3293     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3294
3295   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
3296   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3297     {
3298       return VNET_API_ERROR_NO_SUCH_ENTRY;
3299     }
3300
3301   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3302     return VNET_API_ERROR_UNSPECIFIED;
3303   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3304   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3305   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3306   return 0;
3307 }
3308
3309 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3310                                  vlib_node_runtime_t * node,
3311                                  vlib_frame_t * frame)
3312 {
3313   return 0;
3314 }
3315
3316 VLIB_REGISTER_NODE (nat_default_node) = {
3317   .name = "nat-default",
3318   .vector_size = sizeof (u32),
3319   .format_trace = 0,
3320   .type = VLIB_NODE_TYPE_INTERNAL,
3321   .n_errors = 0,
3322   .n_next_nodes = NAT_N_NEXT,
3323   .next_nodes = {
3324     [NAT_NEXT_DROP] = "error-drop",
3325     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3326     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3327     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3328     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3329     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3330     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3331     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3332     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3333     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3334   },
3335 };
3336
3337 void
3338 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3339 {
3340   f->l3_csum_delta = 0;
3341   f->l4_csum_delta = 0;
3342   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3343       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3344     {
3345       f->l3_csum_delta =
3346         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3347       f->l3_csum_delta =
3348         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3349     }
3350   else
3351     {
3352       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3353     }
3354   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3355       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3356     {
3357       f->l3_csum_delta =
3358         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3359       f->l3_csum_delta =
3360         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3361     }
3362   else
3363     {
3364       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3365     }
3366   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3367     {
3368       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3369       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3370     }
3371   else
3372     {
3373       f->rewrite.sport = f->match.sport;
3374     }
3375   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3376     {
3377       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3378       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3379     }
3380   else
3381     {
3382       f->rewrite.dport = f->match.dport;
3383     }
3384   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3385       f->rewrite.icmp_id != f->match.sport)
3386     {
3387       f->l4_csum_delta =
3388         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3389       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3390     }
3391   else
3392     {
3393       f->rewrite.icmp_id = f->match.sport;
3394     }
3395   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3396     {
3397     }
3398   else
3399     {
3400       f->rewrite.fib_index = f->match.fib_index;
3401     }
3402 }
3403
3404 static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm,
3405                                                      vlib_buffer_t *b,
3406                                                      ip4_header_t *ip,
3407                                                      nat_6t_flow_t *f);
3408
3409 static_always_inline void
3410 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3411                            nat_6t_flow_t *f, nat_protocol_t proto,
3412                            int is_icmp_inner_ip4, int skip_saddr_rewrite)
3413 {
3414   udp_header_t *udp = ip4_next_header (ip);
3415   tcp_header_t *tcp = (tcp_header_t *) udp;
3416
3417   if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
3418       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3419     {
3420       if (!is_icmp_inner_ip4)
3421         { // regular case
3422           ip->src_address = f->rewrite.saddr;
3423           ip->dst_address = f->rewrite.daddr;
3424           udp->src_port = f->rewrite.sport;
3425           udp->dst_port = f->rewrite.dport;
3426         }
3427       else
3428         { // icmp inner ip4 - reversed saddr/daddr
3429           ip->src_address = f->rewrite.daddr;
3430           ip->dst_address = f->rewrite.saddr;
3431           udp->src_port = f->rewrite.dport;
3432           udp->dst_port = f->rewrite.sport;
3433         }
3434
3435       if (NAT_PROTOCOL_TCP == proto)
3436         {
3437           ip_csum_t tcp_sum = tcp->checksum;
3438           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3439           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3440           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3441           tcp->checksum = ip_csum_fold (tcp_sum);
3442         }
3443       else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
3444         {
3445           ip_csum_t udp_sum = udp->checksum;
3446           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3447           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3448           udp->checksum = ip_csum_fold (udp_sum);
3449         }
3450     }
3451   else
3452     {
3453       if (!is_icmp_inner_ip4)
3454         { // regular case
3455           if (!skip_saddr_rewrite)
3456             {
3457               ip->src_address = f->rewrite.saddr;
3458             }
3459           ip->dst_address = f->rewrite.daddr;
3460         }
3461       else
3462         { // icmp inner ip4 - reversed saddr/daddr
3463           ip->src_address = f->rewrite.daddr;
3464           ip->dst_address = f->rewrite.saddr;
3465         }
3466     }
3467
3468   if (skip_saddr_rewrite)
3469     {
3470       ip->checksum = ip4_header_checksum (ip);
3471     }
3472   else
3473     {
3474       ip_csum_t ip_sum = ip->checksum;
3475       ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3476       ip->checksum = ip_csum_fold (ip_sum);
3477     }
3478   if (0xffff == ip->checksum)
3479     ip->checksum = 0;
3480   ASSERT (ip4_header_checksum_is_valid (ip));
3481 }
3482
3483 static_always_inline int
3484 nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b,
3485                             ip4_header_t *ip, nat_6t_flow_t *f)
3486 {
3487   if (IP_PROTOCOL_ICMP != ip->protocol)
3488     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3489
3490   icmp46_header_t *icmp = ip4_next_header (ip);
3491   icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3492
3493   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3494     {
3495       if (icmp->checksum == 0)
3496         icmp->checksum = 0xffff;
3497
3498       if (!icmp_type_is_error_message (icmp->type))
3499         {
3500           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3501               (f->rewrite.icmp_id != echo->identifier))
3502             {
3503               ip_csum_t sum = icmp->checksum;
3504               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3505                                     icmp_echo_header_t,
3506                                     identifier /* changed member */);
3507               echo->identifier = f->rewrite.icmp_id;
3508               icmp->checksum = ip_csum_fold (sum);
3509             }
3510         }
3511       else
3512         {
3513           // errors are not fragmented
3514           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3515
3516           if (!ip4_header_checksum_is_valid (inner_ip))
3517             {
3518               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3519             }
3520
3521           nat_protocol_t inner_proto =
3522             ip_proto_to_nat_proto (inner_ip->protocol);
3523
3524           ip_csum_t old_icmp_sum = icmp->checksum;
3525           ip_csum_t old_inner_ip_sum = inner_ip->checksum;
3526           ip_csum_t old_udp_sum;
3527           ip_csum_t old_tcp_sum;
3528           ip_csum_t new_icmp_sum;
3529           udp_header_t *udp;
3530           tcp_header_t *tcp;
3531
3532           switch (inner_proto)
3533             {
3534             case NAT_PROTOCOL_UDP:
3535               udp = (udp_header_t *) (inner_ip + 1);
3536               old_udp_sum = udp->checksum;
3537               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3538                                          1 /* is_icmp_inner_ip4 */,
3539                                          0 /* skip_saddr_rewrite */);
3540               new_icmp_sum = ip_csum_sub_even (old_icmp_sum, f->l3_csum_delta);
3541               new_icmp_sum = ip_csum_sub_even (new_icmp_sum, f->l4_csum_delta);
3542               new_icmp_sum =
3543                 ip_csum_update (new_icmp_sum, old_inner_ip_sum,
3544                                 inner_ip->checksum, ip4_header_t, checksum);
3545               new_icmp_sum =
3546                 ip_csum_update (new_icmp_sum, old_udp_sum, udp->checksum,
3547                                 udp_header_t, checksum);
3548               new_icmp_sum = ip_csum_fold (new_icmp_sum);
3549               if (0xffff == new_icmp_sum)
3550                 new_icmp_sum = 0;
3551               icmp->checksum = new_icmp_sum;
3552               break;
3553             case NAT_PROTOCOL_TCP:
3554               tcp = (tcp_header_t *) (inner_ip + 1);
3555               old_tcp_sum = tcp->checksum;
3556               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3557                                          1 /* is_icmp_inner_ip4 */,
3558                                          0 /* skip_saddr_rewrite */);
3559               new_icmp_sum = ip_csum_sub_even (old_icmp_sum, f->l3_csum_delta);
3560               new_icmp_sum = ip_csum_sub_even (new_icmp_sum, f->l4_csum_delta);
3561               new_icmp_sum =
3562                 ip_csum_update (new_icmp_sum, old_inner_ip_sum,
3563                                 inner_ip->checksum, ip4_header_t, checksum);
3564               new_icmp_sum =
3565                 ip_csum_update (new_icmp_sum, old_tcp_sum, tcp->checksum,
3566                                 tcp_header_t, checksum);
3567               new_icmp_sum = ip_csum_fold (new_icmp_sum);
3568               if (0xffff == new_icmp_sum)
3569                 new_icmp_sum = 0;
3570               icmp->checksum = new_icmp_sum;
3571               break;
3572             case NAT_PROTOCOL_ICMP:
3573               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3574                 {
3575                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3576                   icmp_echo_header_t *inner_echo =
3577                     (icmp_echo_header_t *) (inner_icmp + 1);
3578                   if (f->rewrite.icmp_id != inner_echo->identifier)
3579                     {
3580                       ip_csum_t sum = icmp->checksum;
3581                       sum = ip_csum_update (
3582                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3583                         icmp_echo_header_t, identifier /* changed member */);
3584                       icmp->checksum = ip_csum_fold (sum);
3585                       ip_csum_t inner_sum = inner_icmp->checksum;
3586                       inner_sum = ip_csum_update (
3587                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3588                         icmp_echo_header_t, identifier /* changed member */);
3589                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3590                       inner_echo->identifier = f->rewrite.icmp_id;
3591                     }
3592                 }
3593               break;
3594             default:
3595               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3596               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3597             }
3598         }
3599     }
3600
3601   return NAT_ED_TRNSL_ERR_SUCCESS;
3602 }
3603
3604 static_always_inline nat_translation_error_e
3605 nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3606                            nat_6t_flow_t *f, nat_protocol_t proto,
3607                            int is_output_feature, int is_i2o)
3608 {
3609   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3610     {
3611       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3612     }
3613
3614   if (NAT_PROTOCOL_ICMP == proto)
3615     {
3616       if (ip->src_address.as_u32 != f->rewrite.saddr.as_u32)
3617         {
3618           // packet is returned from a router, not from destination
3619           // skip source address rewrite if in o2i path
3620           nat_6t_flow_ip4_translate (sm, b, ip, f, proto,
3621                                      0 /* is_icmp_inner_ip4 */,
3622                                      !is_i2o /* skip_saddr_rewrite */);
3623         }
3624       else
3625         {
3626           nat_6t_flow_ip4_translate (sm, b, ip, f, proto,
3627                                      0 /* is_icmp_inner_ip4 */,
3628                                      0 /* skip_saddr_rewrite */);
3629         }
3630       return nat_6t_flow_icmp_translate (sm, b, ip, f);
3631     }
3632
3633   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */,
3634                              0 /* skip_saddr_rewrite */);
3635
3636   return NAT_ED_TRNSL_ERR_SUCCESS;
3637 }
3638
3639 nat_translation_error_e
3640 nat_6t_flow_buf_translate_i2o (snat_main_t *sm, vlib_buffer_t *b,
3641                                ip4_header_t *ip, nat_6t_flow_t *f,
3642                                nat_protocol_t proto, int is_output_feature)
3643 {
3644   return nat_6t_flow_buf_translate (sm, b, ip, f, proto, is_output_feature,
3645                                     1 /* is_i2o */);
3646 }
3647
3648 nat_translation_error_e
3649 nat_6t_flow_buf_translate_o2i (snat_main_t *sm, vlib_buffer_t *b,
3650                                ip4_header_t *ip, nat_6t_flow_t *f,
3651                                nat_protocol_t proto, int is_output_feature)
3652 {
3653   return nat_6t_flow_buf_translate (sm, b, ip, f, proto, is_output_feature,
3654                                     0 /* is_i2o */);
3655 }
3656
3657 u8 *
3658 format_nat_6t (u8 *s, va_list *args)
3659 {
3660   nat_6t_t *t = va_arg (*args, nat_6t_t *);
3661
3662   s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
3663               format_ip4_address, t->saddr.as_u8,
3664               clib_net_to_host_u16 (t->sport), format_ip4_address,
3665               t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
3666               format_ip_protocol, t->proto, t->fib_index);
3667   return s;
3668 }
3669
3670 u8 *
3671 format_nat_ed_translation_error (u8 *s, va_list *args)
3672 {
3673   nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
3674
3675   switch (e)
3676     {
3677     case NAT_ED_TRNSL_ERR_SUCCESS:
3678       s = format (s, "success");
3679       break;
3680     case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
3681       s = format (s, "translation-failed");
3682       break;
3683     case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
3684       s = format (s, "flow-mismatch");
3685       break;
3686     }
3687   return s;
3688 }
3689
3690 u8 *
3691 format_nat_6t_flow (u8 *s, va_list *args)
3692 {
3693   nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
3694
3695   s = format (s, "match: %U ", format_nat_6t, &f->match);
3696   int r = 0;
3697   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
3698     {
3699       s = format (s, "rewrite: saddr %U ", format_ip4_address,
3700                   f->rewrite.saddr.as_u8);
3701       r = 1;
3702     }
3703   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
3704     {
3705       if (!r)
3706         {
3707           s = format (s, "rewrite: ");
3708           r = 1;
3709         }
3710       s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
3711     }
3712   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
3713     {
3714       if (!r)
3715         {
3716           s = format (s, "rewrite: ");
3717           r = 1;
3718         }
3719       s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
3720     }
3721   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
3722     {
3723       if (!r)
3724         {
3725           s = format (s, "rewrite: ");
3726           r = 1;
3727         }
3728       s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
3729     }
3730   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3731     {
3732       if (!r)
3733         {
3734           s = format (s, "rewrite: ");
3735           r = 1;
3736         }
3737       s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
3738     }
3739   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3740     {
3741       if (!r)
3742         {
3743           s = format (s, "rewrite: ");
3744           r = 1;
3745         }
3746       s = format (s, "txfib %u ", f->rewrite.fib_index);
3747     }
3748   return s;
3749 }
3750
3751 /*
3752  * fd.io coding-style-patch-verification: ON
3753  *
3754  * Local Variables:
3755  * eval: (c-set-style "gnu")
3756  * End:
3757  */