d42d303d95c7bc87ed4edaac6d5fd78a8764cbd2
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vpp/app/version.h>
19
20 #include <vnet/vnet.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/ip4.h>
23 #include <vnet/ip/ip_table.h>
24 #include <vnet/ip/reass/ip4_sv_reass.h>
25 #include <vnet/fib/fib_table.h>
26 #include <vnet/fib/ip4_fib.h>
27 #include <vnet/plugin/plugin.h>
28 #include <vppinfra/bihash_16_8.h>
29
30 #include <nat/lib/log.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/lib/nat_inlines.h>
33 #include <nat/lib/ipfix_logging.h>
34
35 #include <nat/nat44-ed/nat44_ed.h>
36 #include <nat/nat44-ed/nat44_ed_affinity.h>
37 #include <nat/nat44-ed/nat44_ed_inlines.h>
38
39 #include <vpp/stats/stat_segment.h>
40
41 snat_main_t snat_main;
42
43 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
44                                                            u32 sw_if_index);
45
46 #define skip_if_disabled()                                                    \
47   do                                                                          \
48     {                                                                         \
49       snat_main_t *sm = &snat_main;                                           \
50       if (PREDICT_FALSE (!sm->enabled))                                       \
51         return;                                                               \
52     }                                                                         \
53   while (0)
54
55 #define fail_if_enabled()                                                     \
56   do                                                                          \
57     {                                                                         \
58       snat_main_t *sm = &snat_main;                                           \
59       if (PREDICT_FALSE (sm->enabled))                                        \
60         {                                                                     \
61           nat_log_err ("plugin enabled");                                     \
62           return 1;                                                           \
63         }                                                                     \
64     }                                                                         \
65   while (0)
66
67 #define fail_if_disabled()                                                    \
68   do                                                                          \
69     {                                                                         \
70       snat_main_t *sm = &snat_main;                                           \
71       if (PREDICT_FALSE (!sm->enabled))                                       \
72         {                                                                     \
73           nat_log_err ("plugin disabled");                                    \
74           return 1;                                                           \
75         }                                                                     \
76     }                                                                         \
77   while (0)
78
79 /* Hook up input features */
80 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
81   .arc_name = "ip4-unicast",
82   .node_name = "nat-pre-in2out",
83   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
84                                "ip4-sv-reassembly-feature"),
85 };
86 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
87   .arc_name = "ip4-unicast",
88   .node_name = "nat-pre-out2in",
89   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
90                                "ip4-dhcp-client-detect",
91                                "ip4-sv-reassembly-feature"),
92 };
93 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
94   .arc_name = "ip4-unicast",
95   .node_name = "nat44-in2out-worker-handoff",
96   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
97 };
98 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
99   .arc_name = "ip4-unicast",
100   .node_name = "nat44-out2in-worker-handoff",
101   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
102                                "ip4-dhcp-client-detect"),
103 };
104 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
105   .arc_name = "ip4-unicast",
106   .node_name = "nat44-in2out",
107   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
108 };
109 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
110   .arc_name = "ip4-unicast",
111   .node_name = "nat44-out2in",
112   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
113                                "ip4-dhcp-client-detect"),
114 };
115 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
116   .arc_name = "ip4-unicast",
117   .node_name = "nat44-ed-in2out",
118   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
119 };
120 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
121   .arc_name = "ip4-unicast",
122   .node_name = "nat44-ed-out2in",
123   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
124                                "ip4-dhcp-client-detect"),
125 };
126 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
127   .arc_name = "ip4-unicast",
128   .node_name = "nat44-ed-classify",
129   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
130 };
131 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
132   .arc_name = "ip4-unicast",
133   .node_name = "nat44-handoff-classify",
134   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
135 };
136 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
137   .arc_name = "ip4-unicast",
138   .node_name = "nat44-in2out-fast",
139   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
140 };
141 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
142   .arc_name = "ip4-unicast",
143   .node_name = "nat44-out2in-fast",
144   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
145                                "ip4-dhcp-client-detect"),
146 };
147
148 /* Hook up output features */
149 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
150   .arc_name = "ip4-output",
151   .node_name = "nat44-in2out-output",
152   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
153   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
154 };
155 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
156   .arc_name = "ip4-output",
157   .node_name = "nat44-in2out-output-worker-handoff",
158   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
159   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
160 };
161 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
162   .arc_name = "ip4-output",
163   .node_name = "nat-pre-in2out-output",
164   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
165   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
166 };
167 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
168   .arc_name = "ip4-output",
169   .node_name = "nat44-ed-in2out-output",
170   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
171   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
172 };
173
174 VLIB_PLUGIN_REGISTER () = {
175     .version = VPP_BUILD_VER,
176     .description = "Network Address Translation (NAT)",
177 };
178
179 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
180
181 static void nat44_ed_db_free ();
182
183 u32 nat_calc_bihash_buckets (u32 n_elts);
184
185 u8 *
186 format_session_kvp (u8 * s, va_list * args)
187 {
188   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
189
190   s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
191               v->key, nat_value_get_thread_index (v),
192               nat_value_get_session_index (v));
193
194   return s;
195 }
196
197 u8 *
198 format_static_mapping_kvp (u8 * s, va_list * args)
199 {
200   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
201
202   s = format (s, "%U static-mapping-index %llu",
203               format_snat_key, v->key, v->value);
204
205   return s;
206 }
207
208 u8 *
209 format_ed_session_kvp (u8 * s, va_list * args)
210 {
211   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
212
213   u8 proto;
214   u16 r_port, l_port;
215   ip4_address_t l_addr, r_addr;
216   u32 fib_index;
217
218   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
219   s = format (s,
220               "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
221               "session-index %u",
222               format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
223               format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
224               format_ip_protocol, proto, fib_index,
225               ed_value_get_thread_index (v), ed_value_get_session_index (v));
226
227   return s;
228 }
229
230 void
231 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
232                        u8 is_ha)
233 {
234       per_vrf_sessions_unregister_session (s, thread_index);
235
236       if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
237         nat_elog_warn (sm, "flow hash del failed");
238
239       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
240         nat_elog_warn (sm, "flow hash del failed");
241
242   if (is_fwd_bypass_session (s))
243     {
244       return;
245     }
246
247       if (is_affinity_sessions (s))
248         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
249                              s->nat_proto, s->out2in.port);
250
251       if (!is_ha)
252         nat_syslog_nat44_sdel (
253           0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
254           &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
255           s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
256           is_twice_nat_session (s));
257
258   if (snat_is_unk_proto_session (s))
259     return;
260
261   if (!is_ha)
262     {
263       /* log NAT event */
264       nat_ipfix_logging_nat44_ses_delete (thread_index,
265                                           s->in2out.addr.as_u32,
266                                           s->out2in.addr.as_u32,
267                                           s->nat_proto,
268                                           s->in2out.port,
269                                           s->out2in.port,
270                                           s->in2out.fib_index);
271     }
272
273   /* Twice NAT address and port for external host */
274   if (is_twice_nat_session (s))
275     {
276       snat_free_outside_address_and_port (sm->twice_nat_addresses,
277                                           thread_index,
278                                           &s->ext_host_nat_addr,
279                                           s->ext_host_nat_port, s->nat_proto);
280     }
281
282   if (snat_is_session_static (s))
283     return;
284
285   snat_free_outside_address_and_port (sm->addresses, thread_index,
286                                       &s->out2in.addr, s->out2in.port,
287                                       s->nat_proto);
288 }
289
290 void
291 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
292                           int is_add)
293 {
294   snat_main_t *sm = &snat_main;
295   fib_prefix_t prefix = {
296     .fp_len = p_len,
297     .fp_proto = FIB_PROTOCOL_IP4,
298     .fp_addr = {
299                 .ip4.as_u32 = addr->as_u32,
300                 },
301   };
302   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
303
304   if (is_add)
305     fib_table_entry_update_one_path (fib_index,
306                                      &prefix,
307                                      sm->fib_src_low,
308                                      (FIB_ENTRY_FLAG_CONNECTED |
309                                       FIB_ENTRY_FLAG_LOCAL |
310                                       FIB_ENTRY_FLAG_EXCLUSIVE),
311                                      DPO_PROTO_IP4,
312                                      NULL,
313                                      sw_if_index,
314                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
315   else
316     fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
317 }
318
319 int
320 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
321                   u8 twice_nat)
322 {
323   snat_address_t *ap;
324   snat_interface_t *i;
325   vlib_thread_main_t *tm = vlib_get_thread_main ();
326
327   /* Check if address already exists */
328   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
329     {
330       if (ap->addr.as_u32 == addr->as_u32)
331         {
332           nat_log_err ("address exist");
333           return VNET_API_ERROR_VALUE_EXIST;
334         }
335     }
336
337   if (twice_nat)
338     vec_add2 (sm->twice_nat_addresses, ap, 1);
339   else
340     vec_add2 (sm->addresses, ap, 1);
341
342   ap->addr = *addr;
343   if (vrf_id != ~0)
344     ap->fib_index =
345       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
346                                          sm->fib_src_low);
347   else
348     ap->fib_index = ~0;
349
350   #define _(N, i, n, s) \
351     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
352     ap->busy_##n##_ports = 0; \
353     ap->busy_##n##_ports_per_thread = 0;\
354     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
355     foreach_nat_protocol
356   #undef _
357
358   if (twice_nat)
359     return 0;
360
361   /* Add external address to FIB */
362   pool_foreach (i, sm->interfaces)
363    {
364      if (nat_interface_is_inside (i))
365        continue;
366
367      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
368      break;
369   }
370   pool_foreach (i, sm->output_feature_interfaces)
371    {
372      if (nat_interface_is_inside (i))
373        continue;
374
375      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
376      break;
377   }
378
379   return 0;
380 }
381
382 static int
383 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
384 {
385   snat_static_mapping_t *m;
386   pool_foreach (m, sm->static_mappings)
387    {
388       if (is_addr_only_static_mapping (m) ||
389           is_out2in_only_static_mapping (m) ||
390           is_identity_static_mapping (m))
391         continue;
392       if (m->external_addr.as_u32 == addr.as_u32)
393         return 1;
394   }
395
396   return 0;
397 }
398
399 static void
400 snat_add_static_mapping_when_resolved (snat_main_t *sm, ip4_address_t l_addr,
401                                        u16 l_port, u32 sw_if_index, u16 e_port,
402                                        u32 vrf_id, nat_protocol_t proto,
403                                        int addr_only, u8 *tag, int twice_nat,
404                                        int out2in_only, int identity_nat,
405                                        ip4_address_t pool_addr, int exact)
406 {
407   snat_static_map_resolve_t *rp;
408
409   vec_add2 (sm->to_resolve, rp, 1);
410   rp->l_addr.as_u32 = l_addr.as_u32;
411   rp->l_port = l_port;
412   rp->sw_if_index = sw_if_index;
413   rp->e_port = e_port;
414   rp->vrf_id = vrf_id;
415   rp->proto = proto;
416   rp->addr_only = addr_only;
417   rp->twice_nat = twice_nat;
418   rp->out2in_only = out2in_only;
419   rp->identity_nat = identity_nat;
420   rp->tag = vec_dup (tag);
421   rp->pool_addr = pool_addr;
422   rp->exact = exact;
423 }
424
425 u32
426 get_thread_idx_by_port (u16 e_port)
427 {
428   snat_main_t *sm = &snat_main;
429   u32 thread_idx = sm->num_workers;
430   if (sm->num_workers > 1)
431     {
432       thread_idx =
433         sm->first_worker_index +
434         sm->workers[(e_port - 1024) / sm->port_per_thread];
435     }
436   return thread_idx;
437 }
438
439 void
440 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
441                                     snat_main_per_thread_data_t * tsm,
442                                     ip4_address_t l_addr,
443                                     u16 l_port,
444                                     u8 protocol,
445                                     u32 fib_index, int addr_only,
446                                     ip4_address_t e_addr, u16 e_port)
447 {
448   snat_session_t *s;
449   u32 *indexes_to_free = NULL;
450   pool_foreach (s, tsm->sessions) {
451     if (s->in2out.fib_index != fib_index ||
452         s->in2out.addr.as_u32 != l_addr.as_u32)
453       {
454         continue;
455       }
456     if (!addr_only)
457       {
458         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
459             s->out2in.port != e_port ||
460             s->in2out.port != l_port ||
461             s->nat_proto != protocol)
462           continue;
463       }
464
465     if (is_lb_session (s))
466       continue;
467     if (!snat_is_session_static (s))
468       continue;
469     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
470     vec_add1 (indexes_to_free, s - tsm->sessions);
471     if (!addr_only)
472       break;
473   }
474   u32 *ses_index;
475   vec_foreach (ses_index, indexes_to_free)
476   {
477     s = pool_elt_at_index (tsm->sessions, *ses_index);
478     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
479   }
480   vec_free (indexes_to_free);
481 }
482
483 int
484 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
485                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
486                          u32 sw_if_index, nat_protocol_t proto, int is_add,
487                          twice_nat_type_t twice_nat, u8 out2in_only, u8 *tag,
488                          u8 identity_nat, ip4_address_t pool_addr, int exact)
489 {
490   snat_main_t *sm = &snat_main;
491   snat_static_mapping_t *m;
492   clib_bihash_kv_8_8_t kv, value;
493   snat_address_t *a = 0;
494   u32 fib_index = ~0;
495   snat_interface_t *interface;
496   snat_main_per_thread_data_t *tsm;
497   snat_static_map_resolve_t *rp, *rp_match = 0;
498   nat44_lb_addr_port_t *local;
499   u32 find = ~0;
500   int i;
501
502   /* If the external address is a specific interface address */
503   if (sw_if_index != ~0)
504     {
505       ip4_address_t *first_int_addr;
506
507       for (i = 0; i < vec_len (sm->to_resolve); i++)
508         {
509           rp = sm->to_resolve + i;
510           if (rp->sw_if_index != sw_if_index ||
511               rp->l_addr.as_u32 != l_addr.as_u32 ||
512               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
513             continue;
514
515           if (!addr_only)
516             {
517               if ((rp->l_port != l_port && rp->e_port != e_port)
518                   || rp->proto != proto)
519                 continue;
520             }
521
522           rp_match = rp;
523           break;
524         }
525
526       /* Might be already set... */
527       first_int_addr = ip4_interface_first_address
528         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
529
530       if (is_add)
531         {
532           if (rp_match)
533             return VNET_API_ERROR_VALUE_EXIST;
534
535           snat_add_static_mapping_when_resolved (
536             sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, addr_only,
537             tag, twice_nat, out2in_only, identity_nat, pool_addr, exact);
538
539           /* DHCP resolution required? */
540           if (first_int_addr == 0)
541             {
542               return 0;
543             }
544           else
545             {
546               e_addr.as_u32 = first_int_addr->as_u32;
547               /* Identity mapping? */
548               if (l_addr.as_u32 == 0)
549                 l_addr.as_u32 = e_addr.as_u32;
550             }
551         }
552       else
553         {
554           if (!rp_match)
555             return VNET_API_ERROR_NO_SUCH_ENTRY;
556
557           vec_del1 (sm->to_resolve, i);
558
559           if (first_int_addr)
560             {
561               e_addr.as_u32 = first_int_addr->as_u32;
562               /* Identity mapping? */
563               if (l_addr.as_u32 == 0)
564                 l_addr.as_u32 = e_addr.as_u32;
565             }
566           else
567             return 0;
568         }
569     }
570
571   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
572   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
573     m = 0;
574   else
575     m = pool_elt_at_index (sm->static_mappings, value.value);
576
577   if (is_add)
578     {
579       if (m)
580         {
581           if (is_identity_static_mapping (m))
582             {
583               pool_foreach (local, m->locals)
584                {
585                 if (local->vrf_id == vrf_id)
586                   return VNET_API_ERROR_VALUE_EXIST;
587               }
588               pool_get (m->locals, local);
589               local->vrf_id = vrf_id;
590               local->fib_index =
591                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
592                                                    sm->fib_src_low);
593               init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
594                            m->proto, 0, m - sm->static_mappings);
595               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
596               return 0;
597             }
598           else
599             return VNET_API_ERROR_VALUE_EXIST;
600         }
601
602       if (twice_nat && addr_only)
603         return VNET_API_ERROR_UNSUPPORTED;
604
605       /* Convert VRF id to FIB index */
606       if (vrf_id != ~0)
607         fib_index =
608           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
609                                              sm->fib_src_low);
610       /* If not specified use inside VRF id from SNAT plugin startup config */
611       else
612         {
613           fib_index = sm->inside_fib_index;
614           vrf_id = sm->inside_vrf_id;
615           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
616         }
617
618       if (!(out2in_only || identity_nat))
619         {
620           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
621                       addr_only ? 0 : proto);
622           if (!clib_bihash_search_8_8
623               (&sm->static_mapping_by_local, &kv, &value))
624             return VNET_API_ERROR_VALUE_EXIST;
625         }
626
627       /* Find external address in allocated addresses and reserve port for
628          address and port pair mapping when dynamic translations enabled */
629       if (!(addr_only || sm->static_mapping_only || out2in_only))
630         {
631           for (i = 0; i < vec_len (sm->addresses); i++)
632             {
633               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
634                 {
635                   a = sm->addresses + i;
636                   /* External port must be unused */
637                   switch (proto)
638                     {
639 #define _(N, j, n, s) \
640                     case NAT_PROTOCOL_##N: \
641                       if (a->busy_##n##_port_refcounts[e_port]) \
642                         return VNET_API_ERROR_INVALID_VALUE; \
643                       ++a->busy_##n##_port_refcounts[e_port]; \
644                       if (e_port > 1024) \
645                         { \
646                           a->busy_##n##_ports++; \
647                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
648                         } \
649                       break;
650                       foreach_nat_protocol
651 #undef _
652                         default : nat_elog_info (sm, "unknown protocol");
653                       return VNET_API_ERROR_INVALID_VALUE_2;
654                     }
655                   break;
656                 }
657             }
658           /* External address must be allocated */
659           if (!a && (l_addr.as_u32 != e_addr.as_u32))
660             {
661               if (sw_if_index != ~0)
662                 {
663                   for (i = 0; i < vec_len (sm->to_resolve); i++)
664                     {
665                       rp = sm->to_resolve + i;
666                       if (rp->addr_only)
667                         continue;
668                       if (rp->sw_if_index != sw_if_index &&
669                           rp->l_addr.as_u32 != l_addr.as_u32 &&
670                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
671                           rp->e_port != e_port && rp->proto != proto)
672                         continue;
673
674                       vec_del1 (sm->to_resolve, i);
675                       break;
676                     }
677                 }
678               return VNET_API_ERROR_NO_SUCH_ENTRY;
679             }
680         }
681
682       pool_get (sm->static_mappings, m);
683       clib_memset (m, 0, sizeof (*m));
684       m->tag = vec_dup (tag);
685       m->local_addr = l_addr;
686       m->external_addr = e_addr;
687       m->twice_nat = twice_nat;
688
689       if (twice_nat == TWICE_NAT && exact)
690         {
691           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
692           m->pool_addr = pool_addr;
693         }
694
695       if (out2in_only)
696         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
697       if (addr_only)
698         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
699       if (identity_nat)
700         {
701           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
702           pool_get (m->locals, local);
703           local->vrf_id = vrf_id;
704           local->fib_index = fib_index;
705         }
706       else
707         {
708           m->vrf_id = vrf_id;
709           m->fib_index = fib_index;
710         }
711       if (!addr_only)
712         {
713           m->local_port = l_port;
714           m->external_port = e_port;
715           m->proto = proto;
716         }
717
718       if (sm->num_workers > 1)
719         {
720           ip4_header_t ip = {
721             .src_address = m->local_addr,
722           };
723           vec_add1 (m->workers, nat44_ed_get_in2out_worker_index (
724                                   0, &ip, m->fib_index, 0));
725           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
726         }
727       else
728         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
729
730       if (!out2in_only)
731         {
732           init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
733                        0, m - sm->static_mappings);
734           clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
735         }
736
737       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
738                    m - sm->static_mappings);
739       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
740     }
741   else
742     {
743       if (!m)
744         {
745           if (sw_if_index != ~0)
746             return 0;
747           else
748             return VNET_API_ERROR_NO_SUCH_ENTRY;
749         }
750
751       if (identity_nat)
752         {
753           if (vrf_id == ~0)
754             vrf_id = sm->inside_vrf_id;
755
756           pool_foreach (local, m->locals)
757            {
758             if (local->vrf_id == vrf_id)
759               find = local - m->locals;
760           }
761           if (find == ~0)
762             return VNET_API_ERROR_NO_SUCH_ENTRY;
763
764           local = pool_elt_at_index (m->locals, find);
765           fib_index = local->fib_index;
766           pool_put (m->locals, local);
767         }
768       else
769         fib_index = m->fib_index;
770
771       /* Free external address port */
772       if (!(addr_only || sm->static_mapping_only || out2in_only))
773         {
774           for (i = 0; i < vec_len (sm->addresses); i++)
775             {
776               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
777                 {
778                   a = sm->addresses + i;
779                   switch (proto)
780                     {
781 #define _(N, j, n, s) \
782                     case NAT_PROTOCOL_##N: \
783                       --a->busy_##n##_port_refcounts[e_port]; \
784                       if (e_port > 1024) \
785                         { \
786                           a->busy_##n##_ports--; \
787                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
788                         } \
789                       break;
790                       foreach_nat_protocol
791 #undef _
792                         default : nat_elog_info (sm, "unknown protocol");
793                       return VNET_API_ERROR_INVALID_VALUE_2;
794                     }
795                   break;
796                 }
797             }
798         }
799
800       if (sm->num_workers > 1)
801         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
802       else
803         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
804
805       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
806       if (!out2in_only)
807         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
808
809       /* Delete session(s) for static mapping if exist */
810       if (!(sm->static_mapping_only) ||
811           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
812         {
813           nat_ed_static_mapping_del_sessions (
814             sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
815             addr_only, e_addr, e_port);
816         }
817
818       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
819       if (pool_elts (m->locals))
820         return 0;
821
822       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
823       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
824
825       vec_free (m->tag);
826       vec_free (m->workers);
827       /* Delete static mapping from pool */
828       pool_put (sm->static_mappings, m);
829     }
830
831   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
832     return 0;
833
834   /* Add/delete external address to FIB */
835   pool_foreach (interface, sm->interfaces)
836    {
837      if (nat_interface_is_inside (interface))
838        continue;
839
840      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
841      break;
842   }
843   pool_foreach (interface, sm->output_feature_interfaces)
844    {
845      if (nat_interface_is_inside (interface))
846        continue;
847
848      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
849      break;
850   }
851
852   return 0;
853 }
854
855 int
856 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
857                                  nat_protocol_t proto,
858                                  nat44_lb_addr_port_t * locals, u8 is_add,
859                                  twice_nat_type_t twice_nat, u8 out2in_only,
860                                  u8 * tag, u32 affinity)
861 {
862   snat_main_t *sm = &snat_main;
863   snat_static_mapping_t *m;
864   clib_bihash_kv_8_8_t kv, value;
865   snat_address_t *a = 0;
866   int i;
867   nat44_lb_addr_port_t *local;
868   snat_main_per_thread_data_t *tsm;
869   snat_session_t *s;
870   uword *bitmap = 0;
871
872   init_nat_k (&kv, e_addr, e_port, 0, proto);
873   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
874     m = 0;
875   else
876     m = pool_elt_at_index (sm->static_mappings, value.value);
877
878   if (is_add)
879     {
880       if (m)
881         return VNET_API_ERROR_VALUE_EXIST;
882
883       if (vec_len (locals) < 2)
884         return VNET_API_ERROR_INVALID_VALUE;
885
886       /* Find external address in allocated addresses and reserve port for
887          address and port pair mapping when dynamic translations enabled */
888       if (!(sm->static_mapping_only || out2in_only))
889         {
890           for (i = 0; i < vec_len (sm->addresses); i++)
891             {
892               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
893                 {
894                   a = sm->addresses + i;
895                   /* External port must be unused */
896                   switch (proto)
897                     {
898 #define _(N, j, n, s) \
899                     case NAT_PROTOCOL_##N: \
900                       if (a->busy_##n##_port_refcounts[e_port]) \
901                         return VNET_API_ERROR_INVALID_VALUE; \
902                       ++a->busy_##n##_port_refcounts[e_port]; \
903                       if (e_port > 1024) \
904                         { \
905                           a->busy_##n##_ports++; \
906                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
907                         } \
908                       break;
909                       foreach_nat_protocol
910 #undef _
911                         default : nat_elog_info (sm, "unknown protocol");
912                       return VNET_API_ERROR_INVALID_VALUE_2;
913                     }
914                   break;
915                 }
916             }
917           /* External address must be allocated */
918           if (!a)
919             return VNET_API_ERROR_NO_SUCH_ENTRY;
920         }
921
922       pool_get (sm->static_mappings, m);
923       clib_memset (m, 0, sizeof (*m));
924       m->tag = vec_dup (tag);
925       m->external_addr = e_addr;
926       m->external_port = e_port;
927       m->proto = proto;
928       m->twice_nat = twice_nat;
929       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
930       if (out2in_only)
931         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
932       m->affinity = affinity;
933
934       if (affinity)
935         m->affinity_per_service_list_head_index =
936           nat_affinity_get_per_service_list_head_index ();
937       else
938         m->affinity_per_service_list_head_index = ~0;
939
940       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
941                    m - sm->static_mappings);
942       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
943         {
944           nat_elog_err (sm, "static_mapping_by_external key add failed");
945           return VNET_API_ERROR_UNSPECIFIED;
946         }
947
948       for (i = 0; i < vec_len (locals); i++)
949         {
950           locals[i].fib_index =
951             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
952                                                locals[i].vrf_id,
953                                                sm->fib_src_low);
954           if (!out2in_only)
955             {
956               init_nat_kv (&kv, locals[i].addr, locals[i].port,
957                            locals[i].fib_index, m->proto, 0,
958                            m - sm->static_mappings);
959               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
960             }
961           locals[i].prefix = (i == 0) ? locals[i].probability :
962             (locals[i - 1].prefix + locals[i].probability);
963           pool_get (m->locals, local);
964           *local = locals[i];
965           if (sm->num_workers > 1)
966             {
967               ip4_header_t ip = {
968                 .src_address = locals[i].addr,
969               };
970               bitmap = clib_bitmap_set (
971                 bitmap,
972                 nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0), 1);
973             }
974         }
975
976       /* Assign workers */
977       if (sm->num_workers > 1)
978         {
979           clib_bitmap_foreach (i, bitmap)
980              {
981                vec_add1(m->workers, i);
982             }
983         }
984     }
985   else
986     {
987       if (!m)
988         return VNET_API_ERROR_NO_SUCH_ENTRY;
989
990       if (!is_lb_static_mapping (m))
991         return VNET_API_ERROR_INVALID_VALUE;
992
993       /* Free external address port */
994       if (!(sm->static_mapping_only || out2in_only))
995         {
996           for (i = 0; i < vec_len (sm->addresses); i++)
997             {
998               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
999                 {
1000                   a = sm->addresses + i;
1001                   switch (proto)
1002                     {
1003 #define _(N, j, n, s) \
1004                     case NAT_PROTOCOL_##N: \
1005                       --a->busy_##n##_port_refcounts[e_port]; \
1006                       if (e_port > 1024) \
1007                         { \
1008                           a->busy_##n##_ports--; \
1009                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1010                         } \
1011                       break;
1012                       foreach_nat_protocol
1013 #undef _
1014                         default : nat_elog_info (sm, "unknown protocol");
1015                       return VNET_API_ERROR_INVALID_VALUE_2;
1016                     }
1017                   break;
1018                 }
1019             }
1020         }
1021
1022       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1023       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1024         {
1025           nat_elog_err (sm, "static_mapping_by_external key del failed");
1026           return VNET_API_ERROR_UNSPECIFIED;
1027         }
1028
1029       pool_foreach (local, m->locals)
1030       {
1031           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1032                             sm->fib_src_low);
1033           if (!out2in_only)
1034             {
1035               init_nat_k (&kv, local->addr, local->port, local->fib_index,
1036                           m->proto);
1037               if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv,
1038                                            0))
1039                 {
1040                   nat_elog_err (sm, "static_mapping_by_local key del failed");
1041                   return VNET_API_ERROR_UNSPECIFIED;
1042                 }
1043             }
1044
1045           if (sm->num_workers > 1)
1046             {
1047               ip4_header_t ip = {
1048                 .src_address = local->addr,
1049               };
1050               tsm = vec_elt_at_index (
1051                 sm->per_thread_data,
1052                 nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1053             }
1054           else
1055             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1056
1057           /* Delete sessions */
1058           pool_foreach (s, tsm->sessions)
1059             {
1060               if (!(is_lb_session (s)))
1061                 continue;
1062
1063               if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1064                   s->in2out.port != local->port)
1065                 continue;
1066
1067               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1068               nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1069             }
1070       }
1071       if (m->affinity)
1072         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1073       pool_free (m->locals);
1074       vec_free (m->tag);
1075       vec_free (m->workers);
1076
1077       pool_put (sm->static_mappings, m);
1078     }
1079
1080   return 0;
1081 }
1082
1083 int
1084 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1085                                        ip4_address_t l_addr, u16 l_port,
1086                                        nat_protocol_t proto, u32 vrf_id,
1087                                        u8 probability, u8 is_add)
1088 {
1089   snat_main_t *sm = &snat_main;
1090   snat_static_mapping_t *m = 0;
1091   clib_bihash_kv_8_8_t kv, value;
1092   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1093   snat_main_per_thread_data_t *tsm;
1094   snat_session_t *s;
1095   u32 *locals = 0;
1096   uword *bitmap = 0;
1097   int i;
1098
1099   init_nat_k (&kv, e_addr, e_port, 0, proto);
1100   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1101     m = pool_elt_at_index (sm->static_mappings, value.value);
1102
1103   if (!m)
1104     return VNET_API_ERROR_NO_SUCH_ENTRY;
1105
1106   if (!is_lb_static_mapping (m))
1107     return VNET_API_ERROR_INVALID_VALUE;
1108
1109   pool_foreach (local, m->locals)
1110    {
1111     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1112         (local->vrf_id == vrf_id))
1113       {
1114         match_local = local;
1115         break;
1116       }
1117   }
1118
1119   if (is_add)
1120     {
1121       if (match_local)
1122         return VNET_API_ERROR_VALUE_EXIST;
1123
1124       pool_get (m->locals, local);
1125       clib_memset (local, 0, sizeof (*local));
1126       local->addr.as_u32 = l_addr.as_u32;
1127       local->port = l_port;
1128       local->probability = probability;
1129       local->vrf_id = vrf_id;
1130       local->fib_index =
1131         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1132                                            sm->fib_src_low);
1133
1134       if (!is_out2in_only_static_mapping (m))
1135         {
1136           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
1137                        m - sm->static_mappings);
1138           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1139             nat_elog_err (sm, "static_mapping_by_local key add failed");
1140         }
1141     }
1142   else
1143     {
1144       if (!match_local)
1145         return VNET_API_ERROR_NO_SUCH_ENTRY;
1146
1147       if (pool_elts (m->locals) < 3)
1148         return VNET_API_ERROR_UNSPECIFIED;
1149
1150       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1151                         sm->fib_src_low);
1152
1153       if (!is_out2in_only_static_mapping (m))
1154         {
1155           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1156           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1157             nat_elog_err (sm, "static_mapping_by_local key del failed");
1158         }
1159
1160       if (sm->num_workers > 1)
1161         {
1162           ip4_header_t ip = {
1163             .src_address = local->addr,
1164           };
1165           tsm = vec_elt_at_index (
1166             sm->per_thread_data,
1167             nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1168         }
1169       else
1170         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1171
1172       /* Delete sessions */
1173       pool_foreach (s, tsm->sessions) {
1174         if (!(is_lb_session (s)))
1175           continue;
1176
1177         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1178             s->in2out.port != match_local->port)
1179           continue;
1180
1181         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1182         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1183       }
1184
1185       pool_put (m->locals, match_local);
1186     }
1187
1188   vec_free (m->workers);
1189
1190   pool_foreach (local, m->locals)
1191    {
1192     vec_add1 (locals, local - m->locals);
1193     if (sm->num_workers > 1)
1194       {
1195         ip4_header_t ip;
1196         ip.src_address.as_u32 = local->addr.as_u32,
1197         bitmap = clib_bitmap_set (
1198           bitmap,
1199           nat44_ed_get_in2out_worker_index (0, &ip, local->fib_index, 0), 1);
1200       }
1201   }
1202
1203   ASSERT (vec_len (locals) > 1);
1204
1205   local = pool_elt_at_index (m->locals, locals[0]);
1206   local->prefix = local->probability;
1207   for (i = 1; i < vec_len (locals); i++)
1208     {
1209       local = pool_elt_at_index (m->locals, locals[i]);
1210       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1211       local->prefix = local->probability + prev_local->prefix;
1212     }
1213
1214   /* Assign workers */
1215   if (sm->num_workers > 1)
1216     {
1217       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1218     }
1219
1220   return 0;
1221 }
1222
1223 int
1224 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1225                   u8 twice_nat)
1226 {
1227   snat_address_t *a = 0;
1228   snat_session_t *ses;
1229   u32 *ses_to_be_removed = 0, *ses_index;
1230   snat_main_per_thread_data_t *tsm;
1231   snat_static_mapping_t *m;
1232   snat_interface_t *interface;
1233   int i;
1234   snat_address_t *addresses =
1235     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1236
1237   /* Find SNAT address */
1238   for (i = 0; i < vec_len (addresses); i++)
1239     {
1240       if (addresses[i].addr.as_u32 == addr.as_u32)
1241         {
1242           a = addresses + i;
1243           break;
1244         }
1245     }
1246   if (!a)
1247     {
1248       nat_log_err ("no such address");
1249       return VNET_API_ERROR_NO_SUCH_ENTRY;
1250     }
1251
1252   if (delete_sm)
1253     {
1254       ip4_address_t pool_addr = { 0 };
1255       pool_foreach (m, sm->static_mappings)
1256        {
1257           if (m->external_addr.as_u32 == addr.as_u32)
1258             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1259                                             m->local_port, m->external_port,
1260                                             m->vrf_id,
1261                                             is_addr_only_static_mapping(m), ~0,
1262                                             m->proto, 0 /* is_add */,
1263                                             m->twice_nat,
1264                                             is_out2in_only_static_mapping(m),
1265                                             m->tag,
1266                                             is_identity_static_mapping(m),
1267                                             pool_addr, 0);
1268       }
1269     }
1270   else
1271     {
1272       /* Check if address is used in some static mapping */
1273       if (is_snat_address_used_in_static_mapping (sm, addr))
1274         {
1275           nat_log_err ("address used in static mapping");
1276           return VNET_API_ERROR_UNSPECIFIED;
1277         }
1278     }
1279
1280   if (a->fib_index != ~0)
1281     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1282
1283   /* Delete sessions using address */
1284   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1285     {
1286       vec_foreach (tsm, sm->per_thread_data)
1287       {
1288         pool_foreach (ses, tsm->sessions)  {
1289           if (ses->out2in.addr.as_u32 == addr.as_u32)
1290             {
1291               nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1292               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1293             }
1294         }
1295
1296             vec_foreach (ses_index, ses_to_be_removed)
1297             {
1298               ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1299               nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1300             }
1301
1302         vec_free (ses_to_be_removed);
1303       }
1304     }
1305
1306 #define _(N, i, n, s) \
1307   vec_free (a->busy_##n##_ports_per_thread);
1308   foreach_nat_protocol
1309 #undef _
1310
1311     if (twice_nat)
1312   {
1313     vec_del1 (sm->twice_nat_addresses, i);
1314     return 0;
1315   }
1316   else vec_del1 (sm->addresses, i);
1317
1318   /* Delete external address from FIB */
1319   pool_foreach (interface, sm->interfaces)
1320     {
1321       if (nat_interface_is_inside (interface))
1322         continue;
1323
1324       snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1325       break;
1326     }
1327   pool_foreach (interface, sm->output_feature_interfaces)
1328    {
1329      if (nat_interface_is_inside (interface))
1330        continue;
1331
1332      snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1333      break;
1334   }
1335
1336   return 0;
1337 }
1338
1339 void
1340 expire_per_vrf_sessions (u32 fib_index)
1341 {
1342   per_vrf_sessions_t *per_vrf_sessions;
1343   snat_main_per_thread_data_t *tsm;
1344   snat_main_t *sm = &snat_main;
1345
1346   vec_foreach (tsm, sm->per_thread_data)
1347     {
1348       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1349         {
1350           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1351               (per_vrf_sessions->tx_fib_index == fib_index))
1352             {
1353               per_vrf_sessions->expired = 1;
1354             }
1355         }
1356     }
1357 }
1358
1359 void
1360 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1361 {
1362   snat_main_t *sm = &snat_main;
1363   nat_fib_t *fib;
1364
1365   // we don't care if it is outside/inside fib
1366   // we just care about their ref_count
1367   // if it reaches 0 sessions should expire
1368   // because the fib isn't valid for NAT anymore
1369
1370   vec_foreach (fib, sm->fibs)
1371   {
1372     if (fib->fib_index == fib_index)
1373       {
1374         if (is_del)
1375           {
1376             fib->ref_count--;
1377             if (!fib->ref_count)
1378               {
1379                 vec_del1 (sm->fibs, fib - sm->fibs);
1380                 expire_per_vrf_sessions (fib_index);
1381               }
1382             return;
1383           }
1384         else
1385           fib->ref_count++;
1386       }
1387   }
1388   if (!is_del)
1389     {
1390       vec_add2 (sm->fibs, fib, 1);
1391       fib->ref_count = 1;
1392       fib->fib_index = fib_index;
1393     }
1394 }
1395
1396 int
1397 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1398 {
1399   snat_main_t *sm = &snat_main;
1400   snat_interface_t *i;
1401   const char *feature_name, *del_feature_name;
1402   snat_address_t *ap;
1403   snat_static_mapping_t *m;
1404   nat_outside_fib_t *outside_fib;
1405   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1406                                                        sw_if_index);
1407
1408   if (!sm->enabled)
1409     {
1410       nat_log_err ("nat44 is disabled");
1411       return VNET_API_ERROR_UNSUPPORTED;
1412     }
1413
1414   pool_foreach (i, sm->output_feature_interfaces)
1415    {
1416     if (i->sw_if_index == sw_if_index)
1417       {
1418         nat_log_err ("error interface already configured");
1419         return VNET_API_ERROR_VALUE_EXIST;
1420       }
1421   }
1422
1423   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1424     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1425   else
1426     {
1427       if (sm->num_workers > 1)
1428         feature_name =
1429           is_inside ? "nat44-in2out-worker-handoff" :
1430           "nat44-out2in-worker-handoff";
1431       else
1432         feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1433     }
1434
1435   ASSERT (sm->frame_queue_nelts > 0);
1436
1437   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1438     sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index,
1439                                                       sm->frame_queue_nelts);
1440
1441   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1442     sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index,
1443                                                       sm->frame_queue_nelts);
1444
1445   update_per_vrf_sessions_vec (fib_index, is_del);
1446
1447   if (!is_inside)
1448     {
1449       vec_foreach (outside_fib, sm->outside_fibs)
1450         {
1451           if (outside_fib->fib_index == fib_index)
1452             {
1453               if (is_del)
1454                 {
1455                   outside_fib->refcount--;
1456                   if (!outside_fib->refcount)
1457                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1458                 }
1459               else
1460                 outside_fib->refcount++;
1461               goto feature_set;
1462             }
1463         }
1464       if (!is_del)
1465         {
1466           vec_add2 (sm->outside_fibs, outside_fib, 1);
1467           outside_fib->refcount = 1;
1468           outside_fib->fib_index = fib_index;
1469         }
1470     }
1471
1472 feature_set:
1473   pool_foreach (i, sm->interfaces)
1474    {
1475     if (i->sw_if_index == sw_if_index)
1476       {
1477         if (is_del)
1478           {
1479             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1480               {
1481                 if (is_inside)
1482                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1483                 else
1484                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1485
1486                 if (sm->num_workers > 1)
1487                   {
1488                     del_feature_name = "nat44-handoff-classify";
1489                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1490                                                  "nat44-out2in-worker-handoff";
1491                   }
1492                 else
1493                   {
1494                     del_feature_name = "nat44-ed-classify";
1495                     feature_name =
1496                       !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1497                   }
1498
1499                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1500                 if (rv)
1501                   return rv;
1502                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1503                                              sw_if_index, 0, 0, 0);
1504                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1505                                              sw_if_index, 1, 0, 0);
1506               }
1507             else
1508               {
1509                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1510                 if (rv)
1511                   return rv;
1512                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1513                                              sw_if_index, 0, 0, 0);
1514                 pool_put (sm->interfaces, i);
1515               }
1516           }
1517         else
1518           {
1519             if ((nat_interface_is_inside (i) && is_inside) ||
1520                 (nat_interface_is_outside (i) && !is_inside))
1521               return 0;
1522
1523             if (sm->num_workers > 1)
1524               {
1525                 del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1526                                                 "nat44-out2in-worker-handoff";
1527                 feature_name = "nat44-handoff-classify";
1528               }
1529             else
1530               {
1531                 del_feature_name =
1532                   !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1533
1534                 feature_name = "nat44-ed-classify";
1535               }
1536
1537             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1538             if (rv)
1539               return rv;
1540             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1541                                          sw_if_index, 0, 0, 0);
1542             vnet_feature_enable_disable ("ip4-unicast", feature_name,
1543                                          sw_if_index, 1, 0, 0);
1544             goto set_flags;
1545           }
1546
1547         goto fib;
1548       }
1549   }
1550
1551   if (is_del)
1552     {
1553       nat_log_err ("error interface couldn't be found");
1554       return VNET_API_ERROR_NO_SUCH_ENTRY;
1555     }
1556
1557   pool_get (sm->interfaces, i);
1558   i->sw_if_index = sw_if_index;
1559   i->flags = 0;
1560   nat_validate_interface_counters (sm, sw_if_index);
1561
1562   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
1563                                0);
1564
1565   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1566   if (rv)
1567     return rv;
1568
1569 set_flags:
1570   if (is_inside)
1571     {
1572       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1573       return 0;
1574     }
1575   else
1576     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1577
1578   /* Add/delete external addresses to FIB */
1579 fib:
1580   vec_foreach (ap, sm->addresses)
1581     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1582
1583   pool_foreach (m, sm->static_mappings)
1584    {
1585     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
1586       continue;
1587
1588     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1589   }
1590
1591   return 0;
1592 }
1593
1594 int
1595 snat_interface_add_del_output_feature (u32 sw_if_index,
1596                                        u8 is_inside, int is_del)
1597 {
1598   snat_main_t *sm = &snat_main;
1599   snat_interface_t *i;
1600   snat_address_t *ap;
1601   snat_static_mapping_t *m;
1602   nat_outside_fib_t *outside_fib;
1603   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1604                                                        sw_if_index);
1605
1606   if (!sm->enabled)
1607     {
1608       nat_log_err ("nat44 is disabled");
1609       return VNET_API_ERROR_UNSUPPORTED;
1610     }
1611
1612   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1613     {
1614       nat_log_err ("error unsupported");
1615       return VNET_API_ERROR_UNSUPPORTED;
1616     }
1617
1618   pool_foreach (i, sm->interfaces)
1619    {
1620     if (i->sw_if_index == sw_if_index)
1621       {
1622         nat_log_err ("error interface already configured");
1623         return VNET_API_ERROR_VALUE_EXIST;
1624       }
1625   }
1626
1627   update_per_vrf_sessions_vec (fib_index, is_del);
1628
1629   if (!is_inside)
1630     {
1631       vec_foreach (outside_fib, sm->outside_fibs)
1632         {
1633           if (outside_fib->fib_index == fib_index)
1634             {
1635               if (is_del)
1636                 {
1637                   outside_fib->refcount--;
1638                   if (!outside_fib->refcount)
1639                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1640                 }
1641               else
1642                 outside_fib->refcount++;
1643               goto feature_set;
1644             }
1645         }
1646       if (!is_del)
1647         {
1648           vec_add2 (sm->outside_fibs, outside_fib, 1);
1649           outside_fib->refcount = 1;
1650           outside_fib->fib_index = fib_index;
1651         }
1652     }
1653
1654 feature_set:
1655   if (is_inside)
1656     {
1657           int rv =
1658             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1659           if (rv)
1660             return rv;
1661           rv =
1662             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1663                                                             !is_del);
1664           if (rv)
1665             return rv;
1666       goto fq;
1667     }
1668
1669   if (sm->num_workers > 1)
1670     {
1671       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1672       if (rv)
1673         return rv;
1674       rv =
1675         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
1676       if (rv)
1677         return rv;
1678       vnet_feature_enable_disable ("ip4-unicast",
1679                                    "nat44-out2in-worker-handoff",
1680                                    sw_if_index, !is_del, 0, 0);
1681       vnet_feature_enable_disable ("ip4-output",
1682                                    "nat44-in2out-output-worker-handoff",
1683                                    sw_if_index, !is_del, 0, 0);
1684     }
1685   else
1686     {
1687           int rv =
1688             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1689           if (rv)
1690             return rv;
1691           rv =
1692             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1693                                                             !is_del);
1694           if (rv)
1695             return rv;
1696           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1697                                        sw_if_index, !is_del, 0, 0);
1698           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1699                                        sw_if_index, !is_del, 0, 0);
1700     }
1701
1702 fq:
1703   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
1704     sm->fq_in2out_output_index =
1705       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
1706
1707   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1708     sm->fq_out2in_index =
1709       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
1710
1711   pool_foreach (i, sm->output_feature_interfaces)
1712    {
1713     if (i->sw_if_index == sw_if_index)
1714       {
1715         if (is_del)
1716           pool_put (sm->output_feature_interfaces, i);
1717         else
1718           return VNET_API_ERROR_VALUE_EXIST;
1719
1720         goto fib;
1721       }
1722   }
1723
1724   if (is_del)
1725     {
1726       nat_log_err ("error interface couldn't be found");
1727       return VNET_API_ERROR_NO_SUCH_ENTRY;
1728     }
1729
1730   pool_get (sm->output_feature_interfaces, i);
1731   i->sw_if_index = sw_if_index;
1732   i->flags = 0;
1733   nat_validate_interface_counters (sm, sw_if_index);
1734   if (is_inside)
1735     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1736   else
1737     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1738
1739   /* Add/delete external addresses to FIB */
1740 fib:
1741   if (is_inside)
1742     return 0;
1743
1744   vec_foreach (ap, sm->addresses)
1745     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1746
1747   pool_foreach (m, sm->static_mappings)
1748    {
1749     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
1750       continue;
1751
1752     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1753   }
1754
1755   return 0;
1756 }
1757
1758 int
1759 snat_set_workers (uword * bitmap)
1760 {
1761   snat_main_t *sm = &snat_main;
1762   int i, j = 0;
1763
1764   if (sm->num_workers < 2)
1765     return VNET_API_ERROR_FEATURE_DISABLED;
1766
1767   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
1768     return VNET_API_ERROR_INVALID_WORKER;
1769
1770   vec_free (sm->workers);
1771   clib_bitmap_foreach (i, bitmap)
1772     {
1773       vec_add1(sm->workers, i);
1774       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
1775       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
1776       j++;
1777     }
1778
1779   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
1780
1781   return 0;
1782 }
1783
1784 int
1785 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
1786 {
1787   fail_if_enabled ();
1788   snat_main_t *sm = &snat_main;
1789   sm->frame_queue_nelts = frame_queue_nelts;
1790   return 0;
1791 }
1792
1793 static void
1794 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1795                          u32 sw_if_index, u32 new_fib_index,
1796                          u32 old_fib_index)
1797 {
1798   snat_main_t *sm = &snat_main;
1799   nat_outside_fib_t *outside_fib;
1800   snat_interface_t *i;
1801   u8 is_add = 1;
1802   u8 match = 0;
1803
1804   if (!sm->enabled || (new_fib_index == old_fib_index)
1805       || (!vec_len (sm->outside_fibs)))
1806     {
1807       return;
1808     }
1809
1810   pool_foreach (i, sm->interfaces)
1811      {
1812       if (i->sw_if_index == sw_if_index)
1813         {
1814           if (!(nat_interface_is_outside (i)))
1815             return;
1816           match = 1;
1817         }
1818     }
1819
1820   pool_foreach (i, sm->output_feature_interfaces)
1821      {
1822       if (i->sw_if_index == sw_if_index)
1823         {
1824           if (!(nat_interface_is_outside (i)))
1825             return;
1826           match = 1;
1827         }
1828     }
1829
1830   if (!match)
1831     return;
1832
1833   vec_foreach (outside_fib, sm->outside_fibs)
1834   {
1835     if (outside_fib->fib_index == old_fib_index)
1836       {
1837         outside_fib->refcount--;
1838         if (!outside_fib->refcount)
1839           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1840         break;
1841       }
1842   }
1843
1844   vec_foreach (outside_fib, sm->outside_fibs)
1845   {
1846     if (outside_fib->fib_index == new_fib_index)
1847       {
1848         outside_fib->refcount++;
1849         is_add = 0;
1850         break;
1851       }
1852   }
1853
1854   if (is_add)
1855     {
1856       vec_add2 (sm->outside_fibs, outside_fib, 1);
1857       outside_fib->refcount = 1;
1858       outside_fib->fib_index = new_fib_index;
1859     }
1860 }
1861
1862 static void
1863 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1864                          u32 sw_if_index, u32 new_fib_index,
1865                          u32 old_fib_index);
1866
1867 static void
1868 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
1869                                        uword opaque,
1870                                        u32 sw_if_index,
1871                                        ip4_address_t * address,
1872                                        u32 address_length,
1873                                        u32 if_address_index, u32 is_delete);
1874
1875 static void
1876 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
1877                                  uword opaque,
1878                                  u32 sw_if_index,
1879                                  ip4_address_t * address,
1880                                  u32 address_length,
1881                                  u32 if_address_index, u32 is_delete);
1882
1883 void
1884 test_key_calc_split ()
1885 {
1886   ip4_address_t l_addr;
1887   l_addr.as_u8[0] = 1;
1888   l_addr.as_u8[1] = 1;
1889   l_addr.as_u8[2] = 1;
1890   l_addr.as_u8[3] = 1;
1891   ip4_address_t r_addr;
1892   r_addr.as_u8[0] = 2;
1893   r_addr.as_u8[1] = 2;
1894   r_addr.as_u8[2] = 2;
1895   r_addr.as_u8[3] = 2;
1896   u16 l_port = 40001;
1897   u16 r_port = 40301;
1898   u8 proto = 9;
1899   u32 fib_index = 9000001;
1900   u32 thread_index = 3000000001;
1901   u32 session_index = 3000000221;
1902   clib_bihash_kv_16_8_t kv;
1903   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
1904               thread_index, session_index);
1905   ip4_address_t l_addr2;
1906   ip4_address_t r_addr2;
1907   clib_memset (&l_addr2, 0, sizeof (l_addr2));
1908   clib_memset (&r_addr2, 0, sizeof (r_addr2));
1909   u16 l_port2 = 0;
1910   u16 r_port2 = 0;
1911   u8 proto2 = 0;
1912   u32 fib_index2 = 0;
1913   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
1914                &r_port2);
1915   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1916   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
1917   ASSERT (l_port == l_port2);
1918   ASSERT (r_port == r_port2);
1919   ASSERT (proto == proto2);
1920   ASSERT (fib_index == fib_index2);
1921   ASSERT (thread_index == ed_value_get_thread_index (&kv));
1922   ASSERT (session_index == ed_value_get_session_index (&kv));
1923
1924   fib_index = 7001;
1925   proto = 5;
1926   nat_protocol_t proto3 = ~0;
1927   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
1928   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
1929   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1930   ASSERT (l_port == l_port2);
1931   ASSERT (proto == proto3);
1932   ASSERT (fib_index == fib_index2);
1933 }
1934
1935 static clib_error_t *
1936 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
1937 {
1938   u32 fib_index;
1939
1940       // TODO: consider removing all NAT interfaces
1941       if (!is_add)
1942         {
1943           fib_index = ip4_fib_index_from_table_id (table_id);
1944           if (fib_index != ~0)
1945             expire_per_vrf_sessions (fib_index);
1946         }
1947   return 0;
1948 }
1949
1950 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
1951
1952 void
1953 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
1954 {
1955   vlib_node_t *node;
1956
1957   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
1958   sm->out2in_node_index = node->index;
1959
1960   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
1961   sm->in2out_node_index = node->index;
1962
1963   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
1964   sm->in2out_output_node_index = node->index;
1965 }
1966
1967 #define nat_validate_simple_counter(c, i)                                     \
1968   do                                                                          \
1969     {                                                                         \
1970       vlib_validate_simple_counter (&c, i);                                   \
1971       vlib_zero_simple_counter (&c, i);                                       \
1972     }                                                                         \
1973   while (0);
1974
1975 #define nat_init_simple_counter(c, n, sn)                                     \
1976   do                                                                          \
1977     {                                                                         \
1978       c.name = n;                                                             \
1979       c.stat_segment_name = sn;                                               \
1980       nat_validate_simple_counter (c, 0);                                     \
1981     }                                                                         \
1982   while (0);
1983
1984 static_always_inline void
1985 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
1986 {
1987 #define _(x)                                                                  \
1988   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
1989   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
1990   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
1991   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
1992   foreach_nat_counter;
1993 #undef _
1994   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
1995 }
1996
1997 static clib_error_t *
1998 nat_init (vlib_main_t * vm)
1999 {
2000   snat_main_t *sm = &snat_main;
2001   vlib_thread_main_t *tm = vlib_get_thread_main ();
2002   vlib_thread_registration_t *tr;
2003   ip4_add_del_interface_address_callback_t cbi = { 0 };
2004   ip4_table_bind_callback_t cbt = { 0 };
2005   u32 i, num_threads = 0;
2006   uword *p, *bitmap = 0;
2007
2008   clib_memset (sm, 0, sizeof (*sm));
2009
2010   // required
2011   sm->vnet_main = vnet_get_main ();
2012   // convenience
2013   sm->ip4_main = &ip4_main;
2014   sm->api_main = vlibapi_get_main ();
2015   sm->ip4_lookup_main = &ip4_main.lookup_main;
2016
2017   // frame queue indices used for handoff
2018   sm->fq_out2in_index = ~0;
2019   sm->fq_in2out_index = ~0;
2020   sm->fq_in2out_output_index = ~0;
2021
2022   sm->log_level = NAT_LOG_ERROR;
2023
2024   nat44_set_node_indexes (sm, vm);
2025   sm->log_class = vlib_log_register_class ("nat", 0);
2026   nat_ipfix_logging_init (vm);
2027
2028   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2029                            "/nat44-ed/total-sessions");
2030   sm->max_cfg_sessions_gauge = stat_segment_new_entry (
2031     (u8 *) "/nat44-ed/max-cfg-sessions", STAT_DIR_TYPE_SCALAR_INDEX);
2032
2033 #define _(x)                                                                  \
2034   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2035                            "/nat44-ed/in2out/fastpath/" #x);                  \
2036   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2037                            "/nat44-ed/out2in/fastpath/" #x);                  \
2038   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2039                            "/nat44-ed/in2out/slowpath/" #x);                  \
2040   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2041                            "/nat44-ed/out2in/slowpath/" #x);
2042   foreach_nat_counter;
2043 #undef _
2044   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2045                            "/nat44-ed/hairpinning");
2046
2047   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2048   if (p)
2049     {
2050       tr = (vlib_thread_registration_t *) p[0];
2051       if (tr)
2052         {
2053           sm->num_workers = tr->count;
2054           sm->first_worker_index = tr->first_index;
2055         }
2056     }
2057   num_threads = tm->n_vlib_mains - 1;
2058   sm->port_per_thread = 0xffff - 1024;
2059   vec_validate (sm->per_thread_data, num_threads);
2060
2061   /* Use all available workers by default */
2062   if (sm->num_workers > 1)
2063     {
2064
2065       for (i = 0; i < sm->num_workers; i++)
2066         bitmap = clib_bitmap_set (bitmap, i, 1);
2067       snat_set_workers (bitmap);
2068       clib_bitmap_free (bitmap);
2069     }
2070   else
2071     sm->per_thread_data[0].snat_thread_index = 0;
2072
2073   /* callbacks to call when interface address changes. */
2074   cbi.function = snat_ip4_add_del_interface_address_cb;
2075   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2076   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2077   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2078
2079   /* callbacks to call when interface to table biding changes */
2080   cbt.function = snat_update_outside_fib;
2081   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2082
2083   sm->fib_src_low =
2084     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2085                          FIB_SOURCE_BH_SIMPLE);
2086   sm->fib_src_hi =
2087     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2088                          FIB_SOURCE_BH_SIMPLE);
2089
2090   nat_affinity_init (vm);
2091   test_key_calc_split ();
2092
2093   return nat44_api_hookup (vm);
2094 }
2095
2096 VLIB_INIT_FUNCTION (nat_init);
2097
2098 int
2099 nat44_plugin_enable (nat44_config_t c)
2100 {
2101   snat_main_t *sm = &snat_main;
2102
2103   fail_if_enabled ();
2104
2105   // UPDATE based on these appropriate API/CLI
2106   // c.static_mapping_only + c.connection_tracking
2107   //  - supported in NAT EI & NAT ED
2108   // c.out2in_dpo, c.static_mapping_only
2109   //  - supported in NAT EI
2110
2111   if (c.static_mapping_only && !c.connection_tracking)
2112     {
2113       nat_log_err ("unsupported combination of configuration");
2114       return 1;
2115     }
2116
2117   // nat44 feature configuration
2118   sm->static_mapping_only = c.static_mapping_only;
2119   sm->static_mapping_connection_tracking = c.connection_tracking;
2120
2121   sm->forwarding_enabled = 0;
2122   sm->mss_clamping = 0;
2123   sm->pat = (!c.static_mapping_only ||
2124              (c.static_mapping_only && c.connection_tracking));
2125
2126   if (!c.sessions)
2127     c.sessions = 63 * 1024;
2128
2129   sm->max_translations_per_thread = c.sessions;
2130   stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
2131                                   sm->max_translations_per_thread);
2132   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2133
2134   // ED only feature
2135   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2136
2137   sm->inside_vrf_id = c.inside_vrf;
2138   sm->inside_fib_index =
2139     fib_table_find_or_create_and_lock
2140     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2141
2142   sm->outside_vrf_id = c.outside_vrf;
2143   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2144     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2145
2146   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2147
2148   nat_affinity_enable ();
2149
2150   nat_reset_timeouts (&sm->timeouts);
2151
2152   vlib_zero_simple_counter (&sm->total_sessions, 0);
2153
2154   if (!sm->frame_queue_nelts)
2155     sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2156
2157   sm->enabled = 1;
2158   sm->rconfig = c;
2159
2160   return 0;
2161 }
2162
2163 void
2164 nat44_addresses_free (snat_address_t ** addresses)
2165 {
2166   snat_address_t *ap;
2167   vec_foreach (ap, *addresses)
2168     {
2169     #define _(N, i, n, s) \
2170       vec_free (ap->busy_##n##_ports_per_thread);
2171       foreach_nat_protocol
2172     #undef _
2173     }
2174   vec_free (*addresses);
2175   *addresses = 0;
2176 }
2177
2178 int
2179 nat44_plugin_disable ()
2180 {
2181   snat_main_t *sm = &snat_main;
2182   snat_interface_t *i, *vec;
2183   int error = 0;
2184
2185   fail_if_disabled ();
2186
2187   // first unregister all nodes from interfaces
2188   vec = vec_dup (sm->interfaces);
2189   vec_foreach (i, vec)
2190     {
2191       if (nat_interface_is_inside(i))
2192         error = snat_interface_add_del (i->sw_if_index, 1, 1);
2193       if (nat_interface_is_outside(i))
2194         error = snat_interface_add_del (i->sw_if_index, 0, 1);
2195
2196       if (error)
2197         {
2198           nat_log_err ("error occurred while removing interface %u",
2199                        i->sw_if_index);
2200         }
2201     }
2202   vec_free (vec);
2203   sm->interfaces = 0;
2204
2205   vec = vec_dup (sm->output_feature_interfaces);
2206   vec_foreach (i, vec)
2207     {
2208       if (nat_interface_is_inside(i))
2209         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
2210       if (nat_interface_is_outside(i))
2211         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
2212
2213       if (error)
2214         {
2215           nat_log_err ("error occurred while removing interface %u",
2216                        i->sw_if_index);
2217         }
2218     }
2219   vec_free (vec);
2220   sm->output_feature_interfaces = 0;
2221
2222   vec_free (sm->max_translations_per_fib);
2223
2224   nat44_ed_db_free ();
2225
2226   nat44_addresses_free (&sm->addresses);
2227   nat44_addresses_free (&sm->twice_nat_addresses);
2228
2229   vec_free (sm->to_resolve);
2230   vec_free (sm->auto_add_sw_if_indices);
2231   vec_free (sm->auto_add_sw_if_indices_twice_nat);
2232
2233   sm->to_resolve = 0;
2234   sm->auto_add_sw_if_indices = 0;
2235   sm->auto_add_sw_if_indices_twice_nat = 0;
2236
2237   sm->forwarding_enabled = 0;
2238
2239   sm->enabled = 0;
2240   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2241
2242   return 0;
2243 }
2244
2245 void
2246 nat44_ed_forwarding_enable_disable (u8 is_enable)
2247 {
2248   snat_main_per_thread_data_t *tsm;
2249   snat_main_t *sm = &snat_main;
2250   snat_session_t *s;
2251
2252   u32 *ses_to_be_removed = 0, *ses_index;
2253
2254   sm->forwarding_enabled = is_enable != 0;
2255
2256   if (is_enable)
2257     return;
2258
2259   vec_foreach (tsm, sm->per_thread_data)
2260     {
2261       pool_foreach (s, tsm->sessions)
2262         {
2263           if (is_fwd_bypass_session (s))
2264             {
2265               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2266             }
2267         }
2268       vec_foreach (ses_index, ses_to_be_removed)
2269         {
2270           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2271           nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2272           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2273         }
2274
2275       vec_free (ses_to_be_removed);
2276     }
2277 }
2278
2279 void
2280 snat_free_outside_address_and_port (snat_address_t *addresses,
2281                                     u32 thread_index, ip4_address_t *addr,
2282                                     u16 port, nat_protocol_t protocol)
2283 {
2284   snat_main_t *sm = &snat_main;
2285   snat_address_t *a;
2286   u32 address_index;
2287   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2288
2289   for (address_index = 0; address_index < vec_len (addresses);
2290        address_index++)
2291     {
2292       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2293         break;
2294     }
2295
2296   ASSERT (address_index < vec_len (addresses));
2297
2298   a = addresses + address_index;
2299
2300   switch (protocol)
2301     {
2302 #define _(N, i, n, s) \
2303     case NAT_PROTOCOL_##N: \
2304       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2305       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2306       a->busy_##n##_ports--; \
2307       a->busy_##n##_ports_per_thread[thread_index]--; \
2308       break;
2309       foreach_nat_protocol
2310 #undef _
2311         default : nat_elog_info (sm, "unknown protocol");
2312       return;
2313     }
2314 }
2315
2316 int
2317 nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
2318                                   ip4_address_t addr, u16 port,
2319                                   nat_protocol_t protocol)
2320 {
2321   snat_main_t *sm = &snat_main;
2322   snat_address_t *a = 0;
2323   u32 address_index;
2324   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2325
2326   for (address_index = 0; address_index < vec_len (addresses);
2327        address_index++)
2328     {
2329       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2330         continue;
2331
2332       a = addresses + address_index;
2333       switch (protocol)
2334         {
2335 #define _(N, j, n, s) \
2336         case NAT_PROTOCOL_##N: \
2337           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2338             return VNET_API_ERROR_INSTANCE_IN_USE; \
2339           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2340           a->busy_##n##_ports_per_thread[thread_index]++; \
2341           a->busy_##n##_ports++; \
2342           return 0;
2343           foreach_nat_protocol
2344 #undef _
2345             default : nat_elog_info (sm, "unknown protocol");
2346           return 1;
2347         }
2348     }
2349
2350   return VNET_API_ERROR_NO_SUCH_ENTRY;
2351 }
2352
2353 int
2354 snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
2355                            ip4_address_t match_addr, u16 match_port,
2356                            u32 match_fib_index, nat_protocol_t match_protocol,
2357                            ip4_address_t *mapping_addr, u16 *mapping_port,
2358                            u32 *mapping_fib_index, u8 by_external,
2359                            u8 *is_addr_only, twice_nat_type_t *twice_nat,
2360                            lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
2361                            u8 *is_identity_nat, snat_static_mapping_t **out)
2362 {
2363   clib_bihash_kv_8_8_t kv, value;
2364   clib_bihash_8_8_t *mapping_hash;
2365   snat_static_mapping_t *m;
2366   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2367   nat44_lb_addr_port_t *local;
2368   u8 backend_index;
2369
2370   if (!by_external)
2371     {
2372       mapping_hash = &sm->static_mapping_by_local;
2373       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2374                   match_protocol);
2375       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2376         {
2377           /* Try address only mapping */
2378           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2379           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2380             return 1;
2381         }
2382     }
2383   else
2384     {
2385       mapping_hash = &sm->static_mapping_by_external;
2386       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2387       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2388         {
2389           /* Try address only mapping */
2390           init_nat_k (&kv, match_addr, 0, 0, 0);
2391           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2392             return 1;
2393         }
2394     }
2395
2396   m = pool_elt_at_index (sm->static_mappings, value.value);
2397
2398   if (by_external)
2399     {
2400       if (is_lb_static_mapping (m))
2401         {
2402           if (PREDICT_FALSE (lb != 0))
2403             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2404           if (m->affinity && !nat_affinity_find_and_lock (
2405                                vm, ext_host_addr[0], match_addr,
2406                                match_protocol, match_port, &backend_index))
2407             {
2408               local = pool_elt_at_index (m->locals, backend_index);
2409               *mapping_addr = local->addr;
2410               *mapping_port = local->port;
2411               *mapping_fib_index = local->fib_index;
2412               goto end;
2413             }
2414           // pick locals matching this worker
2415           if (PREDICT_FALSE (sm->num_workers > 1))
2416             {
2417               u32 thread_index = vlib_get_thread_index ();
2418               pool_foreach_index (i, m->locals)
2419                {
2420                 local = pool_elt_at_index (m->locals, i);
2421
2422                 ip4_header_t ip = {
2423                   .src_address = local->addr,
2424                 };
2425
2426                 if (nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index,
2427                                                       0) == thread_index)
2428                   {
2429                     vec_add1 (tmp, i);
2430                   }
2431                }
2432               ASSERT (vec_len (tmp) != 0);
2433             }
2434           else
2435             {
2436               pool_foreach_index (i, m->locals)
2437                {
2438                 vec_add1 (tmp, i);
2439               }
2440             }
2441           hi = vec_len (tmp) - 1;
2442           local = pool_elt_at_index (m->locals, tmp[hi]);
2443           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2444           while (lo < hi)
2445             {
2446               mid = ((hi - lo) >> 1) + lo;
2447               local = pool_elt_at_index (m->locals, tmp[mid]);
2448               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2449             }
2450           local = pool_elt_at_index (m->locals, tmp[lo]);
2451           if (!(local->prefix >= rand))
2452             return 1;
2453           *mapping_addr = local->addr;
2454           *mapping_port = local->port;
2455           *mapping_fib_index = local->fib_index;
2456           if (m->affinity)
2457             {
2458               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2459                                                 match_protocol, match_port,
2460                                                 tmp[lo], m->affinity,
2461                                                 m->affinity_per_service_list_head_index))
2462                 nat_elog_info (sm, "create affinity record failed");
2463             }
2464           vec_free (tmp);
2465         }
2466       else
2467         {
2468           if (PREDICT_FALSE (lb != 0))
2469             *lb = NO_LB_NAT;
2470           *mapping_fib_index = m->fib_index;
2471           *mapping_addr = m->local_addr;
2472           /* Address only mapping doesn't change port */
2473           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2474             : m->local_port;
2475         }
2476     }
2477   else
2478     {
2479       *mapping_addr = m->external_addr;
2480       /* Address only mapping doesn't change port */
2481       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2482         : m->external_port;
2483       *mapping_fib_index = sm->outside_fib_index;
2484     }
2485
2486 end:
2487   if (PREDICT_FALSE (is_addr_only != 0))
2488     *is_addr_only = is_addr_only_static_mapping (m);
2489
2490   if (PREDICT_FALSE (twice_nat != 0))
2491     *twice_nat = m->twice_nat;
2492
2493   if (PREDICT_FALSE (is_identity_nat != 0))
2494     *is_identity_nat = is_identity_static_mapping (m);
2495
2496   if (out != 0)
2497     *out = m;
2498
2499   return 0;
2500 }
2501
2502 u32
2503 nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2504                                   u32 rx_fib_index, u8 is_output)
2505 {
2506   snat_main_t *sm = &snat_main;
2507   u32 next_worker_index = sm->first_worker_index;
2508   u32 hash;
2509
2510   clib_bihash_kv_16_8_t kv16, value16;
2511
2512   u32 fib_index = rx_fib_index;
2513   if (b)
2514     {
2515       if (PREDICT_FALSE (is_output))
2516         {
2517           fib_index = sm->outside_fib_index;
2518           nat_outside_fib_t *outside_fib;
2519           fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2520           fib_prefix_t pfx = {
2521                   .fp_proto = FIB_PROTOCOL_IP4,
2522                   .fp_len = 32,
2523                   .fp_addr = {
2524                           .ip4.as_u32 = ip->dst_address.as_u32,
2525                   } ,
2526           };
2527
2528           switch (vec_len (sm->outside_fibs))
2529             {
2530             case 0:
2531               fib_index = sm->outside_fib_index;
2532               break;
2533             case 1:
2534               fib_index = sm->outside_fibs[0].fib_index;
2535               break;
2536             default:
2537               vec_foreach (outside_fib, sm->outside_fibs)
2538                 {
2539                   fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2540                   if (FIB_NODE_INDEX_INVALID != fei)
2541                     {
2542                       if (fib_entry_get_resolving_interface (fei) != ~0)
2543                         {
2544                           fib_index = outside_fib->fib_index;
2545                           break;
2546                         }
2547                     }
2548                 }
2549               break;
2550             }
2551         }
2552
2553       init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2554                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2555                  fib_index, ip->protocol);
2556
2557       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2558         {
2559           next_worker_index = ed_value_get_thread_index (&value16);
2560           vnet_buffer2 (b)->nat.cached_session_index =
2561             ed_value_get_session_index (&value16);
2562           goto out;
2563         }
2564
2565       // dst NAT
2566       init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2567                  ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2568                  rx_fib_index, ip->protocol);
2569       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2570         {
2571           next_worker_index = ed_value_get_thread_index (&value16);
2572           vnet_buffer2 (b)->nat.cached_dst_nat_session_index =
2573             ed_value_get_session_index (&value16);
2574           goto out;
2575         }
2576     }
2577
2578   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2579     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2580
2581   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2582     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2583   else
2584     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2585
2586 out:
2587   if (PREDICT_TRUE (!is_output))
2588     {
2589       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2590                               rx_fib_index,
2591                               clib_net_to_host_u32 (ip->src_address.as_u32),
2592                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2593     }
2594   else
2595     {
2596       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2597                               next_worker_index, rx_fib_index,
2598                               clib_net_to_host_u32 (ip->src_address.as_u32),
2599                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2600     }
2601
2602   return next_worker_index;
2603 }
2604
2605 u32
2606 nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2607                                   u32 rx_fib_index, u8 is_output)
2608 {
2609   snat_main_t *sm = &snat_main;
2610   clib_bihash_kv_8_8_t kv, value;
2611   clib_bihash_kv_16_8_t kv16, value16;
2612
2613   u32 proto, next_worker_index = 0;
2614   u16 port;
2615   snat_static_mapping_t *m;
2616   u32 hash;
2617
2618   proto = ip_proto_to_nat_proto (ip->protocol);
2619
2620   if (PREDICT_FALSE (proto == NAT_PROTOCOL_ICMP))
2621     {
2622       ip4_address_t lookup_saddr, lookup_daddr;
2623       u16 lookup_sport, lookup_dport;
2624       u8 lookup_protocol;
2625       if (!nat_get_icmp_session_lookup_values (
2626             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2627             &lookup_protocol))
2628         {
2629           init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
2630                      lookup_dport, rx_fib_index, lookup_protocol);
2631           if (PREDICT_TRUE (
2632                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2633             {
2634               next_worker_index = ed_value_get_thread_index (&value16);
2635               nat_elog_debug_handoff (
2636                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2637                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2638                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2639               return next_worker_index;
2640             }
2641         }
2642     }
2643
2644   init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2645              ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2646              rx_fib_index, ip->protocol);
2647
2648   if (PREDICT_TRUE (
2649         !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2650     {
2651       vnet_buffer2 (b)->nat.cached_session_index =
2652         ed_value_get_session_index (&value16);
2653       next_worker_index = ed_value_get_thread_index (&value16);
2654       nat_elog_debug_handoff (sm, "HANDOFF OUT2IN (session)",
2655                               next_worker_index, rx_fib_index,
2656                               clib_net_to_host_u32 (ip->src_address.as_u32),
2657                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2658       return next_worker_index;
2659     }
2660
2661   /* first try static mappings without port */
2662   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2663     {
2664       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
2665       if (!clib_bihash_search_8_8
2666           (&sm->static_mapping_by_external, &kv, &value))
2667         {
2668           m = pool_elt_at_index (sm->static_mappings, value.value);
2669           next_worker_index = m->workers[0];
2670           goto done;
2671         }
2672     }
2673
2674   /* unknown protocol */
2675   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
2676     {
2677       /* use current thread */
2678       next_worker_index = vlib_get_thread_index ();
2679       goto done;
2680     }
2681
2682   port = vnet_buffer (b)->ip.reass.l4_dst_port;
2683
2684   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2685     {
2686       udp_header_t *udp = ip4_next_header (ip);
2687       icmp46_header_t *icmp = (icmp46_header_t *) udp;
2688       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
2689       if (!icmp_type_is_error_message
2690           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
2691         port = vnet_buffer (b)->ip.reass.l4_src_port;
2692       else
2693         {
2694           /* if error message, then it's not fragmented and we can access it */
2695           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
2696           proto = ip_proto_to_nat_proto (inner_ip->protocol);
2697           void *l4_header = ip4_next_header (inner_ip);
2698           switch (proto)
2699             {
2700             case NAT_PROTOCOL_ICMP:
2701               icmp = (icmp46_header_t *) l4_header;
2702               echo = (icmp_echo_header_t *) (icmp + 1);
2703               port = echo->identifier;
2704               break;
2705             case NAT_PROTOCOL_UDP:
2706             case NAT_PROTOCOL_TCP:
2707               port = ((tcp_udp_header_t *) l4_header)->src_port;
2708               break;
2709             default:
2710               next_worker_index = vlib_get_thread_index ();
2711               goto done;
2712             }
2713         }
2714     }
2715
2716   /* try static mappings with port */
2717   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2718     {
2719       init_nat_k (&kv, ip->dst_address, port, 0, proto);
2720       if (!clib_bihash_search_8_8
2721           (&sm->static_mapping_by_external, &kv, &value))
2722         {
2723           m = pool_elt_at_index (sm->static_mappings, value.value);
2724           if (!is_lb_static_mapping (m))
2725             {
2726               next_worker_index = m->workers[0];
2727               goto done;
2728             }
2729
2730           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2731             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2732
2733           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
2734             next_worker_index =
2735               m->workers[hash & (_vec_len (m->workers) - 1)];
2736           else
2737             next_worker_index = m->workers[hash % _vec_len (m->workers)];
2738           goto done;
2739         }
2740     }
2741
2742   /* worker by outside port */
2743   next_worker_index = sm->first_worker_index;
2744   next_worker_index +=
2745     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2746
2747 done:
2748   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
2749                           rx_fib_index,
2750                           clib_net_to_host_u32 (ip->src_address.as_u32),
2751                           clib_net_to_host_u32 (ip->dst_address.as_u32));
2752   return next_worker_index;
2753 }
2754
2755 u32
2756 nat44_get_max_session_limit ()
2757 {
2758   snat_main_t *sm = &snat_main;
2759   u32 max_limit = 0, len = 0;
2760
2761   for (; len < vec_len (sm->max_translations_per_fib); len++)
2762     {
2763       if (max_limit < sm->max_translations_per_fib[len])
2764         max_limit = sm->max_translations_per_fib[len];
2765     }
2766   return max_limit;
2767 }
2768
2769 int
2770 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
2771 {
2772   snat_main_t *sm = &snat_main;
2773   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
2774   u32 len = vec_len (sm->max_translations_per_fib);
2775
2776   if (len <= fib_index)
2777     {
2778       vec_validate (sm->max_translations_per_fib, fib_index + 1);
2779
2780       for (; len < vec_len (sm->max_translations_per_fib); len++)
2781         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
2782     }
2783
2784   sm->max_translations_per_fib[fib_index] = session_limit;
2785   return 0;
2786 }
2787
2788 int
2789 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
2790 {
2791   snat_main_t *sm = &snat_main;
2792
2793   if (nat44_set_session_limit (session_limit, vrf_id))
2794     return 1;
2795   sm->max_translations_per_thread = nat44_get_max_session_limit ();
2796
2797   stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
2798                                   sm->max_translations_per_thread);
2799
2800   sm->translation_buckets =
2801     nat_calc_bihash_buckets (sm->max_translations_per_thread);
2802
2803   nat44_ed_sessions_clear ();
2804   return 0;
2805 }
2806
2807 static void
2808 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
2809                          u32 translation_buckets)
2810 {
2811   dlist_elt_t *head;
2812
2813   pool_alloc (tsm->sessions, translations);
2814   pool_alloc (tsm->lru_pool, translations);
2815
2816   pool_get (tsm->lru_pool, head);
2817   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
2818   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
2819
2820   pool_get (tsm->lru_pool, head);
2821   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
2822   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
2823
2824   pool_get (tsm->lru_pool, head);
2825   tsm->udp_lru_head_index = head - tsm->lru_pool;
2826   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
2827
2828   pool_get (tsm->lru_pool, head);
2829   tsm->icmp_lru_head_index = head - tsm->lru_pool;
2830   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
2831
2832   pool_get (tsm->lru_pool, head);
2833   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
2834   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
2835 }
2836
2837 static void
2838 reinit_ed_flow_hash ()
2839 {
2840   snat_main_t *sm = &snat_main;
2841   // we expect 2 flows per session, so multiply translation_buckets by 2
2842   clib_bihash_init_16_8 (
2843     &sm->flow_hash, "ed-flow-hash",
2844     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
2845   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
2846 }
2847
2848 static void
2849 nat44_ed_db_init (u32 translations, u32 translation_buckets)
2850 {
2851   snat_main_t *sm = &snat_main;
2852   snat_main_per_thread_data_t *tsm;
2853   u32 static_mapping_buckets = 1024;
2854   u32 static_mapping_memory_size = 64 << 20;
2855
2856   reinit_ed_flow_hash ();
2857
2858   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2859                         "static_mapping_by_local", static_mapping_buckets,
2860                         static_mapping_memory_size);
2861   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2862                                      format_static_mapping_kvp);
2863
2864   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2865                         "static_mapping_by_external", static_mapping_buckets,
2866                         static_mapping_memory_size);
2867   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2868                                      format_static_mapping_kvp);
2869
2870   if (sm->pat)
2871     {
2872       vec_foreach (tsm, sm->per_thread_data)
2873         {
2874           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2875                                    sm->translation_buckets);
2876         }
2877     }
2878 }
2879
2880 static void
2881 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
2882 {
2883   pool_free (tsm->lru_pool);
2884   pool_free (tsm->sessions);
2885   vec_free (tsm->per_vrf_sessions_vec);
2886 }
2887
2888 static void
2889 nat44_ed_db_free ()
2890 {
2891   snat_main_t *sm = &snat_main;
2892   snat_main_per_thread_data_t *tsm;
2893
2894   pool_free (sm->static_mappings);
2895   clib_bihash_free_16_8 (&sm->flow_hash);
2896   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
2897   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
2898
2899   if (sm->pat)
2900     {
2901       vec_foreach (tsm, sm->per_thread_data)
2902         {
2903           nat44_ed_worker_db_free (tsm);
2904         }
2905     }
2906 }
2907
2908 void
2909 nat44_ed_sessions_clear ()
2910 {
2911   snat_main_t *sm = &snat_main;
2912   snat_main_per_thread_data_t *tsm;
2913
2914   reinit_ed_flow_hash ();
2915
2916   if (sm->pat)
2917     {
2918       vec_foreach (tsm, sm->per_thread_data)
2919         {
2920
2921           nat44_ed_worker_db_free (tsm);
2922           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2923                                    sm->translation_buckets);
2924         }
2925     }
2926   vlib_zero_simple_counter (&sm->total_sessions, 0);
2927 }
2928
2929 static void
2930 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2931                                  uword opaque,
2932                                  u32 sw_if_index,
2933                                  ip4_address_t * address,
2934                                  u32 address_length,
2935                                  u32 if_address_index, u32 is_delete)
2936 {
2937   snat_main_t *sm = &snat_main;
2938   snat_static_map_resolve_t *rp;
2939   snat_static_mapping_t *m;
2940   clib_bihash_kv_8_8_t kv, value;
2941   int i, rv;
2942   ip4_address_t l_addr;
2943
2944   if (!sm->enabled)
2945     return;
2946
2947   for (i = 0; i < vec_len (sm->to_resolve); i++)
2948     {
2949       rp = sm->to_resolve + i;
2950       if (rp->addr_only == 0)
2951         continue;
2952       if (rp->sw_if_index == sw_if_index)
2953         goto match;
2954     }
2955
2956   return;
2957
2958 match:
2959   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
2960               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
2961   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2962     m = 0;
2963   else
2964     m = pool_elt_at_index (sm->static_mappings, value.value);
2965
2966   if (!is_delete)
2967     {
2968       /* Don't trip over lease renewal, static config */
2969       if (m)
2970         return;
2971     }
2972   else
2973     {
2974       if (!m)
2975         return;
2976     }
2977
2978   /* Indetity mapping? */
2979   if (rp->l_addr.as_u32 == 0)
2980     l_addr.as_u32 = address[0].as_u32;
2981   else
2982     l_addr.as_u32 = rp->l_addr.as_u32;
2983   /* Add the static mapping */
2984   rv = snat_add_static_mapping (l_addr,
2985                                 address[0],
2986                                 rp->l_port,
2987                                 rp->e_port,
2988                                 rp->vrf_id,
2989                                 rp->addr_only, ~0 /* sw_if_index */ ,
2990                                 rp->proto, !is_delete, rp->twice_nat,
2991                                 rp->out2in_only, rp->tag, rp->identity_nat,
2992                                 rp->pool_addr, rp->exact);
2993   if (rv)
2994     nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", "i4", rv);
2995 }
2996
2997 static void
2998 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2999                                        uword opaque,
3000                                        u32 sw_if_index,
3001                                        ip4_address_t * address,
3002                                        u32 address_length,
3003                                        u32 if_address_index, u32 is_delete)
3004 {
3005   snat_main_t *sm = &snat_main;
3006   snat_static_map_resolve_t *rp;
3007   ip4_address_t l_addr;
3008   int i, j;
3009   int rv;
3010   u8 twice_nat = 0;
3011   snat_address_t *addresses = sm->addresses;
3012
3013   if (!sm->enabled)
3014     return;
3015
3016   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
3017     {
3018       if (sw_if_index == sm->auto_add_sw_if_indices[i])
3019         goto match;
3020     }
3021
3022   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
3023     {
3024       twice_nat = 1;
3025       addresses = sm->twice_nat_addresses;
3026       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
3027         goto match;
3028     }
3029
3030   return;
3031
3032 match:
3033   if (!is_delete)
3034     {
3035       /* Don't trip over lease renewal, static config */
3036       for (j = 0; j < vec_len (addresses); j++)
3037         if (addresses[j].addr.as_u32 == address->as_u32)
3038           return;
3039
3040       (void) snat_add_address (sm, address, ~0, twice_nat);
3041       /* Scan static map resolution vector */
3042       for (j = 0; j < vec_len (sm->to_resolve); j++)
3043         {
3044           rp = sm->to_resolve + j;
3045           if (rp->addr_only)
3046             continue;
3047           /* On this interface? */
3048           if (rp->sw_if_index == sw_if_index)
3049             {
3050               /* Indetity mapping? */
3051               if (rp->l_addr.as_u32 == 0)
3052                 l_addr.as_u32 = address[0].as_u32;
3053               else
3054                 l_addr.as_u32 = rp->l_addr.as_u32;
3055               /* Add the static mapping */
3056               rv = snat_add_static_mapping (
3057                 l_addr, address[0], rp->l_port, rp->e_port, rp->vrf_id,
3058                 rp->addr_only, ~0 /* sw_if_index */, rp->proto, 1,
3059                 rp->twice_nat, rp->out2in_only, rp->tag, rp->identity_nat,
3060                 rp->pool_addr, rp->exact);
3061               if (rv)
3062                 nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d",
3063                                     "i4", rv);
3064             }
3065         }
3066       return;
3067     }
3068   else
3069     {
3070       (void) snat_del_address (sm, address[0], 1, twice_nat);
3071       return;
3072     }
3073 }
3074
3075 int
3076 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
3077                             u8 twice_nat)
3078 {
3079   ip4_main_t *ip4_main = sm->ip4_main;
3080   ip4_address_t *first_int_addr;
3081   snat_static_map_resolve_t *rp;
3082   u32 *indices_to_delete = 0;
3083   int i, j;
3084   u32 *auto_add_sw_if_indices =
3085     twice_nat ? sm->
3086     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
3087
3088   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
3089     );
3090
3091   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
3092     {
3093       if (auto_add_sw_if_indices[i] == sw_if_index)
3094         {
3095           if (is_del)
3096             {
3097               /* if have address remove it */
3098               if (first_int_addr)
3099                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
3100               else
3101                 {
3102                   for (j = 0; j < vec_len (sm->to_resolve); j++)
3103                     {
3104                       rp = sm->to_resolve + j;
3105                       if (rp->sw_if_index == sw_if_index)
3106                         vec_add1 (indices_to_delete, j);
3107                     }
3108                   if (vec_len (indices_to_delete))
3109                     {
3110                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
3111                         vec_del1 (sm->to_resolve, j);
3112                       vec_free (indices_to_delete);
3113                     }
3114                 }
3115               if (twice_nat)
3116                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
3117               else
3118                 vec_del1 (sm->auto_add_sw_if_indices, i);
3119             }
3120           else
3121             return VNET_API_ERROR_VALUE_EXIST;
3122
3123           return 0;
3124         }
3125     }
3126
3127   if (is_del)
3128     return VNET_API_ERROR_NO_SUCH_ENTRY;
3129
3130   /* add to the auto-address list */
3131   if (twice_nat)
3132     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
3133   else
3134     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
3135
3136   /* If the address is already bound - or static - add it now */
3137   if (first_int_addr)
3138     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
3139
3140   return 0;
3141 }
3142
3143 int
3144 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
3145                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3146                       u32 vrf_id, int is_in)
3147 {
3148   ip4_header_t ip;
3149   clib_bihash_kv_16_8_t kv, value;
3150   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3151   snat_session_t *s;
3152   snat_main_per_thread_data_t *tsm;
3153
3154   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3155   if (sm->num_workers > 1)
3156     tsm = vec_elt_at_index (
3157       sm->per_thread_data,
3158       nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
3159   else
3160     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3161
3162   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
3163   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3164     {
3165       return VNET_API_ERROR_NO_SUCH_ENTRY;
3166     }
3167
3168   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3169     return VNET_API_ERROR_UNSPECIFIED;
3170   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3171   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3172   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3173   return 0;
3174 }
3175
3176 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3177                                  vlib_node_runtime_t * node,
3178                                  vlib_frame_t * frame)
3179 {
3180   return 0;
3181 }
3182
3183 VLIB_REGISTER_NODE (nat_default_node) = {
3184   .name = "nat-default",
3185   .vector_size = sizeof (u32),
3186   .format_trace = 0,
3187   .type = VLIB_NODE_TYPE_INTERNAL,
3188   .n_errors = 0,
3189   .n_next_nodes = NAT_N_NEXT,
3190   .next_nodes = {
3191     [NAT_NEXT_DROP] = "error-drop",
3192     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3193     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3194     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3195     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3196     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3197     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3198     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3199     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3200     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3201   },
3202 };
3203
3204 void
3205 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3206 {
3207   f->l3_csum_delta = 0;
3208   f->l4_csum_delta = 0;
3209   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3210       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3211     {
3212       f->l3_csum_delta =
3213         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3214       f->l3_csum_delta =
3215         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3216     }
3217   else
3218     {
3219       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3220     }
3221   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3222       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3223     {
3224       f->l3_csum_delta =
3225         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3226       f->l3_csum_delta =
3227         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3228     }
3229   else
3230     {
3231       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3232     }
3233   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3234     {
3235       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3236       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3237     }
3238   else
3239     {
3240       f->rewrite.sport = f->match.sport;
3241     }
3242   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3243     {
3244       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3245       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3246     }
3247   else
3248     {
3249       f->rewrite.dport = f->match.dport;
3250     }
3251   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3252       f->rewrite.icmp_id != f->match.sport)
3253     {
3254       f->l4_csum_delta =
3255         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3256       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3257     }
3258   else
3259     {
3260       f->rewrite.icmp_id = f->match.sport;
3261     }
3262   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3263     {
3264     }
3265   else
3266     {
3267       f->rewrite.fib_index = f->match.fib_index;
3268     }
3269 }
3270
3271 static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm,
3272                                                      vlib_buffer_t *b,
3273                                                      ip4_header_t *ip,
3274                                                      nat_6t_flow_t *f);
3275
3276 static_always_inline void
3277 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3278                            nat_6t_flow_t *f, nat_protocol_t proto,
3279                            int is_icmp_inner_ip4)
3280 {
3281   udp_header_t *udp = ip4_next_header (ip);
3282   tcp_header_t *tcp = (tcp_header_t *) udp;
3283
3284   if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
3285       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3286     {
3287       if (!is_icmp_inner_ip4)
3288         { // regular case
3289           ip->src_address = f->rewrite.saddr;
3290           ip->dst_address = f->rewrite.daddr;
3291           udp->src_port = f->rewrite.sport;
3292           udp->dst_port = f->rewrite.dport;
3293         }
3294       else
3295         { // icmp inner ip4 - reversed saddr/daddr
3296           ip->src_address = f->rewrite.daddr;
3297           ip->dst_address = f->rewrite.saddr;
3298           udp->src_port = f->rewrite.dport;
3299           udp->dst_port = f->rewrite.sport;
3300         }
3301
3302       if (NAT_PROTOCOL_TCP == proto)
3303         {
3304           ip_csum_t tcp_sum = tcp->checksum;
3305           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3306           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3307           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3308           tcp->checksum = ip_csum_fold (tcp_sum);
3309         }
3310       else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
3311         {
3312           ip_csum_t udp_sum = udp->checksum;
3313           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3314           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3315           udp->checksum = ip_csum_fold (udp_sum);
3316         }
3317     }
3318   else
3319     {
3320       if (!is_icmp_inner_ip4)
3321         { // regular case
3322           ip->src_address = f->rewrite.saddr;
3323           ip->dst_address = f->rewrite.daddr;
3324         }
3325       else
3326         { // icmp inner ip4 - reversed saddr/daddr
3327           ip->src_address = f->rewrite.daddr;
3328           ip->dst_address = f->rewrite.saddr;
3329         }
3330     }
3331
3332   ip_csum_t ip_sum = ip->checksum;
3333   ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3334   ip->checksum = ip_csum_fold (ip_sum);
3335   if (0xffff == ip->checksum)
3336     ip->checksum = 0;
3337   ASSERT (ip4_header_checksum_is_valid (ip));
3338 }
3339
3340 static_always_inline int
3341 nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b,
3342                             ip4_header_t *ip, nat_6t_flow_t *f)
3343 {
3344   if (IP_PROTOCOL_ICMP != ip->protocol)
3345     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3346
3347   icmp46_header_t *icmp = ip4_next_header (ip);
3348   icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3349
3350   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3351     {
3352       if (icmp->checksum == 0)
3353         icmp->checksum = 0xffff;
3354
3355       if (!icmp_type_is_error_message (icmp->type))
3356         {
3357           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3358               (f->rewrite.icmp_id != echo->identifier))
3359             {
3360               ip_csum_t sum = icmp->checksum;
3361               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3362                                     icmp_echo_header_t,
3363                                     identifier /* changed member */);
3364               echo->identifier = f->rewrite.icmp_id;
3365               icmp->checksum = ip_csum_fold (sum);
3366             }
3367         }
3368       else
3369         {
3370           // errors are not fragmented
3371           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3372
3373           if (!ip4_header_checksum_is_valid (inner_ip))
3374             {
3375               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3376             }
3377
3378           nat_protocol_t inner_proto =
3379             ip_proto_to_nat_proto (inner_ip->protocol);
3380
3381           ip_csum_t icmp_sum = icmp->checksum;
3382
3383           switch (inner_proto)
3384             {
3385             case NAT_PROTOCOL_UDP:
3386             case NAT_PROTOCOL_TCP:
3387               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3388                                          1 /* is_icmp_inner_ip4 */);
3389               icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta);
3390               icmp->checksum = ip_csum_fold (icmp_sum);
3391               break;
3392             case NAT_PROTOCOL_ICMP:
3393               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3394                 {
3395                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3396                   icmp_echo_header_t *inner_echo =
3397                     (icmp_echo_header_t *) (inner_icmp + 1);
3398                   if (f->rewrite.icmp_id != inner_echo->identifier)
3399                     {
3400                       ip_csum_t sum = icmp->checksum;
3401                       sum = ip_csum_update (
3402                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3403                         icmp_echo_header_t, identifier /* changed member */);
3404                       icmp->checksum = ip_csum_fold (sum);
3405                       ip_csum_t inner_sum = inner_icmp->checksum;
3406                       inner_sum = ip_csum_update (
3407                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3408                         icmp_echo_header_t, identifier /* changed member */);
3409                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3410                       inner_echo->identifier = f->rewrite.icmp_id;
3411                     }
3412                 }
3413               break;
3414             default:
3415               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3416               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3417             }
3418         }
3419     }
3420   return NAT_ED_TRNSL_ERR_SUCCESS;
3421 }
3422
3423 nat_translation_error_e
3424 nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3425                            nat_6t_flow_t *f, nat_protocol_t proto,
3426                            int is_output_feature)
3427 {
3428   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3429     {
3430       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3431     }
3432
3433   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */);
3434
3435   if (NAT_PROTOCOL_ICMP == proto)
3436     {
3437       return nat_6t_flow_icmp_translate (sm, b, ip, f);
3438     }
3439
3440   return NAT_ED_TRNSL_ERR_SUCCESS;
3441 }
3442
3443 u8 *
3444 format_nat_6t (u8 *s, va_list *args)
3445 {
3446   nat_6t_t *t = va_arg (*args, nat_6t_t *);
3447
3448   s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
3449               format_ip4_address, t->saddr.as_u8,
3450               clib_net_to_host_u16 (t->sport), format_ip4_address,
3451               t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
3452               format_ip_protocol, t->proto, t->fib_index);
3453   return s;
3454 }
3455
3456 u8 *
3457 format_nat_ed_translation_error (u8 *s, va_list *args)
3458 {
3459   nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
3460
3461   switch (e)
3462     {
3463     case NAT_ED_TRNSL_ERR_SUCCESS:
3464       s = format (s, "success");
3465       break;
3466     case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
3467       s = format (s, "translation-failed");
3468       break;
3469     case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
3470       s = format (s, "flow-mismatch");
3471       break;
3472     }
3473   return s;
3474 }
3475
3476 u8 *
3477 format_nat_6t_flow (u8 *s, va_list *args)
3478 {
3479   nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
3480
3481   s = format (s, "match: %U ", format_nat_6t, &f->match);
3482   int r = 0;
3483   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
3484     {
3485       s = format (s, "rewrite: saddr %U ", format_ip4_address,
3486                   f->rewrite.saddr.as_u8);
3487       r = 1;
3488     }
3489   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
3490     {
3491       if (!r)
3492         {
3493           s = format (s, "rewrite: ");
3494           r = 1;
3495         }
3496       s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
3497     }
3498   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
3499     {
3500       if (!r)
3501         {
3502           s = format (s, "rewrite: ");
3503           r = 1;
3504         }
3505       s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
3506     }
3507   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
3508     {
3509       if (!r)
3510         {
3511           s = format (s, "rewrite: ");
3512           r = 1;
3513         }
3514       s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
3515     }
3516   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3517     {
3518       if (!r)
3519         {
3520           s = format (s, "rewrite: ");
3521           r = 1;
3522         }
3523       s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
3524     }
3525   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3526     {
3527       if (!r)
3528         {
3529           s = format (s, "rewrite: ");
3530           r = 1;
3531         }
3532       s = format (s, "txfib %u ", f->rewrite.fib_index);
3533     }
3534   return s;
3535 }
3536
3537 /*
3538  * fd.io coding-style-patch-verification: ON
3539  *
3540  * Local Variables:
3541  * eval: (c-set-style "gnu")
3542  * End:
3543  */