0d62e788ec11d637ed1dbd2b95e7d840d538efa2
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vpp/app/version.h>
19
20 #include <vnet/vnet.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/ip4.h>
23 #include <vnet/ip/ip_table.h>
24 #include <vnet/ip/reass/ip4_sv_reass.h>
25 #include <vnet/fib/fib_table.h>
26 #include <vnet/fib/ip4_fib.h>
27 #include <vnet/plugin/plugin.h>
28 #include <vppinfra/bihash_16_8.h>
29
30 #include <nat/lib/log.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/lib/nat_inlines.h>
33 #include <nat/lib/ipfix_logging.h>
34
35 #include <nat/nat44-ed/nat44_ed.h>
36 #include <nat/nat44-ed/nat44_ed_affinity.h>
37 #include <nat/nat44-ed/nat44_ed_inlines.h>
38
39 snat_main_t snat_main;
40
41 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
42                                                            u32 sw_if_index);
43
44 #define skip_if_disabled()                                                    \
45   do                                                                          \
46     {                                                                         \
47       snat_main_t *sm = &snat_main;                                           \
48       if (PREDICT_FALSE (!sm->enabled))                                       \
49         return;                                                               \
50     }                                                                         \
51   while (0)
52
53 #define fail_if_enabled()                                                     \
54   do                                                                          \
55     {                                                                         \
56       snat_main_t *sm = &snat_main;                                           \
57       if (PREDICT_FALSE (sm->enabled))                                        \
58         {                                                                     \
59           nat_log_err ("plugin enabled");                                     \
60           return 1;                                                           \
61         }                                                                     \
62     }                                                                         \
63   while (0)
64
65 #define fail_if_disabled()                                                    \
66   do                                                                          \
67     {                                                                         \
68       snat_main_t *sm = &snat_main;                                           \
69       if (PREDICT_FALSE (!sm->enabled))                                       \
70         {                                                                     \
71           nat_log_err ("plugin disabled");                                    \
72           return 1;                                                           \
73         }                                                                     \
74     }                                                                         \
75   while (0)
76
77 /* *INDENT-OFF* */
78 /* Hook up input features */
79 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
80   .arc_name = "ip4-unicast",
81   .node_name = "nat-pre-in2out",
82   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
83                                "ip4-sv-reassembly-feature"),
84 };
85 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
86   .arc_name = "ip4-unicast",
87   .node_name = "nat-pre-out2in",
88   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
89                                "ip4-dhcp-client-detect",
90                                "ip4-sv-reassembly-feature"),
91 };
92 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
93   .arc_name = "ip4-unicast",
94   .node_name = "nat44-in2out-worker-handoff",
95   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
96 };
97 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
98   .arc_name = "ip4-unicast",
99   .node_name = "nat44-out2in-worker-handoff",
100   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
101                                "ip4-dhcp-client-detect"),
102 };
103 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
104   .arc_name = "ip4-unicast",
105   .node_name = "nat44-in2out",
106   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
107 };
108 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
109   .arc_name = "ip4-unicast",
110   .node_name = "nat44-out2in",
111   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
112                                "ip4-dhcp-client-detect"),
113 };
114 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
115   .arc_name = "ip4-unicast",
116   .node_name = "nat44-ed-in2out",
117   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
118 };
119 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
120   .arc_name = "ip4-unicast",
121   .node_name = "nat44-ed-out2in",
122   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
123                                "ip4-dhcp-client-detect"),
124 };
125 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
126   .arc_name = "ip4-unicast",
127   .node_name = "nat44-ed-classify",
128   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
129 };
130 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
131   .arc_name = "ip4-unicast",
132   .node_name = "nat44-handoff-classify",
133   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
134 };
135 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
136   .arc_name = "ip4-unicast",
137   .node_name = "nat44-in2out-fast",
138   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
139 };
140 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
141   .arc_name = "ip4-unicast",
142   .node_name = "nat44-out2in-fast",
143   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
144                                "ip4-dhcp-client-detect"),
145 };
146
147 /* Hook up output features */
148 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
149   .arc_name = "ip4-output",
150   .node_name = "nat44-in2out-output",
151   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
152 };
153 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
154   .arc_name = "ip4-output",
155   .node_name = "nat44-in2out-output-worker-handoff",
156   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
157 };
158 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
159   .arc_name = "ip4-output",
160   .node_name = "nat-pre-in2out-output",
161   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
162   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
163 };
164 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
165   .arc_name = "ip4-output",
166   .node_name = "nat44-ed-in2out-output",
167   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
168   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
169 };
170
171 VLIB_PLUGIN_REGISTER () = {
172     .version = VPP_BUILD_VER,
173     .description = "Network Address Translation (NAT)",
174 };
175 /* *INDENT-ON* */
176
177 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
178
179 static void nat44_ed_db_free ();
180
181 static u32
182 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
183                                u32 rx_fib_index, u8 is_output);
184
185 static u32 nat44_ed_get_worker_in2out_cb (vlib_buffer_t *b, ip4_header_t *ip,
186                                           u32 rx_fib_index, u8 is_output);
187
188 u32 nat_calc_bihash_buckets (u32 n_elts);
189
190 u8 *
191 format_session_kvp (u8 * s, va_list * args)
192 {
193   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
194
195   s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
196               v->key, nat_value_get_thread_index (v),
197               nat_value_get_session_index (v));
198
199   return s;
200 }
201
202 u8 *
203 format_static_mapping_kvp (u8 * s, va_list * args)
204 {
205   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
206
207   s = format (s, "%U static-mapping-index %llu",
208               format_snat_key, v->key, v->value);
209
210   return s;
211 }
212
213 u8 *
214 format_ed_session_kvp (u8 * s, va_list * args)
215 {
216   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
217
218   u8 proto;
219   u16 r_port, l_port;
220   ip4_address_t l_addr, r_addr;
221   u32 fib_index;
222
223   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
224   s = format (s,
225               "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
226               "session-index %u",
227               format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
228               format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
229               format_ip_protocol, proto, fib_index,
230               ed_value_get_thread_index (v), ed_value_get_session_index (v));
231
232   return s;
233 }
234
235 void
236 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
237                        u8 is_ha)
238 {
239       per_vrf_sessions_unregister_session (s, thread_index);
240
241       if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
242         nat_elog_warn (sm, "flow hash del failed");
243
244       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
245         nat_elog_warn (sm, "flow hash del failed");
246
247   if (is_fwd_bypass_session (s))
248     {
249       return;
250     }
251
252       if (is_affinity_sessions (s))
253         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
254                              s->nat_proto, s->out2in.port);
255
256       if (!is_ha)
257         nat_syslog_nat44_sdel (
258           0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
259           &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
260           s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
261           is_twice_nat_session (s));
262
263   if (snat_is_unk_proto_session (s))
264     return;
265
266   if (!is_ha)
267     {
268       /* log NAT event */
269       nat_ipfix_logging_nat44_ses_delete (thread_index,
270                                           s->in2out.addr.as_u32,
271                                           s->out2in.addr.as_u32,
272                                           s->nat_proto,
273                                           s->in2out.port,
274                                           s->out2in.port,
275                                           s->in2out.fib_index);
276     }
277
278   /* Twice NAT address and port for external host */
279   if (is_twice_nat_session (s))
280     {
281       snat_free_outside_address_and_port (sm->twice_nat_addresses,
282                                           thread_index,
283                                           &s->ext_host_nat_addr,
284                                           s->ext_host_nat_port, s->nat_proto);
285     }
286
287   if (snat_is_session_static (s))
288     return;
289
290   snat_free_outside_address_and_port (sm->addresses, thread_index,
291                                       &s->out2in.addr, s->out2in.port,
292                                       s->nat_proto);
293 }
294
295 void
296 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
297                           int is_add)
298 {
299   snat_main_t *sm = &snat_main;
300   fib_prefix_t prefix = {
301     .fp_len = p_len,
302     .fp_proto = FIB_PROTOCOL_IP4,
303     .fp_addr = {
304                 .ip4.as_u32 = addr->as_u32,
305                 },
306   };
307   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
308
309   if (is_add)
310     fib_table_entry_update_one_path (fib_index,
311                                      &prefix,
312                                      sm->fib_src_low,
313                                      (FIB_ENTRY_FLAG_CONNECTED |
314                                       FIB_ENTRY_FLAG_LOCAL |
315                                       FIB_ENTRY_FLAG_EXCLUSIVE),
316                                      DPO_PROTO_IP4,
317                                      NULL,
318                                      sw_if_index,
319                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
320   else
321     fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
322 }
323
324 int
325 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
326                   u8 twice_nat)
327 {
328   snat_address_t *ap;
329   snat_interface_t *i;
330   vlib_thread_main_t *tm = vlib_get_thread_main ();
331
332   /* Check if address already exists */
333   /* *INDENT-OFF* */
334   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
335     {
336       if (ap->addr.as_u32 == addr->as_u32)
337         {
338           nat_log_err ("address exist");
339           return VNET_API_ERROR_VALUE_EXIST;
340         }
341     }
342   /* *INDENT-ON* */
343
344   if (twice_nat)
345     vec_add2 (sm->twice_nat_addresses, ap, 1);
346   else
347     vec_add2 (sm->addresses, ap, 1);
348
349   ap->addr = *addr;
350   if (vrf_id != ~0)
351     ap->fib_index =
352       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
353                                          sm->fib_src_low);
354   else
355     ap->fib_index = ~0;
356
357   /* *INDENT-OFF* */
358   #define _(N, i, n, s) \
359     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
360     ap->busy_##n##_ports = 0; \
361     ap->busy_##n##_ports_per_thread = 0;\
362     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
363     foreach_nat_protocol
364   #undef _
365   /* *INDENT-ON* */
366
367   if (twice_nat)
368     return 0;
369
370   /* Add external address to FIB */
371   /* *INDENT-OFF* */
372   pool_foreach (i, sm->interfaces)
373    {
374      if (nat_interface_is_inside (i))
375        continue;
376
377      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
378      break;
379   }
380   pool_foreach (i, sm->output_feature_interfaces)
381    {
382      if (nat_interface_is_inside (i))
383        continue;
384
385      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
386      break;
387   }
388   /* *INDENT-ON* */
389
390   return 0;
391 }
392
393 static int
394 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
395 {
396   snat_static_mapping_t *m;
397   /* *INDENT-OFF* */
398   pool_foreach (m, sm->static_mappings)
399    {
400       if (is_addr_only_static_mapping (m) ||
401           is_out2in_only_static_mapping (m) ||
402           is_identity_static_mapping (m))
403         continue;
404       if (m->external_addr.as_u32 == addr.as_u32)
405         return 1;
406   }
407   /* *INDENT-ON* */
408
409   return 0;
410 }
411
412 static void
413 snat_add_static_mapping_when_resolved (snat_main_t *sm, ip4_address_t l_addr,
414                                        u16 l_port, u32 sw_if_index, u16 e_port,
415                                        u32 vrf_id, nat_protocol_t proto,
416                                        int addr_only, u8 *tag, int twice_nat,
417                                        int out2in_only, int identity_nat,
418                                        ip4_address_t pool_addr, int exact)
419 {
420   snat_static_map_resolve_t *rp;
421
422   vec_add2 (sm->to_resolve, rp, 1);
423   rp->l_addr.as_u32 = l_addr.as_u32;
424   rp->l_port = l_port;
425   rp->sw_if_index = sw_if_index;
426   rp->e_port = e_port;
427   rp->vrf_id = vrf_id;
428   rp->proto = proto;
429   rp->addr_only = addr_only;
430   rp->twice_nat = twice_nat;
431   rp->out2in_only = out2in_only;
432   rp->identity_nat = identity_nat;
433   rp->tag = vec_dup (tag);
434   rp->pool_addr = pool_addr;
435   rp->exact = exact;
436 }
437
438 u32
439 get_thread_idx_by_port (u16 e_port)
440 {
441   snat_main_t *sm = &snat_main;
442   u32 thread_idx = sm->num_workers;
443   if (sm->num_workers > 1)
444     {
445       thread_idx =
446         sm->first_worker_index +
447         sm->workers[(e_port - 1024) / sm->port_per_thread];
448     }
449   return thread_idx;
450 }
451
452 void
453 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
454                                     snat_main_per_thread_data_t * tsm,
455                                     ip4_address_t l_addr,
456                                     u16 l_port,
457                                     u8 protocol,
458                                     u32 fib_index, int addr_only,
459                                     ip4_address_t e_addr, u16 e_port)
460 {
461   snat_session_t *s;
462   u32 *indexes_to_free = NULL;
463   /* *INDENT-OFF* */
464   pool_foreach (s, tsm->sessions) {
465     if (s->in2out.fib_index != fib_index ||
466         s->in2out.addr.as_u32 != l_addr.as_u32)
467       {
468         continue;
469       }
470     if (!addr_only)
471       {
472         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
473             s->out2in.port != e_port ||
474             s->in2out.port != l_port ||
475             s->nat_proto != protocol)
476           continue;
477       }
478
479     if (is_lb_session (s))
480       continue;
481     if (!snat_is_session_static (s))
482       continue;
483     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
484     vec_add1 (indexes_to_free, s - tsm->sessions);
485     if (!addr_only)
486       break;
487   }
488   /* *INDENT-ON* */
489   u32 *ses_index;
490   vec_foreach (ses_index, indexes_to_free)
491   {
492     s = pool_elt_at_index (tsm->sessions, *ses_index);
493     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
494   }
495   vec_free (indexes_to_free);
496 }
497
498 int
499 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
500                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
501                          u32 sw_if_index, nat_protocol_t proto, int is_add,
502                          twice_nat_type_t twice_nat, u8 out2in_only, u8 *tag,
503                          u8 identity_nat, ip4_address_t pool_addr, int exact)
504 {
505   snat_main_t *sm = &snat_main;
506   snat_static_mapping_t *m;
507   clib_bihash_kv_8_8_t kv, value;
508   snat_address_t *a = 0;
509   u32 fib_index = ~0;
510   snat_interface_t *interface;
511   snat_main_per_thread_data_t *tsm;
512   snat_static_map_resolve_t *rp, *rp_match = 0;
513   nat44_lb_addr_port_t *local;
514   u32 find = ~0;
515   int i;
516
517   /* If the external address is a specific interface address */
518   if (sw_if_index != ~0)
519     {
520       ip4_address_t *first_int_addr;
521
522       for (i = 0; i < vec_len (sm->to_resolve); i++)
523         {
524           rp = sm->to_resolve + i;
525           if (rp->sw_if_index != sw_if_index ||
526               rp->l_addr.as_u32 != l_addr.as_u32 ||
527               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
528             continue;
529
530           if (!addr_only)
531             {
532               if ((rp->l_port != l_port && rp->e_port != e_port)
533                   || rp->proto != proto)
534                 continue;
535             }
536
537           rp_match = rp;
538           break;
539         }
540
541       /* Might be already set... */
542       first_int_addr = ip4_interface_first_address
543         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
544
545       if (is_add)
546         {
547           if (rp_match)
548             return VNET_API_ERROR_VALUE_EXIST;
549
550           snat_add_static_mapping_when_resolved (
551             sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, addr_only,
552             tag, twice_nat, out2in_only, identity_nat, pool_addr, exact);
553
554           /* DHCP resolution required? */
555           if (first_int_addr == 0)
556             {
557               return 0;
558             }
559           else
560             {
561               e_addr.as_u32 = first_int_addr->as_u32;
562               /* Identity mapping? */
563               if (l_addr.as_u32 == 0)
564                 l_addr.as_u32 = e_addr.as_u32;
565             }
566         }
567       else
568         {
569           if (!rp_match)
570             return VNET_API_ERROR_NO_SUCH_ENTRY;
571
572           vec_del1 (sm->to_resolve, i);
573
574           if (first_int_addr)
575             {
576               e_addr.as_u32 = first_int_addr->as_u32;
577               /* Identity mapping? */
578               if (l_addr.as_u32 == 0)
579                 l_addr.as_u32 = e_addr.as_u32;
580             }
581           else
582             return 0;
583         }
584     }
585
586   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
587   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
588     m = 0;
589   else
590     m = pool_elt_at_index (sm->static_mappings, value.value);
591
592   if (is_add)
593     {
594       if (m)
595         {
596           if (is_identity_static_mapping (m))
597             {
598               /* *INDENT-OFF* */
599               pool_foreach (local, m->locals)
600                {
601                 if (local->vrf_id == vrf_id)
602                   return VNET_API_ERROR_VALUE_EXIST;
603               }
604               /* *INDENT-ON* */
605               pool_get (m->locals, local);
606               local->vrf_id = vrf_id;
607               local->fib_index =
608                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
609                                                    sm->fib_src_low);
610               init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
611                            m->proto, 0, m - sm->static_mappings);
612               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
613               return 0;
614             }
615           else
616             return VNET_API_ERROR_VALUE_EXIST;
617         }
618
619       if (twice_nat && addr_only)
620         return VNET_API_ERROR_UNSUPPORTED;
621
622       /* Convert VRF id to FIB index */
623       if (vrf_id != ~0)
624         fib_index =
625           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
626                                              sm->fib_src_low);
627       /* If not specified use inside VRF id from SNAT plugin startup config */
628       else
629         {
630           fib_index = sm->inside_fib_index;
631           vrf_id = sm->inside_vrf_id;
632           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
633         }
634
635       if (!(out2in_only || identity_nat))
636         {
637           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
638                       addr_only ? 0 : proto);
639           if (!clib_bihash_search_8_8
640               (&sm->static_mapping_by_local, &kv, &value))
641             return VNET_API_ERROR_VALUE_EXIST;
642         }
643
644       /* Find external address in allocated addresses and reserve port for
645          address and port pair mapping when dynamic translations enabled */
646       if (!(addr_only || sm->static_mapping_only || out2in_only))
647         {
648           for (i = 0; i < vec_len (sm->addresses); i++)
649             {
650               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
651                 {
652                   a = sm->addresses + i;
653                   /* External port must be unused */
654                   switch (proto)
655                     {
656 #define _(N, j, n, s) \
657                     case NAT_PROTOCOL_##N: \
658                       if (a->busy_##n##_port_refcounts[e_port]) \
659                         return VNET_API_ERROR_INVALID_VALUE; \
660                       ++a->busy_##n##_port_refcounts[e_port]; \
661                       if (e_port > 1024) \
662                         { \
663                           a->busy_##n##_ports++; \
664                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
665                         } \
666                       break;
667                       foreach_nat_protocol
668 #undef _
669                         default : nat_elog_info (sm, "unknown protocol");
670                       return VNET_API_ERROR_INVALID_VALUE_2;
671                     }
672                   break;
673                 }
674             }
675           /* External address must be allocated */
676           if (!a && (l_addr.as_u32 != e_addr.as_u32))
677             {
678               if (sw_if_index != ~0)
679                 {
680                   for (i = 0; i < vec_len (sm->to_resolve); i++)
681                     {
682                       rp = sm->to_resolve + i;
683                       if (rp->addr_only)
684                         continue;
685                       if (rp->sw_if_index != sw_if_index &&
686                           rp->l_addr.as_u32 != l_addr.as_u32 &&
687                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
688                           rp->e_port != e_port && rp->proto != proto)
689                         continue;
690
691                       vec_del1 (sm->to_resolve, i);
692                       break;
693                     }
694                 }
695               return VNET_API_ERROR_NO_SUCH_ENTRY;
696             }
697         }
698
699       pool_get (sm->static_mappings, m);
700       clib_memset (m, 0, sizeof (*m));
701       m->tag = vec_dup (tag);
702       m->local_addr = l_addr;
703       m->external_addr = e_addr;
704       m->twice_nat = twice_nat;
705
706       if (twice_nat == TWICE_NAT && exact)
707         {
708           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
709           m->pool_addr = pool_addr;
710         }
711
712       if (out2in_only)
713         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
714       if (addr_only)
715         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
716       if (identity_nat)
717         {
718           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
719           pool_get (m->locals, local);
720           local->vrf_id = vrf_id;
721           local->fib_index = fib_index;
722         }
723       else
724         {
725           m->vrf_id = vrf_id;
726           m->fib_index = fib_index;
727         }
728       if (!addr_only)
729         {
730           m->local_port = l_port;
731           m->external_port = e_port;
732           m->proto = proto;
733         }
734
735       if (sm->num_workers > 1)
736         {
737           ip4_header_t ip = {
738             .src_address = m->local_addr,
739           };
740           vec_add1 (m->workers,
741                     sm->worker_in2out_cb (0, &ip, m->fib_index, 0));
742           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
743         }
744       else
745         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
746
747       if (!out2in_only)
748         {
749           init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
750                        0, m - sm->static_mappings);
751           clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
752         }
753
754       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
755                    m - sm->static_mappings);
756       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
757     }
758   else
759     {
760       if (!m)
761         {
762           if (sw_if_index != ~0)
763             return 0;
764           else
765             return VNET_API_ERROR_NO_SUCH_ENTRY;
766         }
767
768       if (identity_nat)
769         {
770           if (vrf_id == ~0)
771             vrf_id = sm->inside_vrf_id;
772
773           /* *INDENT-OFF* */
774           pool_foreach (local, m->locals)
775            {
776             if (local->vrf_id == vrf_id)
777               find = local - m->locals;
778           }
779           /* *INDENT-ON* */
780           if (find == ~0)
781             return VNET_API_ERROR_NO_SUCH_ENTRY;
782
783           local = pool_elt_at_index (m->locals, find);
784           fib_index = local->fib_index;
785           pool_put (m->locals, local);
786         }
787       else
788         fib_index = m->fib_index;
789
790       /* Free external address port */
791       if (!(addr_only || sm->static_mapping_only || out2in_only))
792         {
793           for (i = 0; i < vec_len (sm->addresses); i++)
794             {
795               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
796                 {
797                   a = sm->addresses + i;
798                   switch (proto)
799                     {
800 #define _(N, j, n, s) \
801                     case NAT_PROTOCOL_##N: \
802                       --a->busy_##n##_port_refcounts[e_port]; \
803                       if (e_port > 1024) \
804                         { \
805                           a->busy_##n##_ports--; \
806                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
807                         } \
808                       break;
809                       foreach_nat_protocol
810 #undef _
811                         default : nat_elog_info (sm, "unknown protocol");
812                       return VNET_API_ERROR_INVALID_VALUE_2;
813                     }
814                   break;
815                 }
816             }
817         }
818
819       if (sm->num_workers > 1)
820         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
821       else
822         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
823
824       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
825       if (!out2in_only)
826         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
827
828       /* Delete session(s) for static mapping if exist */
829       if (!(sm->static_mapping_only) ||
830           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
831         {
832           nat_ed_static_mapping_del_sessions (
833             sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
834             addr_only, e_addr, e_port);
835         }
836
837       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
838       if (pool_elts (m->locals))
839         return 0;
840
841       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
842       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
843
844       vec_free (m->tag);
845       vec_free (m->workers);
846       /* Delete static mapping from pool */
847       pool_put (sm->static_mappings, m);
848     }
849
850   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
851     return 0;
852
853   /* Add/delete external address to FIB */
854   /* *INDENT-OFF* */
855   pool_foreach (interface, sm->interfaces)
856    {
857      if (nat_interface_is_inside (interface))
858        continue;
859
860      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
861      break;
862   }
863   pool_foreach (interface, sm->output_feature_interfaces)
864    {
865      if (nat_interface_is_inside (interface))
866        continue;
867
868      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
869      break;
870   }
871   /* *INDENT-ON* */
872
873   return 0;
874 }
875
876 int
877 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
878                                  nat_protocol_t proto,
879                                  nat44_lb_addr_port_t * locals, u8 is_add,
880                                  twice_nat_type_t twice_nat, u8 out2in_only,
881                                  u8 * tag, u32 affinity)
882 {
883   snat_main_t *sm = &snat_main;
884   snat_static_mapping_t *m;
885   clib_bihash_kv_8_8_t kv, value;
886   snat_address_t *a = 0;
887   int i;
888   nat44_lb_addr_port_t *local;
889   snat_main_per_thread_data_t *tsm;
890   snat_session_t *s;
891   uword *bitmap = 0;
892
893   init_nat_k (&kv, e_addr, e_port, 0, proto);
894   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
895     m = 0;
896   else
897     m = pool_elt_at_index (sm->static_mappings, value.value);
898
899   if (is_add)
900     {
901       if (m)
902         return VNET_API_ERROR_VALUE_EXIST;
903
904       if (vec_len (locals) < 2)
905         return VNET_API_ERROR_INVALID_VALUE;
906
907       /* Find external address in allocated addresses and reserve port for
908          address and port pair mapping when dynamic translations enabled */
909       if (!(sm->static_mapping_only || out2in_only))
910         {
911           for (i = 0; i < vec_len (sm->addresses); i++)
912             {
913               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
914                 {
915                   a = sm->addresses + i;
916                   /* External port must be unused */
917                   switch (proto)
918                     {
919 #define _(N, j, n, s) \
920                     case NAT_PROTOCOL_##N: \
921                       if (a->busy_##n##_port_refcounts[e_port]) \
922                         return VNET_API_ERROR_INVALID_VALUE; \
923                       ++a->busy_##n##_port_refcounts[e_port]; \
924                       if (e_port > 1024) \
925                         { \
926                           a->busy_##n##_ports++; \
927                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
928                         } \
929                       break;
930                       foreach_nat_protocol
931 #undef _
932                         default : nat_elog_info (sm, "unknown protocol");
933                       return VNET_API_ERROR_INVALID_VALUE_2;
934                     }
935                   break;
936                 }
937             }
938           /* External address must be allocated */
939           if (!a)
940             return VNET_API_ERROR_NO_SUCH_ENTRY;
941         }
942
943       pool_get (sm->static_mappings, m);
944       clib_memset (m, 0, sizeof (*m));
945       m->tag = vec_dup (tag);
946       m->external_addr = e_addr;
947       m->external_port = e_port;
948       m->proto = proto;
949       m->twice_nat = twice_nat;
950       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
951       if (out2in_only)
952         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
953       m->affinity = affinity;
954
955       if (affinity)
956         m->affinity_per_service_list_head_index =
957           nat_affinity_get_per_service_list_head_index ();
958       else
959         m->affinity_per_service_list_head_index = ~0;
960
961       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
962                    m - sm->static_mappings);
963       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
964         {
965           nat_elog_err (sm, "static_mapping_by_external key add failed");
966           return VNET_API_ERROR_UNSPECIFIED;
967         }
968
969       for (i = 0; i < vec_len (locals); i++)
970         {
971           locals[i].fib_index =
972             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
973                                                locals[i].vrf_id,
974                                                sm->fib_src_low);
975           if (!out2in_only)
976             {
977               init_nat_kv (&kv, locals[i].addr, locals[i].port,
978                            locals[i].fib_index, m->proto, 0,
979                            m - sm->static_mappings);
980               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
981             }
982           locals[i].prefix = (i == 0) ? locals[i].probability :
983             (locals[i - 1].prefix + locals[i].probability);
984           pool_get (m->locals, local);
985           *local = locals[i];
986           if (sm->num_workers > 1)
987             {
988               ip4_header_t ip = {
989                 .src_address = locals[i].addr,
990               };
991               bitmap = clib_bitmap_set (
992                 bitmap, sm->worker_in2out_cb (0, &ip, m->fib_index, 0), 1);
993             }
994         }
995
996       /* Assign workers */
997       if (sm->num_workers > 1)
998         {
999           /* *INDENT-OFF* */
1000           clib_bitmap_foreach (i, bitmap)
1001              {
1002                vec_add1(m->workers, i);
1003             }
1004           /* *INDENT-ON* */
1005         }
1006     }
1007   else
1008     {
1009       if (!m)
1010         return VNET_API_ERROR_NO_SUCH_ENTRY;
1011
1012       if (!is_lb_static_mapping (m))
1013         return VNET_API_ERROR_INVALID_VALUE;
1014
1015       /* Free external address port */
1016       if (!(sm->static_mapping_only || out2in_only))
1017         {
1018           for (i = 0; i < vec_len (sm->addresses); i++)
1019             {
1020               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1021                 {
1022                   a = sm->addresses + i;
1023                   switch (proto)
1024                     {
1025 #define _(N, j, n, s) \
1026                     case NAT_PROTOCOL_##N: \
1027                       --a->busy_##n##_port_refcounts[e_port]; \
1028                       if (e_port > 1024) \
1029                         { \
1030                           a->busy_##n##_ports--; \
1031                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1032                         } \
1033                       break;
1034                       foreach_nat_protocol
1035 #undef _
1036                         default : nat_elog_info (sm, "unknown protocol");
1037                       return VNET_API_ERROR_INVALID_VALUE_2;
1038                     }
1039                   break;
1040                 }
1041             }
1042         }
1043
1044       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1045       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1046         {
1047           nat_elog_err (sm, "static_mapping_by_external key del failed");
1048           return VNET_API_ERROR_UNSPECIFIED;
1049         }
1050
1051       /* *INDENT-OFF* */
1052       pool_foreach (local, m->locals)
1053       {
1054           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1055                             sm->fib_src_low);
1056           if (!out2in_only)
1057             {
1058               init_nat_k (&kv, local->addr, local->port, local->fib_index,
1059                           m->proto);
1060               if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv,
1061                                            0))
1062                 {
1063                   nat_elog_err (sm, "static_mapping_by_local key del failed");
1064                   return VNET_API_ERROR_UNSPECIFIED;
1065                 }
1066             }
1067
1068           if (sm->num_workers > 1)
1069             {
1070               ip4_header_t ip = {
1071                 .src_address = local->addr,
1072               };
1073               tsm = vec_elt_at_index (
1074                 sm->per_thread_data,
1075                 sm->worker_in2out_cb (0, &ip, m->fib_index, 0));
1076             }
1077           else
1078             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1079
1080           /* Delete sessions */
1081           pool_foreach (s, tsm->sessions)
1082             {
1083               if (!(is_lb_session (s)))
1084                 continue;
1085
1086               if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1087                   s->in2out.port != local->port)
1088                 continue;
1089
1090               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1091               nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1092             }
1093       }
1094       /* *INDENT-ON* */
1095       if (m->affinity)
1096         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1097       pool_free (m->locals);
1098       vec_free (m->tag);
1099       vec_free (m->workers);
1100
1101       pool_put (sm->static_mappings, m);
1102     }
1103
1104   return 0;
1105 }
1106
1107 int
1108 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1109                                        ip4_address_t l_addr, u16 l_port,
1110                                        nat_protocol_t proto, u32 vrf_id,
1111                                        u8 probability, u8 is_add)
1112 {
1113   snat_main_t *sm = &snat_main;
1114   snat_static_mapping_t *m = 0;
1115   clib_bihash_kv_8_8_t kv, value;
1116   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1117   snat_main_per_thread_data_t *tsm;
1118   snat_session_t *s;
1119   u32 *locals = 0;
1120   uword *bitmap = 0;
1121   int i;
1122
1123   init_nat_k (&kv, e_addr, e_port, 0, proto);
1124   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1125     m = pool_elt_at_index (sm->static_mappings, value.value);
1126
1127   if (!m)
1128     return VNET_API_ERROR_NO_SUCH_ENTRY;
1129
1130   if (!is_lb_static_mapping (m))
1131     return VNET_API_ERROR_INVALID_VALUE;
1132
1133   /* *INDENT-OFF* */
1134   pool_foreach (local, m->locals)
1135    {
1136     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1137         (local->vrf_id == vrf_id))
1138       {
1139         match_local = local;
1140         break;
1141       }
1142   }
1143   /* *INDENT-ON* */
1144
1145   if (is_add)
1146     {
1147       if (match_local)
1148         return VNET_API_ERROR_VALUE_EXIST;
1149
1150       pool_get (m->locals, local);
1151       clib_memset (local, 0, sizeof (*local));
1152       local->addr.as_u32 = l_addr.as_u32;
1153       local->port = l_port;
1154       local->probability = probability;
1155       local->vrf_id = vrf_id;
1156       local->fib_index =
1157         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1158                                            sm->fib_src_low);
1159
1160       if (!is_out2in_only_static_mapping (m))
1161         {
1162           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
1163                        m - sm->static_mappings);
1164           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1165             nat_elog_err (sm, "static_mapping_by_local key add failed");
1166         }
1167     }
1168   else
1169     {
1170       if (!match_local)
1171         return VNET_API_ERROR_NO_SUCH_ENTRY;
1172
1173       if (pool_elts (m->locals) < 3)
1174         return VNET_API_ERROR_UNSPECIFIED;
1175
1176       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1177                         sm->fib_src_low);
1178
1179       if (!is_out2in_only_static_mapping (m))
1180         {
1181           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1182           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1183             nat_elog_err (sm, "static_mapping_by_local key del failed");
1184         }
1185
1186       if (sm->num_workers > 1)
1187         {
1188           ip4_header_t ip = {
1189             .src_address = local->addr,
1190           };
1191           tsm =
1192             vec_elt_at_index (sm->per_thread_data,
1193                               sm->worker_in2out_cb (0, &ip, m->fib_index, 0));
1194         }
1195       else
1196         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1197
1198       /* Delete sessions */
1199       /* *INDENT-OFF* */
1200       pool_foreach (s, tsm->sessions) {
1201         if (!(is_lb_session (s)))
1202           continue;
1203
1204         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1205             s->in2out.port != match_local->port)
1206           continue;
1207
1208         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1209         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1210       }
1211       /* *INDENT-ON* */
1212
1213       pool_put (m->locals, match_local);
1214     }
1215
1216   vec_free (m->workers);
1217
1218   /* *INDENT-OFF* */
1219   pool_foreach (local, m->locals)
1220    {
1221     vec_add1 (locals, local - m->locals);
1222     if (sm->num_workers > 1)
1223       {
1224         ip4_header_t ip;
1225         ip.src_address.as_u32 = local->addr.as_u32,
1226         bitmap = clib_bitmap_set (
1227           bitmap, sm->worker_in2out_cb (0, &ip, local->fib_index, 0), 1);
1228       }
1229   }
1230   /* *INDENT-ON* */
1231
1232   ASSERT (vec_len (locals) > 1);
1233
1234   local = pool_elt_at_index (m->locals, locals[0]);
1235   local->prefix = local->probability;
1236   for (i = 1; i < vec_len (locals); i++)
1237     {
1238       local = pool_elt_at_index (m->locals, locals[i]);
1239       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1240       local->prefix = local->probability + prev_local->prefix;
1241     }
1242
1243   /* Assign workers */
1244   if (sm->num_workers > 1)
1245     {
1246       /* *INDENT-OFF* */
1247       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1248       /* *INDENT-ON* */
1249     }
1250
1251   return 0;
1252 }
1253
1254 int
1255 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1256                   u8 twice_nat)
1257 {
1258   snat_address_t *a = 0;
1259   snat_session_t *ses;
1260   u32 *ses_to_be_removed = 0, *ses_index;
1261   snat_main_per_thread_data_t *tsm;
1262   snat_static_mapping_t *m;
1263   snat_interface_t *interface;
1264   int i;
1265   snat_address_t *addresses =
1266     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1267
1268   /* Find SNAT address */
1269   for (i = 0; i < vec_len (addresses); i++)
1270     {
1271       if (addresses[i].addr.as_u32 == addr.as_u32)
1272         {
1273           a = addresses + i;
1274           break;
1275         }
1276     }
1277   if (!a)
1278     {
1279       nat_log_err ("no such address");
1280       return VNET_API_ERROR_NO_SUCH_ENTRY;
1281     }
1282
1283   if (delete_sm)
1284     {
1285       ip4_address_t pool_addr = { 0 };
1286       /* *INDENT-OFF* */
1287       pool_foreach (m, sm->static_mappings)
1288        {
1289           if (m->external_addr.as_u32 == addr.as_u32)
1290             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1291                                             m->local_port, m->external_port,
1292                                             m->vrf_id,
1293                                             is_addr_only_static_mapping(m), ~0,
1294                                             m->proto, 0 /* is_add */,
1295                                             m->twice_nat,
1296                                             is_out2in_only_static_mapping(m),
1297                                             m->tag,
1298                                             is_identity_static_mapping(m),
1299                                             pool_addr, 0);
1300       }
1301       /* *INDENT-ON* */
1302     }
1303   else
1304     {
1305       /* Check if address is used in some static mapping */
1306       if (is_snat_address_used_in_static_mapping (sm, addr))
1307         {
1308           nat_log_err ("address used in static mapping");
1309           return VNET_API_ERROR_UNSPECIFIED;
1310         }
1311     }
1312
1313   if (a->fib_index != ~0)
1314     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1315
1316   /* Delete sessions using address */
1317   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1318     {
1319       vec_foreach (tsm, sm->per_thread_data)
1320       {
1321         pool_foreach (ses, tsm->sessions)  {
1322           if (ses->out2in.addr.as_u32 == addr.as_u32)
1323             {
1324               nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1325               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1326             }
1327         }
1328
1329             vec_foreach (ses_index, ses_to_be_removed)
1330             {
1331               ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1332               nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1333             }
1334
1335         vec_free (ses_to_be_removed);
1336       }
1337     }
1338
1339 #define _(N, i, n, s) \
1340   vec_free (a->busy_##n##_ports_per_thread);
1341   foreach_nat_protocol
1342 #undef _
1343
1344     if (twice_nat)
1345   {
1346     vec_del1 (sm->twice_nat_addresses, i);
1347     return 0;
1348   }
1349   else vec_del1 (sm->addresses, i);
1350
1351   /* Delete external address from FIB */
1352   pool_foreach (interface, sm->interfaces)
1353     {
1354       if (nat_interface_is_inside (interface))
1355         continue;
1356
1357       snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1358       break;
1359     }
1360   pool_foreach (interface, sm->output_feature_interfaces)
1361    {
1362      if (nat_interface_is_inside (interface))
1363        continue;
1364
1365      snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1366      break;
1367   }
1368
1369   return 0;
1370 }
1371
1372 void
1373 expire_per_vrf_sessions (u32 fib_index)
1374 {
1375   per_vrf_sessions_t *per_vrf_sessions;
1376   snat_main_per_thread_data_t *tsm;
1377   snat_main_t *sm = &snat_main;
1378
1379   /* *INDENT-OFF* */
1380   vec_foreach (tsm, sm->per_thread_data)
1381     {
1382       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1383         {
1384           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1385               (per_vrf_sessions->tx_fib_index == fib_index))
1386             {
1387               per_vrf_sessions->expired = 1;
1388             }
1389         }
1390     }
1391   /* *INDENT-ON* */
1392 }
1393
1394 void
1395 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1396 {
1397   snat_main_t *sm = &snat_main;
1398   nat_fib_t *fib;
1399
1400   // we don't care if it is outside/inside fib
1401   // we just care about their ref_count
1402   // if it reaches 0 sessions should expire
1403   // because the fib isn't valid for NAT anymore
1404
1405   vec_foreach (fib, sm->fibs)
1406   {
1407     if (fib->fib_index == fib_index)
1408       {
1409         if (is_del)
1410           {
1411             fib->ref_count--;
1412             if (!fib->ref_count)
1413               {
1414                 vec_del1 (sm->fibs, fib - sm->fibs);
1415                 expire_per_vrf_sessions (fib_index);
1416               }
1417             return;
1418           }
1419         else
1420           fib->ref_count++;
1421       }
1422   }
1423   if (!is_del)
1424     {
1425       vec_add2 (sm->fibs, fib, 1);
1426       fib->ref_count = 1;
1427       fib->fib_index = fib_index;
1428     }
1429 }
1430
1431 int
1432 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1433 {
1434   snat_main_t *sm = &snat_main;
1435   snat_interface_t *i;
1436   const char *feature_name, *del_feature_name;
1437   snat_address_t *ap;
1438   snat_static_mapping_t *m;
1439   nat_outside_fib_t *outside_fib;
1440   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1441                                                        sw_if_index);
1442
1443   if (!sm->enabled)
1444     {
1445       nat_log_err ("nat44 is disabled");
1446       return VNET_API_ERROR_UNSUPPORTED;
1447     }
1448
1449   /* *INDENT-OFF* */
1450   pool_foreach (i, sm->output_feature_interfaces)
1451    {
1452     if (i->sw_if_index == sw_if_index)
1453       {
1454         nat_log_err ("error interface already configured");
1455         return VNET_API_ERROR_VALUE_EXIST;
1456       }
1457   }
1458   /* *INDENT-ON* */
1459
1460   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1461     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1462   else
1463     {
1464       if (sm->num_workers > 1)
1465         feature_name =
1466           is_inside ? "nat44-in2out-worker-handoff" :
1467           "nat44-out2in-worker-handoff";
1468       else
1469         feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1470     }
1471
1472   ASSERT (sm->frame_queue_nelts > 0);
1473
1474   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1475     sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index,
1476                                                       sm->frame_queue_nelts);
1477
1478   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1479     sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index,
1480                                                       sm->frame_queue_nelts);
1481
1482   update_per_vrf_sessions_vec (fib_index, is_del);
1483
1484   if (!is_inside)
1485     {
1486       /* *INDENT-OFF* */
1487       vec_foreach (outside_fib, sm->outside_fibs)
1488         {
1489           if (outside_fib->fib_index == fib_index)
1490             {
1491               if (is_del)
1492                 {
1493                   outside_fib->refcount--;
1494                   if (!outside_fib->refcount)
1495                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1496                 }
1497               else
1498                 outside_fib->refcount++;
1499               goto feature_set;
1500             }
1501         }
1502       /* *INDENT-ON* */
1503       if (!is_del)
1504         {
1505           vec_add2 (sm->outside_fibs, outside_fib, 1);
1506           outside_fib->refcount = 1;
1507           outside_fib->fib_index = fib_index;
1508         }
1509     }
1510
1511 feature_set:
1512   /* *INDENT-OFF* */
1513   pool_foreach (i, sm->interfaces)
1514    {
1515     if (i->sw_if_index == sw_if_index)
1516       {
1517         if (is_del)
1518           {
1519             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1520               {
1521                 if (is_inside)
1522                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1523                 else
1524                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1525
1526                 if (sm->num_workers > 1)
1527                   {
1528                     del_feature_name = "nat44-handoff-classify";
1529                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1530                                                  "nat44-out2in-worker-handoff";
1531                   }
1532                 else
1533                   {
1534                     del_feature_name = "nat44-ed-classify";
1535                     feature_name =
1536                       !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1537                   }
1538
1539                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1540                 if (rv)
1541                   return rv;
1542                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1543                                              sw_if_index, 0, 0, 0);
1544                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1545                                              sw_if_index, 1, 0, 0);
1546               }
1547             else
1548               {
1549                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1550                 if (rv)
1551                   return rv;
1552                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1553                                              sw_if_index, 0, 0, 0);
1554                 pool_put (sm->interfaces, i);
1555               }
1556           }
1557         else
1558           {
1559             if ((nat_interface_is_inside (i) && is_inside) ||
1560                 (nat_interface_is_outside (i) && !is_inside))
1561               return 0;
1562
1563             if (sm->num_workers > 1)
1564               {
1565                 del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1566                                                 "nat44-out2in-worker-handoff";
1567                 feature_name = "nat44-handoff-classify";
1568               }
1569             else
1570               {
1571                 del_feature_name =
1572                   !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1573
1574                 feature_name = "nat44-ed-classify";
1575               }
1576
1577             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1578             if (rv)
1579               return rv;
1580             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1581                                          sw_if_index, 0, 0, 0);
1582             vnet_feature_enable_disable ("ip4-unicast", feature_name,
1583                                          sw_if_index, 1, 0, 0);
1584             goto set_flags;
1585           }
1586
1587         goto fib;
1588       }
1589   }
1590   /* *INDENT-ON* */
1591
1592   if (is_del)
1593     {
1594       nat_log_err ("error interface couldn't be found");
1595       return VNET_API_ERROR_NO_SUCH_ENTRY;
1596     }
1597
1598   pool_get (sm->interfaces, i);
1599   i->sw_if_index = sw_if_index;
1600   i->flags = 0;
1601   nat_validate_interface_counters (sm, sw_if_index);
1602
1603   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
1604                                0);
1605
1606   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1607   if (rv)
1608     return rv;
1609
1610 set_flags:
1611   if (is_inside)
1612     {
1613       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1614       return 0;
1615     }
1616   else
1617     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1618
1619   /* Add/delete external addresses to FIB */
1620 fib:
1621   /* *INDENT-OFF* */
1622   vec_foreach (ap, sm->addresses)
1623     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1624
1625   pool_foreach (m, sm->static_mappings)
1626    {
1627     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
1628       continue;
1629
1630     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1631   }
1632   /* *INDENT-ON* */
1633
1634   return 0;
1635 }
1636
1637 int
1638 snat_interface_add_del_output_feature (u32 sw_if_index,
1639                                        u8 is_inside, int is_del)
1640 {
1641   snat_main_t *sm = &snat_main;
1642   snat_interface_t *i;
1643   snat_address_t *ap;
1644   snat_static_mapping_t *m;
1645   nat_outside_fib_t *outside_fib;
1646   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1647                                                        sw_if_index);
1648
1649   if (!sm->enabled)
1650     {
1651       nat_log_err ("nat44 is disabled");
1652       return VNET_API_ERROR_UNSUPPORTED;
1653     }
1654
1655   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1656     {
1657       nat_log_err ("error unsupported");
1658       return VNET_API_ERROR_UNSUPPORTED;
1659     }
1660
1661   /* *INDENT-OFF* */
1662   pool_foreach (i, sm->interfaces)
1663    {
1664     if (i->sw_if_index == sw_if_index)
1665       {
1666         nat_log_err ("error interface already configured");
1667         return VNET_API_ERROR_VALUE_EXIST;
1668       }
1669   }
1670   /* *INDENT-ON* */
1671
1672   update_per_vrf_sessions_vec (fib_index, is_del);
1673
1674   if (!is_inside)
1675     {
1676       /* *INDENT-OFF* */
1677       vec_foreach (outside_fib, sm->outside_fibs)
1678         {
1679           if (outside_fib->fib_index == fib_index)
1680             {
1681               if (is_del)
1682                 {
1683                   outside_fib->refcount--;
1684                   if (!outside_fib->refcount)
1685                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1686                 }
1687               else
1688                 outside_fib->refcount++;
1689               goto feature_set;
1690             }
1691         }
1692       /* *INDENT-ON* */
1693       if (!is_del)
1694         {
1695           vec_add2 (sm->outside_fibs, outside_fib, 1);
1696           outside_fib->refcount = 1;
1697           outside_fib->fib_index = fib_index;
1698         }
1699     }
1700
1701 feature_set:
1702   if (is_inside)
1703     {
1704           int rv =
1705             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1706           if (rv)
1707             return rv;
1708           rv =
1709             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1710                                                             !is_del);
1711           if (rv)
1712             return rv;
1713       goto fq;
1714     }
1715
1716   if (sm->num_workers > 1)
1717     {
1718       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1719       if (rv)
1720         return rv;
1721       rv =
1722         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
1723       if (rv)
1724         return rv;
1725       vnet_feature_enable_disable ("ip4-unicast",
1726                                    "nat44-out2in-worker-handoff",
1727                                    sw_if_index, !is_del, 0, 0);
1728       vnet_feature_enable_disable ("ip4-output",
1729                                    "nat44-in2out-output-worker-handoff",
1730                                    sw_if_index, !is_del, 0, 0);
1731     }
1732   else
1733     {
1734           int rv =
1735             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1736           if (rv)
1737             return rv;
1738           rv =
1739             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1740                                                             !is_del);
1741           if (rv)
1742             return rv;
1743           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1744                                        sw_if_index, !is_del, 0, 0);
1745           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1746                                        sw_if_index, !is_del, 0, 0);
1747     }
1748
1749 fq:
1750   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
1751     sm->fq_in2out_output_index =
1752       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
1753
1754   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1755     sm->fq_out2in_index =
1756       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
1757
1758   /* *INDENT-OFF* */
1759   pool_foreach (i, sm->output_feature_interfaces)
1760    {
1761     if (i->sw_if_index == sw_if_index)
1762       {
1763         if (is_del)
1764           pool_put (sm->output_feature_interfaces, i);
1765         else
1766           return VNET_API_ERROR_VALUE_EXIST;
1767
1768         goto fib;
1769       }
1770   }
1771   /* *INDENT-ON* */
1772
1773   if (is_del)
1774     {
1775       nat_log_err ("error interface couldn't be found");
1776       return VNET_API_ERROR_NO_SUCH_ENTRY;
1777     }
1778
1779   pool_get (sm->output_feature_interfaces, i);
1780   i->sw_if_index = sw_if_index;
1781   i->flags = 0;
1782   nat_validate_interface_counters (sm, sw_if_index);
1783   if (is_inside)
1784     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1785   else
1786     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1787
1788   /* Add/delete external addresses to FIB */
1789 fib:
1790   if (is_inside)
1791     return 0;
1792
1793   /* *INDENT-OFF* */
1794   vec_foreach (ap, sm->addresses)
1795     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1796
1797   pool_foreach (m, sm->static_mappings)
1798    {
1799     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
1800       continue;
1801
1802     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1803   }
1804   /* *INDENT-ON* */
1805
1806   return 0;
1807 }
1808
1809 int
1810 snat_set_workers (uword * bitmap)
1811 {
1812   snat_main_t *sm = &snat_main;
1813   int i, j = 0;
1814
1815   if (sm->num_workers < 2)
1816     return VNET_API_ERROR_FEATURE_DISABLED;
1817
1818   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
1819     return VNET_API_ERROR_INVALID_WORKER;
1820
1821   vec_free (sm->workers);
1822   /* *INDENT-OFF* */
1823   clib_bitmap_foreach (i, bitmap)
1824     {
1825       vec_add1(sm->workers, i);
1826       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
1827       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
1828       j++;
1829     }
1830   /* *INDENT-ON* */
1831
1832   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
1833
1834   return 0;
1835 }
1836
1837 int
1838 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
1839 {
1840   fail_if_enabled ();
1841   snat_main_t *sm = &snat_main;
1842   sm->frame_queue_nelts = frame_queue_nelts;
1843   return 0;
1844 }
1845
1846 static void
1847 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1848                          u32 sw_if_index, u32 new_fib_index,
1849                          u32 old_fib_index)
1850 {
1851   snat_main_t *sm = &snat_main;
1852   nat_outside_fib_t *outside_fib;
1853   snat_interface_t *i;
1854   u8 is_add = 1;
1855   u8 match = 0;
1856
1857   if (!sm->enabled || (new_fib_index == old_fib_index)
1858       || (!vec_len (sm->outside_fibs)))
1859     {
1860       return;
1861     }
1862
1863   /* *INDENT-OFF* */
1864   pool_foreach (i, sm->interfaces)
1865      {
1866       if (i->sw_if_index == sw_if_index)
1867         {
1868           if (!(nat_interface_is_outside (i)))
1869             return;
1870           match = 1;
1871         }
1872     }
1873
1874   pool_foreach (i, sm->output_feature_interfaces)
1875      {
1876       if (i->sw_if_index == sw_if_index)
1877         {
1878           if (!(nat_interface_is_outside (i)))
1879             return;
1880           match = 1;
1881         }
1882     }
1883   /* *INDENT-ON* */
1884
1885   if (!match)
1886     return;
1887
1888   vec_foreach (outside_fib, sm->outside_fibs)
1889   {
1890     if (outside_fib->fib_index == old_fib_index)
1891       {
1892         outside_fib->refcount--;
1893         if (!outside_fib->refcount)
1894           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1895         break;
1896       }
1897   }
1898
1899   vec_foreach (outside_fib, sm->outside_fibs)
1900   {
1901     if (outside_fib->fib_index == new_fib_index)
1902       {
1903         outside_fib->refcount++;
1904         is_add = 0;
1905         break;
1906       }
1907   }
1908
1909   if (is_add)
1910     {
1911       vec_add2 (sm->outside_fibs, outside_fib, 1);
1912       outside_fib->refcount = 1;
1913       outside_fib->fib_index = new_fib_index;
1914     }
1915 }
1916
1917 static void
1918 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1919                          u32 sw_if_index, u32 new_fib_index,
1920                          u32 old_fib_index);
1921
1922 static void
1923 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
1924                                        uword opaque,
1925                                        u32 sw_if_index,
1926                                        ip4_address_t * address,
1927                                        u32 address_length,
1928                                        u32 if_address_index, u32 is_delete);
1929
1930 static void
1931 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
1932                                  uword opaque,
1933                                  u32 sw_if_index,
1934                                  ip4_address_t * address,
1935                                  u32 address_length,
1936                                  u32 if_address_index, u32 is_delete);
1937
1938 void
1939 test_key_calc_split ()
1940 {
1941   ip4_address_t l_addr;
1942   l_addr.as_u8[0] = 1;
1943   l_addr.as_u8[1] = 1;
1944   l_addr.as_u8[2] = 1;
1945   l_addr.as_u8[3] = 1;
1946   ip4_address_t r_addr;
1947   r_addr.as_u8[0] = 2;
1948   r_addr.as_u8[1] = 2;
1949   r_addr.as_u8[2] = 2;
1950   r_addr.as_u8[3] = 2;
1951   u16 l_port = 40001;
1952   u16 r_port = 40301;
1953   u8 proto = 9;
1954   u32 fib_index = 9000001;
1955   u32 thread_index = 3000000001;
1956   u32 session_index = 3000000221;
1957   clib_bihash_kv_16_8_t kv;
1958   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
1959               thread_index, session_index);
1960   ip4_address_t l_addr2;
1961   ip4_address_t r_addr2;
1962   clib_memset (&l_addr2, 0, sizeof (l_addr2));
1963   clib_memset (&r_addr2, 0, sizeof (r_addr2));
1964   u16 l_port2 = 0;
1965   u16 r_port2 = 0;
1966   u8 proto2 = 0;
1967   u32 fib_index2 = 0;
1968   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
1969                &r_port2);
1970   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1971   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
1972   ASSERT (l_port == l_port2);
1973   ASSERT (r_port == r_port2);
1974   ASSERT (proto == proto2);
1975   ASSERT (fib_index == fib_index2);
1976   ASSERT (thread_index == ed_value_get_thread_index (&kv));
1977   ASSERT (session_index == ed_value_get_session_index (&kv));
1978
1979   fib_index = 7001;
1980   proto = 5;
1981   nat_protocol_t proto3 = ~0;
1982   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
1983   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
1984   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1985   ASSERT (l_port == l_port2);
1986   ASSERT (proto == proto3);
1987   ASSERT (fib_index == fib_index2);
1988 }
1989
1990 static clib_error_t *
1991 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
1992 {
1993   u32 fib_index;
1994
1995       // TODO: consider removing all NAT interfaces
1996       if (!is_add)
1997         {
1998           fib_index = ip4_fib_index_from_table_id (table_id);
1999           if (fib_index != ~0)
2000             expire_per_vrf_sessions (fib_index);
2001         }
2002   return 0;
2003 }
2004
2005 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2006
2007 void
2008 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
2009 {
2010   vlib_node_t *node;
2011
2012   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2013   sm->out2in_node_index = node->index;
2014
2015   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2016   sm->in2out_node_index = node->index;
2017
2018   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2019   sm->in2out_output_node_index = node->index;
2020 }
2021
2022 #define nat_validate_simple_counter(c, i)                                     \
2023   do                                                                          \
2024     {                                                                         \
2025       vlib_validate_simple_counter (&c, i);                                   \
2026       vlib_zero_simple_counter (&c, i);                                       \
2027     }                                                                         \
2028   while (0);
2029
2030 #define nat_init_simple_counter(c, n, sn)                                     \
2031   do                                                                          \
2032     {                                                                         \
2033       c.name = n;                                                             \
2034       c.stat_segment_name = sn;                                               \
2035       nat_validate_simple_counter (c, 0);                                     \
2036     }                                                                         \
2037   while (0);
2038
2039 static_always_inline void
2040 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
2041 {
2042 #define _(x)                                                                  \
2043   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
2044   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
2045   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
2046   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
2047   foreach_nat_counter;
2048 #undef _
2049   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
2050 }
2051
2052 static clib_error_t *
2053 nat_init (vlib_main_t * vm)
2054 {
2055   snat_main_t *sm = &snat_main;
2056   vlib_thread_main_t *tm = vlib_get_thread_main ();
2057   vlib_thread_registration_t *tr;
2058   ip4_add_del_interface_address_callback_t cbi = { 0 };
2059   ip4_table_bind_callback_t cbt = { 0 };
2060   u32 i, num_threads = 0;
2061   uword *p, *bitmap = 0;
2062
2063   clib_memset (sm, 0, sizeof (*sm));
2064
2065   // required
2066   sm->vnet_main = vnet_get_main ();
2067   // convenience
2068   sm->ip4_main = &ip4_main;
2069   sm->api_main = vlibapi_get_main ();
2070   sm->ip4_lookup_main = &ip4_main.lookup_main;
2071
2072   // frame queue indices used for handoff
2073   sm->fq_out2in_index = ~0;
2074   sm->fq_in2out_index = ~0;
2075   sm->fq_in2out_output_index = ~0;
2076
2077   sm->log_level = NAT_LOG_ERROR;
2078
2079   nat44_set_node_indexes (sm, vm);
2080   sm->log_class = vlib_log_register_class ("nat", 0);
2081   nat_ipfix_logging_init (vm);
2082
2083   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2084                            "/nat44-ed/total-sessions");
2085
2086 #define _(x)                                                                  \
2087   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2088                            "/nat44-ed/in2out/fastpath/" #x);                  \
2089   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2090                            "/nat44-ed/out2in/fastpath/" #x);                  \
2091   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2092                            "/nat44-ed/in2out/slowpath/" #x);                  \
2093   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2094                            "/nat44-ed/out2in/slowpath/" #x);
2095   foreach_nat_counter;
2096 #undef _
2097   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2098                            "/nat44-ed/hairpinning");
2099
2100   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2101   if (p)
2102     {
2103       tr = (vlib_thread_registration_t *) p[0];
2104       if (tr)
2105         {
2106           sm->num_workers = tr->count;
2107           sm->first_worker_index = tr->first_index;
2108         }
2109     }
2110   num_threads = tm->n_vlib_mains - 1;
2111   sm->port_per_thread = 0xffff - 1024;
2112   vec_validate (sm->per_thread_data, num_threads);
2113
2114   /* Use all available workers by default */
2115   if (sm->num_workers > 1)
2116     {
2117
2118       for (i = 0; i < sm->num_workers; i++)
2119         bitmap = clib_bitmap_set (bitmap, i, 1);
2120       snat_set_workers (bitmap);
2121       clib_bitmap_free (bitmap);
2122     }
2123   else
2124     sm->per_thread_data[0].snat_thread_index = 0;
2125
2126   /* callbacks to call when interface address changes. */
2127   cbi.function = snat_ip4_add_del_interface_address_cb;
2128   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2129   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2130   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2131
2132   /* callbacks to call when interface to table biding changes */
2133   cbt.function = snat_update_outside_fib;
2134   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2135
2136   sm->fib_src_low =
2137     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2138                          FIB_SOURCE_BH_SIMPLE);
2139   sm->fib_src_hi =
2140     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2141                          FIB_SOURCE_BH_SIMPLE);
2142
2143   nat_affinity_init (vm);
2144   test_key_calc_split ();
2145
2146   return nat44_api_hookup (vm);
2147 }
2148
2149 VLIB_INIT_FUNCTION (nat_init);
2150
2151 int
2152 nat44_plugin_enable (nat44_config_t c)
2153 {
2154   snat_main_t *sm = &snat_main;
2155
2156   fail_if_enabled ();
2157
2158   // UPDATE based on these appropriate API/CLI
2159   // c.static_mapping_only + c.connection_tracking
2160   //  - supported in NAT EI & NAT ED
2161   // c.out2in_dpo, c.static_mapping_only
2162   //  - supported in NAT EI
2163
2164   if (c.static_mapping_only && !c.connection_tracking)
2165     {
2166       nat_log_err ("unsupported combination of configuration");
2167       return 1;
2168     }
2169
2170   // nat44 feature configuration
2171   sm->static_mapping_only = c.static_mapping_only;
2172   sm->static_mapping_connection_tracking = c.connection_tracking;
2173
2174   sm->forwarding_enabled = 0;
2175   sm->mss_clamping = 0;
2176   sm->pat = (!c.static_mapping_only ||
2177              (c.static_mapping_only && c.connection_tracking));
2178
2179   if (!c.sessions)
2180     c.sessions = 63 * 1024;
2181
2182   sm->max_translations_per_thread = c.sessions;
2183   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2184
2185   // ED only feature
2186   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2187
2188   sm->inside_vrf_id = c.inside_vrf;
2189   sm->inside_fib_index =
2190     fib_table_find_or_create_and_lock
2191     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2192
2193   sm->outside_vrf_id = c.outside_vrf;
2194   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2195     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2196
2197   sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
2198   sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
2199
2200   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2201
2202   nat_affinity_enable ();
2203
2204   nat_reset_timeouts (&sm->timeouts);
2205
2206   vlib_zero_simple_counter (&sm->total_sessions, 0);
2207
2208   if (!sm->frame_queue_nelts)
2209     sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2210
2211   sm->enabled = 1;
2212   sm->rconfig = c;
2213
2214   return 0;
2215 }
2216
2217 void
2218 nat44_addresses_free (snat_address_t ** addresses)
2219 {
2220   snat_address_t *ap;
2221   vec_foreach (ap, *addresses)
2222     {
2223     #define _(N, i, n, s) \
2224       vec_free (ap->busy_##n##_ports_per_thread);
2225       foreach_nat_protocol
2226     #undef _
2227     }
2228   vec_free (*addresses);
2229   *addresses = 0;
2230 }
2231
2232 int
2233 nat44_plugin_disable ()
2234 {
2235   snat_main_t *sm = &snat_main;
2236   snat_interface_t *i, *vec;
2237   int error = 0;
2238
2239   fail_if_disabled ();
2240
2241   // first unregister all nodes from interfaces
2242   vec = vec_dup (sm->interfaces);
2243   vec_foreach (i, vec)
2244     {
2245       if (nat_interface_is_inside(i))
2246         error = snat_interface_add_del (i->sw_if_index, 1, 1);
2247       if (nat_interface_is_outside(i))
2248         error = snat_interface_add_del (i->sw_if_index, 0, 1);
2249
2250       if (error)
2251         {
2252           nat_log_err ("error occurred while removing interface %u",
2253                        i->sw_if_index);
2254         }
2255     }
2256   vec_free (vec);
2257   sm->interfaces = 0;
2258
2259   vec = vec_dup (sm->output_feature_interfaces);
2260   vec_foreach (i, vec)
2261     {
2262       if (nat_interface_is_inside(i))
2263         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
2264       if (nat_interface_is_outside(i))
2265         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
2266
2267       if (error)
2268         {
2269           nat_log_err ("error occurred while removing interface %u",
2270                        i->sw_if_index);
2271         }
2272     }
2273   vec_free (vec);
2274   sm->output_feature_interfaces = 0;
2275
2276   vec_free (sm->max_translations_per_fib);
2277
2278   nat44_ed_db_free ();
2279
2280   nat44_addresses_free (&sm->addresses);
2281   nat44_addresses_free (&sm->twice_nat_addresses);
2282
2283   vec_free (sm->to_resolve);
2284   vec_free (sm->auto_add_sw_if_indices);
2285   vec_free (sm->auto_add_sw_if_indices_twice_nat);
2286
2287   sm->to_resolve = 0;
2288   sm->auto_add_sw_if_indices = 0;
2289   sm->auto_add_sw_if_indices_twice_nat = 0;
2290
2291   sm->forwarding_enabled = 0;
2292
2293   sm->enabled = 0;
2294   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2295
2296   return 0;
2297 }
2298
2299 void
2300 nat44_ed_forwarding_enable_disable (u8 is_enable)
2301 {
2302   snat_main_per_thread_data_t *tsm;
2303   snat_main_t *sm = &snat_main;
2304   snat_session_t *s;
2305
2306   u32 *ses_to_be_removed = 0, *ses_index;
2307
2308   sm->forwarding_enabled = is_enable != 0;
2309
2310   if (is_enable)
2311     return;
2312
2313   vec_foreach (tsm, sm->per_thread_data)
2314     {
2315       pool_foreach (s, tsm->sessions)
2316         {
2317           if (is_fwd_bypass_session (s))
2318             {
2319               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2320             }
2321         }
2322       vec_foreach (ses_index, ses_to_be_removed)
2323         {
2324           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2325           nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2326           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2327         }
2328
2329       vec_free (ses_to_be_removed);
2330     }
2331 }
2332
2333 void
2334 snat_free_outside_address_and_port (snat_address_t *addresses,
2335                                     u32 thread_index, ip4_address_t *addr,
2336                                     u16 port, nat_protocol_t protocol)
2337 {
2338   snat_main_t *sm = &snat_main;
2339   snat_address_t *a;
2340   u32 address_index;
2341   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2342
2343   for (address_index = 0; address_index < vec_len (addresses);
2344        address_index++)
2345     {
2346       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2347         break;
2348     }
2349
2350   ASSERT (address_index < vec_len (addresses));
2351
2352   a = addresses + address_index;
2353
2354   switch (protocol)
2355     {
2356 #define _(N, i, n, s) \
2357     case NAT_PROTOCOL_##N: \
2358       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2359       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2360       a->busy_##n##_ports--; \
2361       a->busy_##n##_ports_per_thread[thread_index]--; \
2362       break;
2363       foreach_nat_protocol
2364 #undef _
2365         default : nat_elog_info (sm, "unknown protocol");
2366       return;
2367     }
2368 }
2369
2370 int
2371 nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
2372                                   ip4_address_t addr, u16 port,
2373                                   nat_protocol_t protocol)
2374 {
2375   snat_main_t *sm = &snat_main;
2376   snat_address_t *a = 0;
2377   u32 address_index;
2378   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2379
2380   for (address_index = 0; address_index < vec_len (addresses);
2381        address_index++)
2382     {
2383       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2384         continue;
2385
2386       a = addresses + address_index;
2387       switch (protocol)
2388         {
2389 #define _(N, j, n, s) \
2390         case NAT_PROTOCOL_##N: \
2391           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2392             return VNET_API_ERROR_INSTANCE_IN_USE; \
2393           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2394           a->busy_##n##_ports_per_thread[thread_index]++; \
2395           a->busy_##n##_ports++; \
2396           return 0;
2397           foreach_nat_protocol
2398 #undef _
2399             default : nat_elog_info (sm, "unknown protocol");
2400           return 1;
2401         }
2402     }
2403
2404   return VNET_API_ERROR_NO_SUCH_ENTRY;
2405 }
2406
2407 int
2408 snat_static_mapping_match (snat_main_t * sm,
2409                            ip4_address_t match_addr,
2410                            u16 match_port,
2411                            u32 match_fib_index,
2412                            nat_protocol_t match_protocol,
2413                            ip4_address_t * mapping_addr,
2414                            u16 * mapping_port,
2415                            u32 * mapping_fib_index,
2416                            u8 by_external,
2417                            u8 * is_addr_only,
2418                            twice_nat_type_t * twice_nat,
2419                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
2420                            u8 * is_identity_nat, snat_static_mapping_t ** out)
2421 {
2422   clib_bihash_kv_8_8_t kv, value;
2423   clib_bihash_8_8_t *mapping_hash;
2424   snat_static_mapping_t *m;
2425   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2426   nat44_lb_addr_port_t *local;
2427   u8 backend_index;
2428
2429   if (!by_external)
2430     {
2431       mapping_hash = &sm->static_mapping_by_local;
2432       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2433                   match_protocol);
2434       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2435         {
2436           /* Try address only mapping */
2437           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2438           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2439             return 1;
2440         }
2441     }
2442   else
2443     {
2444       mapping_hash = &sm->static_mapping_by_external;
2445       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2446       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2447         {
2448           /* Try address only mapping */
2449           init_nat_k (&kv, match_addr, 0, 0, 0);
2450           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2451             return 1;
2452         }
2453     }
2454
2455   m = pool_elt_at_index (sm->static_mappings, value.value);
2456
2457   if (by_external)
2458     {
2459       if (is_lb_static_mapping (m))
2460         {
2461           if (PREDICT_FALSE (lb != 0))
2462             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2463           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
2464                                                           match_addr,
2465                                                           match_protocol,
2466                                                           match_port,
2467                                                           &backend_index))
2468             {
2469               local = pool_elt_at_index (m->locals, backend_index);
2470               *mapping_addr = local->addr;
2471               *mapping_port = local->port;
2472               *mapping_fib_index = local->fib_index;
2473               goto end;
2474             }
2475           // pick locals matching this worker
2476           if (PREDICT_FALSE (sm->num_workers > 1))
2477             {
2478               u32 thread_index = vlib_get_thread_index ();
2479               pool_foreach_index (i, m->locals)
2480                {
2481                 local = pool_elt_at_index (m->locals, i);
2482
2483                 ip4_header_t ip = {
2484                   .src_address = local->addr,
2485                 };
2486
2487                 if (sm->worker_in2out_cb (0, &ip, m->fib_index, 0) ==
2488                     thread_index)
2489                   {
2490                     vec_add1 (tmp, i);
2491                   }
2492                }
2493               ASSERT (vec_len (tmp) != 0);
2494             }
2495           else
2496             {
2497               pool_foreach_index (i, m->locals)
2498                {
2499                 vec_add1 (tmp, i);
2500               }
2501             }
2502           hi = vec_len (tmp) - 1;
2503           local = pool_elt_at_index (m->locals, tmp[hi]);
2504           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2505           while (lo < hi)
2506             {
2507               mid = ((hi - lo) >> 1) + lo;
2508               local = pool_elt_at_index (m->locals, tmp[mid]);
2509               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2510             }
2511           local = pool_elt_at_index (m->locals, tmp[lo]);
2512           if (!(local->prefix >= rand))
2513             return 1;
2514           *mapping_addr = local->addr;
2515           *mapping_port = local->port;
2516           *mapping_fib_index = local->fib_index;
2517           if (m->affinity)
2518             {
2519               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2520                                                 match_protocol, match_port,
2521                                                 tmp[lo], m->affinity,
2522                                                 m->affinity_per_service_list_head_index))
2523                 nat_elog_info (sm, "create affinity record failed");
2524             }
2525           vec_free (tmp);
2526         }
2527       else
2528         {
2529           if (PREDICT_FALSE (lb != 0))
2530             *lb = NO_LB_NAT;
2531           *mapping_fib_index = m->fib_index;
2532           *mapping_addr = m->local_addr;
2533           /* Address only mapping doesn't change port */
2534           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2535             : m->local_port;
2536         }
2537     }
2538   else
2539     {
2540       *mapping_addr = m->external_addr;
2541       /* Address only mapping doesn't change port */
2542       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2543         : m->external_port;
2544       *mapping_fib_index = sm->outside_fib_index;
2545     }
2546
2547 end:
2548   if (PREDICT_FALSE (is_addr_only != 0))
2549     *is_addr_only = is_addr_only_static_mapping (m);
2550
2551   if (PREDICT_FALSE (twice_nat != 0))
2552     *twice_nat = m->twice_nat;
2553
2554   if (PREDICT_FALSE (is_identity_nat != 0))
2555     *is_identity_nat = is_identity_static_mapping (m);
2556
2557   if (out != 0)
2558     *out = m;
2559
2560   return 0;
2561 }
2562
2563 static u32
2564 nat44_ed_get_worker_in2out_cb (vlib_buffer_t *b, ip4_header_t *ip,
2565                                u32 rx_fib_index, u8 is_output)
2566 {
2567   snat_main_t *sm = &snat_main;
2568   u32 next_worker_index = sm->first_worker_index;
2569   u32 hash;
2570
2571   clib_bihash_kv_16_8_t kv16, value16;
2572
2573   u32 fib_index = rx_fib_index;
2574   if (PREDICT_FALSE (is_output))
2575     {
2576       fib_index = sm->outside_fib_index;
2577       nat_outside_fib_t *outside_fib;
2578       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2579       fib_prefix_t pfx = {
2580         .fp_proto = FIB_PROTOCOL_IP4,
2581         .fp_len = 32,
2582         .fp_addr = {
2583                     .ip4.as_u32 = ip->dst_address.as_u32,
2584                     }
2585         ,
2586       };
2587
2588       switch (vec_len (sm->outside_fibs))
2589         {
2590         case 0:
2591           fib_index = sm->outside_fib_index;
2592           break;
2593         case 1:
2594           fib_index = sm->outside_fibs[0].fib_index;
2595           break;
2596         default:
2597             /* *INDENT-OFF* */
2598             vec_foreach (outside_fib, sm->outside_fibs)
2599               {
2600                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2601                 if (FIB_NODE_INDEX_INVALID != fei)
2602                   {
2603                     if (fib_entry_get_resolving_interface (fei) != ~0)
2604                       {
2605                         fib_index = outside_fib->fib_index;
2606                         break;
2607                       }
2608                   }
2609               }
2610             /* *INDENT-ON* */
2611           break;
2612         }
2613     }
2614
2615   if (b)
2616     {
2617       init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2618                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2619                  fib_index, ip->protocol);
2620
2621       if (PREDICT_TRUE (
2622             !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2623         {
2624           next_worker_index = ed_value_get_thread_index (&value16);
2625           vnet_buffer2 (b)->nat.cached_session_index =
2626             ed_value_get_session_index (&value16);
2627           goto out;
2628         }
2629     }
2630
2631   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2632     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2633
2634   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2635     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2636   else
2637     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2638
2639 out:
2640   if (PREDICT_TRUE (!is_output))
2641     {
2642       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2643                               rx_fib_index,
2644                               clib_net_to_host_u32 (ip->src_address.as_u32),
2645                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2646     }
2647   else
2648     {
2649       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2650                               next_worker_index, rx_fib_index,
2651                               clib_net_to_host_u32 (ip->src_address.as_u32),
2652                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2653     }
2654
2655   return next_worker_index;
2656 }
2657
2658 static u32
2659 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
2660                                u32 rx_fib_index, u8 is_output)
2661 {
2662   snat_main_t *sm = &snat_main;
2663   clib_bihash_kv_8_8_t kv, value;
2664   clib_bihash_kv_16_8_t kv16, value16;
2665   snat_main_per_thread_data_t *tsm;
2666
2667   u32 proto, next_worker_index = 0;
2668   udp_header_t *udp;
2669   u16 port;
2670   snat_static_mapping_t *m;
2671   u32 hash;
2672
2673   proto = ip_proto_to_nat_proto (ip->protocol);
2674
2675   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
2676     {
2677       udp = ip4_next_header (ip);
2678
2679       init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2680                  ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2681                  rx_fib_index, ip->protocol);
2682
2683       if (PREDICT_TRUE (
2684             !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2685         {
2686           tsm =
2687             vec_elt_at_index (sm->per_thread_data,
2688                               ed_value_get_thread_index (&value16));
2689           vnet_buffer2 (b)->nat.cached_session_index =
2690             ed_value_get_session_index (&value16);
2691           next_worker_index = sm->first_worker_index + tsm->thread_index;
2692           nat_elog_debug_handoff (
2693             sm, "HANDOFF OUT2IN (session)", next_worker_index, rx_fib_index,
2694             clib_net_to_host_u32 (ip->src_address.as_u32),
2695             clib_net_to_host_u32 (ip->dst_address.as_u32));
2696           return next_worker_index;
2697         }
2698     }
2699   else if (proto == NAT_PROTOCOL_ICMP)
2700     {
2701       ip4_address_t lookup_saddr, lookup_daddr;
2702       u16 lookup_sport, lookup_dport;
2703       u8 lookup_protocol;
2704       if (!nat_get_icmp_session_lookup_values (
2705             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2706             &lookup_protocol))
2707         {
2708           init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
2709                      lookup_dport, rx_fib_index, lookup_protocol);
2710           if (PREDICT_TRUE (
2711                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2712             {
2713               tsm =
2714                 vec_elt_at_index (sm->per_thread_data,
2715                                   ed_value_get_thread_index (&value16));
2716               next_worker_index = sm->first_worker_index + tsm->thread_index;
2717               nat_elog_debug_handoff (
2718                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2719                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2720                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2721               return next_worker_index;
2722             }
2723         }
2724     }
2725
2726   /* first try static mappings without port */
2727   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2728     {
2729       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
2730       if (!clib_bihash_search_8_8
2731           (&sm->static_mapping_by_external, &kv, &value))
2732         {
2733           m = pool_elt_at_index (sm->static_mappings, value.value);
2734           next_worker_index = m->workers[0];
2735           goto done;
2736         }
2737     }
2738
2739   /* unknown protocol */
2740   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
2741     {
2742       /* use current thread */
2743       next_worker_index = vlib_get_thread_index ();
2744       goto done;
2745     }
2746
2747   udp = ip4_next_header (ip);
2748   port = vnet_buffer (b)->ip.reass.l4_dst_port;
2749
2750   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2751     {
2752       icmp46_header_t *icmp = (icmp46_header_t *) udp;
2753       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
2754       if (!icmp_type_is_error_message
2755           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
2756         port = vnet_buffer (b)->ip.reass.l4_src_port;
2757       else
2758         {
2759           /* if error message, then it's not fragmented and we can access it */
2760           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
2761           proto = ip_proto_to_nat_proto (inner_ip->protocol);
2762           void *l4_header = ip4_next_header (inner_ip);
2763           switch (proto)
2764             {
2765             case NAT_PROTOCOL_ICMP:
2766               icmp = (icmp46_header_t *) l4_header;
2767               echo = (icmp_echo_header_t *) (icmp + 1);
2768               port = echo->identifier;
2769               break;
2770             case NAT_PROTOCOL_UDP:
2771             case NAT_PROTOCOL_TCP:
2772               port = ((tcp_udp_header_t *) l4_header)->src_port;
2773               break;
2774             default:
2775               next_worker_index = vlib_get_thread_index ();
2776               goto done;
2777             }
2778         }
2779     }
2780
2781   /* try static mappings with port */
2782   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2783     {
2784       init_nat_k (&kv, ip->dst_address, port, 0, proto);
2785       if (!clib_bihash_search_8_8
2786           (&sm->static_mapping_by_external, &kv, &value))
2787         {
2788           m = pool_elt_at_index (sm->static_mappings, value.value);
2789           if (!is_lb_static_mapping (m))
2790             {
2791               next_worker_index = m->workers[0];
2792               goto done;
2793             }
2794
2795           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2796             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2797
2798           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
2799             next_worker_index =
2800               m->workers[hash & (_vec_len (m->workers) - 1)];
2801           else
2802             next_worker_index = m->workers[hash % _vec_len (m->workers)];
2803           goto done;
2804         }
2805     }
2806
2807   /* worker by outside port */
2808   next_worker_index = sm->first_worker_index;
2809   next_worker_index +=
2810     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2811
2812 done:
2813   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
2814                           rx_fib_index,
2815                           clib_net_to_host_u32 (ip->src_address.as_u32),
2816                           clib_net_to_host_u32 (ip->dst_address.as_u32));
2817   return next_worker_index;
2818 }
2819
2820 u32
2821 nat44_get_max_session_limit ()
2822 {
2823   snat_main_t *sm = &snat_main;
2824   u32 max_limit = 0, len = 0;
2825
2826   for (; len < vec_len (sm->max_translations_per_fib); len++)
2827     {
2828       if (max_limit < sm->max_translations_per_fib[len])
2829         max_limit = sm->max_translations_per_fib[len];
2830     }
2831   return max_limit;
2832 }
2833
2834 int
2835 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
2836 {
2837   snat_main_t *sm = &snat_main;
2838   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
2839   u32 len = vec_len (sm->max_translations_per_fib);
2840
2841   if (len <= fib_index)
2842     {
2843       vec_validate (sm->max_translations_per_fib, fib_index + 1);
2844
2845       for (; len < vec_len (sm->max_translations_per_fib); len++)
2846         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
2847     }
2848
2849   sm->max_translations_per_fib[fib_index] = session_limit;
2850   return 0;
2851 }
2852
2853 int
2854 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
2855 {
2856   snat_main_t *sm = &snat_main;
2857
2858   if (nat44_set_session_limit (session_limit, vrf_id))
2859     return 1;
2860   sm->max_translations_per_thread = nat44_get_max_session_limit ();
2861
2862   sm->translation_buckets =
2863     nat_calc_bihash_buckets (sm->max_translations_per_thread);
2864
2865   nat44_ed_sessions_clear ();
2866   return 0;
2867 }
2868
2869 static void
2870 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
2871                          u32 translation_buckets)
2872 {
2873   dlist_elt_t *head;
2874
2875   pool_alloc (tsm->sessions, translations);
2876   pool_alloc (tsm->lru_pool, translations);
2877
2878   pool_get (tsm->lru_pool, head);
2879   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
2880   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
2881
2882   pool_get (tsm->lru_pool, head);
2883   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
2884   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
2885
2886   pool_get (tsm->lru_pool, head);
2887   tsm->udp_lru_head_index = head - tsm->lru_pool;
2888   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
2889
2890   pool_get (tsm->lru_pool, head);
2891   tsm->icmp_lru_head_index = head - tsm->lru_pool;
2892   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
2893
2894   pool_get (tsm->lru_pool, head);
2895   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
2896   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
2897 }
2898
2899 static void
2900 reinit_ed_flow_hash ()
2901 {
2902   snat_main_t *sm = &snat_main;
2903   // we expect 2 flows per session, so multiply translation_buckets by 2
2904   clib_bihash_init_16_8 (
2905     &sm->flow_hash, "ed-flow-hash",
2906     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
2907   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
2908 }
2909
2910 static void
2911 nat44_ed_db_init (u32 translations, u32 translation_buckets)
2912 {
2913   snat_main_t *sm = &snat_main;
2914   snat_main_per_thread_data_t *tsm;
2915   u32 static_mapping_buckets = 1024;
2916   u32 static_mapping_memory_size = 64 << 20;
2917
2918   reinit_ed_flow_hash ();
2919
2920   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2921                         "static_mapping_by_local", static_mapping_buckets,
2922                         static_mapping_memory_size);
2923   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2924                                      format_static_mapping_kvp);
2925
2926   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2927                         "static_mapping_by_external", static_mapping_buckets,
2928                         static_mapping_memory_size);
2929   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2930                                      format_static_mapping_kvp);
2931
2932   if (sm->pat)
2933     {
2934       vec_foreach (tsm, sm->per_thread_data)
2935         {
2936           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2937                                    sm->translation_buckets);
2938         }
2939     }
2940 }
2941
2942 static void
2943 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
2944 {
2945   pool_free (tsm->lru_pool);
2946   pool_free (tsm->sessions);
2947   vec_free (tsm->per_vrf_sessions_vec);
2948 }
2949
2950 static void
2951 nat44_ed_db_free ()
2952 {
2953   snat_main_t *sm = &snat_main;
2954   snat_main_per_thread_data_t *tsm;
2955
2956   pool_free (sm->static_mappings);
2957   clib_bihash_free_16_8 (&sm->flow_hash);
2958   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
2959   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
2960
2961   if (sm->pat)
2962     {
2963       vec_foreach (tsm, sm->per_thread_data)
2964         {
2965           nat44_ed_worker_db_free (tsm);
2966         }
2967     }
2968 }
2969
2970 void
2971 nat44_ed_sessions_clear ()
2972 {
2973   snat_main_t *sm = &snat_main;
2974   snat_main_per_thread_data_t *tsm;
2975
2976   reinit_ed_flow_hash ();
2977
2978   if (sm->pat)
2979     {
2980       vec_foreach (tsm, sm->per_thread_data)
2981         {
2982
2983           nat44_ed_worker_db_free (tsm);
2984           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2985                                    sm->translation_buckets);
2986         }
2987     }
2988   vlib_zero_simple_counter (&sm->total_sessions, 0);
2989 }
2990
2991 static void
2992 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2993                                  uword opaque,
2994                                  u32 sw_if_index,
2995                                  ip4_address_t * address,
2996                                  u32 address_length,
2997                                  u32 if_address_index, u32 is_delete)
2998 {
2999   snat_main_t *sm = &snat_main;
3000   snat_static_map_resolve_t *rp;
3001   snat_static_mapping_t *m;
3002   clib_bihash_kv_8_8_t kv, value;
3003   int i, rv;
3004   ip4_address_t l_addr;
3005
3006   if (!sm->enabled)
3007     return;
3008
3009   for (i = 0; i < vec_len (sm->to_resolve); i++)
3010     {
3011       rp = sm->to_resolve + i;
3012       if (rp->addr_only == 0)
3013         continue;
3014       if (rp->sw_if_index == sw_if_index)
3015         goto match;
3016     }
3017
3018   return;
3019
3020 match:
3021   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
3022               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
3023   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3024     m = 0;
3025   else
3026     m = pool_elt_at_index (sm->static_mappings, value.value);
3027
3028   if (!is_delete)
3029     {
3030       /* Don't trip over lease renewal, static config */
3031       if (m)
3032         return;
3033     }
3034   else
3035     {
3036       if (!m)
3037         return;
3038     }
3039
3040   /* Indetity mapping? */
3041   if (rp->l_addr.as_u32 == 0)
3042     l_addr.as_u32 = address[0].as_u32;
3043   else
3044     l_addr.as_u32 = rp->l_addr.as_u32;
3045   /* Add the static mapping */
3046   rv = snat_add_static_mapping (l_addr,
3047                                 address[0],
3048                                 rp->l_port,
3049                                 rp->e_port,
3050                                 rp->vrf_id,
3051                                 rp->addr_only, ~0 /* sw_if_index */ ,
3052                                 rp->proto, !is_delete, rp->twice_nat,
3053                                 rp->out2in_only, rp->tag, rp->identity_nat,
3054                                 rp->pool_addr, rp->exact);
3055   if (rv)
3056     nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", "i4", rv);
3057 }
3058
3059 static void
3060 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
3061                                        uword opaque,
3062                                        u32 sw_if_index,
3063                                        ip4_address_t * address,
3064                                        u32 address_length,
3065                                        u32 if_address_index, u32 is_delete)
3066 {
3067   snat_main_t *sm = &snat_main;
3068   snat_static_map_resolve_t *rp;
3069   ip4_address_t l_addr;
3070   int i, j;
3071   int rv;
3072   u8 twice_nat = 0;
3073   snat_address_t *addresses = sm->addresses;
3074
3075   if (!sm->enabled)
3076     return;
3077
3078   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
3079     {
3080       if (sw_if_index == sm->auto_add_sw_if_indices[i])
3081         goto match;
3082     }
3083
3084   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
3085     {
3086       twice_nat = 1;
3087       addresses = sm->twice_nat_addresses;
3088       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
3089         goto match;
3090     }
3091
3092   return;
3093
3094 match:
3095   if (!is_delete)
3096     {
3097       /* Don't trip over lease renewal, static config */
3098       for (j = 0; j < vec_len (addresses); j++)
3099         if (addresses[j].addr.as_u32 == address->as_u32)
3100           return;
3101
3102       (void) snat_add_address (sm, address, ~0, twice_nat);
3103       /* Scan static map resolution vector */
3104       for (j = 0; j < vec_len (sm->to_resolve); j++)
3105         {
3106           rp = sm->to_resolve + j;
3107           if (rp->addr_only)
3108             continue;
3109           /* On this interface? */
3110           if (rp->sw_if_index == sw_if_index)
3111             {
3112               /* Indetity mapping? */
3113               if (rp->l_addr.as_u32 == 0)
3114                 l_addr.as_u32 = address[0].as_u32;
3115               else
3116                 l_addr.as_u32 = rp->l_addr.as_u32;
3117               /* Add the static mapping */
3118               rv = snat_add_static_mapping (
3119                 l_addr, address[0], rp->l_port, rp->e_port, rp->vrf_id,
3120                 rp->addr_only, ~0 /* sw_if_index */, rp->proto, 1,
3121                 rp->twice_nat, rp->out2in_only, rp->tag, rp->identity_nat,
3122                 rp->pool_addr, rp->exact);
3123               if (rv)
3124                 nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d",
3125                                     "i4", rv);
3126             }
3127         }
3128       return;
3129     }
3130   else
3131     {
3132       (void) snat_del_address (sm, address[0], 1, twice_nat);
3133       return;
3134     }
3135 }
3136
3137 int
3138 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
3139                             u8 twice_nat)
3140 {
3141   ip4_main_t *ip4_main = sm->ip4_main;
3142   ip4_address_t *first_int_addr;
3143   snat_static_map_resolve_t *rp;
3144   u32 *indices_to_delete = 0;
3145   int i, j;
3146   u32 *auto_add_sw_if_indices =
3147     twice_nat ? sm->
3148     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
3149
3150   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
3151     );
3152
3153   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
3154     {
3155       if (auto_add_sw_if_indices[i] == sw_if_index)
3156         {
3157           if (is_del)
3158             {
3159               /* if have address remove it */
3160               if (first_int_addr)
3161                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
3162               else
3163                 {
3164                   for (j = 0; j < vec_len (sm->to_resolve); j++)
3165                     {
3166                       rp = sm->to_resolve + j;
3167                       if (rp->sw_if_index == sw_if_index)
3168                         vec_add1 (indices_to_delete, j);
3169                     }
3170                   if (vec_len (indices_to_delete))
3171                     {
3172                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
3173                         vec_del1 (sm->to_resolve, j);
3174                       vec_free (indices_to_delete);
3175                     }
3176                 }
3177               if (twice_nat)
3178                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
3179               else
3180                 vec_del1 (sm->auto_add_sw_if_indices, i);
3181             }
3182           else
3183             return VNET_API_ERROR_VALUE_EXIST;
3184
3185           return 0;
3186         }
3187     }
3188
3189   if (is_del)
3190     return VNET_API_ERROR_NO_SUCH_ENTRY;
3191
3192   /* add to the auto-address list */
3193   if (twice_nat)
3194     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
3195   else
3196     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
3197
3198   /* If the address is already bound - or static - add it now */
3199   if (first_int_addr)
3200     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
3201
3202   return 0;
3203 }
3204
3205 int
3206 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
3207                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3208                       u32 vrf_id, int is_in)
3209 {
3210   ip4_header_t ip;
3211   clib_bihash_kv_16_8_t kv, value;
3212   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3213   snat_session_t *s;
3214   snat_main_per_thread_data_t *tsm;
3215
3216   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3217   if (sm->num_workers > 1)
3218     tsm = vec_elt_at_index (sm->per_thread_data,
3219                             sm->worker_in2out_cb (0, &ip, fib_index, 0));
3220   else
3221     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3222
3223   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
3224   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3225     {
3226       return VNET_API_ERROR_NO_SUCH_ENTRY;
3227     }
3228
3229   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3230     return VNET_API_ERROR_UNSPECIFIED;
3231   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3232   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3233   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3234   return 0;
3235 }
3236
3237 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3238                                  vlib_node_runtime_t * node,
3239                                  vlib_frame_t * frame)
3240 {
3241   return 0;
3242 }
3243
3244 /* *INDENT-OFF* */
3245 VLIB_REGISTER_NODE (nat_default_node) = {
3246   .name = "nat-default",
3247   .vector_size = sizeof (u32),
3248   .format_trace = 0,
3249   .type = VLIB_NODE_TYPE_INTERNAL,
3250   .n_errors = 0,
3251   .n_next_nodes = NAT_N_NEXT,
3252   .next_nodes = {
3253     [NAT_NEXT_DROP] = "error-drop",
3254     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3255     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3256     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3257     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3258     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3259     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3260     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3261     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3262     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3263   },
3264 };
3265 /* *INDENT-ON* */
3266
3267 void
3268 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3269 {
3270   f->l3_csum_delta = 0;
3271   f->l4_csum_delta = 0;
3272   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3273       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3274     {
3275       f->l3_csum_delta =
3276         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3277       f->l3_csum_delta =
3278         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3279     }
3280   else
3281     {
3282       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3283     }
3284   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3285       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3286     {
3287       f->l3_csum_delta =
3288         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3289       f->l3_csum_delta =
3290         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3291     }
3292   else
3293     {
3294       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3295     }
3296   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3297     {
3298       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3299       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3300     }
3301   else
3302     {
3303       f->rewrite.sport = f->match.sport;
3304     }
3305   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3306     {
3307       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3308       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3309     }
3310   else
3311     {
3312       f->rewrite.dport = f->match.dport;
3313     }
3314   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3315       f->rewrite.icmp_id != f->match.sport)
3316     {
3317       f->l4_csum_delta =
3318         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3319       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3320     }
3321   else
3322     {
3323       f->rewrite.icmp_id = f->match.sport;
3324     }
3325   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3326     {
3327     }
3328   else
3329     {
3330       f->rewrite.fib_index = f->match.fib_index;
3331     }
3332 }
3333
3334 static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm,
3335                                                      vlib_buffer_t *b,
3336                                                      ip4_header_t *ip,
3337                                                      nat_6t_flow_t *f);
3338
3339 static_always_inline void
3340 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3341                            nat_6t_flow_t *f, nat_protocol_t proto,
3342                            int is_icmp_inner_ip4)
3343 {
3344   udp_header_t *udp = ip4_next_header (ip);
3345   tcp_header_t *tcp = (tcp_header_t *) udp;
3346
3347   if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
3348       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3349     {
3350       if (!is_icmp_inner_ip4)
3351         { // regular case
3352           ip->src_address = f->rewrite.saddr;
3353           ip->dst_address = f->rewrite.daddr;
3354           udp->src_port = f->rewrite.sport;
3355           udp->dst_port = f->rewrite.dport;
3356         }
3357       else
3358         { // icmp inner ip4 - reversed saddr/daddr
3359           ip->src_address = f->rewrite.daddr;
3360           ip->dst_address = f->rewrite.saddr;
3361           udp->src_port = f->rewrite.dport;
3362           udp->dst_port = f->rewrite.sport;
3363         }
3364
3365       if (NAT_PROTOCOL_TCP == proto)
3366         {
3367           ip_csum_t tcp_sum = tcp->checksum;
3368           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3369           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3370           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3371           tcp->checksum = ip_csum_fold (tcp_sum);
3372         }
3373       else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
3374         {
3375           ip_csum_t udp_sum = udp->checksum;
3376           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3377           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3378           udp->checksum = ip_csum_fold (udp_sum);
3379         }
3380     }
3381   else
3382     {
3383       if (!is_icmp_inner_ip4)
3384         { // regular case
3385           ip->src_address = f->rewrite.saddr;
3386           ip->dst_address = f->rewrite.daddr;
3387         }
3388       else
3389         { // icmp inner ip4 - reversed saddr/daddr
3390           ip->src_address = f->rewrite.daddr;
3391           ip->dst_address = f->rewrite.saddr;
3392         }
3393     }
3394
3395   ip_csum_t ip_sum = ip->checksum;
3396   ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3397   ip->checksum = ip_csum_fold (ip_sum);
3398   ASSERT (ip->checksum == ip4_header_checksum (ip));
3399 }
3400
3401 static_always_inline int
3402 nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b,
3403                             ip4_header_t *ip, nat_6t_flow_t *f)
3404 {
3405   if (IP_PROTOCOL_ICMP != ip->protocol)
3406     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3407
3408   icmp46_header_t *icmp = ip4_next_header (ip);
3409   icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3410
3411   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3412     {
3413       if (icmp->checksum == 0)
3414         icmp->checksum = 0xffff;
3415
3416       if (!icmp_type_is_error_message (icmp->type))
3417         {
3418           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3419               (f->rewrite.icmp_id != echo->identifier))
3420             {
3421               ip_csum_t sum = icmp->checksum;
3422               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3423                                     icmp_echo_header_t,
3424                                     identifier /* changed member */);
3425               echo->identifier = f->rewrite.icmp_id;
3426               icmp->checksum = ip_csum_fold (sum);
3427             }
3428         }
3429       else
3430         {
3431           // errors are not fragmented
3432           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3433
3434           if (!ip4_header_checksum_is_valid (inner_ip))
3435             {
3436               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3437             }
3438
3439           nat_protocol_t inner_proto =
3440             ip_proto_to_nat_proto (inner_ip->protocol);
3441
3442           ip_csum_t icmp_sum = icmp->checksum;
3443
3444           switch (inner_proto)
3445             {
3446             case NAT_PROTOCOL_UDP:
3447             case NAT_PROTOCOL_TCP:
3448               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3449                                          1 /* is_icmp_inner_ip4 */);
3450               icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta);
3451               icmp->checksum = ip_csum_fold (icmp_sum);
3452               break;
3453             case NAT_PROTOCOL_ICMP:
3454               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3455                 {
3456                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3457                   icmp_echo_header_t *inner_echo =
3458                     (icmp_echo_header_t *) (inner_icmp + 1);
3459                   if (f->rewrite.icmp_id != inner_echo->identifier)
3460                     {
3461                       ip_csum_t sum = icmp->checksum;
3462                       sum = ip_csum_update (
3463                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3464                         icmp_echo_header_t, identifier /* changed member */);
3465                       icmp->checksum = ip_csum_fold (sum);
3466                       ip_csum_t inner_sum = inner_icmp->checksum;
3467                       inner_sum = ip_csum_update (
3468                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3469                         icmp_echo_header_t, identifier /* changed member */);
3470                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3471                       inner_echo->identifier = f->rewrite.icmp_id;
3472                     }
3473                 }
3474               break;
3475             default:
3476               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3477               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3478             }
3479         }
3480     }
3481   return NAT_ED_TRNSL_ERR_SUCCESS;
3482 }
3483
3484 nat_translation_error_e
3485 nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3486                            nat_6t_flow_t *f, nat_protocol_t proto,
3487                            int is_output_feature)
3488 {
3489   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3490     {
3491       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3492     }
3493
3494   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */);
3495
3496   if (NAT_PROTOCOL_ICMP == proto)
3497     {
3498       return nat_6t_flow_icmp_translate (sm, b, ip, f);
3499     }
3500
3501   return NAT_ED_TRNSL_ERR_SUCCESS;
3502 }
3503
3504 u8 *
3505 format_nat_6t (u8 *s, va_list *args)
3506 {
3507   nat_6t_t *t = va_arg (*args, nat_6t_t *);
3508
3509   s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
3510               format_ip4_address, t->saddr.as_u8,
3511               clib_net_to_host_u16 (t->sport), format_ip4_address,
3512               t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
3513               format_ip_protocol, t->proto, t->fib_index);
3514   return s;
3515 }
3516
3517 u8 *
3518 format_nat_ed_translation_error (u8 *s, va_list *args)
3519 {
3520   nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
3521
3522   switch (e)
3523     {
3524     case NAT_ED_TRNSL_ERR_SUCCESS:
3525       s = format (s, "success");
3526       break;
3527     case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
3528       s = format (s, "translation-failed");
3529       break;
3530     case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
3531       s = format (s, "flow-mismatch");
3532       break;
3533     }
3534   return s;
3535 }
3536
3537 u8 *
3538 format_nat_6t_flow (u8 *s, va_list *args)
3539 {
3540   nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
3541
3542   s = format (s, "match: %U ", format_nat_6t, &f->match);
3543   int r = 0;
3544   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
3545     {
3546       s = format (s, "rewrite: saddr %U ", format_ip4_address,
3547                   f->rewrite.saddr.as_u8);
3548       r = 1;
3549     }
3550   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
3551     {
3552       if (!r)
3553         {
3554           s = format (s, "rewrite: ");
3555           r = 1;
3556         }
3557       s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
3558     }
3559   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
3560     {
3561       if (!r)
3562         {
3563           s = format (s, "rewrite: ");
3564           r = 1;
3565         }
3566       s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
3567     }
3568   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
3569     {
3570       if (!r)
3571         {
3572           s = format (s, "rewrite: ");
3573           r = 1;
3574         }
3575       s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
3576     }
3577   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3578     {
3579       if (!r)
3580         {
3581           s = format (s, "rewrite: ");
3582           r = 1;
3583         }
3584       s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
3585     }
3586   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3587     {
3588       if (!r)
3589         {
3590           s = format (s, "rewrite: ");
3591           r = 1;
3592         }
3593       s = format (s, "txfib %u ", f->rewrite.fib_index);
3594     }
3595   return s;
3596 }
3597
3598 /*
3599  * fd.io coding-style-patch-verification: ON
3600  *
3601  * Local Variables:
3602  * eval: (c-set-style "gnu")
3603  * End:
3604  */