nat: NAT44-ED unused value removal
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vpp/app/version.h>
19
20 #include <vnet/vnet.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/ip4.h>
23 #include <vnet/ip/ip_table.h>
24 #include <vnet/ip/reass/ip4_sv_reass.h>
25 #include <vnet/fib/fib_table.h>
26 #include <vnet/fib/ip4_fib.h>
27 #include <vnet/plugin/plugin.h>
28 #include <vppinfra/bihash_16_8.h>
29
30 #include <nat/lib/log.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/lib/nat_inlines.h>
33 #include <nat/lib/ipfix_logging.h>
34
35 #include <nat/nat44-ed/nat44_ed.h>
36 #include <nat/nat44-ed/nat44_ed_affinity.h>
37 #include <nat/nat44-ed/nat44_ed_inlines.h>
38
39 snat_main_t snat_main;
40
41 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
42                                                            u32 sw_if_index);
43
44 #define skip_if_disabled()                                                    \
45   do                                                                          \
46     {                                                                         \
47       snat_main_t *sm = &snat_main;                                           \
48       if (PREDICT_FALSE (!sm->enabled))                                       \
49         return;                                                               \
50     }                                                                         \
51   while (0)
52
53 #define fail_if_enabled()                                                     \
54   do                                                                          \
55     {                                                                         \
56       snat_main_t *sm = &snat_main;                                           \
57       if (PREDICT_FALSE (sm->enabled))                                        \
58         {                                                                     \
59           nat_log_err ("plugin enabled");                                     \
60           return 1;                                                           \
61         }                                                                     \
62     }                                                                         \
63   while (0)
64
65 #define fail_if_disabled()                                                    \
66   do                                                                          \
67     {                                                                         \
68       snat_main_t *sm = &snat_main;                                           \
69       if (PREDICT_FALSE (!sm->enabled))                                       \
70         {                                                                     \
71           nat_log_err ("plugin disabled");                                    \
72           return 1;                                                           \
73         }                                                                     \
74     }                                                                         \
75   while (0)
76
77 /* Hook up input features */
78 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
79   .arc_name = "ip4-unicast",
80   .node_name = "nat-pre-in2out",
81   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
82                                "ip4-sv-reassembly-feature"),
83 };
84 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
85   .arc_name = "ip4-unicast",
86   .node_name = "nat-pre-out2in",
87   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
88                                "ip4-dhcp-client-detect",
89                                "ip4-sv-reassembly-feature"),
90 };
91 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
92   .arc_name = "ip4-unicast",
93   .node_name = "nat44-in2out-worker-handoff",
94   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
95 };
96 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
97   .arc_name = "ip4-unicast",
98   .node_name = "nat44-out2in-worker-handoff",
99   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
100                                "ip4-dhcp-client-detect"),
101 };
102 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
103   .arc_name = "ip4-unicast",
104   .node_name = "nat44-in2out",
105   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
106 };
107 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
108   .arc_name = "ip4-unicast",
109   .node_name = "nat44-out2in",
110   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
111                                "ip4-dhcp-client-detect"),
112 };
113 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
114   .arc_name = "ip4-unicast",
115   .node_name = "nat44-ed-in2out",
116   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
117 };
118 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
119   .arc_name = "ip4-unicast",
120   .node_name = "nat44-ed-out2in",
121   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
122                                "ip4-dhcp-client-detect"),
123 };
124 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
125   .arc_name = "ip4-unicast",
126   .node_name = "nat44-ed-classify",
127   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
128 };
129 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
130   .arc_name = "ip4-unicast",
131   .node_name = "nat44-handoff-classify",
132   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
133 };
134 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
135   .arc_name = "ip4-unicast",
136   .node_name = "nat44-in2out-fast",
137   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
138 };
139 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
140   .arc_name = "ip4-unicast",
141   .node_name = "nat44-out2in-fast",
142   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
143                                "ip4-dhcp-client-detect"),
144 };
145
146 /* Hook up output features */
147 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
148   .arc_name = "ip4-output",
149   .node_name = "nat44-in2out-output",
150   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
151 };
152 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
153   .arc_name = "ip4-output",
154   .node_name = "nat44-in2out-output-worker-handoff",
155   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
156 };
157 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
158   .arc_name = "ip4-output",
159   .node_name = "nat-pre-in2out-output",
160   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
161   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
162 };
163 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
164   .arc_name = "ip4-output",
165   .node_name = "nat44-ed-in2out-output",
166   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
167   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
168 };
169
170 VLIB_PLUGIN_REGISTER () = {
171     .version = VPP_BUILD_VER,
172     .description = "Network Address Translation (NAT)",
173 };
174
175 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
176
177 static void nat44_ed_db_free ();
178
179 u32 nat_calc_bihash_buckets (u32 n_elts);
180
181 u8 *
182 format_session_kvp (u8 * s, va_list * args)
183 {
184   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
185
186   s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
187               v->key, nat_value_get_thread_index (v),
188               nat_value_get_session_index (v));
189
190   return s;
191 }
192
193 u8 *
194 format_static_mapping_kvp (u8 * s, va_list * args)
195 {
196   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
197
198   s = format (s, "%U static-mapping-index %llu",
199               format_snat_key, v->key, v->value);
200
201   return s;
202 }
203
204 u8 *
205 format_ed_session_kvp (u8 * s, va_list * args)
206 {
207   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
208
209   u8 proto;
210   u16 r_port, l_port;
211   ip4_address_t l_addr, r_addr;
212   u32 fib_index;
213
214   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
215   s = format (s,
216               "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
217               "session-index %u",
218               format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
219               format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
220               format_ip_protocol, proto, fib_index,
221               ed_value_get_thread_index (v), ed_value_get_session_index (v));
222
223   return s;
224 }
225
226 void
227 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
228                        u8 is_ha)
229 {
230       per_vrf_sessions_unregister_session (s, thread_index);
231
232       if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
233         nat_elog_warn (sm, "flow hash del failed");
234
235       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
236         nat_elog_warn (sm, "flow hash del failed");
237
238   if (is_fwd_bypass_session (s))
239     {
240       return;
241     }
242
243       if (is_affinity_sessions (s))
244         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
245                              s->nat_proto, s->out2in.port);
246
247       if (!is_ha)
248         nat_syslog_nat44_sdel (
249           0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
250           &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
251           s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
252           is_twice_nat_session (s));
253
254   if (snat_is_unk_proto_session (s))
255     return;
256
257   if (!is_ha)
258     {
259       /* log NAT event */
260       nat_ipfix_logging_nat44_ses_delete (thread_index,
261                                           s->in2out.addr.as_u32,
262                                           s->out2in.addr.as_u32,
263                                           s->nat_proto,
264                                           s->in2out.port,
265                                           s->out2in.port,
266                                           s->in2out.fib_index);
267     }
268
269   /* Twice NAT address and port for external host */
270   if (is_twice_nat_session (s))
271     {
272       snat_free_outside_address_and_port (sm->twice_nat_addresses,
273                                           thread_index,
274                                           &s->ext_host_nat_addr,
275                                           s->ext_host_nat_port, s->nat_proto);
276     }
277
278   if (snat_is_session_static (s))
279     return;
280
281   snat_free_outside_address_and_port (sm->addresses, thread_index,
282                                       &s->out2in.addr, s->out2in.port,
283                                       s->nat_proto);
284 }
285
286 void
287 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
288                           int is_add)
289 {
290   snat_main_t *sm = &snat_main;
291   fib_prefix_t prefix = {
292     .fp_len = p_len,
293     .fp_proto = FIB_PROTOCOL_IP4,
294     .fp_addr = {
295                 .ip4.as_u32 = addr->as_u32,
296                 },
297   };
298   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
299
300   if (is_add)
301     fib_table_entry_update_one_path (fib_index,
302                                      &prefix,
303                                      sm->fib_src_low,
304                                      (FIB_ENTRY_FLAG_CONNECTED |
305                                       FIB_ENTRY_FLAG_LOCAL |
306                                       FIB_ENTRY_FLAG_EXCLUSIVE),
307                                      DPO_PROTO_IP4,
308                                      NULL,
309                                      sw_if_index,
310                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
311   else
312     fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
313 }
314
315 int
316 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
317                   u8 twice_nat)
318 {
319   snat_address_t *ap;
320   snat_interface_t *i;
321   vlib_thread_main_t *tm = vlib_get_thread_main ();
322
323   /* Check if address already exists */
324   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
325     {
326       if (ap->addr.as_u32 == addr->as_u32)
327         {
328           nat_log_err ("address exist");
329           return VNET_API_ERROR_VALUE_EXIST;
330         }
331     }
332
333   if (twice_nat)
334     vec_add2 (sm->twice_nat_addresses, ap, 1);
335   else
336     vec_add2 (sm->addresses, ap, 1);
337
338   ap->addr = *addr;
339   if (vrf_id != ~0)
340     ap->fib_index =
341       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
342                                          sm->fib_src_low);
343   else
344     ap->fib_index = ~0;
345
346   #define _(N, i, n, s) \
347     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
348     ap->busy_##n##_ports = 0; \
349     ap->busy_##n##_ports_per_thread = 0;\
350     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
351     foreach_nat_protocol
352   #undef _
353
354   if (twice_nat)
355     return 0;
356
357   /* Add external address to FIB */
358   pool_foreach (i, sm->interfaces)
359    {
360      if (nat_interface_is_inside (i))
361        continue;
362
363      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
364      break;
365   }
366   pool_foreach (i, sm->output_feature_interfaces)
367    {
368      if (nat_interface_is_inside (i))
369        continue;
370
371      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
372      break;
373   }
374
375   return 0;
376 }
377
378 static int
379 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
380 {
381   snat_static_mapping_t *m;
382   pool_foreach (m, sm->static_mappings)
383    {
384       if (is_addr_only_static_mapping (m) ||
385           is_out2in_only_static_mapping (m) ||
386           is_identity_static_mapping (m))
387         continue;
388       if (m->external_addr.as_u32 == addr.as_u32)
389         return 1;
390   }
391
392   return 0;
393 }
394
395 static void
396 snat_add_static_mapping_when_resolved (snat_main_t *sm, ip4_address_t l_addr,
397                                        u16 l_port, u32 sw_if_index, u16 e_port,
398                                        u32 vrf_id, nat_protocol_t proto,
399                                        int addr_only, u8 *tag, int twice_nat,
400                                        int out2in_only, int identity_nat,
401                                        ip4_address_t pool_addr, int exact)
402 {
403   snat_static_map_resolve_t *rp;
404
405   vec_add2 (sm->to_resolve, rp, 1);
406   rp->l_addr.as_u32 = l_addr.as_u32;
407   rp->l_port = l_port;
408   rp->sw_if_index = sw_if_index;
409   rp->e_port = e_port;
410   rp->vrf_id = vrf_id;
411   rp->proto = proto;
412   rp->addr_only = addr_only;
413   rp->twice_nat = twice_nat;
414   rp->out2in_only = out2in_only;
415   rp->identity_nat = identity_nat;
416   rp->tag = vec_dup (tag);
417   rp->pool_addr = pool_addr;
418   rp->exact = exact;
419 }
420
421 u32
422 get_thread_idx_by_port (u16 e_port)
423 {
424   snat_main_t *sm = &snat_main;
425   u32 thread_idx = sm->num_workers;
426   if (sm->num_workers > 1)
427     {
428       thread_idx =
429         sm->first_worker_index +
430         sm->workers[(e_port - 1024) / sm->port_per_thread];
431     }
432   return thread_idx;
433 }
434
435 void
436 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
437                                     snat_main_per_thread_data_t * tsm,
438                                     ip4_address_t l_addr,
439                                     u16 l_port,
440                                     u8 protocol,
441                                     u32 fib_index, int addr_only,
442                                     ip4_address_t e_addr, u16 e_port)
443 {
444   snat_session_t *s;
445   u32 *indexes_to_free = NULL;
446   pool_foreach (s, tsm->sessions) {
447     if (s->in2out.fib_index != fib_index ||
448         s->in2out.addr.as_u32 != l_addr.as_u32)
449       {
450         continue;
451       }
452     if (!addr_only)
453       {
454         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
455             s->out2in.port != e_port ||
456             s->in2out.port != l_port ||
457             s->nat_proto != protocol)
458           continue;
459       }
460
461     if (is_lb_session (s))
462       continue;
463     if (!snat_is_session_static (s))
464       continue;
465     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
466     vec_add1 (indexes_to_free, s - tsm->sessions);
467     if (!addr_only)
468       break;
469   }
470   u32 *ses_index;
471   vec_foreach (ses_index, indexes_to_free)
472   {
473     s = pool_elt_at_index (tsm->sessions, *ses_index);
474     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
475   }
476   vec_free (indexes_to_free);
477 }
478
479 int
480 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
481                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
482                          u32 sw_if_index, nat_protocol_t proto, int is_add,
483                          twice_nat_type_t twice_nat, u8 out2in_only, u8 *tag,
484                          u8 identity_nat, ip4_address_t pool_addr, int exact)
485 {
486   snat_main_t *sm = &snat_main;
487   snat_static_mapping_t *m;
488   clib_bihash_kv_8_8_t kv, value;
489   snat_address_t *a = 0;
490   u32 fib_index = ~0;
491   snat_interface_t *interface;
492   snat_main_per_thread_data_t *tsm;
493   snat_static_map_resolve_t *rp, *rp_match = 0;
494   nat44_lb_addr_port_t *local;
495   u32 find = ~0;
496   int i;
497
498   /* If the external address is a specific interface address */
499   if (sw_if_index != ~0)
500     {
501       ip4_address_t *first_int_addr;
502
503       for (i = 0; i < vec_len (sm->to_resolve); i++)
504         {
505           rp = sm->to_resolve + i;
506           if (rp->sw_if_index != sw_if_index ||
507               rp->l_addr.as_u32 != l_addr.as_u32 ||
508               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
509             continue;
510
511           if (!addr_only)
512             {
513               if ((rp->l_port != l_port && rp->e_port != e_port)
514                   || rp->proto != proto)
515                 continue;
516             }
517
518           rp_match = rp;
519           break;
520         }
521
522       /* Might be already set... */
523       first_int_addr = ip4_interface_first_address
524         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
525
526       if (is_add)
527         {
528           if (rp_match)
529             return VNET_API_ERROR_VALUE_EXIST;
530
531           snat_add_static_mapping_when_resolved (
532             sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, addr_only,
533             tag, twice_nat, out2in_only, identity_nat, pool_addr, exact);
534
535           /* DHCP resolution required? */
536           if (first_int_addr == 0)
537             {
538               return 0;
539             }
540           else
541             {
542               e_addr.as_u32 = first_int_addr->as_u32;
543               /* Identity mapping? */
544               if (l_addr.as_u32 == 0)
545                 l_addr.as_u32 = e_addr.as_u32;
546             }
547         }
548       else
549         {
550           if (!rp_match)
551             return VNET_API_ERROR_NO_SUCH_ENTRY;
552
553           vec_del1 (sm->to_resolve, i);
554
555           if (first_int_addr)
556             {
557               e_addr.as_u32 = first_int_addr->as_u32;
558               /* Identity mapping? */
559               if (l_addr.as_u32 == 0)
560                 l_addr.as_u32 = e_addr.as_u32;
561             }
562           else
563             return 0;
564         }
565     }
566
567   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
568   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
569     m = 0;
570   else
571     m = pool_elt_at_index (sm->static_mappings, value.value);
572
573   if (is_add)
574     {
575       if (m)
576         {
577           if (is_identity_static_mapping (m))
578             {
579               pool_foreach (local, m->locals)
580                {
581                 if (local->vrf_id == vrf_id)
582                   return VNET_API_ERROR_VALUE_EXIST;
583               }
584               pool_get (m->locals, local);
585               local->vrf_id = vrf_id;
586               local->fib_index =
587                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
588                                                    sm->fib_src_low);
589               init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
590                            m->proto, 0, m - sm->static_mappings);
591               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
592               return 0;
593             }
594           else
595             return VNET_API_ERROR_VALUE_EXIST;
596         }
597
598       if (twice_nat && addr_only)
599         return VNET_API_ERROR_UNSUPPORTED;
600
601       /* Convert VRF id to FIB index */
602       if (vrf_id != ~0)
603         fib_index =
604           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
605                                              sm->fib_src_low);
606       /* If not specified use inside VRF id from SNAT plugin startup config */
607       else
608         {
609           fib_index = sm->inside_fib_index;
610           vrf_id = sm->inside_vrf_id;
611           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
612         }
613
614       if (!(out2in_only || identity_nat))
615         {
616           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
617                       addr_only ? 0 : proto);
618           if (!clib_bihash_search_8_8
619               (&sm->static_mapping_by_local, &kv, &value))
620             return VNET_API_ERROR_VALUE_EXIST;
621         }
622
623       /* Find external address in allocated addresses and reserve port for
624          address and port pair mapping when dynamic translations enabled */
625       if (!(addr_only || sm->static_mapping_only || out2in_only))
626         {
627           for (i = 0; i < vec_len (sm->addresses); i++)
628             {
629               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
630                 {
631                   a = sm->addresses + i;
632                   /* External port must be unused */
633                   switch (proto)
634                     {
635 #define _(N, j, n, s) \
636                     case NAT_PROTOCOL_##N: \
637                       if (a->busy_##n##_port_refcounts[e_port]) \
638                         return VNET_API_ERROR_INVALID_VALUE; \
639                       ++a->busy_##n##_port_refcounts[e_port]; \
640                       if (e_port > 1024) \
641                         { \
642                           a->busy_##n##_ports++; \
643                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
644                         } \
645                       break;
646                       foreach_nat_protocol
647 #undef _
648                         default : nat_elog_info (sm, "unknown protocol");
649                       return VNET_API_ERROR_INVALID_VALUE_2;
650                     }
651                   break;
652                 }
653             }
654           /* External address must be allocated */
655           if (!a && (l_addr.as_u32 != e_addr.as_u32))
656             {
657               if (sw_if_index != ~0)
658                 {
659                   for (i = 0; i < vec_len (sm->to_resolve); i++)
660                     {
661                       rp = sm->to_resolve + i;
662                       if (rp->addr_only)
663                         continue;
664                       if (rp->sw_if_index != sw_if_index &&
665                           rp->l_addr.as_u32 != l_addr.as_u32 &&
666                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
667                           rp->e_port != e_port && rp->proto != proto)
668                         continue;
669
670                       vec_del1 (sm->to_resolve, i);
671                       break;
672                     }
673                 }
674               return VNET_API_ERROR_NO_SUCH_ENTRY;
675             }
676         }
677
678       pool_get (sm->static_mappings, m);
679       clib_memset (m, 0, sizeof (*m));
680       m->tag = vec_dup (tag);
681       m->local_addr = l_addr;
682       m->external_addr = e_addr;
683       m->twice_nat = twice_nat;
684
685       if (twice_nat == TWICE_NAT && exact)
686         {
687           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
688           m->pool_addr = pool_addr;
689         }
690
691       if (out2in_only)
692         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
693       if (addr_only)
694         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
695       if (identity_nat)
696         {
697           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
698           pool_get (m->locals, local);
699           local->vrf_id = vrf_id;
700           local->fib_index = fib_index;
701         }
702       else
703         {
704           m->vrf_id = vrf_id;
705           m->fib_index = fib_index;
706         }
707       if (!addr_only)
708         {
709           m->local_port = l_port;
710           m->external_port = e_port;
711           m->proto = proto;
712         }
713
714       if (sm->num_workers > 1)
715         {
716           ip4_header_t ip = {
717             .src_address = m->local_addr,
718           };
719           vec_add1 (m->workers, nat44_ed_get_in2out_worker_index (
720                                   0, &ip, m->fib_index, 0));
721           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
722         }
723       else
724         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
725
726       if (!out2in_only)
727         {
728           init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
729                        0, m - sm->static_mappings);
730           clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
731         }
732
733       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
734                    m - sm->static_mappings);
735       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
736     }
737   else
738     {
739       if (!m)
740         {
741           if (sw_if_index != ~0)
742             return 0;
743           else
744             return VNET_API_ERROR_NO_SUCH_ENTRY;
745         }
746
747       if (identity_nat)
748         {
749           if (vrf_id == ~0)
750             vrf_id = sm->inside_vrf_id;
751
752           pool_foreach (local, m->locals)
753            {
754             if (local->vrf_id == vrf_id)
755               find = local - m->locals;
756           }
757           if (find == ~0)
758             return VNET_API_ERROR_NO_SUCH_ENTRY;
759
760           local = pool_elt_at_index (m->locals, find);
761           fib_index = local->fib_index;
762           pool_put (m->locals, local);
763         }
764       else
765         fib_index = m->fib_index;
766
767       /* Free external address port */
768       if (!(addr_only || sm->static_mapping_only || out2in_only))
769         {
770           for (i = 0; i < vec_len (sm->addresses); i++)
771             {
772               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
773                 {
774                   a = sm->addresses + i;
775                   switch (proto)
776                     {
777 #define _(N, j, n, s) \
778                     case NAT_PROTOCOL_##N: \
779                       --a->busy_##n##_port_refcounts[e_port]; \
780                       if (e_port > 1024) \
781                         { \
782                           a->busy_##n##_ports--; \
783                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
784                         } \
785                       break;
786                       foreach_nat_protocol
787 #undef _
788                         default : nat_elog_info (sm, "unknown protocol");
789                       return VNET_API_ERROR_INVALID_VALUE_2;
790                     }
791                   break;
792                 }
793             }
794         }
795
796       if (sm->num_workers > 1)
797         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
798       else
799         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
800
801       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
802       if (!out2in_only)
803         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
804
805       /* Delete session(s) for static mapping if exist */
806       if (!(sm->static_mapping_only) ||
807           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
808         {
809           nat_ed_static_mapping_del_sessions (
810             sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
811             addr_only, e_addr, e_port);
812         }
813
814       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
815       if (pool_elts (m->locals))
816         return 0;
817
818       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
819       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
820
821       vec_free (m->tag);
822       vec_free (m->workers);
823       /* Delete static mapping from pool */
824       pool_put (sm->static_mappings, m);
825     }
826
827   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
828     return 0;
829
830   /* Add/delete external address to FIB */
831   pool_foreach (interface, sm->interfaces)
832    {
833      if (nat_interface_is_inside (interface))
834        continue;
835
836      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
837      break;
838   }
839   pool_foreach (interface, sm->output_feature_interfaces)
840    {
841      if (nat_interface_is_inside (interface))
842        continue;
843
844      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
845      break;
846   }
847
848   return 0;
849 }
850
851 int
852 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
853                                  nat_protocol_t proto,
854                                  nat44_lb_addr_port_t * locals, u8 is_add,
855                                  twice_nat_type_t twice_nat, u8 out2in_only,
856                                  u8 * tag, u32 affinity)
857 {
858   snat_main_t *sm = &snat_main;
859   snat_static_mapping_t *m;
860   clib_bihash_kv_8_8_t kv, value;
861   snat_address_t *a = 0;
862   int i;
863   nat44_lb_addr_port_t *local;
864   snat_main_per_thread_data_t *tsm;
865   snat_session_t *s;
866   uword *bitmap = 0;
867
868   init_nat_k (&kv, e_addr, e_port, 0, proto);
869   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
870     m = 0;
871   else
872     m = pool_elt_at_index (sm->static_mappings, value.value);
873
874   if (is_add)
875     {
876       if (m)
877         return VNET_API_ERROR_VALUE_EXIST;
878
879       if (vec_len (locals) < 2)
880         return VNET_API_ERROR_INVALID_VALUE;
881
882       /* Find external address in allocated addresses and reserve port for
883          address and port pair mapping when dynamic translations enabled */
884       if (!(sm->static_mapping_only || out2in_only))
885         {
886           for (i = 0; i < vec_len (sm->addresses); i++)
887             {
888               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
889                 {
890                   a = sm->addresses + i;
891                   /* External port must be unused */
892                   switch (proto)
893                     {
894 #define _(N, j, n, s) \
895                     case NAT_PROTOCOL_##N: \
896                       if (a->busy_##n##_port_refcounts[e_port]) \
897                         return VNET_API_ERROR_INVALID_VALUE; \
898                       ++a->busy_##n##_port_refcounts[e_port]; \
899                       if (e_port > 1024) \
900                         { \
901                           a->busy_##n##_ports++; \
902                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
903                         } \
904                       break;
905                       foreach_nat_protocol
906 #undef _
907                         default : nat_elog_info (sm, "unknown protocol");
908                       return VNET_API_ERROR_INVALID_VALUE_2;
909                     }
910                   break;
911                 }
912             }
913           /* External address must be allocated */
914           if (!a)
915             return VNET_API_ERROR_NO_SUCH_ENTRY;
916         }
917
918       pool_get (sm->static_mappings, m);
919       clib_memset (m, 0, sizeof (*m));
920       m->tag = vec_dup (tag);
921       m->external_addr = e_addr;
922       m->external_port = e_port;
923       m->proto = proto;
924       m->twice_nat = twice_nat;
925       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
926       if (out2in_only)
927         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
928       m->affinity = affinity;
929
930       if (affinity)
931         m->affinity_per_service_list_head_index =
932           nat_affinity_get_per_service_list_head_index ();
933       else
934         m->affinity_per_service_list_head_index = ~0;
935
936       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
937                    m - sm->static_mappings);
938       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
939         {
940           nat_elog_err (sm, "static_mapping_by_external key add failed");
941           return VNET_API_ERROR_UNSPECIFIED;
942         }
943
944       for (i = 0; i < vec_len (locals); i++)
945         {
946           locals[i].fib_index =
947             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
948                                                locals[i].vrf_id,
949                                                sm->fib_src_low);
950           if (!out2in_only)
951             {
952               init_nat_kv (&kv, locals[i].addr, locals[i].port,
953                            locals[i].fib_index, m->proto, 0,
954                            m - sm->static_mappings);
955               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
956             }
957           locals[i].prefix = (i == 0) ? locals[i].probability :
958             (locals[i - 1].prefix + locals[i].probability);
959           pool_get (m->locals, local);
960           *local = locals[i];
961           if (sm->num_workers > 1)
962             {
963               ip4_header_t ip = {
964                 .src_address = locals[i].addr,
965               };
966               bitmap = clib_bitmap_set (
967                 bitmap,
968                 nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0), 1);
969             }
970         }
971
972       /* Assign workers */
973       if (sm->num_workers > 1)
974         {
975           clib_bitmap_foreach (i, bitmap)
976              {
977                vec_add1(m->workers, i);
978             }
979         }
980     }
981   else
982     {
983       if (!m)
984         return VNET_API_ERROR_NO_SUCH_ENTRY;
985
986       if (!is_lb_static_mapping (m))
987         return VNET_API_ERROR_INVALID_VALUE;
988
989       /* Free external address port */
990       if (!(sm->static_mapping_only || out2in_only))
991         {
992           for (i = 0; i < vec_len (sm->addresses); i++)
993             {
994               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
995                 {
996                   a = sm->addresses + i;
997                   switch (proto)
998                     {
999 #define _(N, j, n, s) \
1000                     case NAT_PROTOCOL_##N: \
1001                       --a->busy_##n##_port_refcounts[e_port]; \
1002                       if (e_port > 1024) \
1003                         { \
1004                           a->busy_##n##_ports--; \
1005                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1006                         } \
1007                       break;
1008                       foreach_nat_protocol
1009 #undef _
1010                         default : nat_elog_info (sm, "unknown protocol");
1011                       return VNET_API_ERROR_INVALID_VALUE_2;
1012                     }
1013                   break;
1014                 }
1015             }
1016         }
1017
1018       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1019       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1020         {
1021           nat_elog_err (sm, "static_mapping_by_external key del failed");
1022           return VNET_API_ERROR_UNSPECIFIED;
1023         }
1024
1025       pool_foreach (local, m->locals)
1026       {
1027           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1028                             sm->fib_src_low);
1029           if (!out2in_only)
1030             {
1031               init_nat_k (&kv, local->addr, local->port, local->fib_index,
1032                           m->proto);
1033               if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv,
1034                                            0))
1035                 {
1036                   nat_elog_err (sm, "static_mapping_by_local key del failed");
1037                   return VNET_API_ERROR_UNSPECIFIED;
1038                 }
1039             }
1040
1041           if (sm->num_workers > 1)
1042             {
1043               ip4_header_t ip = {
1044                 .src_address = local->addr,
1045               };
1046               tsm = vec_elt_at_index (
1047                 sm->per_thread_data,
1048                 nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1049             }
1050           else
1051             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1052
1053           /* Delete sessions */
1054           pool_foreach (s, tsm->sessions)
1055             {
1056               if (!(is_lb_session (s)))
1057                 continue;
1058
1059               if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1060                   s->in2out.port != local->port)
1061                 continue;
1062
1063               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1064               nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1065             }
1066       }
1067       if (m->affinity)
1068         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1069       pool_free (m->locals);
1070       vec_free (m->tag);
1071       vec_free (m->workers);
1072
1073       pool_put (sm->static_mappings, m);
1074     }
1075
1076   return 0;
1077 }
1078
1079 int
1080 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1081                                        ip4_address_t l_addr, u16 l_port,
1082                                        nat_protocol_t proto, u32 vrf_id,
1083                                        u8 probability, u8 is_add)
1084 {
1085   snat_main_t *sm = &snat_main;
1086   snat_static_mapping_t *m = 0;
1087   clib_bihash_kv_8_8_t kv, value;
1088   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1089   snat_main_per_thread_data_t *tsm;
1090   snat_session_t *s;
1091   u32 *locals = 0;
1092   uword *bitmap = 0;
1093   int i;
1094
1095   init_nat_k (&kv, e_addr, e_port, 0, proto);
1096   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1097     m = pool_elt_at_index (sm->static_mappings, value.value);
1098
1099   if (!m)
1100     return VNET_API_ERROR_NO_SUCH_ENTRY;
1101
1102   if (!is_lb_static_mapping (m))
1103     return VNET_API_ERROR_INVALID_VALUE;
1104
1105   pool_foreach (local, m->locals)
1106    {
1107     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1108         (local->vrf_id == vrf_id))
1109       {
1110         match_local = local;
1111         break;
1112       }
1113   }
1114
1115   if (is_add)
1116     {
1117       if (match_local)
1118         return VNET_API_ERROR_VALUE_EXIST;
1119
1120       pool_get (m->locals, local);
1121       clib_memset (local, 0, sizeof (*local));
1122       local->addr.as_u32 = l_addr.as_u32;
1123       local->port = l_port;
1124       local->probability = probability;
1125       local->vrf_id = vrf_id;
1126       local->fib_index =
1127         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1128                                            sm->fib_src_low);
1129
1130       if (!is_out2in_only_static_mapping (m))
1131         {
1132           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
1133                        m - sm->static_mappings);
1134           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1135             nat_elog_err (sm, "static_mapping_by_local key add failed");
1136         }
1137     }
1138   else
1139     {
1140       if (!match_local)
1141         return VNET_API_ERROR_NO_SUCH_ENTRY;
1142
1143       if (pool_elts (m->locals) < 3)
1144         return VNET_API_ERROR_UNSPECIFIED;
1145
1146       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1147                         sm->fib_src_low);
1148
1149       if (!is_out2in_only_static_mapping (m))
1150         {
1151           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1152           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1153             nat_elog_err (sm, "static_mapping_by_local key del failed");
1154         }
1155
1156       if (sm->num_workers > 1)
1157         {
1158           ip4_header_t ip = {
1159             .src_address = local->addr,
1160           };
1161           tsm = vec_elt_at_index (
1162             sm->per_thread_data,
1163             nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index, 0));
1164         }
1165       else
1166         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1167
1168       /* Delete sessions */
1169       pool_foreach (s, tsm->sessions) {
1170         if (!(is_lb_session (s)))
1171           continue;
1172
1173         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1174             s->in2out.port != match_local->port)
1175           continue;
1176
1177         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1178         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1179       }
1180
1181       pool_put (m->locals, match_local);
1182     }
1183
1184   vec_free (m->workers);
1185
1186   pool_foreach (local, m->locals)
1187    {
1188     vec_add1 (locals, local - m->locals);
1189     if (sm->num_workers > 1)
1190       {
1191         ip4_header_t ip;
1192         ip.src_address.as_u32 = local->addr.as_u32,
1193         bitmap = clib_bitmap_set (
1194           bitmap,
1195           nat44_ed_get_in2out_worker_index (0, &ip, local->fib_index, 0), 1);
1196       }
1197   }
1198
1199   ASSERT (vec_len (locals) > 1);
1200
1201   local = pool_elt_at_index (m->locals, locals[0]);
1202   local->prefix = local->probability;
1203   for (i = 1; i < vec_len (locals); i++)
1204     {
1205       local = pool_elt_at_index (m->locals, locals[i]);
1206       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1207       local->prefix = local->probability + prev_local->prefix;
1208     }
1209
1210   /* Assign workers */
1211   if (sm->num_workers > 1)
1212     {
1213       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1214     }
1215
1216   return 0;
1217 }
1218
1219 int
1220 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1221                   u8 twice_nat)
1222 {
1223   snat_address_t *a = 0;
1224   snat_session_t *ses;
1225   u32 *ses_to_be_removed = 0, *ses_index;
1226   snat_main_per_thread_data_t *tsm;
1227   snat_static_mapping_t *m;
1228   snat_interface_t *interface;
1229   int i;
1230   snat_address_t *addresses =
1231     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1232
1233   /* Find SNAT address */
1234   for (i = 0; i < vec_len (addresses); i++)
1235     {
1236       if (addresses[i].addr.as_u32 == addr.as_u32)
1237         {
1238           a = addresses + i;
1239           break;
1240         }
1241     }
1242   if (!a)
1243     {
1244       nat_log_err ("no such address");
1245       return VNET_API_ERROR_NO_SUCH_ENTRY;
1246     }
1247
1248   if (delete_sm)
1249     {
1250       ip4_address_t pool_addr = { 0 };
1251       pool_foreach (m, sm->static_mappings)
1252        {
1253           if (m->external_addr.as_u32 == addr.as_u32)
1254             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1255                                             m->local_port, m->external_port,
1256                                             m->vrf_id,
1257                                             is_addr_only_static_mapping(m), ~0,
1258                                             m->proto, 0 /* is_add */,
1259                                             m->twice_nat,
1260                                             is_out2in_only_static_mapping(m),
1261                                             m->tag,
1262                                             is_identity_static_mapping(m),
1263                                             pool_addr, 0);
1264       }
1265     }
1266   else
1267     {
1268       /* Check if address is used in some static mapping */
1269       if (is_snat_address_used_in_static_mapping (sm, addr))
1270         {
1271           nat_log_err ("address used in static mapping");
1272           return VNET_API_ERROR_UNSPECIFIED;
1273         }
1274     }
1275
1276   if (a->fib_index != ~0)
1277     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1278
1279   /* Delete sessions using address */
1280   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1281     {
1282       vec_foreach (tsm, sm->per_thread_data)
1283       {
1284         pool_foreach (ses, tsm->sessions)  {
1285           if (ses->out2in.addr.as_u32 == addr.as_u32)
1286             {
1287               nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1288               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1289             }
1290         }
1291
1292             vec_foreach (ses_index, ses_to_be_removed)
1293             {
1294               ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1295               nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1296             }
1297
1298         vec_free (ses_to_be_removed);
1299       }
1300     }
1301
1302 #define _(N, i, n, s) \
1303   vec_free (a->busy_##n##_ports_per_thread);
1304   foreach_nat_protocol
1305 #undef _
1306
1307     if (twice_nat)
1308   {
1309     vec_del1 (sm->twice_nat_addresses, i);
1310     return 0;
1311   }
1312   else vec_del1 (sm->addresses, i);
1313
1314   /* Delete external address from FIB */
1315   pool_foreach (interface, sm->interfaces)
1316     {
1317       if (nat_interface_is_inside (interface))
1318         continue;
1319
1320       snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1321       break;
1322     }
1323   pool_foreach (interface, sm->output_feature_interfaces)
1324    {
1325      if (nat_interface_is_inside (interface))
1326        continue;
1327
1328      snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1329      break;
1330   }
1331
1332   return 0;
1333 }
1334
1335 void
1336 expire_per_vrf_sessions (u32 fib_index)
1337 {
1338   per_vrf_sessions_t *per_vrf_sessions;
1339   snat_main_per_thread_data_t *tsm;
1340   snat_main_t *sm = &snat_main;
1341
1342   vec_foreach (tsm, sm->per_thread_data)
1343     {
1344       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1345         {
1346           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1347               (per_vrf_sessions->tx_fib_index == fib_index))
1348             {
1349               per_vrf_sessions->expired = 1;
1350             }
1351         }
1352     }
1353 }
1354
1355 void
1356 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1357 {
1358   snat_main_t *sm = &snat_main;
1359   nat_fib_t *fib;
1360
1361   // we don't care if it is outside/inside fib
1362   // we just care about their ref_count
1363   // if it reaches 0 sessions should expire
1364   // because the fib isn't valid for NAT anymore
1365
1366   vec_foreach (fib, sm->fibs)
1367   {
1368     if (fib->fib_index == fib_index)
1369       {
1370         if (is_del)
1371           {
1372             fib->ref_count--;
1373             if (!fib->ref_count)
1374               {
1375                 vec_del1 (sm->fibs, fib - sm->fibs);
1376                 expire_per_vrf_sessions (fib_index);
1377               }
1378             return;
1379           }
1380         else
1381           fib->ref_count++;
1382       }
1383   }
1384   if (!is_del)
1385     {
1386       vec_add2 (sm->fibs, fib, 1);
1387       fib->ref_count = 1;
1388       fib->fib_index = fib_index;
1389     }
1390 }
1391
1392 int
1393 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1394 {
1395   snat_main_t *sm = &snat_main;
1396   snat_interface_t *i;
1397   const char *feature_name, *del_feature_name;
1398   snat_address_t *ap;
1399   snat_static_mapping_t *m;
1400   nat_outside_fib_t *outside_fib;
1401   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1402                                                        sw_if_index);
1403
1404   if (!sm->enabled)
1405     {
1406       nat_log_err ("nat44 is disabled");
1407       return VNET_API_ERROR_UNSUPPORTED;
1408     }
1409
1410   pool_foreach (i, sm->output_feature_interfaces)
1411    {
1412     if (i->sw_if_index == sw_if_index)
1413       {
1414         nat_log_err ("error interface already configured");
1415         return VNET_API_ERROR_VALUE_EXIST;
1416       }
1417   }
1418
1419   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1420     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1421   else
1422     {
1423       if (sm->num_workers > 1)
1424         feature_name =
1425           is_inside ? "nat44-in2out-worker-handoff" :
1426           "nat44-out2in-worker-handoff";
1427       else
1428         feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1429     }
1430
1431   ASSERT (sm->frame_queue_nelts > 0);
1432
1433   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1434     sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index,
1435                                                       sm->frame_queue_nelts);
1436
1437   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1438     sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index,
1439                                                       sm->frame_queue_nelts);
1440
1441   update_per_vrf_sessions_vec (fib_index, is_del);
1442
1443   if (!is_inside)
1444     {
1445       vec_foreach (outside_fib, sm->outside_fibs)
1446         {
1447           if (outside_fib->fib_index == fib_index)
1448             {
1449               if (is_del)
1450                 {
1451                   outside_fib->refcount--;
1452                   if (!outside_fib->refcount)
1453                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1454                 }
1455               else
1456                 outside_fib->refcount++;
1457               goto feature_set;
1458             }
1459         }
1460       if (!is_del)
1461         {
1462           vec_add2 (sm->outside_fibs, outside_fib, 1);
1463           outside_fib->refcount = 1;
1464           outside_fib->fib_index = fib_index;
1465         }
1466     }
1467
1468 feature_set:
1469   pool_foreach (i, sm->interfaces)
1470    {
1471     if (i->sw_if_index == sw_if_index)
1472       {
1473         if (is_del)
1474           {
1475             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1476               {
1477                 if (is_inside)
1478                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1479                 else
1480                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1481
1482                 if (sm->num_workers > 1)
1483                   {
1484                     del_feature_name = "nat44-handoff-classify";
1485                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1486                                                  "nat44-out2in-worker-handoff";
1487                   }
1488                 else
1489                   {
1490                     del_feature_name = "nat44-ed-classify";
1491                     feature_name =
1492                       !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1493                   }
1494
1495                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1496                 if (rv)
1497                   return rv;
1498                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1499                                              sw_if_index, 0, 0, 0);
1500                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1501                                              sw_if_index, 1, 0, 0);
1502               }
1503             else
1504               {
1505                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1506                 if (rv)
1507                   return rv;
1508                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1509                                              sw_if_index, 0, 0, 0);
1510                 pool_put (sm->interfaces, i);
1511               }
1512           }
1513         else
1514           {
1515             if ((nat_interface_is_inside (i) && is_inside) ||
1516                 (nat_interface_is_outside (i) && !is_inside))
1517               return 0;
1518
1519             if (sm->num_workers > 1)
1520               {
1521                 del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1522                                                 "nat44-out2in-worker-handoff";
1523                 feature_name = "nat44-handoff-classify";
1524               }
1525             else
1526               {
1527                 del_feature_name =
1528                   !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1529
1530                 feature_name = "nat44-ed-classify";
1531               }
1532
1533             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1534             if (rv)
1535               return rv;
1536             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1537                                          sw_if_index, 0, 0, 0);
1538             vnet_feature_enable_disable ("ip4-unicast", feature_name,
1539                                          sw_if_index, 1, 0, 0);
1540             goto set_flags;
1541           }
1542
1543         goto fib;
1544       }
1545   }
1546
1547   if (is_del)
1548     {
1549       nat_log_err ("error interface couldn't be found");
1550       return VNET_API_ERROR_NO_SUCH_ENTRY;
1551     }
1552
1553   pool_get (sm->interfaces, i);
1554   i->sw_if_index = sw_if_index;
1555   i->flags = 0;
1556   nat_validate_interface_counters (sm, sw_if_index);
1557
1558   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
1559                                0);
1560
1561   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1562   if (rv)
1563     return rv;
1564
1565 set_flags:
1566   if (is_inside)
1567     {
1568       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1569       return 0;
1570     }
1571   else
1572     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1573
1574   /* Add/delete external addresses to FIB */
1575 fib:
1576   vec_foreach (ap, sm->addresses)
1577     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1578
1579   pool_foreach (m, sm->static_mappings)
1580    {
1581     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
1582       continue;
1583
1584     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1585   }
1586
1587   return 0;
1588 }
1589
1590 int
1591 snat_interface_add_del_output_feature (u32 sw_if_index,
1592                                        u8 is_inside, int is_del)
1593 {
1594   snat_main_t *sm = &snat_main;
1595   snat_interface_t *i;
1596   snat_address_t *ap;
1597   snat_static_mapping_t *m;
1598   nat_outside_fib_t *outside_fib;
1599   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1600                                                        sw_if_index);
1601
1602   if (!sm->enabled)
1603     {
1604       nat_log_err ("nat44 is disabled");
1605       return VNET_API_ERROR_UNSUPPORTED;
1606     }
1607
1608   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1609     {
1610       nat_log_err ("error unsupported");
1611       return VNET_API_ERROR_UNSUPPORTED;
1612     }
1613
1614   pool_foreach (i, sm->interfaces)
1615    {
1616     if (i->sw_if_index == sw_if_index)
1617       {
1618         nat_log_err ("error interface already configured");
1619         return VNET_API_ERROR_VALUE_EXIST;
1620       }
1621   }
1622
1623   update_per_vrf_sessions_vec (fib_index, is_del);
1624
1625   if (!is_inside)
1626     {
1627       vec_foreach (outside_fib, sm->outside_fibs)
1628         {
1629           if (outside_fib->fib_index == fib_index)
1630             {
1631               if (is_del)
1632                 {
1633                   outside_fib->refcount--;
1634                   if (!outside_fib->refcount)
1635                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1636                 }
1637               else
1638                 outside_fib->refcount++;
1639               goto feature_set;
1640             }
1641         }
1642       if (!is_del)
1643         {
1644           vec_add2 (sm->outside_fibs, outside_fib, 1);
1645           outside_fib->refcount = 1;
1646           outside_fib->fib_index = fib_index;
1647         }
1648     }
1649
1650 feature_set:
1651   if (is_inside)
1652     {
1653           int rv =
1654             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1655           if (rv)
1656             return rv;
1657           rv =
1658             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1659                                                             !is_del);
1660           if (rv)
1661             return rv;
1662       goto fq;
1663     }
1664
1665   if (sm->num_workers > 1)
1666     {
1667       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1668       if (rv)
1669         return rv;
1670       rv =
1671         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
1672       if (rv)
1673         return rv;
1674       vnet_feature_enable_disable ("ip4-unicast",
1675                                    "nat44-out2in-worker-handoff",
1676                                    sw_if_index, !is_del, 0, 0);
1677       vnet_feature_enable_disable ("ip4-output",
1678                                    "nat44-in2out-output-worker-handoff",
1679                                    sw_if_index, !is_del, 0, 0);
1680     }
1681   else
1682     {
1683           int rv =
1684             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1685           if (rv)
1686             return rv;
1687           rv =
1688             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1689                                                             !is_del);
1690           if (rv)
1691             return rv;
1692           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1693                                        sw_if_index, !is_del, 0, 0);
1694           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1695                                        sw_if_index, !is_del, 0, 0);
1696     }
1697
1698 fq:
1699   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
1700     sm->fq_in2out_output_index =
1701       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
1702
1703   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1704     sm->fq_out2in_index =
1705       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
1706
1707   pool_foreach (i, sm->output_feature_interfaces)
1708    {
1709     if (i->sw_if_index == sw_if_index)
1710       {
1711         if (is_del)
1712           pool_put (sm->output_feature_interfaces, i);
1713         else
1714           return VNET_API_ERROR_VALUE_EXIST;
1715
1716         goto fib;
1717       }
1718   }
1719
1720   if (is_del)
1721     {
1722       nat_log_err ("error interface couldn't be found");
1723       return VNET_API_ERROR_NO_SUCH_ENTRY;
1724     }
1725
1726   pool_get (sm->output_feature_interfaces, i);
1727   i->sw_if_index = sw_if_index;
1728   i->flags = 0;
1729   nat_validate_interface_counters (sm, sw_if_index);
1730   if (is_inside)
1731     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1732   else
1733     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1734
1735   /* Add/delete external addresses to FIB */
1736 fib:
1737   if (is_inside)
1738     return 0;
1739
1740   vec_foreach (ap, sm->addresses)
1741     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1742
1743   pool_foreach (m, sm->static_mappings)
1744    {
1745     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
1746       continue;
1747
1748     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1749   }
1750
1751   return 0;
1752 }
1753
1754 int
1755 snat_set_workers (uword * bitmap)
1756 {
1757   snat_main_t *sm = &snat_main;
1758   int i, j = 0;
1759
1760   if (sm->num_workers < 2)
1761     return VNET_API_ERROR_FEATURE_DISABLED;
1762
1763   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
1764     return VNET_API_ERROR_INVALID_WORKER;
1765
1766   vec_free (sm->workers);
1767   clib_bitmap_foreach (i, bitmap)
1768     {
1769       vec_add1(sm->workers, i);
1770       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
1771       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
1772       j++;
1773     }
1774
1775   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
1776
1777   return 0;
1778 }
1779
1780 int
1781 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
1782 {
1783   fail_if_enabled ();
1784   snat_main_t *sm = &snat_main;
1785   sm->frame_queue_nelts = frame_queue_nelts;
1786   return 0;
1787 }
1788
1789 static void
1790 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1791                          u32 sw_if_index, u32 new_fib_index,
1792                          u32 old_fib_index)
1793 {
1794   snat_main_t *sm = &snat_main;
1795   nat_outside_fib_t *outside_fib;
1796   snat_interface_t *i;
1797   u8 is_add = 1;
1798   u8 match = 0;
1799
1800   if (!sm->enabled || (new_fib_index == old_fib_index)
1801       || (!vec_len (sm->outside_fibs)))
1802     {
1803       return;
1804     }
1805
1806   pool_foreach (i, sm->interfaces)
1807      {
1808       if (i->sw_if_index == sw_if_index)
1809         {
1810           if (!(nat_interface_is_outside (i)))
1811             return;
1812           match = 1;
1813         }
1814     }
1815
1816   pool_foreach (i, sm->output_feature_interfaces)
1817      {
1818       if (i->sw_if_index == sw_if_index)
1819         {
1820           if (!(nat_interface_is_outside (i)))
1821             return;
1822           match = 1;
1823         }
1824     }
1825
1826   if (!match)
1827     return;
1828
1829   vec_foreach (outside_fib, sm->outside_fibs)
1830   {
1831     if (outside_fib->fib_index == old_fib_index)
1832       {
1833         outside_fib->refcount--;
1834         if (!outside_fib->refcount)
1835           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1836         break;
1837       }
1838   }
1839
1840   vec_foreach (outside_fib, sm->outside_fibs)
1841   {
1842     if (outside_fib->fib_index == new_fib_index)
1843       {
1844         outside_fib->refcount++;
1845         is_add = 0;
1846         break;
1847       }
1848   }
1849
1850   if (is_add)
1851     {
1852       vec_add2 (sm->outside_fibs, outside_fib, 1);
1853       outside_fib->refcount = 1;
1854       outside_fib->fib_index = new_fib_index;
1855     }
1856 }
1857
1858 static void
1859 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1860                          u32 sw_if_index, u32 new_fib_index,
1861                          u32 old_fib_index);
1862
1863 static void
1864 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
1865                                        uword opaque,
1866                                        u32 sw_if_index,
1867                                        ip4_address_t * address,
1868                                        u32 address_length,
1869                                        u32 if_address_index, u32 is_delete);
1870
1871 static void
1872 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
1873                                  uword opaque,
1874                                  u32 sw_if_index,
1875                                  ip4_address_t * address,
1876                                  u32 address_length,
1877                                  u32 if_address_index, u32 is_delete);
1878
1879 void
1880 test_key_calc_split ()
1881 {
1882   ip4_address_t l_addr;
1883   l_addr.as_u8[0] = 1;
1884   l_addr.as_u8[1] = 1;
1885   l_addr.as_u8[2] = 1;
1886   l_addr.as_u8[3] = 1;
1887   ip4_address_t r_addr;
1888   r_addr.as_u8[0] = 2;
1889   r_addr.as_u8[1] = 2;
1890   r_addr.as_u8[2] = 2;
1891   r_addr.as_u8[3] = 2;
1892   u16 l_port = 40001;
1893   u16 r_port = 40301;
1894   u8 proto = 9;
1895   u32 fib_index = 9000001;
1896   u32 thread_index = 3000000001;
1897   u32 session_index = 3000000221;
1898   clib_bihash_kv_16_8_t kv;
1899   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
1900               thread_index, session_index);
1901   ip4_address_t l_addr2;
1902   ip4_address_t r_addr2;
1903   clib_memset (&l_addr2, 0, sizeof (l_addr2));
1904   clib_memset (&r_addr2, 0, sizeof (r_addr2));
1905   u16 l_port2 = 0;
1906   u16 r_port2 = 0;
1907   u8 proto2 = 0;
1908   u32 fib_index2 = 0;
1909   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
1910                &r_port2);
1911   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1912   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
1913   ASSERT (l_port == l_port2);
1914   ASSERT (r_port == r_port2);
1915   ASSERT (proto == proto2);
1916   ASSERT (fib_index == fib_index2);
1917   ASSERT (thread_index == ed_value_get_thread_index (&kv));
1918   ASSERT (session_index == ed_value_get_session_index (&kv));
1919
1920   fib_index = 7001;
1921   proto = 5;
1922   nat_protocol_t proto3 = ~0;
1923   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
1924   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
1925   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1926   ASSERT (l_port == l_port2);
1927   ASSERT (proto == proto3);
1928   ASSERT (fib_index == fib_index2);
1929 }
1930
1931 static clib_error_t *
1932 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
1933 {
1934   u32 fib_index;
1935
1936       // TODO: consider removing all NAT interfaces
1937       if (!is_add)
1938         {
1939           fib_index = ip4_fib_index_from_table_id (table_id);
1940           if (fib_index != ~0)
1941             expire_per_vrf_sessions (fib_index);
1942         }
1943   return 0;
1944 }
1945
1946 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
1947
1948 void
1949 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
1950 {
1951   vlib_node_t *node;
1952
1953   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
1954   sm->out2in_node_index = node->index;
1955
1956   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
1957   sm->in2out_node_index = node->index;
1958
1959   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
1960   sm->in2out_output_node_index = node->index;
1961 }
1962
1963 #define nat_validate_simple_counter(c, i)                                     \
1964   do                                                                          \
1965     {                                                                         \
1966       vlib_validate_simple_counter (&c, i);                                   \
1967       vlib_zero_simple_counter (&c, i);                                       \
1968     }                                                                         \
1969   while (0);
1970
1971 #define nat_init_simple_counter(c, n, sn)                                     \
1972   do                                                                          \
1973     {                                                                         \
1974       c.name = n;                                                             \
1975       c.stat_segment_name = sn;                                               \
1976       nat_validate_simple_counter (c, 0);                                     \
1977     }                                                                         \
1978   while (0);
1979
1980 static_always_inline void
1981 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
1982 {
1983 #define _(x)                                                                  \
1984   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
1985   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
1986   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
1987   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
1988   foreach_nat_counter;
1989 #undef _
1990   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
1991 }
1992
1993 static clib_error_t *
1994 nat_init (vlib_main_t * vm)
1995 {
1996   snat_main_t *sm = &snat_main;
1997   vlib_thread_main_t *tm = vlib_get_thread_main ();
1998   vlib_thread_registration_t *tr;
1999   ip4_add_del_interface_address_callback_t cbi = { 0 };
2000   ip4_table_bind_callback_t cbt = { 0 };
2001   u32 i, num_threads = 0;
2002   uword *p, *bitmap = 0;
2003
2004   clib_memset (sm, 0, sizeof (*sm));
2005
2006   // required
2007   sm->vnet_main = vnet_get_main ();
2008   // convenience
2009   sm->ip4_main = &ip4_main;
2010   sm->api_main = vlibapi_get_main ();
2011   sm->ip4_lookup_main = &ip4_main.lookup_main;
2012
2013   // frame queue indices used for handoff
2014   sm->fq_out2in_index = ~0;
2015   sm->fq_in2out_index = ~0;
2016   sm->fq_in2out_output_index = ~0;
2017
2018   sm->log_level = NAT_LOG_ERROR;
2019
2020   nat44_set_node_indexes (sm, vm);
2021   sm->log_class = vlib_log_register_class ("nat", 0);
2022   nat_ipfix_logging_init (vm);
2023
2024   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2025                            "/nat44-ed/total-sessions");
2026
2027 #define _(x)                                                                  \
2028   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2029                            "/nat44-ed/in2out/fastpath/" #x);                  \
2030   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2031                            "/nat44-ed/out2in/fastpath/" #x);                  \
2032   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2033                            "/nat44-ed/in2out/slowpath/" #x);                  \
2034   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2035                            "/nat44-ed/out2in/slowpath/" #x);
2036   foreach_nat_counter;
2037 #undef _
2038   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2039                            "/nat44-ed/hairpinning");
2040
2041   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2042   if (p)
2043     {
2044       tr = (vlib_thread_registration_t *) p[0];
2045       if (tr)
2046         {
2047           sm->num_workers = tr->count;
2048           sm->first_worker_index = tr->first_index;
2049         }
2050     }
2051   num_threads = tm->n_vlib_mains - 1;
2052   sm->port_per_thread = 0xffff - 1024;
2053   vec_validate (sm->per_thread_data, num_threads);
2054
2055   /* Use all available workers by default */
2056   if (sm->num_workers > 1)
2057     {
2058
2059       for (i = 0; i < sm->num_workers; i++)
2060         bitmap = clib_bitmap_set (bitmap, i, 1);
2061       snat_set_workers (bitmap);
2062       clib_bitmap_free (bitmap);
2063     }
2064   else
2065     sm->per_thread_data[0].snat_thread_index = 0;
2066
2067   /* callbacks to call when interface address changes. */
2068   cbi.function = snat_ip4_add_del_interface_address_cb;
2069   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2070   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2071   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2072
2073   /* callbacks to call when interface to table biding changes */
2074   cbt.function = snat_update_outside_fib;
2075   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2076
2077   sm->fib_src_low =
2078     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2079                          FIB_SOURCE_BH_SIMPLE);
2080   sm->fib_src_hi =
2081     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2082                          FIB_SOURCE_BH_SIMPLE);
2083
2084   nat_affinity_init (vm);
2085   test_key_calc_split ();
2086
2087   return nat44_api_hookup (vm);
2088 }
2089
2090 VLIB_INIT_FUNCTION (nat_init);
2091
2092 int
2093 nat44_plugin_enable (nat44_config_t c)
2094 {
2095   snat_main_t *sm = &snat_main;
2096
2097   fail_if_enabled ();
2098
2099   // UPDATE based on these appropriate API/CLI
2100   // c.static_mapping_only + c.connection_tracking
2101   //  - supported in NAT EI & NAT ED
2102   // c.out2in_dpo, c.static_mapping_only
2103   //  - supported in NAT EI
2104
2105   if (c.static_mapping_only && !c.connection_tracking)
2106     {
2107       nat_log_err ("unsupported combination of configuration");
2108       return 1;
2109     }
2110
2111   // nat44 feature configuration
2112   sm->static_mapping_only = c.static_mapping_only;
2113   sm->static_mapping_connection_tracking = c.connection_tracking;
2114
2115   sm->forwarding_enabled = 0;
2116   sm->mss_clamping = 0;
2117   sm->pat = (!c.static_mapping_only ||
2118              (c.static_mapping_only && c.connection_tracking));
2119
2120   if (!c.sessions)
2121     c.sessions = 63 * 1024;
2122
2123   sm->max_translations_per_thread = c.sessions;
2124   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2125
2126   // ED only feature
2127   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2128
2129   sm->inside_vrf_id = c.inside_vrf;
2130   sm->inside_fib_index =
2131     fib_table_find_or_create_and_lock
2132     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2133
2134   sm->outside_vrf_id = c.outside_vrf;
2135   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2136     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2137
2138   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2139
2140   nat_affinity_enable ();
2141
2142   nat_reset_timeouts (&sm->timeouts);
2143
2144   vlib_zero_simple_counter (&sm->total_sessions, 0);
2145
2146   if (!sm->frame_queue_nelts)
2147     sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2148
2149   sm->enabled = 1;
2150   sm->rconfig = c;
2151
2152   return 0;
2153 }
2154
2155 void
2156 nat44_addresses_free (snat_address_t ** addresses)
2157 {
2158   snat_address_t *ap;
2159   vec_foreach (ap, *addresses)
2160     {
2161     #define _(N, i, n, s) \
2162       vec_free (ap->busy_##n##_ports_per_thread);
2163       foreach_nat_protocol
2164     #undef _
2165     }
2166   vec_free (*addresses);
2167   *addresses = 0;
2168 }
2169
2170 int
2171 nat44_plugin_disable ()
2172 {
2173   snat_main_t *sm = &snat_main;
2174   snat_interface_t *i, *vec;
2175   int error = 0;
2176
2177   fail_if_disabled ();
2178
2179   // first unregister all nodes from interfaces
2180   vec = vec_dup (sm->interfaces);
2181   vec_foreach (i, vec)
2182     {
2183       if (nat_interface_is_inside(i))
2184         error = snat_interface_add_del (i->sw_if_index, 1, 1);
2185       if (nat_interface_is_outside(i))
2186         error = snat_interface_add_del (i->sw_if_index, 0, 1);
2187
2188       if (error)
2189         {
2190           nat_log_err ("error occurred while removing interface %u",
2191                        i->sw_if_index);
2192         }
2193     }
2194   vec_free (vec);
2195   sm->interfaces = 0;
2196
2197   vec = vec_dup (sm->output_feature_interfaces);
2198   vec_foreach (i, vec)
2199     {
2200       if (nat_interface_is_inside(i))
2201         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
2202       if (nat_interface_is_outside(i))
2203         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
2204
2205       if (error)
2206         {
2207           nat_log_err ("error occurred while removing interface %u",
2208                        i->sw_if_index);
2209         }
2210     }
2211   vec_free (vec);
2212   sm->output_feature_interfaces = 0;
2213
2214   vec_free (sm->max_translations_per_fib);
2215
2216   nat44_ed_db_free ();
2217
2218   nat44_addresses_free (&sm->addresses);
2219   nat44_addresses_free (&sm->twice_nat_addresses);
2220
2221   vec_free (sm->to_resolve);
2222   vec_free (sm->auto_add_sw_if_indices);
2223   vec_free (sm->auto_add_sw_if_indices_twice_nat);
2224
2225   sm->to_resolve = 0;
2226   sm->auto_add_sw_if_indices = 0;
2227   sm->auto_add_sw_if_indices_twice_nat = 0;
2228
2229   sm->forwarding_enabled = 0;
2230
2231   sm->enabled = 0;
2232   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2233
2234   return 0;
2235 }
2236
2237 void
2238 nat44_ed_forwarding_enable_disable (u8 is_enable)
2239 {
2240   snat_main_per_thread_data_t *tsm;
2241   snat_main_t *sm = &snat_main;
2242   snat_session_t *s;
2243
2244   u32 *ses_to_be_removed = 0, *ses_index;
2245
2246   sm->forwarding_enabled = is_enable != 0;
2247
2248   if (is_enable)
2249     return;
2250
2251   vec_foreach (tsm, sm->per_thread_data)
2252     {
2253       pool_foreach (s, tsm->sessions)
2254         {
2255           if (is_fwd_bypass_session (s))
2256             {
2257               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2258             }
2259         }
2260       vec_foreach (ses_index, ses_to_be_removed)
2261         {
2262           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2263           nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2264           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2265         }
2266
2267       vec_free (ses_to_be_removed);
2268     }
2269 }
2270
2271 void
2272 snat_free_outside_address_and_port (snat_address_t *addresses,
2273                                     u32 thread_index, ip4_address_t *addr,
2274                                     u16 port, nat_protocol_t protocol)
2275 {
2276   snat_main_t *sm = &snat_main;
2277   snat_address_t *a;
2278   u32 address_index;
2279   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2280
2281   for (address_index = 0; address_index < vec_len (addresses);
2282        address_index++)
2283     {
2284       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2285         break;
2286     }
2287
2288   ASSERT (address_index < vec_len (addresses));
2289
2290   a = addresses + address_index;
2291
2292   switch (protocol)
2293     {
2294 #define _(N, i, n, s) \
2295     case NAT_PROTOCOL_##N: \
2296       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2297       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2298       a->busy_##n##_ports--; \
2299       a->busy_##n##_ports_per_thread[thread_index]--; \
2300       break;
2301       foreach_nat_protocol
2302 #undef _
2303         default : nat_elog_info (sm, "unknown protocol");
2304       return;
2305     }
2306 }
2307
2308 int
2309 nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
2310                                   ip4_address_t addr, u16 port,
2311                                   nat_protocol_t protocol)
2312 {
2313   snat_main_t *sm = &snat_main;
2314   snat_address_t *a = 0;
2315   u32 address_index;
2316   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2317
2318   for (address_index = 0; address_index < vec_len (addresses);
2319        address_index++)
2320     {
2321       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2322         continue;
2323
2324       a = addresses + address_index;
2325       switch (protocol)
2326         {
2327 #define _(N, j, n, s) \
2328         case NAT_PROTOCOL_##N: \
2329           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2330             return VNET_API_ERROR_INSTANCE_IN_USE; \
2331           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2332           a->busy_##n##_ports_per_thread[thread_index]++; \
2333           a->busy_##n##_ports++; \
2334           return 0;
2335           foreach_nat_protocol
2336 #undef _
2337             default : nat_elog_info (sm, "unknown protocol");
2338           return 1;
2339         }
2340     }
2341
2342   return VNET_API_ERROR_NO_SUCH_ENTRY;
2343 }
2344
2345 int
2346 snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
2347                            ip4_address_t match_addr, u16 match_port,
2348                            u32 match_fib_index, nat_protocol_t match_protocol,
2349                            ip4_address_t *mapping_addr, u16 *mapping_port,
2350                            u32 *mapping_fib_index, u8 by_external,
2351                            u8 *is_addr_only, twice_nat_type_t *twice_nat,
2352                            lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
2353                            u8 *is_identity_nat, snat_static_mapping_t **out)
2354 {
2355   clib_bihash_kv_8_8_t kv, value;
2356   clib_bihash_8_8_t *mapping_hash;
2357   snat_static_mapping_t *m;
2358   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2359   nat44_lb_addr_port_t *local;
2360   u8 backend_index;
2361
2362   if (!by_external)
2363     {
2364       mapping_hash = &sm->static_mapping_by_local;
2365       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2366                   match_protocol);
2367       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2368         {
2369           /* Try address only mapping */
2370           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2371           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2372             return 1;
2373         }
2374     }
2375   else
2376     {
2377       mapping_hash = &sm->static_mapping_by_external;
2378       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2379       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2380         {
2381           /* Try address only mapping */
2382           init_nat_k (&kv, match_addr, 0, 0, 0);
2383           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2384             return 1;
2385         }
2386     }
2387
2388   m = pool_elt_at_index (sm->static_mappings, value.value);
2389
2390   if (by_external)
2391     {
2392       if (is_lb_static_mapping (m))
2393         {
2394           if (PREDICT_FALSE (lb != 0))
2395             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2396           if (m->affinity && !nat_affinity_find_and_lock (
2397                                vm, ext_host_addr[0], match_addr,
2398                                match_protocol, match_port, &backend_index))
2399             {
2400               local = pool_elt_at_index (m->locals, backend_index);
2401               *mapping_addr = local->addr;
2402               *mapping_port = local->port;
2403               *mapping_fib_index = local->fib_index;
2404               goto end;
2405             }
2406           // pick locals matching this worker
2407           if (PREDICT_FALSE (sm->num_workers > 1))
2408             {
2409               u32 thread_index = vlib_get_thread_index ();
2410               pool_foreach_index (i, m->locals)
2411                {
2412                 local = pool_elt_at_index (m->locals, i);
2413
2414                 ip4_header_t ip = {
2415                   .src_address = local->addr,
2416                 };
2417
2418                 if (nat44_ed_get_in2out_worker_index (0, &ip, m->fib_index,
2419                                                       0) == thread_index)
2420                   {
2421                     vec_add1 (tmp, i);
2422                   }
2423                }
2424               ASSERT (vec_len (tmp) != 0);
2425             }
2426           else
2427             {
2428               pool_foreach_index (i, m->locals)
2429                {
2430                 vec_add1 (tmp, i);
2431               }
2432             }
2433           hi = vec_len (tmp) - 1;
2434           local = pool_elt_at_index (m->locals, tmp[hi]);
2435           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2436           while (lo < hi)
2437             {
2438               mid = ((hi - lo) >> 1) + lo;
2439               local = pool_elt_at_index (m->locals, tmp[mid]);
2440               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2441             }
2442           local = pool_elt_at_index (m->locals, tmp[lo]);
2443           if (!(local->prefix >= rand))
2444             return 1;
2445           *mapping_addr = local->addr;
2446           *mapping_port = local->port;
2447           *mapping_fib_index = local->fib_index;
2448           if (m->affinity)
2449             {
2450               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2451                                                 match_protocol, match_port,
2452                                                 tmp[lo], m->affinity,
2453                                                 m->affinity_per_service_list_head_index))
2454                 nat_elog_info (sm, "create affinity record failed");
2455             }
2456           vec_free (tmp);
2457         }
2458       else
2459         {
2460           if (PREDICT_FALSE (lb != 0))
2461             *lb = NO_LB_NAT;
2462           *mapping_fib_index = m->fib_index;
2463           *mapping_addr = m->local_addr;
2464           /* Address only mapping doesn't change port */
2465           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2466             : m->local_port;
2467         }
2468     }
2469   else
2470     {
2471       *mapping_addr = m->external_addr;
2472       /* Address only mapping doesn't change port */
2473       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2474         : m->external_port;
2475       *mapping_fib_index = sm->outside_fib_index;
2476     }
2477
2478 end:
2479   if (PREDICT_FALSE (is_addr_only != 0))
2480     *is_addr_only = is_addr_only_static_mapping (m);
2481
2482   if (PREDICT_FALSE (twice_nat != 0))
2483     *twice_nat = m->twice_nat;
2484
2485   if (PREDICT_FALSE (is_identity_nat != 0))
2486     *is_identity_nat = is_identity_static_mapping (m);
2487
2488   if (out != 0)
2489     *out = m;
2490
2491   return 0;
2492 }
2493
2494 u32
2495 nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2496                                   u32 rx_fib_index, u8 is_output)
2497 {
2498   snat_main_t *sm = &snat_main;
2499   u32 next_worker_index = sm->first_worker_index;
2500   u32 hash;
2501
2502   clib_bihash_kv_16_8_t kv16, value16;
2503
2504   u32 fib_index = rx_fib_index;
2505   if (b)
2506     {
2507       if (PREDICT_FALSE (is_output))
2508         {
2509           fib_index = sm->outside_fib_index;
2510           nat_outside_fib_t *outside_fib;
2511           fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2512           fib_prefix_t pfx = {
2513                   .fp_proto = FIB_PROTOCOL_IP4,
2514                   .fp_len = 32,
2515                   .fp_addr = {
2516                           .ip4.as_u32 = ip->dst_address.as_u32,
2517                   } ,
2518           };
2519
2520           switch (vec_len (sm->outside_fibs))
2521             {
2522             case 0:
2523               fib_index = sm->outside_fib_index;
2524               break;
2525             case 1:
2526               fib_index = sm->outside_fibs[0].fib_index;
2527               break;
2528             default:
2529               vec_foreach (outside_fib, sm->outside_fibs)
2530                 {
2531                   fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2532                   if (FIB_NODE_INDEX_INVALID != fei)
2533                     {
2534                       if (fib_entry_get_resolving_interface (fei) != ~0)
2535                         {
2536                           fib_index = outside_fib->fib_index;
2537                           break;
2538                         }
2539                     }
2540                 }
2541               break;
2542             }
2543         }
2544
2545       init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2546                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2547                  fib_index, ip->protocol);
2548
2549       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2550         {
2551           next_worker_index = ed_value_get_thread_index (&value16);
2552           vnet_buffer2 (b)->nat.cached_session_index =
2553             ed_value_get_session_index (&value16);
2554           goto out;
2555         }
2556
2557       // dst NAT
2558       init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2559                  ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2560                  rx_fib_index, ip->protocol);
2561       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2562         {
2563           next_worker_index = ed_value_get_thread_index (&value16);
2564           vnet_buffer2 (b)->nat.cached_dst_nat_session_index =
2565             ed_value_get_session_index (&value16);
2566           goto out;
2567         }
2568     }
2569
2570   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2571     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2572
2573   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2574     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2575   else
2576     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2577
2578 out:
2579   if (PREDICT_TRUE (!is_output))
2580     {
2581       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2582                               rx_fib_index,
2583                               clib_net_to_host_u32 (ip->src_address.as_u32),
2584                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2585     }
2586   else
2587     {
2588       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2589                               next_worker_index, rx_fib_index,
2590                               clib_net_to_host_u32 (ip->src_address.as_u32),
2591                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2592     }
2593
2594   return next_worker_index;
2595 }
2596
2597 u32
2598 nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
2599                                   u32 rx_fib_index, u8 is_output)
2600 {
2601   snat_main_t *sm = &snat_main;
2602   clib_bihash_kv_8_8_t kv, value;
2603   clib_bihash_kv_16_8_t kv16, value16;
2604   snat_main_per_thread_data_t *tsm;
2605
2606   u32 proto, next_worker_index = 0;
2607   u16 port;
2608   snat_static_mapping_t *m;
2609   u32 hash;
2610
2611   proto = ip_proto_to_nat_proto (ip->protocol);
2612
2613   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
2614     {
2615       init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2616                  ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2617                  rx_fib_index, ip->protocol);
2618
2619       if (PREDICT_TRUE (
2620             !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2621         {
2622           tsm =
2623             vec_elt_at_index (sm->per_thread_data,
2624                               ed_value_get_thread_index (&value16));
2625           vnet_buffer2 (b)->nat.cached_session_index =
2626             ed_value_get_session_index (&value16);
2627           next_worker_index = sm->first_worker_index + tsm->thread_index;
2628           nat_elog_debug_handoff (
2629             sm, "HANDOFF OUT2IN (session)", next_worker_index, rx_fib_index,
2630             clib_net_to_host_u32 (ip->src_address.as_u32),
2631             clib_net_to_host_u32 (ip->dst_address.as_u32));
2632           return next_worker_index;
2633         }
2634     }
2635   else if (proto == NAT_PROTOCOL_ICMP)
2636     {
2637       ip4_address_t lookup_saddr, lookup_daddr;
2638       u16 lookup_sport, lookup_dport;
2639       u8 lookup_protocol;
2640       if (!nat_get_icmp_session_lookup_values (
2641             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2642             &lookup_protocol))
2643         {
2644           init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
2645                      lookup_dport, rx_fib_index, lookup_protocol);
2646           if (PREDICT_TRUE (
2647                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2648             {
2649               tsm =
2650                 vec_elt_at_index (sm->per_thread_data,
2651                                   ed_value_get_thread_index (&value16));
2652               next_worker_index = sm->first_worker_index + tsm->thread_index;
2653               nat_elog_debug_handoff (
2654                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2655                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2656                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2657               return next_worker_index;
2658             }
2659         }
2660     }
2661
2662   /* first try static mappings without port */
2663   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2664     {
2665       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
2666       if (!clib_bihash_search_8_8
2667           (&sm->static_mapping_by_external, &kv, &value))
2668         {
2669           m = pool_elt_at_index (sm->static_mappings, value.value);
2670           next_worker_index = m->workers[0];
2671           goto done;
2672         }
2673     }
2674
2675   /* unknown protocol */
2676   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
2677     {
2678       /* use current thread */
2679       next_worker_index = vlib_get_thread_index ();
2680       goto done;
2681     }
2682
2683   port = vnet_buffer (b)->ip.reass.l4_dst_port;
2684
2685   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2686     {
2687       udp_header_t *udp = ip4_next_header (ip);
2688       icmp46_header_t *icmp = (icmp46_header_t *) udp;
2689       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
2690       if (!icmp_type_is_error_message
2691           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
2692         port = vnet_buffer (b)->ip.reass.l4_src_port;
2693       else
2694         {
2695           /* if error message, then it's not fragmented and we can access it */
2696           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
2697           proto = ip_proto_to_nat_proto (inner_ip->protocol);
2698           void *l4_header = ip4_next_header (inner_ip);
2699           switch (proto)
2700             {
2701             case NAT_PROTOCOL_ICMP:
2702               icmp = (icmp46_header_t *) l4_header;
2703               echo = (icmp_echo_header_t *) (icmp + 1);
2704               port = echo->identifier;
2705               break;
2706             case NAT_PROTOCOL_UDP:
2707             case NAT_PROTOCOL_TCP:
2708               port = ((tcp_udp_header_t *) l4_header)->src_port;
2709               break;
2710             default:
2711               next_worker_index = vlib_get_thread_index ();
2712               goto done;
2713             }
2714         }
2715     }
2716
2717   /* try static mappings with port */
2718   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2719     {
2720       init_nat_k (&kv, ip->dst_address, port, 0, proto);
2721       if (!clib_bihash_search_8_8
2722           (&sm->static_mapping_by_external, &kv, &value))
2723         {
2724           m = pool_elt_at_index (sm->static_mappings, value.value);
2725           if (!is_lb_static_mapping (m))
2726             {
2727               next_worker_index = m->workers[0];
2728               goto done;
2729             }
2730
2731           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2732             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2733
2734           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
2735             next_worker_index =
2736               m->workers[hash & (_vec_len (m->workers) - 1)];
2737           else
2738             next_worker_index = m->workers[hash % _vec_len (m->workers)];
2739           goto done;
2740         }
2741     }
2742
2743   /* worker by outside port */
2744   next_worker_index = sm->first_worker_index;
2745   next_worker_index +=
2746     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2747
2748 done:
2749   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
2750                           rx_fib_index,
2751                           clib_net_to_host_u32 (ip->src_address.as_u32),
2752                           clib_net_to_host_u32 (ip->dst_address.as_u32));
2753   return next_worker_index;
2754 }
2755
2756 u32
2757 nat44_get_max_session_limit ()
2758 {
2759   snat_main_t *sm = &snat_main;
2760   u32 max_limit = 0, len = 0;
2761
2762   for (; len < vec_len (sm->max_translations_per_fib); len++)
2763     {
2764       if (max_limit < sm->max_translations_per_fib[len])
2765         max_limit = sm->max_translations_per_fib[len];
2766     }
2767   return max_limit;
2768 }
2769
2770 int
2771 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
2772 {
2773   snat_main_t *sm = &snat_main;
2774   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
2775   u32 len = vec_len (sm->max_translations_per_fib);
2776
2777   if (len <= fib_index)
2778     {
2779       vec_validate (sm->max_translations_per_fib, fib_index + 1);
2780
2781       for (; len < vec_len (sm->max_translations_per_fib); len++)
2782         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
2783     }
2784
2785   sm->max_translations_per_fib[fib_index] = session_limit;
2786   return 0;
2787 }
2788
2789 int
2790 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
2791 {
2792   snat_main_t *sm = &snat_main;
2793
2794   if (nat44_set_session_limit (session_limit, vrf_id))
2795     return 1;
2796   sm->max_translations_per_thread = nat44_get_max_session_limit ();
2797
2798   sm->translation_buckets =
2799     nat_calc_bihash_buckets (sm->max_translations_per_thread);
2800
2801   nat44_ed_sessions_clear ();
2802   return 0;
2803 }
2804
2805 static void
2806 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
2807                          u32 translation_buckets)
2808 {
2809   dlist_elt_t *head;
2810
2811   pool_alloc (tsm->sessions, translations);
2812   pool_alloc (tsm->lru_pool, translations);
2813
2814   pool_get (tsm->lru_pool, head);
2815   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
2816   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
2817
2818   pool_get (tsm->lru_pool, head);
2819   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
2820   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
2821
2822   pool_get (tsm->lru_pool, head);
2823   tsm->udp_lru_head_index = head - tsm->lru_pool;
2824   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
2825
2826   pool_get (tsm->lru_pool, head);
2827   tsm->icmp_lru_head_index = head - tsm->lru_pool;
2828   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
2829
2830   pool_get (tsm->lru_pool, head);
2831   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
2832   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
2833 }
2834
2835 static void
2836 reinit_ed_flow_hash ()
2837 {
2838   snat_main_t *sm = &snat_main;
2839   // we expect 2 flows per session, so multiply translation_buckets by 2
2840   clib_bihash_init_16_8 (
2841     &sm->flow_hash, "ed-flow-hash",
2842     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
2843   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
2844 }
2845
2846 static void
2847 nat44_ed_db_init (u32 translations, u32 translation_buckets)
2848 {
2849   snat_main_t *sm = &snat_main;
2850   snat_main_per_thread_data_t *tsm;
2851   u32 static_mapping_buckets = 1024;
2852   u32 static_mapping_memory_size = 64 << 20;
2853
2854   reinit_ed_flow_hash ();
2855
2856   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2857                         "static_mapping_by_local", static_mapping_buckets,
2858                         static_mapping_memory_size);
2859   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2860                                      format_static_mapping_kvp);
2861
2862   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2863                         "static_mapping_by_external", static_mapping_buckets,
2864                         static_mapping_memory_size);
2865   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2866                                      format_static_mapping_kvp);
2867
2868   if (sm->pat)
2869     {
2870       vec_foreach (tsm, sm->per_thread_data)
2871         {
2872           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2873                                    sm->translation_buckets);
2874         }
2875     }
2876 }
2877
2878 static void
2879 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
2880 {
2881   pool_free (tsm->lru_pool);
2882   pool_free (tsm->sessions);
2883   vec_free (tsm->per_vrf_sessions_vec);
2884 }
2885
2886 static void
2887 nat44_ed_db_free ()
2888 {
2889   snat_main_t *sm = &snat_main;
2890   snat_main_per_thread_data_t *tsm;
2891
2892   pool_free (sm->static_mappings);
2893   clib_bihash_free_16_8 (&sm->flow_hash);
2894   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
2895   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
2896
2897   if (sm->pat)
2898     {
2899       vec_foreach (tsm, sm->per_thread_data)
2900         {
2901           nat44_ed_worker_db_free (tsm);
2902         }
2903     }
2904 }
2905
2906 void
2907 nat44_ed_sessions_clear ()
2908 {
2909   snat_main_t *sm = &snat_main;
2910   snat_main_per_thread_data_t *tsm;
2911
2912   reinit_ed_flow_hash ();
2913
2914   if (sm->pat)
2915     {
2916       vec_foreach (tsm, sm->per_thread_data)
2917         {
2918
2919           nat44_ed_worker_db_free (tsm);
2920           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2921                                    sm->translation_buckets);
2922         }
2923     }
2924   vlib_zero_simple_counter (&sm->total_sessions, 0);
2925 }
2926
2927 static void
2928 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2929                                  uword opaque,
2930                                  u32 sw_if_index,
2931                                  ip4_address_t * address,
2932                                  u32 address_length,
2933                                  u32 if_address_index, u32 is_delete)
2934 {
2935   snat_main_t *sm = &snat_main;
2936   snat_static_map_resolve_t *rp;
2937   snat_static_mapping_t *m;
2938   clib_bihash_kv_8_8_t kv, value;
2939   int i, rv;
2940   ip4_address_t l_addr;
2941
2942   if (!sm->enabled)
2943     return;
2944
2945   for (i = 0; i < vec_len (sm->to_resolve); i++)
2946     {
2947       rp = sm->to_resolve + i;
2948       if (rp->addr_only == 0)
2949         continue;
2950       if (rp->sw_if_index == sw_if_index)
2951         goto match;
2952     }
2953
2954   return;
2955
2956 match:
2957   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
2958               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
2959   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2960     m = 0;
2961   else
2962     m = pool_elt_at_index (sm->static_mappings, value.value);
2963
2964   if (!is_delete)
2965     {
2966       /* Don't trip over lease renewal, static config */
2967       if (m)
2968         return;
2969     }
2970   else
2971     {
2972       if (!m)
2973         return;
2974     }
2975
2976   /* Indetity mapping? */
2977   if (rp->l_addr.as_u32 == 0)
2978     l_addr.as_u32 = address[0].as_u32;
2979   else
2980     l_addr.as_u32 = rp->l_addr.as_u32;
2981   /* Add the static mapping */
2982   rv = snat_add_static_mapping (l_addr,
2983                                 address[0],
2984                                 rp->l_port,
2985                                 rp->e_port,
2986                                 rp->vrf_id,
2987                                 rp->addr_only, ~0 /* sw_if_index */ ,
2988                                 rp->proto, !is_delete, rp->twice_nat,
2989                                 rp->out2in_only, rp->tag, rp->identity_nat,
2990                                 rp->pool_addr, rp->exact);
2991   if (rv)
2992     nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", "i4", rv);
2993 }
2994
2995 static void
2996 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2997                                        uword opaque,
2998                                        u32 sw_if_index,
2999                                        ip4_address_t * address,
3000                                        u32 address_length,
3001                                        u32 if_address_index, u32 is_delete)
3002 {
3003   snat_main_t *sm = &snat_main;
3004   snat_static_map_resolve_t *rp;
3005   ip4_address_t l_addr;
3006   int i, j;
3007   int rv;
3008   u8 twice_nat = 0;
3009   snat_address_t *addresses = sm->addresses;
3010
3011   if (!sm->enabled)
3012     return;
3013
3014   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
3015     {
3016       if (sw_if_index == sm->auto_add_sw_if_indices[i])
3017         goto match;
3018     }
3019
3020   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
3021     {
3022       twice_nat = 1;
3023       addresses = sm->twice_nat_addresses;
3024       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
3025         goto match;
3026     }
3027
3028   return;
3029
3030 match:
3031   if (!is_delete)
3032     {
3033       /* Don't trip over lease renewal, static config */
3034       for (j = 0; j < vec_len (addresses); j++)
3035         if (addresses[j].addr.as_u32 == address->as_u32)
3036           return;
3037
3038       (void) snat_add_address (sm, address, ~0, twice_nat);
3039       /* Scan static map resolution vector */
3040       for (j = 0; j < vec_len (sm->to_resolve); j++)
3041         {
3042           rp = sm->to_resolve + j;
3043           if (rp->addr_only)
3044             continue;
3045           /* On this interface? */
3046           if (rp->sw_if_index == sw_if_index)
3047             {
3048               /* Indetity mapping? */
3049               if (rp->l_addr.as_u32 == 0)
3050                 l_addr.as_u32 = address[0].as_u32;
3051               else
3052                 l_addr.as_u32 = rp->l_addr.as_u32;
3053               /* Add the static mapping */
3054               rv = snat_add_static_mapping (
3055                 l_addr, address[0], rp->l_port, rp->e_port, rp->vrf_id,
3056                 rp->addr_only, ~0 /* sw_if_index */, rp->proto, 1,
3057                 rp->twice_nat, rp->out2in_only, rp->tag, rp->identity_nat,
3058                 rp->pool_addr, rp->exact);
3059               if (rv)
3060                 nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d",
3061                                     "i4", rv);
3062             }
3063         }
3064       return;
3065     }
3066   else
3067     {
3068       (void) snat_del_address (sm, address[0], 1, twice_nat);
3069       return;
3070     }
3071 }
3072
3073 int
3074 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
3075                             u8 twice_nat)
3076 {
3077   ip4_main_t *ip4_main = sm->ip4_main;
3078   ip4_address_t *first_int_addr;
3079   snat_static_map_resolve_t *rp;
3080   u32 *indices_to_delete = 0;
3081   int i, j;
3082   u32 *auto_add_sw_if_indices =
3083     twice_nat ? sm->
3084     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
3085
3086   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
3087     );
3088
3089   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
3090     {
3091       if (auto_add_sw_if_indices[i] == sw_if_index)
3092         {
3093           if (is_del)
3094             {
3095               /* if have address remove it */
3096               if (first_int_addr)
3097                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
3098               else
3099                 {
3100                   for (j = 0; j < vec_len (sm->to_resolve); j++)
3101                     {
3102                       rp = sm->to_resolve + j;
3103                       if (rp->sw_if_index == sw_if_index)
3104                         vec_add1 (indices_to_delete, j);
3105                     }
3106                   if (vec_len (indices_to_delete))
3107                     {
3108                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
3109                         vec_del1 (sm->to_resolve, j);
3110                       vec_free (indices_to_delete);
3111                     }
3112                 }
3113               if (twice_nat)
3114                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
3115               else
3116                 vec_del1 (sm->auto_add_sw_if_indices, i);
3117             }
3118           else
3119             return VNET_API_ERROR_VALUE_EXIST;
3120
3121           return 0;
3122         }
3123     }
3124
3125   if (is_del)
3126     return VNET_API_ERROR_NO_SUCH_ENTRY;
3127
3128   /* add to the auto-address list */
3129   if (twice_nat)
3130     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
3131   else
3132     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
3133
3134   /* If the address is already bound - or static - add it now */
3135   if (first_int_addr)
3136     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
3137
3138   return 0;
3139 }
3140
3141 int
3142 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
3143                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3144                       u32 vrf_id, int is_in)
3145 {
3146   ip4_header_t ip;
3147   clib_bihash_kv_16_8_t kv, value;
3148   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3149   snat_session_t *s;
3150   snat_main_per_thread_data_t *tsm;
3151
3152   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3153   if (sm->num_workers > 1)
3154     tsm = vec_elt_at_index (
3155       sm->per_thread_data,
3156       nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
3157   else
3158     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3159
3160   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
3161   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3162     {
3163       return VNET_API_ERROR_NO_SUCH_ENTRY;
3164     }
3165
3166   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3167     return VNET_API_ERROR_UNSPECIFIED;
3168   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3169   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3170   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3171   return 0;
3172 }
3173
3174 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3175                                  vlib_node_runtime_t * node,
3176                                  vlib_frame_t * frame)
3177 {
3178   return 0;
3179 }
3180
3181 VLIB_REGISTER_NODE (nat_default_node) = {
3182   .name = "nat-default",
3183   .vector_size = sizeof (u32),
3184   .format_trace = 0,
3185   .type = VLIB_NODE_TYPE_INTERNAL,
3186   .n_errors = 0,
3187   .n_next_nodes = NAT_N_NEXT,
3188   .next_nodes = {
3189     [NAT_NEXT_DROP] = "error-drop",
3190     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3191     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3192     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3193     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3194     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3195     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3196     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3197     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3198     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3199   },
3200 };
3201
3202 void
3203 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3204 {
3205   f->l3_csum_delta = 0;
3206   f->l4_csum_delta = 0;
3207   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3208       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3209     {
3210       f->l3_csum_delta =
3211         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3212       f->l3_csum_delta =
3213         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3214     }
3215   else
3216     {
3217       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3218     }
3219   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3220       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3221     {
3222       f->l3_csum_delta =
3223         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3224       f->l3_csum_delta =
3225         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3226     }
3227   else
3228     {
3229       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3230     }
3231   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3232     {
3233       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3234       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3235     }
3236   else
3237     {
3238       f->rewrite.sport = f->match.sport;
3239     }
3240   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3241     {
3242       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3243       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3244     }
3245   else
3246     {
3247       f->rewrite.dport = f->match.dport;
3248     }
3249   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3250       f->rewrite.icmp_id != f->match.sport)
3251     {
3252       f->l4_csum_delta =
3253         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3254       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3255     }
3256   else
3257     {
3258       f->rewrite.icmp_id = f->match.sport;
3259     }
3260   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3261     {
3262     }
3263   else
3264     {
3265       f->rewrite.fib_index = f->match.fib_index;
3266     }
3267 }
3268
3269 static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm,
3270                                                      vlib_buffer_t *b,
3271                                                      ip4_header_t *ip,
3272                                                      nat_6t_flow_t *f);
3273
3274 static_always_inline void
3275 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3276                            nat_6t_flow_t *f, nat_protocol_t proto,
3277                            int is_icmp_inner_ip4)
3278 {
3279   udp_header_t *udp = ip4_next_header (ip);
3280   tcp_header_t *tcp = (tcp_header_t *) udp;
3281
3282   if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
3283       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3284     {
3285       if (!is_icmp_inner_ip4)
3286         { // regular case
3287           ip->src_address = f->rewrite.saddr;
3288           ip->dst_address = f->rewrite.daddr;
3289           udp->src_port = f->rewrite.sport;
3290           udp->dst_port = f->rewrite.dport;
3291         }
3292       else
3293         { // icmp inner ip4 - reversed saddr/daddr
3294           ip->src_address = f->rewrite.daddr;
3295           ip->dst_address = f->rewrite.saddr;
3296           udp->src_port = f->rewrite.dport;
3297           udp->dst_port = f->rewrite.sport;
3298         }
3299
3300       if (NAT_PROTOCOL_TCP == proto)
3301         {
3302           ip_csum_t tcp_sum = tcp->checksum;
3303           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3304           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3305           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3306           tcp->checksum = ip_csum_fold (tcp_sum);
3307         }
3308       else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
3309         {
3310           ip_csum_t udp_sum = udp->checksum;
3311           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3312           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3313           udp->checksum = ip_csum_fold (udp_sum);
3314         }
3315     }
3316   else
3317     {
3318       if (!is_icmp_inner_ip4)
3319         { // regular case
3320           ip->src_address = f->rewrite.saddr;
3321           ip->dst_address = f->rewrite.daddr;
3322         }
3323       else
3324         { // icmp inner ip4 - reversed saddr/daddr
3325           ip->src_address = f->rewrite.daddr;
3326           ip->dst_address = f->rewrite.saddr;
3327         }
3328     }
3329
3330   ip_csum_t ip_sum = ip->checksum;
3331   ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3332   ip->checksum = ip_csum_fold (ip_sum);
3333   ASSERT (ip->checksum == ip4_header_checksum (ip));
3334 }
3335
3336 static_always_inline int
3337 nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b,
3338                             ip4_header_t *ip, nat_6t_flow_t *f)
3339 {
3340   if (IP_PROTOCOL_ICMP != ip->protocol)
3341     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3342
3343   icmp46_header_t *icmp = ip4_next_header (ip);
3344   icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3345
3346   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3347     {
3348       if (icmp->checksum == 0)
3349         icmp->checksum = 0xffff;
3350
3351       if (!icmp_type_is_error_message (icmp->type))
3352         {
3353           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3354               (f->rewrite.icmp_id != echo->identifier))
3355             {
3356               ip_csum_t sum = icmp->checksum;
3357               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3358                                     icmp_echo_header_t,
3359                                     identifier /* changed member */);
3360               echo->identifier = f->rewrite.icmp_id;
3361               icmp->checksum = ip_csum_fold (sum);
3362             }
3363         }
3364       else
3365         {
3366           // errors are not fragmented
3367           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3368
3369           if (!ip4_header_checksum_is_valid (inner_ip))
3370             {
3371               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3372             }
3373
3374           nat_protocol_t inner_proto =
3375             ip_proto_to_nat_proto (inner_ip->protocol);
3376
3377           ip_csum_t icmp_sum = icmp->checksum;
3378
3379           switch (inner_proto)
3380             {
3381             case NAT_PROTOCOL_UDP:
3382             case NAT_PROTOCOL_TCP:
3383               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3384                                          1 /* is_icmp_inner_ip4 */);
3385               icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta);
3386               icmp->checksum = ip_csum_fold (icmp_sum);
3387               break;
3388             case NAT_PROTOCOL_ICMP:
3389               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3390                 {
3391                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3392                   icmp_echo_header_t *inner_echo =
3393                     (icmp_echo_header_t *) (inner_icmp + 1);
3394                   if (f->rewrite.icmp_id != inner_echo->identifier)
3395                     {
3396                       ip_csum_t sum = icmp->checksum;
3397                       sum = ip_csum_update (
3398                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3399                         icmp_echo_header_t, identifier /* changed member */);
3400                       icmp->checksum = ip_csum_fold (sum);
3401                       ip_csum_t inner_sum = inner_icmp->checksum;
3402                       inner_sum = ip_csum_update (
3403                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3404                         icmp_echo_header_t, identifier /* changed member */);
3405                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3406                       inner_echo->identifier = f->rewrite.icmp_id;
3407                     }
3408                 }
3409               break;
3410             default:
3411               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3412               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3413             }
3414         }
3415     }
3416   return NAT_ED_TRNSL_ERR_SUCCESS;
3417 }
3418
3419 nat_translation_error_e
3420 nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3421                            nat_6t_flow_t *f, nat_protocol_t proto,
3422                            int is_output_feature)
3423 {
3424   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3425     {
3426       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3427     }
3428
3429   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */);
3430
3431   if (NAT_PROTOCOL_ICMP == proto)
3432     {
3433       return nat_6t_flow_icmp_translate (sm, b, ip, f);
3434     }
3435
3436   return NAT_ED_TRNSL_ERR_SUCCESS;
3437 }
3438
3439 u8 *
3440 format_nat_6t (u8 *s, va_list *args)
3441 {
3442   nat_6t_t *t = va_arg (*args, nat_6t_t *);
3443
3444   s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
3445               format_ip4_address, t->saddr.as_u8,
3446               clib_net_to_host_u16 (t->sport), format_ip4_address,
3447               t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
3448               format_ip_protocol, t->proto, t->fib_index);
3449   return s;
3450 }
3451
3452 u8 *
3453 format_nat_ed_translation_error (u8 *s, va_list *args)
3454 {
3455   nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
3456
3457   switch (e)
3458     {
3459     case NAT_ED_TRNSL_ERR_SUCCESS:
3460       s = format (s, "success");
3461       break;
3462     case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
3463       s = format (s, "translation-failed");
3464       break;
3465     case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
3466       s = format (s, "flow-mismatch");
3467       break;
3468     }
3469   return s;
3470 }
3471
3472 u8 *
3473 format_nat_6t_flow (u8 *s, va_list *args)
3474 {
3475   nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
3476
3477   s = format (s, "match: %U ", format_nat_6t, &f->match);
3478   int r = 0;
3479   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
3480     {
3481       s = format (s, "rewrite: saddr %U ", format_ip4_address,
3482                   f->rewrite.saddr.as_u8);
3483       r = 1;
3484     }
3485   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
3486     {
3487       if (!r)
3488         {
3489           s = format (s, "rewrite: ");
3490           r = 1;
3491         }
3492       s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
3493     }
3494   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
3495     {
3496       if (!r)
3497         {
3498           s = format (s, "rewrite: ");
3499           r = 1;
3500         }
3501       s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
3502     }
3503   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
3504     {
3505       if (!r)
3506         {
3507           s = format (s, "rewrite: ");
3508           r = 1;
3509         }
3510       s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
3511     }
3512   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3513     {
3514       if (!r)
3515         {
3516           s = format (s, "rewrite: ");
3517           r = 1;
3518         }
3519       s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
3520     }
3521   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3522     {
3523       if (!r)
3524         {
3525           s = format (s, "rewrite: ");
3526           r = 1;
3527         }
3528       s = format (s, "txfib %u ", f->rewrite.fib_index);
3529     }
3530   return s;
3531 }
3532
3533 /*
3534  * fd.io coding-style-patch-verification: ON
3535  *
3536  * Local Variables:
3537  * eval: (c-set-style "gnu")
3538  * End:
3539  */