007296e065fce120aba70fa0be7a6b043c6c3e04
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vpp/app/version.h>
19
20 #include <vnet/vnet.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/ip4.h>
23 #include <vnet/ip/ip_table.h>
24 #include <vnet/ip/reass/ip4_sv_reass.h>
25 #include <vnet/fib/fib_table.h>
26 #include <vnet/fib/ip4_fib.h>
27 #include <vnet/plugin/plugin.h>
28 #include <vppinfra/bihash_16_8.h>
29
30 #include <nat/lib/log.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/lib/nat_inlines.h>
33 #include <nat/lib/ipfix_logging.h>
34
35 #include <nat/nat44-ed/nat44_ed.h>
36 #include <nat/nat44-ed/nat44_ed_affinity.h>
37 #include <nat/nat44-ed/nat44_ed_inlines.h>
38
39 snat_main_t snat_main;
40
41 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
42                                                            u32 sw_if_index);
43
44 #define skip_if_disabled()                                                    \
45   do                                                                          \
46     {                                                                         \
47       snat_main_t *sm = &snat_main;                                           \
48       if (PREDICT_FALSE (!sm->enabled))                                       \
49         return;                                                               \
50     }                                                                         \
51   while (0)
52
53 #define fail_if_enabled()                                                     \
54   do                                                                          \
55     {                                                                         \
56       snat_main_t *sm = &snat_main;                                           \
57       if (PREDICT_FALSE (sm->enabled))                                        \
58         {                                                                     \
59           nat_log_err ("plugin enabled");                                     \
60           return 1;                                                           \
61         }                                                                     \
62     }                                                                         \
63   while (0)
64
65 #define fail_if_disabled()                                                    \
66   do                                                                          \
67     {                                                                         \
68       snat_main_t *sm = &snat_main;                                           \
69       if (PREDICT_FALSE (!sm->enabled))                                       \
70         {                                                                     \
71           nat_log_err ("plugin disabled");                                    \
72           return 1;                                                           \
73         }                                                                     \
74     }                                                                         \
75   while (0)
76
77 /* Hook up input features */
78 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
79   .arc_name = "ip4-unicast",
80   .node_name = "nat-pre-in2out",
81   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
82                                "ip4-sv-reassembly-feature"),
83 };
84 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
85   .arc_name = "ip4-unicast",
86   .node_name = "nat-pre-out2in",
87   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
88                                "ip4-dhcp-client-detect",
89                                "ip4-sv-reassembly-feature"),
90 };
91 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
92   .arc_name = "ip4-unicast",
93   .node_name = "nat44-in2out-worker-handoff",
94   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
95 };
96 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
97   .arc_name = "ip4-unicast",
98   .node_name = "nat44-out2in-worker-handoff",
99   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
100                                "ip4-dhcp-client-detect"),
101 };
102 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
103   .arc_name = "ip4-unicast",
104   .node_name = "nat44-in2out",
105   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
106 };
107 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
108   .arc_name = "ip4-unicast",
109   .node_name = "nat44-out2in",
110   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
111                                "ip4-dhcp-client-detect"),
112 };
113 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
114   .arc_name = "ip4-unicast",
115   .node_name = "nat44-ed-in2out",
116   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
117 };
118 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
119   .arc_name = "ip4-unicast",
120   .node_name = "nat44-ed-out2in",
121   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
122                                "ip4-dhcp-client-detect"),
123 };
124 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
125   .arc_name = "ip4-unicast",
126   .node_name = "nat44-ed-classify",
127   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
128 };
129 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
130   .arc_name = "ip4-unicast",
131   .node_name = "nat44-handoff-classify",
132   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
133 };
134 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
135   .arc_name = "ip4-unicast",
136   .node_name = "nat44-in2out-fast",
137   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
138 };
139 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
140   .arc_name = "ip4-unicast",
141   .node_name = "nat44-out2in-fast",
142   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
143                                "ip4-dhcp-client-detect"),
144 };
145
146 /* Hook up output features */
147 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
148   .arc_name = "ip4-output",
149   .node_name = "nat44-in2out-output",
150   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
151 };
152 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
153   .arc_name = "ip4-output",
154   .node_name = "nat44-in2out-output-worker-handoff",
155   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
156 };
157 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
158   .arc_name = "ip4-output",
159   .node_name = "nat-pre-in2out-output",
160   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
161   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
162 };
163 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
164   .arc_name = "ip4-output",
165   .node_name = "nat44-ed-in2out-output",
166   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
167   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
168 };
169
170 VLIB_PLUGIN_REGISTER () = {
171     .version = VPP_BUILD_VER,
172     .description = "Network Address Translation (NAT)",
173 };
174
175 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
176
177 static void nat44_ed_db_free ();
178
179 static u32
180 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
181                                u32 rx_fib_index, u8 is_output);
182
183 static u32 nat44_ed_get_worker_in2out_cb (vlib_buffer_t *b, ip4_header_t *ip,
184                                           u32 rx_fib_index, u8 is_output);
185
186 u32 nat_calc_bihash_buckets (u32 n_elts);
187
188 u8 *
189 format_session_kvp (u8 * s, va_list * args)
190 {
191   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
192
193   s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
194               v->key, nat_value_get_thread_index (v),
195               nat_value_get_session_index (v));
196
197   return s;
198 }
199
200 u8 *
201 format_static_mapping_kvp (u8 * s, va_list * args)
202 {
203   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
204
205   s = format (s, "%U static-mapping-index %llu",
206               format_snat_key, v->key, v->value);
207
208   return s;
209 }
210
211 u8 *
212 format_ed_session_kvp (u8 * s, va_list * args)
213 {
214   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
215
216   u8 proto;
217   u16 r_port, l_port;
218   ip4_address_t l_addr, r_addr;
219   u32 fib_index;
220
221   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
222   s = format (s,
223               "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
224               "session-index %u",
225               format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
226               format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
227               format_ip_protocol, proto, fib_index,
228               ed_value_get_thread_index (v), ed_value_get_session_index (v));
229
230   return s;
231 }
232
233 void
234 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
235                        u8 is_ha)
236 {
237       per_vrf_sessions_unregister_session (s, thread_index);
238
239       if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
240         nat_elog_warn (sm, "flow hash del failed");
241
242       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
243         nat_elog_warn (sm, "flow hash del failed");
244
245   if (is_fwd_bypass_session (s))
246     {
247       return;
248     }
249
250       if (is_affinity_sessions (s))
251         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
252                              s->nat_proto, s->out2in.port);
253
254       if (!is_ha)
255         nat_syslog_nat44_sdel (
256           0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
257           &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
258           s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
259           is_twice_nat_session (s));
260
261   if (snat_is_unk_proto_session (s))
262     return;
263
264   if (!is_ha)
265     {
266       /* log NAT event */
267       nat_ipfix_logging_nat44_ses_delete (thread_index,
268                                           s->in2out.addr.as_u32,
269                                           s->out2in.addr.as_u32,
270                                           s->nat_proto,
271                                           s->in2out.port,
272                                           s->out2in.port,
273                                           s->in2out.fib_index);
274     }
275
276   /* Twice NAT address and port for external host */
277   if (is_twice_nat_session (s))
278     {
279       snat_free_outside_address_and_port (sm->twice_nat_addresses,
280                                           thread_index,
281                                           &s->ext_host_nat_addr,
282                                           s->ext_host_nat_port, s->nat_proto);
283     }
284
285   if (snat_is_session_static (s))
286     return;
287
288   snat_free_outside_address_and_port (sm->addresses, thread_index,
289                                       &s->out2in.addr, s->out2in.port,
290                                       s->nat_proto);
291 }
292
293 void
294 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
295                           int is_add)
296 {
297   snat_main_t *sm = &snat_main;
298   fib_prefix_t prefix = {
299     .fp_len = p_len,
300     .fp_proto = FIB_PROTOCOL_IP4,
301     .fp_addr = {
302                 .ip4.as_u32 = addr->as_u32,
303                 },
304   };
305   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
306
307   if (is_add)
308     fib_table_entry_update_one_path (fib_index,
309                                      &prefix,
310                                      sm->fib_src_low,
311                                      (FIB_ENTRY_FLAG_CONNECTED |
312                                       FIB_ENTRY_FLAG_LOCAL |
313                                       FIB_ENTRY_FLAG_EXCLUSIVE),
314                                      DPO_PROTO_IP4,
315                                      NULL,
316                                      sw_if_index,
317                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
318   else
319     fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
320 }
321
322 int
323 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
324                   u8 twice_nat)
325 {
326   snat_address_t *ap;
327   snat_interface_t *i;
328   vlib_thread_main_t *tm = vlib_get_thread_main ();
329
330   /* Check if address already exists */
331   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
332     {
333       if (ap->addr.as_u32 == addr->as_u32)
334         {
335           nat_log_err ("address exist");
336           return VNET_API_ERROR_VALUE_EXIST;
337         }
338     }
339
340   if (twice_nat)
341     vec_add2 (sm->twice_nat_addresses, ap, 1);
342   else
343     vec_add2 (sm->addresses, ap, 1);
344
345   ap->addr = *addr;
346   if (vrf_id != ~0)
347     ap->fib_index =
348       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
349                                          sm->fib_src_low);
350   else
351     ap->fib_index = ~0;
352
353   #define _(N, i, n, s) \
354     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
355     ap->busy_##n##_ports = 0; \
356     ap->busy_##n##_ports_per_thread = 0;\
357     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
358     foreach_nat_protocol
359   #undef _
360
361   if (twice_nat)
362     return 0;
363
364   /* Add external address to FIB */
365   pool_foreach (i, sm->interfaces)
366    {
367      if (nat_interface_is_inside (i))
368        continue;
369
370      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
371      break;
372   }
373   pool_foreach (i, sm->output_feature_interfaces)
374    {
375      if (nat_interface_is_inside (i))
376        continue;
377
378      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
379      break;
380   }
381
382   return 0;
383 }
384
385 static int
386 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
387 {
388   snat_static_mapping_t *m;
389   pool_foreach (m, sm->static_mappings)
390    {
391       if (is_addr_only_static_mapping (m) ||
392           is_out2in_only_static_mapping (m) ||
393           is_identity_static_mapping (m))
394         continue;
395       if (m->external_addr.as_u32 == addr.as_u32)
396         return 1;
397   }
398
399   return 0;
400 }
401
402 static void
403 snat_add_static_mapping_when_resolved (snat_main_t *sm, ip4_address_t l_addr,
404                                        u16 l_port, u32 sw_if_index, u16 e_port,
405                                        u32 vrf_id, nat_protocol_t proto,
406                                        int addr_only, u8 *tag, int twice_nat,
407                                        int out2in_only, int identity_nat,
408                                        ip4_address_t pool_addr, int exact)
409 {
410   snat_static_map_resolve_t *rp;
411
412   vec_add2 (sm->to_resolve, rp, 1);
413   rp->l_addr.as_u32 = l_addr.as_u32;
414   rp->l_port = l_port;
415   rp->sw_if_index = sw_if_index;
416   rp->e_port = e_port;
417   rp->vrf_id = vrf_id;
418   rp->proto = proto;
419   rp->addr_only = addr_only;
420   rp->twice_nat = twice_nat;
421   rp->out2in_only = out2in_only;
422   rp->identity_nat = identity_nat;
423   rp->tag = vec_dup (tag);
424   rp->pool_addr = pool_addr;
425   rp->exact = exact;
426 }
427
428 u32
429 get_thread_idx_by_port (u16 e_port)
430 {
431   snat_main_t *sm = &snat_main;
432   u32 thread_idx = sm->num_workers;
433   if (sm->num_workers > 1)
434     {
435       thread_idx =
436         sm->first_worker_index +
437         sm->workers[(e_port - 1024) / sm->port_per_thread];
438     }
439   return thread_idx;
440 }
441
442 void
443 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
444                                     snat_main_per_thread_data_t * tsm,
445                                     ip4_address_t l_addr,
446                                     u16 l_port,
447                                     u8 protocol,
448                                     u32 fib_index, int addr_only,
449                                     ip4_address_t e_addr, u16 e_port)
450 {
451   snat_session_t *s;
452   u32 *indexes_to_free = NULL;
453   pool_foreach (s, tsm->sessions) {
454     if (s->in2out.fib_index != fib_index ||
455         s->in2out.addr.as_u32 != l_addr.as_u32)
456       {
457         continue;
458       }
459     if (!addr_only)
460       {
461         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
462             s->out2in.port != e_port ||
463             s->in2out.port != l_port ||
464             s->nat_proto != protocol)
465           continue;
466       }
467
468     if (is_lb_session (s))
469       continue;
470     if (!snat_is_session_static (s))
471       continue;
472     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
473     vec_add1 (indexes_to_free, s - tsm->sessions);
474     if (!addr_only)
475       break;
476   }
477   u32 *ses_index;
478   vec_foreach (ses_index, indexes_to_free)
479   {
480     s = pool_elt_at_index (tsm->sessions, *ses_index);
481     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
482   }
483   vec_free (indexes_to_free);
484 }
485
486 int
487 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
488                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
489                          u32 sw_if_index, nat_protocol_t proto, int is_add,
490                          twice_nat_type_t twice_nat, u8 out2in_only, u8 *tag,
491                          u8 identity_nat, ip4_address_t pool_addr, int exact)
492 {
493   snat_main_t *sm = &snat_main;
494   snat_static_mapping_t *m;
495   clib_bihash_kv_8_8_t kv, value;
496   snat_address_t *a = 0;
497   u32 fib_index = ~0;
498   snat_interface_t *interface;
499   snat_main_per_thread_data_t *tsm;
500   snat_static_map_resolve_t *rp, *rp_match = 0;
501   nat44_lb_addr_port_t *local;
502   u32 find = ~0;
503   int i;
504
505   /* If the external address is a specific interface address */
506   if (sw_if_index != ~0)
507     {
508       ip4_address_t *first_int_addr;
509
510       for (i = 0; i < vec_len (sm->to_resolve); i++)
511         {
512           rp = sm->to_resolve + i;
513           if (rp->sw_if_index != sw_if_index ||
514               rp->l_addr.as_u32 != l_addr.as_u32 ||
515               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
516             continue;
517
518           if (!addr_only)
519             {
520               if ((rp->l_port != l_port && rp->e_port != e_port)
521                   || rp->proto != proto)
522                 continue;
523             }
524
525           rp_match = rp;
526           break;
527         }
528
529       /* Might be already set... */
530       first_int_addr = ip4_interface_first_address
531         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
532
533       if (is_add)
534         {
535           if (rp_match)
536             return VNET_API_ERROR_VALUE_EXIST;
537
538           snat_add_static_mapping_when_resolved (
539             sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, addr_only,
540             tag, twice_nat, out2in_only, identity_nat, pool_addr, exact);
541
542           /* DHCP resolution required? */
543           if (first_int_addr == 0)
544             {
545               return 0;
546             }
547           else
548             {
549               e_addr.as_u32 = first_int_addr->as_u32;
550               /* Identity mapping? */
551               if (l_addr.as_u32 == 0)
552                 l_addr.as_u32 = e_addr.as_u32;
553             }
554         }
555       else
556         {
557           if (!rp_match)
558             return VNET_API_ERROR_NO_SUCH_ENTRY;
559
560           vec_del1 (sm->to_resolve, i);
561
562           if (first_int_addr)
563             {
564               e_addr.as_u32 = first_int_addr->as_u32;
565               /* Identity mapping? */
566               if (l_addr.as_u32 == 0)
567                 l_addr.as_u32 = e_addr.as_u32;
568             }
569           else
570             return 0;
571         }
572     }
573
574   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
575   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
576     m = 0;
577   else
578     m = pool_elt_at_index (sm->static_mappings, value.value);
579
580   if (is_add)
581     {
582       if (m)
583         {
584           if (is_identity_static_mapping (m))
585             {
586               pool_foreach (local, m->locals)
587                {
588                 if (local->vrf_id == vrf_id)
589                   return VNET_API_ERROR_VALUE_EXIST;
590               }
591               pool_get (m->locals, local);
592               local->vrf_id = vrf_id;
593               local->fib_index =
594                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
595                                                    sm->fib_src_low);
596               init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
597                            m->proto, 0, m - sm->static_mappings);
598               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
599               return 0;
600             }
601           else
602             return VNET_API_ERROR_VALUE_EXIST;
603         }
604
605       if (twice_nat && addr_only)
606         return VNET_API_ERROR_UNSUPPORTED;
607
608       /* Convert VRF id to FIB index */
609       if (vrf_id != ~0)
610         fib_index =
611           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
612                                              sm->fib_src_low);
613       /* If not specified use inside VRF id from SNAT plugin startup config */
614       else
615         {
616           fib_index = sm->inside_fib_index;
617           vrf_id = sm->inside_vrf_id;
618           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
619         }
620
621       if (!(out2in_only || identity_nat))
622         {
623           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
624                       addr_only ? 0 : proto);
625           if (!clib_bihash_search_8_8
626               (&sm->static_mapping_by_local, &kv, &value))
627             return VNET_API_ERROR_VALUE_EXIST;
628         }
629
630       /* Find external address in allocated addresses and reserve port for
631          address and port pair mapping when dynamic translations enabled */
632       if (!(addr_only || sm->static_mapping_only || out2in_only))
633         {
634           for (i = 0; i < vec_len (sm->addresses); i++)
635             {
636               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
637                 {
638                   a = sm->addresses + i;
639                   /* External port must be unused */
640                   switch (proto)
641                     {
642 #define _(N, j, n, s) \
643                     case NAT_PROTOCOL_##N: \
644                       if (a->busy_##n##_port_refcounts[e_port]) \
645                         return VNET_API_ERROR_INVALID_VALUE; \
646                       ++a->busy_##n##_port_refcounts[e_port]; \
647                       if (e_port > 1024) \
648                         { \
649                           a->busy_##n##_ports++; \
650                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
651                         } \
652                       break;
653                       foreach_nat_protocol
654 #undef _
655                         default : nat_elog_info (sm, "unknown protocol");
656                       return VNET_API_ERROR_INVALID_VALUE_2;
657                     }
658                   break;
659                 }
660             }
661           /* External address must be allocated */
662           if (!a && (l_addr.as_u32 != e_addr.as_u32))
663             {
664               if (sw_if_index != ~0)
665                 {
666                   for (i = 0; i < vec_len (sm->to_resolve); i++)
667                     {
668                       rp = sm->to_resolve + i;
669                       if (rp->addr_only)
670                         continue;
671                       if (rp->sw_if_index != sw_if_index &&
672                           rp->l_addr.as_u32 != l_addr.as_u32 &&
673                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
674                           rp->e_port != e_port && rp->proto != proto)
675                         continue;
676
677                       vec_del1 (sm->to_resolve, i);
678                       break;
679                     }
680                 }
681               return VNET_API_ERROR_NO_SUCH_ENTRY;
682             }
683         }
684
685       pool_get (sm->static_mappings, m);
686       clib_memset (m, 0, sizeof (*m));
687       m->tag = vec_dup (tag);
688       m->local_addr = l_addr;
689       m->external_addr = e_addr;
690       m->twice_nat = twice_nat;
691
692       if (twice_nat == TWICE_NAT && exact)
693         {
694           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
695           m->pool_addr = pool_addr;
696         }
697
698       if (out2in_only)
699         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
700       if (addr_only)
701         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
702       if (identity_nat)
703         {
704           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
705           pool_get (m->locals, local);
706           local->vrf_id = vrf_id;
707           local->fib_index = fib_index;
708         }
709       else
710         {
711           m->vrf_id = vrf_id;
712           m->fib_index = fib_index;
713         }
714       if (!addr_only)
715         {
716           m->local_port = l_port;
717           m->external_port = e_port;
718           m->proto = proto;
719         }
720
721       if (sm->num_workers > 1)
722         {
723           ip4_header_t ip = {
724             .src_address = m->local_addr,
725           };
726           vec_add1 (m->workers,
727                     sm->worker_in2out_cb (0, &ip, m->fib_index, 0));
728           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
729         }
730       else
731         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
732
733       if (!out2in_only)
734         {
735           init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
736                        0, m - sm->static_mappings);
737           clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
738         }
739
740       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
741                    m - sm->static_mappings);
742       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
743     }
744   else
745     {
746       if (!m)
747         {
748           if (sw_if_index != ~0)
749             return 0;
750           else
751             return VNET_API_ERROR_NO_SUCH_ENTRY;
752         }
753
754       if (identity_nat)
755         {
756           if (vrf_id == ~0)
757             vrf_id = sm->inside_vrf_id;
758
759           pool_foreach (local, m->locals)
760            {
761             if (local->vrf_id == vrf_id)
762               find = local - m->locals;
763           }
764           if (find == ~0)
765             return VNET_API_ERROR_NO_SUCH_ENTRY;
766
767           local = pool_elt_at_index (m->locals, find);
768           fib_index = local->fib_index;
769           pool_put (m->locals, local);
770         }
771       else
772         fib_index = m->fib_index;
773
774       /* Free external address port */
775       if (!(addr_only || sm->static_mapping_only || out2in_only))
776         {
777           for (i = 0; i < vec_len (sm->addresses); i++)
778             {
779               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
780                 {
781                   a = sm->addresses + i;
782                   switch (proto)
783                     {
784 #define _(N, j, n, s) \
785                     case NAT_PROTOCOL_##N: \
786                       --a->busy_##n##_port_refcounts[e_port]; \
787                       if (e_port > 1024) \
788                         { \
789                           a->busy_##n##_ports--; \
790                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
791                         } \
792                       break;
793                       foreach_nat_protocol
794 #undef _
795                         default : nat_elog_info (sm, "unknown protocol");
796                       return VNET_API_ERROR_INVALID_VALUE_2;
797                     }
798                   break;
799                 }
800             }
801         }
802
803       if (sm->num_workers > 1)
804         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
805       else
806         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
807
808       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
809       if (!out2in_only)
810         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
811
812       /* Delete session(s) for static mapping if exist */
813       if (!(sm->static_mapping_only) ||
814           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
815         {
816           nat_ed_static_mapping_del_sessions (
817             sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
818             addr_only, e_addr, e_port);
819         }
820
821       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
822       if (pool_elts (m->locals))
823         return 0;
824
825       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
826       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
827
828       vec_free (m->tag);
829       vec_free (m->workers);
830       /* Delete static mapping from pool */
831       pool_put (sm->static_mappings, m);
832     }
833
834   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
835     return 0;
836
837   /* Add/delete external address to FIB */
838   pool_foreach (interface, sm->interfaces)
839    {
840      if (nat_interface_is_inside (interface))
841        continue;
842
843      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
844      break;
845   }
846   pool_foreach (interface, sm->output_feature_interfaces)
847    {
848      if (nat_interface_is_inside (interface))
849        continue;
850
851      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
852      break;
853   }
854
855   return 0;
856 }
857
858 int
859 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
860                                  nat_protocol_t proto,
861                                  nat44_lb_addr_port_t * locals, u8 is_add,
862                                  twice_nat_type_t twice_nat, u8 out2in_only,
863                                  u8 * tag, u32 affinity)
864 {
865   snat_main_t *sm = &snat_main;
866   snat_static_mapping_t *m;
867   clib_bihash_kv_8_8_t kv, value;
868   snat_address_t *a = 0;
869   int i;
870   nat44_lb_addr_port_t *local;
871   snat_main_per_thread_data_t *tsm;
872   snat_session_t *s;
873   uword *bitmap = 0;
874
875   init_nat_k (&kv, e_addr, e_port, 0, proto);
876   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
877     m = 0;
878   else
879     m = pool_elt_at_index (sm->static_mappings, value.value);
880
881   if (is_add)
882     {
883       if (m)
884         return VNET_API_ERROR_VALUE_EXIST;
885
886       if (vec_len (locals) < 2)
887         return VNET_API_ERROR_INVALID_VALUE;
888
889       /* Find external address in allocated addresses and reserve port for
890          address and port pair mapping when dynamic translations enabled */
891       if (!(sm->static_mapping_only || out2in_only))
892         {
893           for (i = 0; i < vec_len (sm->addresses); i++)
894             {
895               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
896                 {
897                   a = sm->addresses + i;
898                   /* External port must be unused */
899                   switch (proto)
900                     {
901 #define _(N, j, n, s) \
902                     case NAT_PROTOCOL_##N: \
903                       if (a->busy_##n##_port_refcounts[e_port]) \
904                         return VNET_API_ERROR_INVALID_VALUE; \
905                       ++a->busy_##n##_port_refcounts[e_port]; \
906                       if (e_port > 1024) \
907                         { \
908                           a->busy_##n##_ports++; \
909                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
910                         } \
911                       break;
912                       foreach_nat_protocol
913 #undef _
914                         default : nat_elog_info (sm, "unknown protocol");
915                       return VNET_API_ERROR_INVALID_VALUE_2;
916                     }
917                   break;
918                 }
919             }
920           /* External address must be allocated */
921           if (!a)
922             return VNET_API_ERROR_NO_SUCH_ENTRY;
923         }
924
925       pool_get (sm->static_mappings, m);
926       clib_memset (m, 0, sizeof (*m));
927       m->tag = vec_dup (tag);
928       m->external_addr = e_addr;
929       m->external_port = e_port;
930       m->proto = proto;
931       m->twice_nat = twice_nat;
932       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
933       if (out2in_only)
934         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
935       m->affinity = affinity;
936
937       if (affinity)
938         m->affinity_per_service_list_head_index =
939           nat_affinity_get_per_service_list_head_index ();
940       else
941         m->affinity_per_service_list_head_index = ~0;
942
943       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
944                    m - sm->static_mappings);
945       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
946         {
947           nat_elog_err (sm, "static_mapping_by_external key add failed");
948           return VNET_API_ERROR_UNSPECIFIED;
949         }
950
951       for (i = 0; i < vec_len (locals); i++)
952         {
953           locals[i].fib_index =
954             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
955                                                locals[i].vrf_id,
956                                                sm->fib_src_low);
957           if (!out2in_only)
958             {
959               init_nat_kv (&kv, locals[i].addr, locals[i].port,
960                            locals[i].fib_index, m->proto, 0,
961                            m - sm->static_mappings);
962               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
963             }
964           locals[i].prefix = (i == 0) ? locals[i].probability :
965             (locals[i - 1].prefix + locals[i].probability);
966           pool_get (m->locals, local);
967           *local = locals[i];
968           if (sm->num_workers > 1)
969             {
970               ip4_header_t ip = {
971                 .src_address = locals[i].addr,
972               };
973               bitmap = clib_bitmap_set (
974                 bitmap, sm->worker_in2out_cb (0, &ip, m->fib_index, 0), 1);
975             }
976         }
977
978       /* Assign workers */
979       if (sm->num_workers > 1)
980         {
981           clib_bitmap_foreach (i, bitmap)
982              {
983                vec_add1(m->workers, i);
984             }
985         }
986     }
987   else
988     {
989       if (!m)
990         return VNET_API_ERROR_NO_SUCH_ENTRY;
991
992       if (!is_lb_static_mapping (m))
993         return VNET_API_ERROR_INVALID_VALUE;
994
995       /* Free external address port */
996       if (!(sm->static_mapping_only || out2in_only))
997         {
998           for (i = 0; i < vec_len (sm->addresses); i++)
999             {
1000               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1001                 {
1002                   a = sm->addresses + i;
1003                   switch (proto)
1004                     {
1005 #define _(N, j, n, s) \
1006                     case NAT_PROTOCOL_##N: \
1007                       --a->busy_##n##_port_refcounts[e_port]; \
1008                       if (e_port > 1024) \
1009                         { \
1010                           a->busy_##n##_ports--; \
1011                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1012                         } \
1013                       break;
1014                       foreach_nat_protocol
1015 #undef _
1016                         default : nat_elog_info (sm, "unknown protocol");
1017                       return VNET_API_ERROR_INVALID_VALUE_2;
1018                     }
1019                   break;
1020                 }
1021             }
1022         }
1023
1024       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1025       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1026         {
1027           nat_elog_err (sm, "static_mapping_by_external key del failed");
1028           return VNET_API_ERROR_UNSPECIFIED;
1029         }
1030
1031       pool_foreach (local, m->locals)
1032       {
1033           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1034                             sm->fib_src_low);
1035           if (!out2in_only)
1036             {
1037               init_nat_k (&kv, local->addr, local->port, local->fib_index,
1038                           m->proto);
1039               if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv,
1040                                            0))
1041                 {
1042                   nat_elog_err (sm, "static_mapping_by_local key del failed");
1043                   return VNET_API_ERROR_UNSPECIFIED;
1044                 }
1045             }
1046
1047           if (sm->num_workers > 1)
1048             {
1049               ip4_header_t ip = {
1050                 .src_address = local->addr,
1051               };
1052               tsm = vec_elt_at_index (
1053                 sm->per_thread_data,
1054                 sm->worker_in2out_cb (0, &ip, m->fib_index, 0));
1055             }
1056           else
1057             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1058
1059           /* Delete sessions */
1060           pool_foreach (s, tsm->sessions)
1061             {
1062               if (!(is_lb_session (s)))
1063                 continue;
1064
1065               if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1066                   s->in2out.port != local->port)
1067                 continue;
1068
1069               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1070               nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1071             }
1072       }
1073       if (m->affinity)
1074         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1075       pool_free (m->locals);
1076       vec_free (m->tag);
1077       vec_free (m->workers);
1078
1079       pool_put (sm->static_mappings, m);
1080     }
1081
1082   return 0;
1083 }
1084
1085 int
1086 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1087                                        ip4_address_t l_addr, u16 l_port,
1088                                        nat_protocol_t proto, u32 vrf_id,
1089                                        u8 probability, u8 is_add)
1090 {
1091   snat_main_t *sm = &snat_main;
1092   snat_static_mapping_t *m = 0;
1093   clib_bihash_kv_8_8_t kv, value;
1094   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1095   snat_main_per_thread_data_t *tsm;
1096   snat_session_t *s;
1097   u32 *locals = 0;
1098   uword *bitmap = 0;
1099   int i;
1100
1101   init_nat_k (&kv, e_addr, e_port, 0, proto);
1102   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1103     m = pool_elt_at_index (sm->static_mappings, value.value);
1104
1105   if (!m)
1106     return VNET_API_ERROR_NO_SUCH_ENTRY;
1107
1108   if (!is_lb_static_mapping (m))
1109     return VNET_API_ERROR_INVALID_VALUE;
1110
1111   pool_foreach (local, m->locals)
1112    {
1113     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1114         (local->vrf_id == vrf_id))
1115       {
1116         match_local = local;
1117         break;
1118       }
1119   }
1120
1121   if (is_add)
1122     {
1123       if (match_local)
1124         return VNET_API_ERROR_VALUE_EXIST;
1125
1126       pool_get (m->locals, local);
1127       clib_memset (local, 0, sizeof (*local));
1128       local->addr.as_u32 = l_addr.as_u32;
1129       local->port = l_port;
1130       local->probability = probability;
1131       local->vrf_id = vrf_id;
1132       local->fib_index =
1133         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1134                                            sm->fib_src_low);
1135
1136       if (!is_out2in_only_static_mapping (m))
1137         {
1138           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
1139                        m - sm->static_mappings);
1140           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1141             nat_elog_err (sm, "static_mapping_by_local key add failed");
1142         }
1143     }
1144   else
1145     {
1146       if (!match_local)
1147         return VNET_API_ERROR_NO_SUCH_ENTRY;
1148
1149       if (pool_elts (m->locals) < 3)
1150         return VNET_API_ERROR_UNSPECIFIED;
1151
1152       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1153                         sm->fib_src_low);
1154
1155       if (!is_out2in_only_static_mapping (m))
1156         {
1157           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1158           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1159             nat_elog_err (sm, "static_mapping_by_local key del failed");
1160         }
1161
1162       if (sm->num_workers > 1)
1163         {
1164           ip4_header_t ip = {
1165             .src_address = local->addr,
1166           };
1167           tsm =
1168             vec_elt_at_index (sm->per_thread_data,
1169                               sm->worker_in2out_cb (0, &ip, m->fib_index, 0));
1170         }
1171       else
1172         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1173
1174       /* Delete sessions */
1175       pool_foreach (s, tsm->sessions) {
1176         if (!(is_lb_session (s)))
1177           continue;
1178
1179         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1180             s->in2out.port != match_local->port)
1181           continue;
1182
1183         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1184         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1185       }
1186
1187       pool_put (m->locals, match_local);
1188     }
1189
1190   vec_free (m->workers);
1191
1192   pool_foreach (local, m->locals)
1193    {
1194     vec_add1 (locals, local - m->locals);
1195     if (sm->num_workers > 1)
1196       {
1197         ip4_header_t ip;
1198         ip.src_address.as_u32 = local->addr.as_u32,
1199         bitmap = clib_bitmap_set (
1200           bitmap, sm->worker_in2out_cb (0, &ip, local->fib_index, 0), 1);
1201       }
1202   }
1203
1204   ASSERT (vec_len (locals) > 1);
1205
1206   local = pool_elt_at_index (m->locals, locals[0]);
1207   local->prefix = local->probability;
1208   for (i = 1; i < vec_len (locals); i++)
1209     {
1210       local = pool_elt_at_index (m->locals, locals[i]);
1211       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1212       local->prefix = local->probability + prev_local->prefix;
1213     }
1214
1215   /* Assign workers */
1216   if (sm->num_workers > 1)
1217     {
1218       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1219     }
1220
1221   return 0;
1222 }
1223
1224 int
1225 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1226                   u8 twice_nat)
1227 {
1228   snat_address_t *a = 0;
1229   snat_session_t *ses;
1230   u32 *ses_to_be_removed = 0, *ses_index;
1231   snat_main_per_thread_data_t *tsm;
1232   snat_static_mapping_t *m;
1233   snat_interface_t *interface;
1234   int i;
1235   snat_address_t *addresses =
1236     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1237
1238   /* Find SNAT address */
1239   for (i = 0; i < vec_len (addresses); i++)
1240     {
1241       if (addresses[i].addr.as_u32 == addr.as_u32)
1242         {
1243           a = addresses + i;
1244           break;
1245         }
1246     }
1247   if (!a)
1248     {
1249       nat_log_err ("no such address");
1250       return VNET_API_ERROR_NO_SUCH_ENTRY;
1251     }
1252
1253   if (delete_sm)
1254     {
1255       ip4_address_t pool_addr = { 0 };
1256       pool_foreach (m, sm->static_mappings)
1257        {
1258           if (m->external_addr.as_u32 == addr.as_u32)
1259             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1260                                             m->local_port, m->external_port,
1261                                             m->vrf_id,
1262                                             is_addr_only_static_mapping(m), ~0,
1263                                             m->proto, 0 /* is_add */,
1264                                             m->twice_nat,
1265                                             is_out2in_only_static_mapping(m),
1266                                             m->tag,
1267                                             is_identity_static_mapping(m),
1268                                             pool_addr, 0);
1269       }
1270     }
1271   else
1272     {
1273       /* Check if address is used in some static mapping */
1274       if (is_snat_address_used_in_static_mapping (sm, addr))
1275         {
1276           nat_log_err ("address used in static mapping");
1277           return VNET_API_ERROR_UNSPECIFIED;
1278         }
1279     }
1280
1281   if (a->fib_index != ~0)
1282     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1283
1284   /* Delete sessions using address */
1285   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1286     {
1287       vec_foreach (tsm, sm->per_thread_data)
1288       {
1289         pool_foreach (ses, tsm->sessions)  {
1290           if (ses->out2in.addr.as_u32 == addr.as_u32)
1291             {
1292               nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1293               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1294             }
1295         }
1296
1297             vec_foreach (ses_index, ses_to_be_removed)
1298             {
1299               ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1300               nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1301             }
1302
1303         vec_free (ses_to_be_removed);
1304       }
1305     }
1306
1307 #define _(N, i, n, s) \
1308   vec_free (a->busy_##n##_ports_per_thread);
1309   foreach_nat_protocol
1310 #undef _
1311
1312     if (twice_nat)
1313   {
1314     vec_del1 (sm->twice_nat_addresses, i);
1315     return 0;
1316   }
1317   else vec_del1 (sm->addresses, i);
1318
1319   /* Delete external address from FIB */
1320   pool_foreach (interface, sm->interfaces)
1321     {
1322       if (nat_interface_is_inside (interface))
1323         continue;
1324
1325       snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1326       break;
1327     }
1328   pool_foreach (interface, sm->output_feature_interfaces)
1329    {
1330      if (nat_interface_is_inside (interface))
1331        continue;
1332
1333      snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1334      break;
1335   }
1336
1337   return 0;
1338 }
1339
1340 void
1341 expire_per_vrf_sessions (u32 fib_index)
1342 {
1343   per_vrf_sessions_t *per_vrf_sessions;
1344   snat_main_per_thread_data_t *tsm;
1345   snat_main_t *sm = &snat_main;
1346
1347   vec_foreach (tsm, sm->per_thread_data)
1348     {
1349       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1350         {
1351           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1352               (per_vrf_sessions->tx_fib_index == fib_index))
1353             {
1354               per_vrf_sessions->expired = 1;
1355             }
1356         }
1357     }
1358 }
1359
1360 void
1361 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1362 {
1363   snat_main_t *sm = &snat_main;
1364   nat_fib_t *fib;
1365
1366   // we don't care if it is outside/inside fib
1367   // we just care about their ref_count
1368   // if it reaches 0 sessions should expire
1369   // because the fib isn't valid for NAT anymore
1370
1371   vec_foreach (fib, sm->fibs)
1372   {
1373     if (fib->fib_index == fib_index)
1374       {
1375         if (is_del)
1376           {
1377             fib->ref_count--;
1378             if (!fib->ref_count)
1379               {
1380                 vec_del1 (sm->fibs, fib - sm->fibs);
1381                 expire_per_vrf_sessions (fib_index);
1382               }
1383             return;
1384           }
1385         else
1386           fib->ref_count++;
1387       }
1388   }
1389   if (!is_del)
1390     {
1391       vec_add2 (sm->fibs, fib, 1);
1392       fib->ref_count = 1;
1393       fib->fib_index = fib_index;
1394     }
1395 }
1396
1397 int
1398 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1399 {
1400   snat_main_t *sm = &snat_main;
1401   snat_interface_t *i;
1402   const char *feature_name, *del_feature_name;
1403   snat_address_t *ap;
1404   snat_static_mapping_t *m;
1405   nat_outside_fib_t *outside_fib;
1406   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1407                                                        sw_if_index);
1408
1409   if (!sm->enabled)
1410     {
1411       nat_log_err ("nat44 is disabled");
1412       return VNET_API_ERROR_UNSUPPORTED;
1413     }
1414
1415   pool_foreach (i, sm->output_feature_interfaces)
1416    {
1417     if (i->sw_if_index == sw_if_index)
1418       {
1419         nat_log_err ("error interface already configured");
1420         return VNET_API_ERROR_VALUE_EXIST;
1421       }
1422   }
1423
1424   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1425     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1426   else
1427     {
1428       if (sm->num_workers > 1)
1429         feature_name =
1430           is_inside ? "nat44-in2out-worker-handoff" :
1431           "nat44-out2in-worker-handoff";
1432       else
1433         feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1434     }
1435
1436   ASSERT (sm->frame_queue_nelts > 0);
1437
1438   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1439     sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index,
1440                                                       sm->frame_queue_nelts);
1441
1442   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1443     sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index,
1444                                                       sm->frame_queue_nelts);
1445
1446   update_per_vrf_sessions_vec (fib_index, is_del);
1447
1448   if (!is_inside)
1449     {
1450       vec_foreach (outside_fib, sm->outside_fibs)
1451         {
1452           if (outside_fib->fib_index == fib_index)
1453             {
1454               if (is_del)
1455                 {
1456                   outside_fib->refcount--;
1457                   if (!outside_fib->refcount)
1458                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1459                 }
1460               else
1461                 outside_fib->refcount++;
1462               goto feature_set;
1463             }
1464         }
1465       if (!is_del)
1466         {
1467           vec_add2 (sm->outside_fibs, outside_fib, 1);
1468           outside_fib->refcount = 1;
1469           outside_fib->fib_index = fib_index;
1470         }
1471     }
1472
1473 feature_set:
1474   pool_foreach (i, sm->interfaces)
1475    {
1476     if (i->sw_if_index == sw_if_index)
1477       {
1478         if (is_del)
1479           {
1480             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1481               {
1482                 if (is_inside)
1483                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1484                 else
1485                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1486
1487                 if (sm->num_workers > 1)
1488                   {
1489                     del_feature_name = "nat44-handoff-classify";
1490                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1491                                                  "nat44-out2in-worker-handoff";
1492                   }
1493                 else
1494                   {
1495                     del_feature_name = "nat44-ed-classify";
1496                     feature_name =
1497                       !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1498                   }
1499
1500                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1501                 if (rv)
1502                   return rv;
1503                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1504                                              sw_if_index, 0, 0, 0);
1505                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1506                                              sw_if_index, 1, 0, 0);
1507               }
1508             else
1509               {
1510                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1511                 if (rv)
1512                   return rv;
1513                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1514                                              sw_if_index, 0, 0, 0);
1515                 pool_put (sm->interfaces, i);
1516               }
1517           }
1518         else
1519           {
1520             if ((nat_interface_is_inside (i) && is_inside) ||
1521                 (nat_interface_is_outside (i) && !is_inside))
1522               return 0;
1523
1524             if (sm->num_workers > 1)
1525               {
1526                 del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1527                                                 "nat44-out2in-worker-handoff";
1528                 feature_name = "nat44-handoff-classify";
1529               }
1530             else
1531               {
1532                 del_feature_name =
1533                   !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1534
1535                 feature_name = "nat44-ed-classify";
1536               }
1537
1538             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1539             if (rv)
1540               return rv;
1541             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1542                                          sw_if_index, 0, 0, 0);
1543             vnet_feature_enable_disable ("ip4-unicast", feature_name,
1544                                          sw_if_index, 1, 0, 0);
1545             goto set_flags;
1546           }
1547
1548         goto fib;
1549       }
1550   }
1551
1552   if (is_del)
1553     {
1554       nat_log_err ("error interface couldn't be found");
1555       return VNET_API_ERROR_NO_SUCH_ENTRY;
1556     }
1557
1558   pool_get (sm->interfaces, i);
1559   i->sw_if_index = sw_if_index;
1560   i->flags = 0;
1561   nat_validate_interface_counters (sm, sw_if_index);
1562
1563   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
1564                                0);
1565
1566   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1567   if (rv)
1568     return rv;
1569
1570 set_flags:
1571   if (is_inside)
1572     {
1573       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1574       return 0;
1575     }
1576   else
1577     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1578
1579   /* Add/delete external addresses to FIB */
1580 fib:
1581   vec_foreach (ap, sm->addresses)
1582     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1583
1584   pool_foreach (m, sm->static_mappings)
1585    {
1586     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
1587       continue;
1588
1589     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1590   }
1591
1592   return 0;
1593 }
1594
1595 int
1596 snat_interface_add_del_output_feature (u32 sw_if_index,
1597                                        u8 is_inside, int is_del)
1598 {
1599   snat_main_t *sm = &snat_main;
1600   snat_interface_t *i;
1601   snat_address_t *ap;
1602   snat_static_mapping_t *m;
1603   nat_outside_fib_t *outside_fib;
1604   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1605                                                        sw_if_index);
1606
1607   if (!sm->enabled)
1608     {
1609       nat_log_err ("nat44 is disabled");
1610       return VNET_API_ERROR_UNSUPPORTED;
1611     }
1612
1613   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1614     {
1615       nat_log_err ("error unsupported");
1616       return VNET_API_ERROR_UNSUPPORTED;
1617     }
1618
1619   pool_foreach (i, sm->interfaces)
1620    {
1621     if (i->sw_if_index == sw_if_index)
1622       {
1623         nat_log_err ("error interface already configured");
1624         return VNET_API_ERROR_VALUE_EXIST;
1625       }
1626   }
1627
1628   update_per_vrf_sessions_vec (fib_index, is_del);
1629
1630   if (!is_inside)
1631     {
1632       vec_foreach (outside_fib, sm->outside_fibs)
1633         {
1634           if (outside_fib->fib_index == fib_index)
1635             {
1636               if (is_del)
1637                 {
1638                   outside_fib->refcount--;
1639                   if (!outside_fib->refcount)
1640                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1641                 }
1642               else
1643                 outside_fib->refcount++;
1644               goto feature_set;
1645             }
1646         }
1647       if (!is_del)
1648         {
1649           vec_add2 (sm->outside_fibs, outside_fib, 1);
1650           outside_fib->refcount = 1;
1651           outside_fib->fib_index = fib_index;
1652         }
1653     }
1654
1655 feature_set:
1656   if (is_inside)
1657     {
1658           int rv =
1659             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1660           if (rv)
1661             return rv;
1662           rv =
1663             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1664                                                             !is_del);
1665           if (rv)
1666             return rv;
1667       goto fq;
1668     }
1669
1670   if (sm->num_workers > 1)
1671     {
1672       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1673       if (rv)
1674         return rv;
1675       rv =
1676         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
1677       if (rv)
1678         return rv;
1679       vnet_feature_enable_disable ("ip4-unicast",
1680                                    "nat44-out2in-worker-handoff",
1681                                    sw_if_index, !is_del, 0, 0);
1682       vnet_feature_enable_disable ("ip4-output",
1683                                    "nat44-in2out-output-worker-handoff",
1684                                    sw_if_index, !is_del, 0, 0);
1685     }
1686   else
1687     {
1688           int rv =
1689             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1690           if (rv)
1691             return rv;
1692           rv =
1693             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1694                                                             !is_del);
1695           if (rv)
1696             return rv;
1697           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1698                                        sw_if_index, !is_del, 0, 0);
1699           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1700                                        sw_if_index, !is_del, 0, 0);
1701     }
1702
1703 fq:
1704   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
1705     sm->fq_in2out_output_index =
1706       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
1707
1708   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1709     sm->fq_out2in_index =
1710       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
1711
1712   pool_foreach (i, sm->output_feature_interfaces)
1713    {
1714     if (i->sw_if_index == sw_if_index)
1715       {
1716         if (is_del)
1717           pool_put (sm->output_feature_interfaces, i);
1718         else
1719           return VNET_API_ERROR_VALUE_EXIST;
1720
1721         goto fib;
1722       }
1723   }
1724
1725   if (is_del)
1726     {
1727       nat_log_err ("error interface couldn't be found");
1728       return VNET_API_ERROR_NO_SUCH_ENTRY;
1729     }
1730
1731   pool_get (sm->output_feature_interfaces, i);
1732   i->sw_if_index = sw_if_index;
1733   i->flags = 0;
1734   nat_validate_interface_counters (sm, sw_if_index);
1735   if (is_inside)
1736     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1737   else
1738     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1739
1740   /* Add/delete external addresses to FIB */
1741 fib:
1742   if (is_inside)
1743     return 0;
1744
1745   vec_foreach (ap, sm->addresses)
1746     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1747
1748   pool_foreach (m, sm->static_mappings)
1749    {
1750     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
1751       continue;
1752
1753     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1754   }
1755
1756   return 0;
1757 }
1758
1759 int
1760 snat_set_workers (uword * bitmap)
1761 {
1762   snat_main_t *sm = &snat_main;
1763   int i, j = 0;
1764
1765   if (sm->num_workers < 2)
1766     return VNET_API_ERROR_FEATURE_DISABLED;
1767
1768   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
1769     return VNET_API_ERROR_INVALID_WORKER;
1770
1771   vec_free (sm->workers);
1772   clib_bitmap_foreach (i, bitmap)
1773     {
1774       vec_add1(sm->workers, i);
1775       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
1776       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
1777       j++;
1778     }
1779
1780   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
1781
1782   return 0;
1783 }
1784
1785 int
1786 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
1787 {
1788   fail_if_enabled ();
1789   snat_main_t *sm = &snat_main;
1790   sm->frame_queue_nelts = frame_queue_nelts;
1791   return 0;
1792 }
1793
1794 static void
1795 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1796                          u32 sw_if_index, u32 new_fib_index,
1797                          u32 old_fib_index)
1798 {
1799   snat_main_t *sm = &snat_main;
1800   nat_outside_fib_t *outside_fib;
1801   snat_interface_t *i;
1802   u8 is_add = 1;
1803   u8 match = 0;
1804
1805   if (!sm->enabled || (new_fib_index == old_fib_index)
1806       || (!vec_len (sm->outside_fibs)))
1807     {
1808       return;
1809     }
1810
1811   pool_foreach (i, sm->interfaces)
1812      {
1813       if (i->sw_if_index == sw_if_index)
1814         {
1815           if (!(nat_interface_is_outside (i)))
1816             return;
1817           match = 1;
1818         }
1819     }
1820
1821   pool_foreach (i, sm->output_feature_interfaces)
1822      {
1823       if (i->sw_if_index == sw_if_index)
1824         {
1825           if (!(nat_interface_is_outside (i)))
1826             return;
1827           match = 1;
1828         }
1829     }
1830
1831   if (!match)
1832     return;
1833
1834   vec_foreach (outside_fib, sm->outside_fibs)
1835   {
1836     if (outside_fib->fib_index == old_fib_index)
1837       {
1838         outside_fib->refcount--;
1839         if (!outside_fib->refcount)
1840           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1841         break;
1842       }
1843   }
1844
1845   vec_foreach (outside_fib, sm->outside_fibs)
1846   {
1847     if (outside_fib->fib_index == new_fib_index)
1848       {
1849         outside_fib->refcount++;
1850         is_add = 0;
1851         break;
1852       }
1853   }
1854
1855   if (is_add)
1856     {
1857       vec_add2 (sm->outside_fibs, outside_fib, 1);
1858       outside_fib->refcount = 1;
1859       outside_fib->fib_index = new_fib_index;
1860     }
1861 }
1862
1863 static void
1864 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1865                          u32 sw_if_index, u32 new_fib_index,
1866                          u32 old_fib_index);
1867
1868 static void
1869 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
1870                                        uword opaque,
1871                                        u32 sw_if_index,
1872                                        ip4_address_t * address,
1873                                        u32 address_length,
1874                                        u32 if_address_index, u32 is_delete);
1875
1876 static void
1877 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
1878                                  uword opaque,
1879                                  u32 sw_if_index,
1880                                  ip4_address_t * address,
1881                                  u32 address_length,
1882                                  u32 if_address_index, u32 is_delete);
1883
1884 void
1885 test_key_calc_split ()
1886 {
1887   ip4_address_t l_addr;
1888   l_addr.as_u8[0] = 1;
1889   l_addr.as_u8[1] = 1;
1890   l_addr.as_u8[2] = 1;
1891   l_addr.as_u8[3] = 1;
1892   ip4_address_t r_addr;
1893   r_addr.as_u8[0] = 2;
1894   r_addr.as_u8[1] = 2;
1895   r_addr.as_u8[2] = 2;
1896   r_addr.as_u8[3] = 2;
1897   u16 l_port = 40001;
1898   u16 r_port = 40301;
1899   u8 proto = 9;
1900   u32 fib_index = 9000001;
1901   u32 thread_index = 3000000001;
1902   u32 session_index = 3000000221;
1903   clib_bihash_kv_16_8_t kv;
1904   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
1905               thread_index, session_index);
1906   ip4_address_t l_addr2;
1907   ip4_address_t r_addr2;
1908   clib_memset (&l_addr2, 0, sizeof (l_addr2));
1909   clib_memset (&r_addr2, 0, sizeof (r_addr2));
1910   u16 l_port2 = 0;
1911   u16 r_port2 = 0;
1912   u8 proto2 = 0;
1913   u32 fib_index2 = 0;
1914   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
1915                &r_port2);
1916   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1917   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
1918   ASSERT (l_port == l_port2);
1919   ASSERT (r_port == r_port2);
1920   ASSERT (proto == proto2);
1921   ASSERT (fib_index == fib_index2);
1922   ASSERT (thread_index == ed_value_get_thread_index (&kv));
1923   ASSERT (session_index == ed_value_get_session_index (&kv));
1924
1925   fib_index = 7001;
1926   proto = 5;
1927   nat_protocol_t proto3 = ~0;
1928   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
1929   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
1930   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1931   ASSERT (l_port == l_port2);
1932   ASSERT (proto == proto3);
1933   ASSERT (fib_index == fib_index2);
1934 }
1935
1936 static clib_error_t *
1937 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
1938 {
1939   u32 fib_index;
1940
1941       // TODO: consider removing all NAT interfaces
1942       if (!is_add)
1943         {
1944           fib_index = ip4_fib_index_from_table_id (table_id);
1945           if (fib_index != ~0)
1946             expire_per_vrf_sessions (fib_index);
1947         }
1948   return 0;
1949 }
1950
1951 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
1952
1953 void
1954 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
1955 {
1956   vlib_node_t *node;
1957
1958   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
1959   sm->out2in_node_index = node->index;
1960
1961   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
1962   sm->in2out_node_index = node->index;
1963
1964   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
1965   sm->in2out_output_node_index = node->index;
1966 }
1967
1968 #define nat_validate_simple_counter(c, i)                                     \
1969   do                                                                          \
1970     {                                                                         \
1971       vlib_validate_simple_counter (&c, i);                                   \
1972       vlib_zero_simple_counter (&c, i);                                       \
1973     }                                                                         \
1974   while (0);
1975
1976 #define nat_init_simple_counter(c, n, sn)                                     \
1977   do                                                                          \
1978     {                                                                         \
1979       c.name = n;                                                             \
1980       c.stat_segment_name = sn;                                               \
1981       nat_validate_simple_counter (c, 0);                                     \
1982     }                                                                         \
1983   while (0);
1984
1985 static_always_inline void
1986 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
1987 {
1988 #define _(x)                                                                  \
1989   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
1990   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
1991   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
1992   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
1993   foreach_nat_counter;
1994 #undef _
1995   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
1996 }
1997
1998 static clib_error_t *
1999 nat_init (vlib_main_t * vm)
2000 {
2001   snat_main_t *sm = &snat_main;
2002   vlib_thread_main_t *tm = vlib_get_thread_main ();
2003   vlib_thread_registration_t *tr;
2004   ip4_add_del_interface_address_callback_t cbi = { 0 };
2005   ip4_table_bind_callback_t cbt = { 0 };
2006   u32 i, num_threads = 0;
2007   uword *p, *bitmap = 0;
2008
2009   clib_memset (sm, 0, sizeof (*sm));
2010
2011   // required
2012   sm->vnet_main = vnet_get_main ();
2013   // convenience
2014   sm->ip4_main = &ip4_main;
2015   sm->api_main = vlibapi_get_main ();
2016   sm->ip4_lookup_main = &ip4_main.lookup_main;
2017
2018   // frame queue indices used for handoff
2019   sm->fq_out2in_index = ~0;
2020   sm->fq_in2out_index = ~0;
2021   sm->fq_in2out_output_index = ~0;
2022
2023   sm->log_level = NAT_LOG_ERROR;
2024
2025   nat44_set_node_indexes (sm, vm);
2026   sm->log_class = vlib_log_register_class ("nat", 0);
2027   nat_ipfix_logging_init (vm);
2028
2029   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2030                            "/nat44-ed/total-sessions");
2031
2032 #define _(x)                                                                  \
2033   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2034                            "/nat44-ed/in2out/fastpath/" #x);                  \
2035   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2036                            "/nat44-ed/out2in/fastpath/" #x);                  \
2037   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2038                            "/nat44-ed/in2out/slowpath/" #x);                  \
2039   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2040                            "/nat44-ed/out2in/slowpath/" #x);
2041   foreach_nat_counter;
2042 #undef _
2043   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2044                            "/nat44-ed/hairpinning");
2045
2046   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2047   if (p)
2048     {
2049       tr = (vlib_thread_registration_t *) p[0];
2050       if (tr)
2051         {
2052           sm->num_workers = tr->count;
2053           sm->first_worker_index = tr->first_index;
2054         }
2055     }
2056   num_threads = tm->n_vlib_mains - 1;
2057   sm->port_per_thread = 0xffff - 1024;
2058   vec_validate (sm->per_thread_data, num_threads);
2059
2060   /* Use all available workers by default */
2061   if (sm->num_workers > 1)
2062     {
2063
2064       for (i = 0; i < sm->num_workers; i++)
2065         bitmap = clib_bitmap_set (bitmap, i, 1);
2066       snat_set_workers (bitmap);
2067       clib_bitmap_free (bitmap);
2068     }
2069   else
2070     sm->per_thread_data[0].snat_thread_index = 0;
2071
2072   /* callbacks to call when interface address changes. */
2073   cbi.function = snat_ip4_add_del_interface_address_cb;
2074   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2075   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2076   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2077
2078   /* callbacks to call when interface to table biding changes */
2079   cbt.function = snat_update_outside_fib;
2080   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2081
2082   sm->fib_src_low =
2083     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2084                          FIB_SOURCE_BH_SIMPLE);
2085   sm->fib_src_hi =
2086     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2087                          FIB_SOURCE_BH_SIMPLE);
2088
2089   nat_affinity_init (vm);
2090   test_key_calc_split ();
2091
2092   return nat44_api_hookup (vm);
2093 }
2094
2095 VLIB_INIT_FUNCTION (nat_init);
2096
2097 int
2098 nat44_plugin_enable (nat44_config_t c)
2099 {
2100   snat_main_t *sm = &snat_main;
2101
2102   fail_if_enabled ();
2103
2104   // UPDATE based on these appropriate API/CLI
2105   // c.static_mapping_only + c.connection_tracking
2106   //  - supported in NAT EI & NAT ED
2107   // c.out2in_dpo, c.static_mapping_only
2108   //  - supported in NAT EI
2109
2110   if (c.static_mapping_only && !c.connection_tracking)
2111     {
2112       nat_log_err ("unsupported combination of configuration");
2113       return 1;
2114     }
2115
2116   // nat44 feature configuration
2117   sm->static_mapping_only = c.static_mapping_only;
2118   sm->static_mapping_connection_tracking = c.connection_tracking;
2119
2120   sm->forwarding_enabled = 0;
2121   sm->mss_clamping = 0;
2122   sm->pat = (!c.static_mapping_only ||
2123              (c.static_mapping_only && c.connection_tracking));
2124
2125   if (!c.sessions)
2126     c.sessions = 63 * 1024;
2127
2128   sm->max_translations_per_thread = c.sessions;
2129   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2130
2131   // ED only feature
2132   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2133
2134   sm->inside_vrf_id = c.inside_vrf;
2135   sm->inside_fib_index =
2136     fib_table_find_or_create_and_lock
2137     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2138
2139   sm->outside_vrf_id = c.outside_vrf;
2140   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2141     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2142
2143   sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
2144   sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
2145
2146   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2147
2148   nat_affinity_enable ();
2149
2150   nat_reset_timeouts (&sm->timeouts);
2151
2152   vlib_zero_simple_counter (&sm->total_sessions, 0);
2153
2154   if (!sm->frame_queue_nelts)
2155     sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2156
2157   sm->enabled = 1;
2158   sm->rconfig = c;
2159
2160   return 0;
2161 }
2162
2163 void
2164 nat44_addresses_free (snat_address_t ** addresses)
2165 {
2166   snat_address_t *ap;
2167   vec_foreach (ap, *addresses)
2168     {
2169     #define _(N, i, n, s) \
2170       vec_free (ap->busy_##n##_ports_per_thread);
2171       foreach_nat_protocol
2172     #undef _
2173     }
2174   vec_free (*addresses);
2175   *addresses = 0;
2176 }
2177
2178 int
2179 nat44_plugin_disable ()
2180 {
2181   snat_main_t *sm = &snat_main;
2182   snat_interface_t *i, *vec;
2183   int error = 0;
2184
2185   fail_if_disabled ();
2186
2187   // first unregister all nodes from interfaces
2188   vec = vec_dup (sm->interfaces);
2189   vec_foreach (i, vec)
2190     {
2191       if (nat_interface_is_inside(i))
2192         error = snat_interface_add_del (i->sw_if_index, 1, 1);
2193       if (nat_interface_is_outside(i))
2194         error = snat_interface_add_del (i->sw_if_index, 0, 1);
2195
2196       if (error)
2197         {
2198           nat_log_err ("error occurred while removing interface %u",
2199                        i->sw_if_index);
2200         }
2201     }
2202   vec_free (vec);
2203   sm->interfaces = 0;
2204
2205   vec = vec_dup (sm->output_feature_interfaces);
2206   vec_foreach (i, vec)
2207     {
2208       if (nat_interface_is_inside(i))
2209         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
2210       if (nat_interface_is_outside(i))
2211         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
2212
2213       if (error)
2214         {
2215           nat_log_err ("error occurred while removing interface %u",
2216                        i->sw_if_index);
2217         }
2218     }
2219   vec_free (vec);
2220   sm->output_feature_interfaces = 0;
2221
2222   vec_free (sm->max_translations_per_fib);
2223
2224   nat44_ed_db_free ();
2225
2226   nat44_addresses_free (&sm->addresses);
2227   nat44_addresses_free (&sm->twice_nat_addresses);
2228
2229   vec_free (sm->to_resolve);
2230   vec_free (sm->auto_add_sw_if_indices);
2231   vec_free (sm->auto_add_sw_if_indices_twice_nat);
2232
2233   sm->to_resolve = 0;
2234   sm->auto_add_sw_if_indices = 0;
2235   sm->auto_add_sw_if_indices_twice_nat = 0;
2236
2237   sm->forwarding_enabled = 0;
2238
2239   sm->enabled = 0;
2240   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2241
2242   return 0;
2243 }
2244
2245 void
2246 nat44_ed_forwarding_enable_disable (u8 is_enable)
2247 {
2248   snat_main_per_thread_data_t *tsm;
2249   snat_main_t *sm = &snat_main;
2250   snat_session_t *s;
2251
2252   u32 *ses_to_be_removed = 0, *ses_index;
2253
2254   sm->forwarding_enabled = is_enable != 0;
2255
2256   if (is_enable)
2257     return;
2258
2259   vec_foreach (tsm, sm->per_thread_data)
2260     {
2261       pool_foreach (s, tsm->sessions)
2262         {
2263           if (is_fwd_bypass_session (s))
2264             {
2265               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2266             }
2267         }
2268       vec_foreach (ses_index, ses_to_be_removed)
2269         {
2270           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2271           nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2272           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2273         }
2274
2275       vec_free (ses_to_be_removed);
2276     }
2277 }
2278
2279 void
2280 snat_free_outside_address_and_port (snat_address_t *addresses,
2281                                     u32 thread_index, ip4_address_t *addr,
2282                                     u16 port, nat_protocol_t protocol)
2283 {
2284   snat_main_t *sm = &snat_main;
2285   snat_address_t *a;
2286   u32 address_index;
2287   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2288
2289   for (address_index = 0; address_index < vec_len (addresses);
2290        address_index++)
2291     {
2292       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2293         break;
2294     }
2295
2296   ASSERT (address_index < vec_len (addresses));
2297
2298   a = addresses + address_index;
2299
2300   switch (protocol)
2301     {
2302 #define _(N, i, n, s) \
2303     case NAT_PROTOCOL_##N: \
2304       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2305       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2306       a->busy_##n##_ports--; \
2307       a->busy_##n##_ports_per_thread[thread_index]--; \
2308       break;
2309       foreach_nat_protocol
2310 #undef _
2311         default : nat_elog_info (sm, "unknown protocol");
2312       return;
2313     }
2314 }
2315
2316 int
2317 nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
2318                                   ip4_address_t addr, u16 port,
2319                                   nat_protocol_t protocol)
2320 {
2321   snat_main_t *sm = &snat_main;
2322   snat_address_t *a = 0;
2323   u32 address_index;
2324   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2325
2326   for (address_index = 0; address_index < vec_len (addresses);
2327        address_index++)
2328     {
2329       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2330         continue;
2331
2332       a = addresses + address_index;
2333       switch (protocol)
2334         {
2335 #define _(N, j, n, s) \
2336         case NAT_PROTOCOL_##N: \
2337           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2338             return VNET_API_ERROR_INSTANCE_IN_USE; \
2339           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2340           a->busy_##n##_ports_per_thread[thread_index]++; \
2341           a->busy_##n##_ports++; \
2342           return 0;
2343           foreach_nat_protocol
2344 #undef _
2345             default : nat_elog_info (sm, "unknown protocol");
2346           return 1;
2347         }
2348     }
2349
2350   return VNET_API_ERROR_NO_SUCH_ENTRY;
2351 }
2352
2353 int
2354 snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
2355                            ip4_address_t match_addr, u16 match_port,
2356                            u32 match_fib_index, nat_protocol_t match_protocol,
2357                            ip4_address_t *mapping_addr, u16 *mapping_port,
2358                            u32 *mapping_fib_index, u8 by_external,
2359                            u8 *is_addr_only, twice_nat_type_t *twice_nat,
2360                            lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
2361                            u8 *is_identity_nat, snat_static_mapping_t **out)
2362 {
2363   clib_bihash_kv_8_8_t kv, value;
2364   clib_bihash_8_8_t *mapping_hash;
2365   snat_static_mapping_t *m;
2366   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2367   nat44_lb_addr_port_t *local;
2368   u8 backend_index;
2369
2370   if (!by_external)
2371     {
2372       mapping_hash = &sm->static_mapping_by_local;
2373       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2374                   match_protocol);
2375       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2376         {
2377           /* Try address only mapping */
2378           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2379           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2380             return 1;
2381         }
2382     }
2383   else
2384     {
2385       mapping_hash = &sm->static_mapping_by_external;
2386       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2387       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2388         {
2389           /* Try address only mapping */
2390           init_nat_k (&kv, match_addr, 0, 0, 0);
2391           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2392             return 1;
2393         }
2394     }
2395
2396   m = pool_elt_at_index (sm->static_mappings, value.value);
2397
2398   if (by_external)
2399     {
2400       if (is_lb_static_mapping (m))
2401         {
2402           if (PREDICT_FALSE (lb != 0))
2403             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2404           if (m->affinity && !nat_affinity_find_and_lock (
2405                                vm, ext_host_addr[0], match_addr,
2406                                match_protocol, match_port, &backend_index))
2407             {
2408               local = pool_elt_at_index (m->locals, backend_index);
2409               *mapping_addr = local->addr;
2410               *mapping_port = local->port;
2411               *mapping_fib_index = local->fib_index;
2412               goto end;
2413             }
2414           // pick locals matching this worker
2415           if (PREDICT_FALSE (sm->num_workers > 1))
2416             {
2417               u32 thread_index = vlib_get_thread_index ();
2418               pool_foreach_index (i, m->locals)
2419                {
2420                 local = pool_elt_at_index (m->locals, i);
2421
2422                 ip4_header_t ip = {
2423                   .src_address = local->addr,
2424                 };
2425
2426                 if (sm->worker_in2out_cb (0, &ip, m->fib_index, 0) ==
2427                     thread_index)
2428                   {
2429                     vec_add1 (tmp, i);
2430                   }
2431                }
2432               ASSERT (vec_len (tmp) != 0);
2433             }
2434           else
2435             {
2436               pool_foreach_index (i, m->locals)
2437                {
2438                 vec_add1 (tmp, i);
2439               }
2440             }
2441           hi = vec_len (tmp) - 1;
2442           local = pool_elt_at_index (m->locals, tmp[hi]);
2443           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2444           while (lo < hi)
2445             {
2446               mid = ((hi - lo) >> 1) + lo;
2447               local = pool_elt_at_index (m->locals, tmp[mid]);
2448               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2449             }
2450           local = pool_elt_at_index (m->locals, tmp[lo]);
2451           if (!(local->prefix >= rand))
2452             return 1;
2453           *mapping_addr = local->addr;
2454           *mapping_port = local->port;
2455           *mapping_fib_index = local->fib_index;
2456           if (m->affinity)
2457             {
2458               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2459                                                 match_protocol, match_port,
2460                                                 tmp[lo], m->affinity,
2461                                                 m->affinity_per_service_list_head_index))
2462                 nat_elog_info (sm, "create affinity record failed");
2463             }
2464           vec_free (tmp);
2465         }
2466       else
2467         {
2468           if (PREDICT_FALSE (lb != 0))
2469             *lb = NO_LB_NAT;
2470           *mapping_fib_index = m->fib_index;
2471           *mapping_addr = m->local_addr;
2472           /* Address only mapping doesn't change port */
2473           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2474             : m->local_port;
2475         }
2476     }
2477   else
2478     {
2479       *mapping_addr = m->external_addr;
2480       /* Address only mapping doesn't change port */
2481       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2482         : m->external_port;
2483       *mapping_fib_index = sm->outside_fib_index;
2484     }
2485
2486 end:
2487   if (PREDICT_FALSE (is_addr_only != 0))
2488     *is_addr_only = is_addr_only_static_mapping (m);
2489
2490   if (PREDICT_FALSE (twice_nat != 0))
2491     *twice_nat = m->twice_nat;
2492
2493   if (PREDICT_FALSE (is_identity_nat != 0))
2494     *is_identity_nat = is_identity_static_mapping (m);
2495
2496   if (out != 0)
2497     *out = m;
2498
2499   return 0;
2500 }
2501
2502 static u32
2503 nat44_ed_get_worker_in2out_cb (vlib_buffer_t *b, ip4_header_t *ip,
2504                                u32 rx_fib_index, u8 is_output)
2505 {
2506   snat_main_t *sm = &snat_main;
2507   u32 next_worker_index = sm->first_worker_index;
2508   u32 hash;
2509
2510   clib_bihash_kv_16_8_t kv16, value16;
2511
2512   u32 fib_index = rx_fib_index;
2513   if (b)
2514     {
2515       if (PREDICT_FALSE (is_output))
2516         {
2517           fib_index = sm->outside_fib_index;
2518           nat_outside_fib_t *outside_fib;
2519           fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2520           fib_prefix_t pfx = {
2521                   .fp_proto = FIB_PROTOCOL_IP4,
2522                   .fp_len = 32,
2523                   .fp_addr = {
2524                           .ip4.as_u32 = ip->dst_address.as_u32,
2525                   } ,
2526           };
2527
2528           switch (vec_len (sm->outside_fibs))
2529             {
2530             case 0:
2531               fib_index = sm->outside_fib_index;
2532               break;
2533             case 1:
2534               fib_index = sm->outside_fibs[0].fib_index;
2535               break;
2536             default:
2537               vec_foreach (outside_fib, sm->outside_fibs)
2538                 {
2539                   fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2540                   if (FIB_NODE_INDEX_INVALID != fei)
2541                     {
2542                       if (fib_entry_get_resolving_interface (fei) != ~0)
2543                         {
2544                           fib_index = outside_fib->fib_index;
2545                           break;
2546                         }
2547                     }
2548                 }
2549               break;
2550             }
2551         }
2552
2553       init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2554                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2555                  fib_index, ip->protocol);
2556
2557       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2558         {
2559           next_worker_index = ed_value_get_thread_index (&value16);
2560           vnet_buffer2 (b)->nat.cached_session_index =
2561             ed_value_get_session_index (&value16);
2562           goto out;
2563         }
2564
2565       // dst NAT
2566       init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2567                  ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2568                  rx_fib_index, ip->protocol);
2569       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
2570         {
2571           next_worker_index = ed_value_get_thread_index (&value16);
2572           vnet_buffer2 (b)->nat.cached_dst_nat_session_index =
2573             ed_value_get_session_index (&value16);
2574           goto out;
2575         }
2576     }
2577
2578   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2579     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2580
2581   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2582     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2583   else
2584     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2585
2586 out:
2587   if (PREDICT_TRUE (!is_output))
2588     {
2589       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2590                               rx_fib_index,
2591                               clib_net_to_host_u32 (ip->src_address.as_u32),
2592                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2593     }
2594   else
2595     {
2596       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2597                               next_worker_index, rx_fib_index,
2598                               clib_net_to_host_u32 (ip->src_address.as_u32),
2599                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2600     }
2601
2602   return next_worker_index;
2603 }
2604
2605 static u32
2606 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
2607                                u32 rx_fib_index, u8 is_output)
2608 {
2609   snat_main_t *sm = &snat_main;
2610   clib_bihash_kv_8_8_t kv, value;
2611   clib_bihash_kv_16_8_t kv16, value16;
2612   snat_main_per_thread_data_t *tsm;
2613
2614   u32 proto, next_worker_index = 0;
2615   udp_header_t *udp;
2616   u16 port;
2617   snat_static_mapping_t *m;
2618   u32 hash;
2619
2620   proto = ip_proto_to_nat_proto (ip->protocol);
2621
2622   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
2623     {
2624       udp = ip4_next_header (ip);
2625
2626       init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
2627                  ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
2628                  rx_fib_index, ip->protocol);
2629
2630       if (PREDICT_TRUE (
2631             !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2632         {
2633           tsm =
2634             vec_elt_at_index (sm->per_thread_data,
2635                               ed_value_get_thread_index (&value16));
2636           vnet_buffer2 (b)->nat.cached_session_index =
2637             ed_value_get_session_index (&value16);
2638           next_worker_index = sm->first_worker_index + tsm->thread_index;
2639           nat_elog_debug_handoff (
2640             sm, "HANDOFF OUT2IN (session)", next_worker_index, rx_fib_index,
2641             clib_net_to_host_u32 (ip->src_address.as_u32),
2642             clib_net_to_host_u32 (ip->dst_address.as_u32));
2643           return next_worker_index;
2644         }
2645     }
2646   else if (proto == NAT_PROTOCOL_ICMP)
2647     {
2648       ip4_address_t lookup_saddr, lookup_daddr;
2649       u16 lookup_sport, lookup_dport;
2650       u8 lookup_protocol;
2651       if (!nat_get_icmp_session_lookup_values (
2652             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2653             &lookup_protocol))
2654         {
2655           init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
2656                      lookup_dport, rx_fib_index, lookup_protocol);
2657           if (PREDICT_TRUE (
2658                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2659             {
2660               tsm =
2661                 vec_elt_at_index (sm->per_thread_data,
2662                                   ed_value_get_thread_index (&value16));
2663               next_worker_index = sm->first_worker_index + tsm->thread_index;
2664               nat_elog_debug_handoff (
2665                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2666                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2667                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2668               return next_worker_index;
2669             }
2670         }
2671     }
2672
2673   /* first try static mappings without port */
2674   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2675     {
2676       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
2677       if (!clib_bihash_search_8_8
2678           (&sm->static_mapping_by_external, &kv, &value))
2679         {
2680           m = pool_elt_at_index (sm->static_mappings, value.value);
2681           next_worker_index = m->workers[0];
2682           goto done;
2683         }
2684     }
2685
2686   /* unknown protocol */
2687   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
2688     {
2689       /* use current thread */
2690       next_worker_index = vlib_get_thread_index ();
2691       goto done;
2692     }
2693
2694   udp = ip4_next_header (ip);
2695   port = vnet_buffer (b)->ip.reass.l4_dst_port;
2696
2697   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2698     {
2699       icmp46_header_t *icmp = (icmp46_header_t *) udp;
2700       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
2701       if (!icmp_type_is_error_message
2702           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
2703         port = vnet_buffer (b)->ip.reass.l4_src_port;
2704       else
2705         {
2706           /* if error message, then it's not fragmented and we can access it */
2707           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
2708           proto = ip_proto_to_nat_proto (inner_ip->protocol);
2709           void *l4_header = ip4_next_header (inner_ip);
2710           switch (proto)
2711             {
2712             case NAT_PROTOCOL_ICMP:
2713               icmp = (icmp46_header_t *) l4_header;
2714               echo = (icmp_echo_header_t *) (icmp + 1);
2715               port = echo->identifier;
2716               break;
2717             case NAT_PROTOCOL_UDP:
2718             case NAT_PROTOCOL_TCP:
2719               port = ((tcp_udp_header_t *) l4_header)->src_port;
2720               break;
2721             default:
2722               next_worker_index = vlib_get_thread_index ();
2723               goto done;
2724             }
2725         }
2726     }
2727
2728   /* try static mappings with port */
2729   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2730     {
2731       init_nat_k (&kv, ip->dst_address, port, 0, proto);
2732       if (!clib_bihash_search_8_8
2733           (&sm->static_mapping_by_external, &kv, &value))
2734         {
2735           m = pool_elt_at_index (sm->static_mappings, value.value);
2736           if (!is_lb_static_mapping (m))
2737             {
2738               next_worker_index = m->workers[0];
2739               goto done;
2740             }
2741
2742           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2743             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2744
2745           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
2746             next_worker_index =
2747               m->workers[hash & (_vec_len (m->workers) - 1)];
2748           else
2749             next_worker_index = m->workers[hash % _vec_len (m->workers)];
2750           goto done;
2751         }
2752     }
2753
2754   /* worker by outside port */
2755   next_worker_index = sm->first_worker_index;
2756   next_worker_index +=
2757     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2758
2759 done:
2760   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
2761                           rx_fib_index,
2762                           clib_net_to_host_u32 (ip->src_address.as_u32),
2763                           clib_net_to_host_u32 (ip->dst_address.as_u32));
2764   return next_worker_index;
2765 }
2766
2767 u32
2768 nat44_get_max_session_limit ()
2769 {
2770   snat_main_t *sm = &snat_main;
2771   u32 max_limit = 0, len = 0;
2772
2773   for (; len < vec_len (sm->max_translations_per_fib); len++)
2774     {
2775       if (max_limit < sm->max_translations_per_fib[len])
2776         max_limit = sm->max_translations_per_fib[len];
2777     }
2778   return max_limit;
2779 }
2780
2781 int
2782 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
2783 {
2784   snat_main_t *sm = &snat_main;
2785   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
2786   u32 len = vec_len (sm->max_translations_per_fib);
2787
2788   if (len <= fib_index)
2789     {
2790       vec_validate (sm->max_translations_per_fib, fib_index + 1);
2791
2792       for (; len < vec_len (sm->max_translations_per_fib); len++)
2793         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
2794     }
2795
2796   sm->max_translations_per_fib[fib_index] = session_limit;
2797   return 0;
2798 }
2799
2800 int
2801 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
2802 {
2803   snat_main_t *sm = &snat_main;
2804
2805   if (nat44_set_session_limit (session_limit, vrf_id))
2806     return 1;
2807   sm->max_translations_per_thread = nat44_get_max_session_limit ();
2808
2809   sm->translation_buckets =
2810     nat_calc_bihash_buckets (sm->max_translations_per_thread);
2811
2812   nat44_ed_sessions_clear ();
2813   return 0;
2814 }
2815
2816 static void
2817 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
2818                          u32 translation_buckets)
2819 {
2820   dlist_elt_t *head;
2821
2822   pool_alloc (tsm->sessions, translations);
2823   pool_alloc (tsm->lru_pool, translations);
2824
2825   pool_get (tsm->lru_pool, head);
2826   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
2827   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
2828
2829   pool_get (tsm->lru_pool, head);
2830   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
2831   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
2832
2833   pool_get (tsm->lru_pool, head);
2834   tsm->udp_lru_head_index = head - tsm->lru_pool;
2835   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
2836
2837   pool_get (tsm->lru_pool, head);
2838   tsm->icmp_lru_head_index = head - tsm->lru_pool;
2839   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
2840
2841   pool_get (tsm->lru_pool, head);
2842   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
2843   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
2844 }
2845
2846 static void
2847 reinit_ed_flow_hash ()
2848 {
2849   snat_main_t *sm = &snat_main;
2850   // we expect 2 flows per session, so multiply translation_buckets by 2
2851   clib_bihash_init_16_8 (
2852     &sm->flow_hash, "ed-flow-hash",
2853     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
2854   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
2855 }
2856
2857 static void
2858 nat44_ed_db_init (u32 translations, u32 translation_buckets)
2859 {
2860   snat_main_t *sm = &snat_main;
2861   snat_main_per_thread_data_t *tsm;
2862   u32 static_mapping_buckets = 1024;
2863   u32 static_mapping_memory_size = 64 << 20;
2864
2865   reinit_ed_flow_hash ();
2866
2867   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2868                         "static_mapping_by_local", static_mapping_buckets,
2869                         static_mapping_memory_size);
2870   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2871                                      format_static_mapping_kvp);
2872
2873   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2874                         "static_mapping_by_external", static_mapping_buckets,
2875                         static_mapping_memory_size);
2876   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2877                                      format_static_mapping_kvp);
2878
2879   if (sm->pat)
2880     {
2881       vec_foreach (tsm, sm->per_thread_data)
2882         {
2883           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2884                                    sm->translation_buckets);
2885         }
2886     }
2887 }
2888
2889 static void
2890 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
2891 {
2892   pool_free (tsm->lru_pool);
2893   pool_free (tsm->sessions);
2894   vec_free (tsm->per_vrf_sessions_vec);
2895 }
2896
2897 static void
2898 nat44_ed_db_free ()
2899 {
2900   snat_main_t *sm = &snat_main;
2901   snat_main_per_thread_data_t *tsm;
2902
2903   pool_free (sm->static_mappings);
2904   clib_bihash_free_16_8 (&sm->flow_hash);
2905   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
2906   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
2907
2908   if (sm->pat)
2909     {
2910       vec_foreach (tsm, sm->per_thread_data)
2911         {
2912           nat44_ed_worker_db_free (tsm);
2913         }
2914     }
2915 }
2916
2917 void
2918 nat44_ed_sessions_clear ()
2919 {
2920   snat_main_t *sm = &snat_main;
2921   snat_main_per_thread_data_t *tsm;
2922
2923   reinit_ed_flow_hash ();
2924
2925   if (sm->pat)
2926     {
2927       vec_foreach (tsm, sm->per_thread_data)
2928         {
2929
2930           nat44_ed_worker_db_free (tsm);
2931           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2932                                    sm->translation_buckets);
2933         }
2934     }
2935   vlib_zero_simple_counter (&sm->total_sessions, 0);
2936 }
2937
2938 static void
2939 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2940                                  uword opaque,
2941                                  u32 sw_if_index,
2942                                  ip4_address_t * address,
2943                                  u32 address_length,
2944                                  u32 if_address_index, u32 is_delete)
2945 {
2946   snat_main_t *sm = &snat_main;
2947   snat_static_map_resolve_t *rp;
2948   snat_static_mapping_t *m;
2949   clib_bihash_kv_8_8_t kv, value;
2950   int i, rv;
2951   ip4_address_t l_addr;
2952
2953   if (!sm->enabled)
2954     return;
2955
2956   for (i = 0; i < vec_len (sm->to_resolve); i++)
2957     {
2958       rp = sm->to_resolve + i;
2959       if (rp->addr_only == 0)
2960         continue;
2961       if (rp->sw_if_index == sw_if_index)
2962         goto match;
2963     }
2964
2965   return;
2966
2967 match:
2968   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
2969               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
2970   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2971     m = 0;
2972   else
2973     m = pool_elt_at_index (sm->static_mappings, value.value);
2974
2975   if (!is_delete)
2976     {
2977       /* Don't trip over lease renewal, static config */
2978       if (m)
2979         return;
2980     }
2981   else
2982     {
2983       if (!m)
2984         return;
2985     }
2986
2987   /* Indetity mapping? */
2988   if (rp->l_addr.as_u32 == 0)
2989     l_addr.as_u32 = address[0].as_u32;
2990   else
2991     l_addr.as_u32 = rp->l_addr.as_u32;
2992   /* Add the static mapping */
2993   rv = snat_add_static_mapping (l_addr,
2994                                 address[0],
2995                                 rp->l_port,
2996                                 rp->e_port,
2997                                 rp->vrf_id,
2998                                 rp->addr_only, ~0 /* sw_if_index */ ,
2999                                 rp->proto, !is_delete, rp->twice_nat,
3000                                 rp->out2in_only, rp->tag, rp->identity_nat,
3001                                 rp->pool_addr, rp->exact);
3002   if (rv)
3003     nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", "i4", rv);
3004 }
3005
3006 static void
3007 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
3008                                        uword opaque,
3009                                        u32 sw_if_index,
3010                                        ip4_address_t * address,
3011                                        u32 address_length,
3012                                        u32 if_address_index, u32 is_delete)
3013 {
3014   snat_main_t *sm = &snat_main;
3015   snat_static_map_resolve_t *rp;
3016   ip4_address_t l_addr;
3017   int i, j;
3018   int rv;
3019   u8 twice_nat = 0;
3020   snat_address_t *addresses = sm->addresses;
3021
3022   if (!sm->enabled)
3023     return;
3024
3025   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
3026     {
3027       if (sw_if_index == sm->auto_add_sw_if_indices[i])
3028         goto match;
3029     }
3030
3031   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
3032     {
3033       twice_nat = 1;
3034       addresses = sm->twice_nat_addresses;
3035       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
3036         goto match;
3037     }
3038
3039   return;
3040
3041 match:
3042   if (!is_delete)
3043     {
3044       /* Don't trip over lease renewal, static config */
3045       for (j = 0; j < vec_len (addresses); j++)
3046         if (addresses[j].addr.as_u32 == address->as_u32)
3047           return;
3048
3049       (void) snat_add_address (sm, address, ~0, twice_nat);
3050       /* Scan static map resolution vector */
3051       for (j = 0; j < vec_len (sm->to_resolve); j++)
3052         {
3053           rp = sm->to_resolve + j;
3054           if (rp->addr_only)
3055             continue;
3056           /* On this interface? */
3057           if (rp->sw_if_index == sw_if_index)
3058             {
3059               /* Indetity mapping? */
3060               if (rp->l_addr.as_u32 == 0)
3061                 l_addr.as_u32 = address[0].as_u32;
3062               else
3063                 l_addr.as_u32 = rp->l_addr.as_u32;
3064               /* Add the static mapping */
3065               rv = snat_add_static_mapping (
3066                 l_addr, address[0], rp->l_port, rp->e_port, rp->vrf_id,
3067                 rp->addr_only, ~0 /* sw_if_index */, rp->proto, 1,
3068                 rp->twice_nat, rp->out2in_only, rp->tag, rp->identity_nat,
3069                 rp->pool_addr, rp->exact);
3070               if (rv)
3071                 nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d",
3072                                     "i4", rv);
3073             }
3074         }
3075       return;
3076     }
3077   else
3078     {
3079       (void) snat_del_address (sm, address[0], 1, twice_nat);
3080       return;
3081     }
3082 }
3083
3084 int
3085 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
3086                             u8 twice_nat)
3087 {
3088   ip4_main_t *ip4_main = sm->ip4_main;
3089   ip4_address_t *first_int_addr;
3090   snat_static_map_resolve_t *rp;
3091   u32 *indices_to_delete = 0;
3092   int i, j;
3093   u32 *auto_add_sw_if_indices =
3094     twice_nat ? sm->
3095     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
3096
3097   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
3098     );
3099
3100   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
3101     {
3102       if (auto_add_sw_if_indices[i] == sw_if_index)
3103         {
3104           if (is_del)
3105             {
3106               /* if have address remove it */
3107               if (first_int_addr)
3108                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
3109               else
3110                 {
3111                   for (j = 0; j < vec_len (sm->to_resolve); j++)
3112                     {
3113                       rp = sm->to_resolve + j;
3114                       if (rp->sw_if_index == sw_if_index)
3115                         vec_add1 (indices_to_delete, j);
3116                     }
3117                   if (vec_len (indices_to_delete))
3118                     {
3119                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
3120                         vec_del1 (sm->to_resolve, j);
3121                       vec_free (indices_to_delete);
3122                     }
3123                 }
3124               if (twice_nat)
3125                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
3126               else
3127                 vec_del1 (sm->auto_add_sw_if_indices, i);
3128             }
3129           else
3130             return VNET_API_ERROR_VALUE_EXIST;
3131
3132           return 0;
3133         }
3134     }
3135
3136   if (is_del)
3137     return VNET_API_ERROR_NO_SUCH_ENTRY;
3138
3139   /* add to the auto-address list */
3140   if (twice_nat)
3141     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
3142   else
3143     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
3144
3145   /* If the address is already bound - or static - add it now */
3146   if (first_int_addr)
3147     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
3148
3149   return 0;
3150 }
3151
3152 int
3153 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
3154                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3155                       u32 vrf_id, int is_in)
3156 {
3157   ip4_header_t ip;
3158   clib_bihash_kv_16_8_t kv, value;
3159   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3160   snat_session_t *s;
3161   snat_main_per_thread_data_t *tsm;
3162
3163   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3164   if (sm->num_workers > 1)
3165     tsm = vec_elt_at_index (sm->per_thread_data,
3166                             sm->worker_in2out_cb (0, &ip, fib_index, 0));
3167   else
3168     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3169
3170   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
3171   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3172     {
3173       return VNET_API_ERROR_NO_SUCH_ENTRY;
3174     }
3175
3176   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3177     return VNET_API_ERROR_UNSPECIFIED;
3178   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3179   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3180   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3181   return 0;
3182 }
3183
3184 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3185                                  vlib_node_runtime_t * node,
3186                                  vlib_frame_t * frame)
3187 {
3188   return 0;
3189 }
3190
3191 VLIB_REGISTER_NODE (nat_default_node) = {
3192   .name = "nat-default",
3193   .vector_size = sizeof (u32),
3194   .format_trace = 0,
3195   .type = VLIB_NODE_TYPE_INTERNAL,
3196   .n_errors = 0,
3197   .n_next_nodes = NAT_N_NEXT,
3198   .next_nodes = {
3199     [NAT_NEXT_DROP] = "error-drop",
3200     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3201     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3202     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3203     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3204     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3205     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3206     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3207     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3208     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3209   },
3210 };
3211
3212 void
3213 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3214 {
3215   f->l3_csum_delta = 0;
3216   f->l4_csum_delta = 0;
3217   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3218       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3219     {
3220       f->l3_csum_delta =
3221         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3222       f->l3_csum_delta =
3223         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3224     }
3225   else
3226     {
3227       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3228     }
3229   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3230       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3231     {
3232       f->l3_csum_delta =
3233         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3234       f->l3_csum_delta =
3235         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3236     }
3237   else
3238     {
3239       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3240     }
3241   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3242     {
3243       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3244       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3245     }
3246   else
3247     {
3248       f->rewrite.sport = f->match.sport;
3249     }
3250   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3251     {
3252       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3253       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3254     }
3255   else
3256     {
3257       f->rewrite.dport = f->match.dport;
3258     }
3259   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3260       f->rewrite.icmp_id != f->match.sport)
3261     {
3262       f->l4_csum_delta =
3263         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3264       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3265     }
3266   else
3267     {
3268       f->rewrite.icmp_id = f->match.sport;
3269     }
3270   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3271     {
3272     }
3273   else
3274     {
3275       f->rewrite.fib_index = f->match.fib_index;
3276     }
3277 }
3278
3279 static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm,
3280                                                      vlib_buffer_t *b,
3281                                                      ip4_header_t *ip,
3282                                                      nat_6t_flow_t *f);
3283
3284 static_always_inline void
3285 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3286                            nat_6t_flow_t *f, nat_protocol_t proto,
3287                            int is_icmp_inner_ip4)
3288 {
3289   udp_header_t *udp = ip4_next_header (ip);
3290   tcp_header_t *tcp = (tcp_header_t *) udp;
3291
3292   if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
3293       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3294     {
3295       if (!is_icmp_inner_ip4)
3296         { // regular case
3297           ip->src_address = f->rewrite.saddr;
3298           ip->dst_address = f->rewrite.daddr;
3299           udp->src_port = f->rewrite.sport;
3300           udp->dst_port = f->rewrite.dport;
3301         }
3302       else
3303         { // icmp inner ip4 - reversed saddr/daddr
3304           ip->src_address = f->rewrite.daddr;
3305           ip->dst_address = f->rewrite.saddr;
3306           udp->src_port = f->rewrite.dport;
3307           udp->dst_port = f->rewrite.sport;
3308         }
3309
3310       if (NAT_PROTOCOL_TCP == proto)
3311         {
3312           ip_csum_t tcp_sum = tcp->checksum;
3313           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3314           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3315           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3316           tcp->checksum = ip_csum_fold (tcp_sum);
3317         }
3318       else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
3319         {
3320           ip_csum_t udp_sum = udp->checksum;
3321           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3322           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3323           udp->checksum = ip_csum_fold (udp_sum);
3324         }
3325     }
3326   else
3327     {
3328       if (!is_icmp_inner_ip4)
3329         { // regular case
3330           ip->src_address = f->rewrite.saddr;
3331           ip->dst_address = f->rewrite.daddr;
3332         }
3333       else
3334         { // icmp inner ip4 - reversed saddr/daddr
3335           ip->src_address = f->rewrite.daddr;
3336           ip->dst_address = f->rewrite.saddr;
3337         }
3338     }
3339
3340   ip_csum_t ip_sum = ip->checksum;
3341   ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3342   ip->checksum = ip_csum_fold (ip_sum);
3343   ASSERT (ip->checksum == ip4_header_checksum (ip));
3344 }
3345
3346 static_always_inline int
3347 nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b,
3348                             ip4_header_t *ip, nat_6t_flow_t *f)
3349 {
3350   if (IP_PROTOCOL_ICMP != ip->protocol)
3351     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3352
3353   icmp46_header_t *icmp = ip4_next_header (ip);
3354   icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3355
3356   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3357     {
3358       if (icmp->checksum == 0)
3359         icmp->checksum = 0xffff;
3360
3361       if (!icmp_type_is_error_message (icmp->type))
3362         {
3363           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3364               (f->rewrite.icmp_id != echo->identifier))
3365             {
3366               ip_csum_t sum = icmp->checksum;
3367               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3368                                     icmp_echo_header_t,
3369                                     identifier /* changed member */);
3370               echo->identifier = f->rewrite.icmp_id;
3371               icmp->checksum = ip_csum_fold (sum);
3372             }
3373         }
3374       else
3375         {
3376           // errors are not fragmented
3377           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3378
3379           if (!ip4_header_checksum_is_valid (inner_ip))
3380             {
3381               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3382             }
3383
3384           nat_protocol_t inner_proto =
3385             ip_proto_to_nat_proto (inner_ip->protocol);
3386
3387           ip_csum_t icmp_sum = icmp->checksum;
3388
3389           switch (inner_proto)
3390             {
3391             case NAT_PROTOCOL_UDP:
3392             case NAT_PROTOCOL_TCP:
3393               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3394                                          1 /* is_icmp_inner_ip4 */);
3395               icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta);
3396               icmp->checksum = ip_csum_fold (icmp_sum);
3397               break;
3398             case NAT_PROTOCOL_ICMP:
3399               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3400                 {
3401                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3402                   icmp_echo_header_t *inner_echo =
3403                     (icmp_echo_header_t *) (inner_icmp + 1);
3404                   if (f->rewrite.icmp_id != inner_echo->identifier)
3405                     {
3406                       ip_csum_t sum = icmp->checksum;
3407                       sum = ip_csum_update (
3408                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3409                         icmp_echo_header_t, identifier /* changed member */);
3410                       icmp->checksum = ip_csum_fold (sum);
3411                       ip_csum_t inner_sum = inner_icmp->checksum;
3412                       inner_sum = ip_csum_update (
3413                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3414                         icmp_echo_header_t, identifier /* changed member */);
3415                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3416                       inner_echo->identifier = f->rewrite.icmp_id;
3417                     }
3418                 }
3419               break;
3420             default:
3421               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3422               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3423             }
3424         }
3425     }
3426   return NAT_ED_TRNSL_ERR_SUCCESS;
3427 }
3428
3429 nat_translation_error_e
3430 nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3431                            nat_6t_flow_t *f, nat_protocol_t proto,
3432                            int is_output_feature)
3433 {
3434   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3435     {
3436       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3437     }
3438
3439   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */);
3440
3441   if (NAT_PROTOCOL_ICMP == proto)
3442     {
3443       return nat_6t_flow_icmp_translate (sm, b, ip, f);
3444     }
3445
3446   return NAT_ED_TRNSL_ERR_SUCCESS;
3447 }
3448
3449 u8 *
3450 format_nat_6t (u8 *s, va_list *args)
3451 {
3452   nat_6t_t *t = va_arg (*args, nat_6t_t *);
3453
3454   s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
3455               format_ip4_address, t->saddr.as_u8,
3456               clib_net_to_host_u16 (t->sport), format_ip4_address,
3457               t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
3458               format_ip_protocol, t->proto, t->fib_index);
3459   return s;
3460 }
3461
3462 u8 *
3463 format_nat_ed_translation_error (u8 *s, va_list *args)
3464 {
3465   nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
3466
3467   switch (e)
3468     {
3469     case NAT_ED_TRNSL_ERR_SUCCESS:
3470       s = format (s, "success");
3471       break;
3472     case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
3473       s = format (s, "translation-failed");
3474       break;
3475     case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
3476       s = format (s, "flow-mismatch");
3477       break;
3478     }
3479   return s;
3480 }
3481
3482 u8 *
3483 format_nat_6t_flow (u8 *s, va_list *args)
3484 {
3485   nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
3486
3487   s = format (s, "match: %U ", format_nat_6t, &f->match);
3488   int r = 0;
3489   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
3490     {
3491       s = format (s, "rewrite: saddr %U ", format_ip4_address,
3492                   f->rewrite.saddr.as_u8);
3493       r = 1;
3494     }
3495   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
3496     {
3497       if (!r)
3498         {
3499           s = format (s, "rewrite: ");
3500           r = 1;
3501         }
3502       s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
3503     }
3504   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
3505     {
3506       if (!r)
3507         {
3508           s = format (s, "rewrite: ");
3509           r = 1;
3510         }
3511       s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
3512     }
3513   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
3514     {
3515       if (!r)
3516         {
3517           s = format (s, "rewrite: ");
3518           r = 1;
3519         }
3520       s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
3521     }
3522   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3523     {
3524       if (!r)
3525         {
3526           s = format (s, "rewrite: ");
3527           r = 1;
3528         }
3529       s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
3530     }
3531   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3532     {
3533       if (!r)
3534         {
3535           s = format (s, "rewrite: ");
3536           r = 1;
3537         }
3538       s = format (s, "txfib %u ", f->rewrite.fib_index);
3539     }
3540   return s;
3541 }
3542
3543 /*
3544  * fd.io coding-style-patch-verification: ON
3545  *
3546  * Local Variables:
3547  * eval: (c-set-style "gnu")
3548  * End:
3549  */