nat: Final NAT44 EI/ED split patch
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vpp/app/version.h>
19
20 #include <vnet/vnet.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/ip4.h>
23 #include <vnet/ip/ip_table.h>
24 #include <vnet/ip/reass/ip4_sv_reass.h>
25 #include <vnet/fib/fib_table.h>
26 #include <vnet/fib/ip4_fib.h>
27 #include <vnet/plugin/plugin.h>
28 #include <vppinfra/bihash_16_8.h>
29
30 #include <nat/lib/log.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/lib/nat_inlines.h>
33 #include <nat/lib/ipfix_logging.h>
34
35 #include <nat/nat44-ed/nat44_ed.h>
36 #include <nat/nat44-ed/nat44_ed_affinity.h>
37 #include <nat/nat44-ed/nat44_ed_inlines.h>
38
39 snat_main_t snat_main;
40
41 static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
42                                                            u32 sw_if_index);
43
44 #define skip_if_disabled()                                                    \
45   do                                                                          \
46     {                                                                         \
47       snat_main_t *sm = &snat_main;                                           \
48       if (PREDICT_FALSE (!sm->enabled))                                       \
49         return;                                                               \
50     }                                                                         \
51   while (0)
52
53 #define fail_if_enabled()                                                     \
54   do                                                                          \
55     {                                                                         \
56       snat_main_t *sm = &snat_main;                                           \
57       if (PREDICT_FALSE (sm->enabled))                                        \
58         {                                                                     \
59           nat_log_err ("plugin enabled");                                     \
60           return 1;                                                           \
61         }                                                                     \
62     }                                                                         \
63   while (0)
64
65 #define fail_if_disabled()                                                    \
66   do                                                                          \
67     {                                                                         \
68       snat_main_t *sm = &snat_main;                                           \
69       if (PREDICT_FALSE (!sm->enabled))                                       \
70         {                                                                     \
71           nat_log_err ("plugin disabled");                                    \
72           return 1;                                                           \
73         }                                                                     \
74     }                                                                         \
75   while (0)
76
77 /* *INDENT-OFF* */
78 /* Hook up input features */
79 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
80   .arc_name = "ip4-unicast",
81   .node_name = "nat-pre-in2out",
82   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
83                                "ip4-sv-reassembly-feature"),
84 };
85 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
86   .arc_name = "ip4-unicast",
87   .node_name = "nat-pre-out2in",
88   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
89                                "ip4-dhcp-client-detect",
90                                "ip4-sv-reassembly-feature"),
91 };
92 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
93   .arc_name = "ip4-unicast",
94   .node_name = "nat44-in2out-worker-handoff",
95   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
96 };
97 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
98   .arc_name = "ip4-unicast",
99   .node_name = "nat44-out2in-worker-handoff",
100   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
101                                "ip4-dhcp-client-detect"),
102 };
103 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
104   .arc_name = "ip4-unicast",
105   .node_name = "nat44-in2out",
106   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
107 };
108 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
109   .arc_name = "ip4-unicast",
110   .node_name = "nat44-out2in",
111   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
112                                "ip4-dhcp-client-detect"),
113 };
114 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
115   .arc_name = "ip4-unicast",
116   .node_name = "nat44-ed-in2out",
117   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
118 };
119 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
120   .arc_name = "ip4-unicast",
121   .node_name = "nat44-ed-out2in",
122   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
123                                "ip4-dhcp-client-detect"),
124 };
125 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
126   .arc_name = "ip4-unicast",
127   .node_name = "nat44-ed-classify",
128   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
129 };
130 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
131   .arc_name = "ip4-unicast",
132   .node_name = "nat44-handoff-classify",
133   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
134 };
135 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
136   .arc_name = "ip4-unicast",
137   .node_name = "nat44-in2out-fast",
138   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
139 };
140 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
141   .arc_name = "ip4-unicast",
142   .node_name = "nat44-out2in-fast",
143   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
144                                "ip4-dhcp-client-detect"),
145 };
146
147 /* Hook up output features */
148 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
149   .arc_name = "ip4-output",
150   .node_name = "nat44-in2out-output",
151   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
152 };
153 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
154   .arc_name = "ip4-output",
155   .node_name = "nat44-in2out-output-worker-handoff",
156   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
157 };
158 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
159   .arc_name = "ip4-output",
160   .node_name = "nat-pre-in2out-output",
161   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
162   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
163 };
164 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
165   .arc_name = "ip4-output",
166   .node_name = "nat44-ed-in2out-output",
167   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
168   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
169 };
170
171 VLIB_PLUGIN_REGISTER () = {
172     .version = VPP_BUILD_VER,
173     .description = "Network Address Translation (NAT)",
174 };
175 /* *INDENT-ON* */
176
177 static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
178
179 static void nat44_ed_db_free ();
180
181 static u32
182 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
183                                u32 rx_fib_index, u8 is_output);
184
185 static u32
186 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
187                                u8 is_output);
188
189 u32 nat_calc_bihash_buckets (u32 n_elts);
190
191 u8 *
192 format_session_kvp (u8 * s, va_list * args)
193 {
194   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
195
196   s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
197               v->key, nat_value_get_thread_index (v),
198               nat_value_get_session_index (v));
199
200   return s;
201 }
202
203 u8 *
204 format_static_mapping_kvp (u8 * s, va_list * args)
205 {
206   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
207
208   s = format (s, "%U static-mapping-index %llu",
209               format_snat_key, v->key, v->value);
210
211   return s;
212 }
213
214 u8 *
215 format_ed_session_kvp (u8 * s, va_list * args)
216 {
217   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
218
219   u8 proto;
220   u16 r_port, l_port;
221   ip4_address_t l_addr, r_addr;
222   u32 fib_index;
223
224   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
225   s = format (s,
226               "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
227               "session-index %u",
228               format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
229               format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
230               format_ip_protocol, proto, fib_index,
231               ed_value_get_thread_index (v), ed_value_get_session_index (v));
232
233   return s;
234 }
235
236 void
237 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
238                        u8 is_ha)
239 {
240       per_vrf_sessions_unregister_session (s, thread_index);
241
242       if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
243         nat_elog_warn (sm, "flow hash del failed");
244
245       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
246         nat_elog_warn (sm, "flow hash del failed");
247
248   if (is_fwd_bypass_session (s))
249     {
250       return;
251     }
252
253       if (is_affinity_sessions (s))
254         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
255                              s->nat_proto, s->out2in.port);
256
257       if (!is_ha)
258         nat_syslog_nat44_sdel (
259           0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
260           &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
261           s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
262           is_twice_nat_session (s));
263
264   if (snat_is_unk_proto_session (s))
265     return;
266
267   if (!is_ha)
268     {
269       /* log NAT event */
270       nat_ipfix_logging_nat44_ses_delete (thread_index,
271                                           s->in2out.addr.as_u32,
272                                           s->out2in.addr.as_u32,
273                                           s->nat_proto,
274                                           s->in2out.port,
275                                           s->out2in.port,
276                                           s->in2out.fib_index);
277     }
278
279   /* Twice NAT address and port for external host */
280   if (is_twice_nat_session (s))
281     {
282       snat_free_outside_address_and_port (sm->twice_nat_addresses,
283                                           thread_index,
284                                           &s->ext_host_nat_addr,
285                                           s->ext_host_nat_port, s->nat_proto);
286     }
287
288   if (snat_is_session_static (s))
289     return;
290
291   snat_free_outside_address_and_port (sm->addresses, thread_index,
292                                       &s->out2in.addr, s->out2in.port,
293                                       s->nat_proto);
294 }
295
296 void
297 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
298                           int is_add)
299 {
300   snat_main_t *sm = &snat_main;
301   fib_prefix_t prefix = {
302     .fp_len = p_len,
303     .fp_proto = FIB_PROTOCOL_IP4,
304     .fp_addr = {
305                 .ip4.as_u32 = addr->as_u32,
306                 },
307   };
308   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
309
310   if (is_add)
311     fib_table_entry_update_one_path (fib_index,
312                                      &prefix,
313                                      sm->fib_src_low,
314                                      (FIB_ENTRY_FLAG_CONNECTED |
315                                       FIB_ENTRY_FLAG_LOCAL |
316                                       FIB_ENTRY_FLAG_EXCLUSIVE),
317                                      DPO_PROTO_IP4,
318                                      NULL,
319                                      sw_if_index,
320                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
321   else
322     fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
323 }
324
325 int
326 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
327                   u8 twice_nat)
328 {
329   snat_address_t *ap;
330   snat_interface_t *i;
331   vlib_thread_main_t *tm = vlib_get_thread_main ();
332
333   /* Check if address already exists */
334   /* *INDENT-OFF* */
335   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
336     {
337       if (ap->addr.as_u32 == addr->as_u32)
338         {
339           nat_log_err ("address exist");
340           return VNET_API_ERROR_VALUE_EXIST;
341         }
342     }
343   /* *INDENT-ON* */
344
345   if (twice_nat)
346     vec_add2 (sm->twice_nat_addresses, ap, 1);
347   else
348     vec_add2 (sm->addresses, ap, 1);
349
350   ap->addr = *addr;
351   if (vrf_id != ~0)
352     ap->fib_index =
353       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
354                                          sm->fib_src_low);
355   else
356     ap->fib_index = ~0;
357
358   /* *INDENT-OFF* */
359   #define _(N, i, n, s) \
360     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
361     ap->busy_##n##_ports = 0; \
362     ap->busy_##n##_ports_per_thread = 0;\
363     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
364     foreach_nat_protocol
365   #undef _
366   /* *INDENT-ON* */
367
368   if (twice_nat)
369     return 0;
370
371   /* Add external address to FIB */
372   /* *INDENT-OFF* */
373   pool_foreach (i, sm->interfaces)
374    {
375      if (nat_interface_is_inside (i))
376        continue;
377
378      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
379      break;
380   }
381   pool_foreach (i, sm->output_feature_interfaces)
382    {
383      if (nat_interface_is_inside (i))
384        continue;
385
386      snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
387      break;
388   }
389   /* *INDENT-ON* */
390
391   return 0;
392 }
393
394 static int
395 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
396 {
397   snat_static_mapping_t *m;
398   /* *INDENT-OFF* */
399   pool_foreach (m, sm->static_mappings)
400    {
401       if (is_addr_only_static_mapping (m) ||
402           is_out2in_only_static_mapping (m) ||
403           is_identity_static_mapping (m))
404         continue;
405       if (m->external_addr.as_u32 == addr.as_u32)
406         return 1;
407   }
408   /* *INDENT-ON* */
409
410   return 0;
411 }
412
413 static void
414 snat_add_static_mapping_when_resolved (snat_main_t *sm, ip4_address_t l_addr,
415                                        u16 l_port, u32 sw_if_index, u16 e_port,
416                                        u32 vrf_id, nat_protocol_t proto,
417                                        int addr_only, u8 *tag, int twice_nat,
418                                        int out2in_only, int identity_nat,
419                                        ip4_address_t pool_addr, int exact)
420 {
421   snat_static_map_resolve_t *rp;
422
423   vec_add2 (sm->to_resolve, rp, 1);
424   rp->l_addr.as_u32 = l_addr.as_u32;
425   rp->l_port = l_port;
426   rp->sw_if_index = sw_if_index;
427   rp->e_port = e_port;
428   rp->vrf_id = vrf_id;
429   rp->proto = proto;
430   rp->addr_only = addr_only;
431   rp->twice_nat = twice_nat;
432   rp->out2in_only = out2in_only;
433   rp->identity_nat = identity_nat;
434   rp->tag = vec_dup (tag);
435   rp->pool_addr = pool_addr;
436   rp->exact = exact;
437 }
438
439 u32
440 get_thread_idx_by_port (u16 e_port)
441 {
442   snat_main_t *sm = &snat_main;
443   u32 thread_idx = sm->num_workers;
444   if (sm->num_workers > 1)
445     {
446       thread_idx =
447         sm->first_worker_index +
448         sm->workers[(e_port - 1024) / sm->port_per_thread];
449     }
450   return thread_idx;
451 }
452
453 void
454 nat_ed_static_mapping_del_sessions (snat_main_t * sm,
455                                     snat_main_per_thread_data_t * tsm,
456                                     ip4_address_t l_addr,
457                                     u16 l_port,
458                                     u8 protocol,
459                                     u32 fib_index, int addr_only,
460                                     ip4_address_t e_addr, u16 e_port)
461 {
462   snat_session_t *s;
463   u32 *indexes_to_free = NULL;
464   /* *INDENT-OFF* */
465   pool_foreach (s, tsm->sessions) {
466     if (s->in2out.fib_index != fib_index ||
467         s->in2out.addr.as_u32 != l_addr.as_u32)
468       {
469         continue;
470       }
471     if (!addr_only)
472       {
473         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
474             s->out2in.port != e_port ||
475             s->in2out.port != l_port ||
476             s->nat_proto != protocol)
477           continue;
478       }
479
480     if (is_lb_session (s))
481       continue;
482     if (!snat_is_session_static (s))
483       continue;
484     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
485     vec_add1 (indexes_to_free, s - tsm->sessions);
486     if (!addr_only)
487       break;
488   }
489   /* *INDENT-ON* */
490   u32 *ses_index;
491   vec_foreach (ses_index, indexes_to_free)
492   {
493     s = pool_elt_at_index (tsm->sessions, *ses_index);
494     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
495   }
496   vec_free (indexes_to_free);
497 }
498
499 int
500 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
501                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
502                          u32 sw_if_index, nat_protocol_t proto, int is_add,
503                          twice_nat_type_t twice_nat, u8 out2in_only, u8 *tag,
504                          u8 identity_nat, ip4_address_t pool_addr, int exact)
505 {
506   snat_main_t *sm = &snat_main;
507   snat_static_mapping_t *m;
508   clib_bihash_kv_8_8_t kv, value;
509   snat_address_t *a = 0;
510   u32 fib_index = ~0;
511   snat_interface_t *interface;
512   snat_main_per_thread_data_t *tsm;
513   snat_static_map_resolve_t *rp, *rp_match = 0;
514   nat44_lb_addr_port_t *local;
515   u32 find = ~0;
516   int i;
517
518   /* If the external address is a specific interface address */
519   if (sw_if_index != ~0)
520     {
521       ip4_address_t *first_int_addr;
522
523       for (i = 0; i < vec_len (sm->to_resolve); i++)
524         {
525           rp = sm->to_resolve + i;
526           if (rp->sw_if_index != sw_if_index ||
527               rp->l_addr.as_u32 != l_addr.as_u32 ||
528               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
529             continue;
530
531           if (!addr_only)
532             {
533               if ((rp->l_port != l_port && rp->e_port != e_port)
534                   || rp->proto != proto)
535                 continue;
536             }
537
538           rp_match = rp;
539           break;
540         }
541
542       /* Might be already set... */
543       first_int_addr = ip4_interface_first_address
544         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
545
546       if (is_add)
547         {
548           if (rp_match)
549             return VNET_API_ERROR_VALUE_EXIST;
550
551           snat_add_static_mapping_when_resolved (
552             sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, addr_only,
553             tag, twice_nat, out2in_only, identity_nat, pool_addr, exact);
554
555           /* DHCP resolution required? */
556           if (first_int_addr == 0)
557             {
558               return 0;
559             }
560           else
561             {
562               e_addr.as_u32 = first_int_addr->as_u32;
563               /* Identity mapping? */
564               if (l_addr.as_u32 == 0)
565                 l_addr.as_u32 = e_addr.as_u32;
566             }
567         }
568       else
569         {
570           if (!rp_match)
571             return VNET_API_ERROR_NO_SUCH_ENTRY;
572
573           vec_del1 (sm->to_resolve, i);
574
575           if (first_int_addr)
576             {
577               e_addr.as_u32 = first_int_addr->as_u32;
578               /* Identity mapping? */
579               if (l_addr.as_u32 == 0)
580                 l_addr.as_u32 = e_addr.as_u32;
581             }
582           else
583             return 0;
584         }
585     }
586
587   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
588   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
589     m = 0;
590   else
591     m = pool_elt_at_index (sm->static_mappings, value.value);
592
593   if (is_add)
594     {
595       if (m)
596         {
597           if (is_identity_static_mapping (m))
598             {
599               /* *INDENT-OFF* */
600               pool_foreach (local, m->locals)
601                {
602                 if (local->vrf_id == vrf_id)
603                   return VNET_API_ERROR_VALUE_EXIST;
604               }
605               /* *INDENT-ON* */
606               pool_get (m->locals, local);
607               local->vrf_id = vrf_id;
608               local->fib_index =
609                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
610                                                    sm->fib_src_low);
611               init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
612                            m->proto, 0, m - sm->static_mappings);
613               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
614               return 0;
615             }
616           else
617             return VNET_API_ERROR_VALUE_EXIST;
618         }
619
620       if (twice_nat && addr_only)
621         return VNET_API_ERROR_UNSUPPORTED;
622
623       /* Convert VRF id to FIB index */
624       if (vrf_id != ~0)
625         fib_index =
626           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
627                                              sm->fib_src_low);
628       /* If not specified use inside VRF id from SNAT plugin startup config */
629       else
630         {
631           fib_index = sm->inside_fib_index;
632           vrf_id = sm->inside_vrf_id;
633           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
634         }
635
636       if (!(out2in_only || identity_nat))
637         {
638           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
639                       addr_only ? 0 : proto);
640           if (!clib_bihash_search_8_8
641               (&sm->static_mapping_by_local, &kv, &value))
642             return VNET_API_ERROR_VALUE_EXIST;
643         }
644
645       /* Find external address in allocated addresses and reserve port for
646          address and port pair mapping when dynamic translations enabled */
647       if (!(addr_only || sm->static_mapping_only || out2in_only))
648         {
649           for (i = 0; i < vec_len (sm->addresses); i++)
650             {
651               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
652                 {
653                   a = sm->addresses + i;
654                   /* External port must be unused */
655                   switch (proto)
656                     {
657 #define _(N, j, n, s) \
658                     case NAT_PROTOCOL_##N: \
659                       if (a->busy_##n##_port_refcounts[e_port]) \
660                         return VNET_API_ERROR_INVALID_VALUE; \
661                       ++a->busy_##n##_port_refcounts[e_port]; \
662                       if (e_port > 1024) \
663                         { \
664                           a->busy_##n##_ports++; \
665                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
666                         } \
667                       break;
668                       foreach_nat_protocol
669 #undef _
670                         default : nat_elog_info (sm, "unknown protocol");
671                       return VNET_API_ERROR_INVALID_VALUE_2;
672                     }
673                   break;
674                 }
675             }
676           /* External address must be allocated */
677           if (!a && (l_addr.as_u32 != e_addr.as_u32))
678             {
679               if (sw_if_index != ~0)
680                 {
681                   for (i = 0; i < vec_len (sm->to_resolve); i++)
682                     {
683                       rp = sm->to_resolve + i;
684                       if (rp->addr_only)
685                         continue;
686                       if (rp->sw_if_index != sw_if_index &&
687                           rp->l_addr.as_u32 != l_addr.as_u32 &&
688                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
689                           rp->e_port != e_port && rp->proto != proto)
690                         continue;
691
692                       vec_del1 (sm->to_resolve, i);
693                       break;
694                     }
695                 }
696               return VNET_API_ERROR_NO_SUCH_ENTRY;
697             }
698         }
699
700       pool_get (sm->static_mappings, m);
701       clib_memset (m, 0, sizeof (*m));
702       m->tag = vec_dup (tag);
703       m->local_addr = l_addr;
704       m->external_addr = e_addr;
705       m->twice_nat = twice_nat;
706
707       if (twice_nat == TWICE_NAT && exact)
708         {
709           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
710           m->pool_addr = pool_addr;
711         }
712
713       if (out2in_only)
714         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
715       if (addr_only)
716         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
717       if (identity_nat)
718         {
719           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
720           pool_get (m->locals, local);
721           local->vrf_id = vrf_id;
722           local->fib_index = fib_index;
723         }
724       else
725         {
726           m->vrf_id = vrf_id;
727           m->fib_index = fib_index;
728         }
729       if (!addr_only)
730         {
731           m->local_port = l_port;
732           m->external_port = e_port;
733           m->proto = proto;
734         }
735
736       if (sm->num_workers > 1)
737         {
738           ip4_header_t ip = {
739             .src_address = m->local_addr,
740           };
741           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
742           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
743         }
744       else
745         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
746
747       if (!out2in_only)
748         {
749           init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
750                        0, m - sm->static_mappings);
751           clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
752         }
753
754       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
755                    m - sm->static_mappings);
756       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
757     }
758   else
759     {
760       if (!m)
761         {
762           if (sw_if_index != ~0)
763             return 0;
764           else
765             return VNET_API_ERROR_NO_SUCH_ENTRY;
766         }
767
768       if (identity_nat)
769         {
770           if (vrf_id == ~0)
771             vrf_id = sm->inside_vrf_id;
772
773           /* *INDENT-OFF* */
774           pool_foreach (local, m->locals)
775            {
776             if (local->vrf_id == vrf_id)
777               find = local - m->locals;
778           }
779           /* *INDENT-ON* */
780           if (find == ~0)
781             return VNET_API_ERROR_NO_SUCH_ENTRY;
782
783           local = pool_elt_at_index (m->locals, find);
784           fib_index = local->fib_index;
785           pool_put (m->locals, local);
786         }
787       else
788         fib_index = m->fib_index;
789
790       /* Free external address port */
791       if (!(addr_only || sm->static_mapping_only || out2in_only))
792         {
793           for (i = 0; i < vec_len (sm->addresses); i++)
794             {
795               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
796                 {
797                   a = sm->addresses + i;
798                   switch (proto)
799                     {
800 #define _(N, j, n, s) \
801                     case NAT_PROTOCOL_##N: \
802                       --a->busy_##n##_port_refcounts[e_port]; \
803                       if (e_port > 1024) \
804                         { \
805                           a->busy_##n##_ports--; \
806                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
807                         } \
808                       break;
809                       foreach_nat_protocol
810 #undef _
811                         default : nat_elog_info (sm, "unknown protocol");
812                       return VNET_API_ERROR_INVALID_VALUE_2;
813                     }
814                   break;
815                 }
816             }
817         }
818
819       if (sm->num_workers > 1)
820         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
821       else
822         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
823
824       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
825       if (!out2in_only)
826         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
827
828       /* Delete session(s) for static mapping if exist */
829       if (!(sm->static_mapping_only) ||
830           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
831         {
832           nat_ed_static_mapping_del_sessions (
833             sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
834             addr_only, e_addr, e_port);
835         }
836
837       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
838       if (pool_elts (m->locals))
839         return 0;
840
841       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
842       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
843
844       vec_free (m->tag);
845       vec_free (m->workers);
846       /* Delete static mapping from pool */
847       pool_put (sm->static_mappings, m);
848     }
849
850   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
851     return 0;
852
853   /* Add/delete external address to FIB */
854   /* *INDENT-OFF* */
855   pool_foreach (interface, sm->interfaces)
856    {
857      if (nat_interface_is_inside (interface))
858        continue;
859
860      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
861      break;
862   }
863   pool_foreach (interface, sm->output_feature_interfaces)
864    {
865      if (nat_interface_is_inside (interface))
866        continue;
867
868      snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add);
869      break;
870   }
871   /* *INDENT-ON* */
872
873   return 0;
874 }
875
876 int
877 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
878                                  nat_protocol_t proto,
879                                  nat44_lb_addr_port_t * locals, u8 is_add,
880                                  twice_nat_type_t twice_nat, u8 out2in_only,
881                                  u8 * tag, u32 affinity)
882 {
883   snat_main_t *sm = &snat_main;
884   snat_static_mapping_t *m;
885   clib_bihash_kv_8_8_t kv, value;
886   snat_address_t *a = 0;
887   int i;
888   nat44_lb_addr_port_t *local;
889   snat_main_per_thread_data_t *tsm;
890   snat_session_t *s;
891   uword *bitmap = 0;
892
893   init_nat_k (&kv, e_addr, e_port, 0, proto);
894   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
895     m = 0;
896   else
897     m = pool_elt_at_index (sm->static_mappings, value.value);
898
899   if (is_add)
900     {
901       if (m)
902         return VNET_API_ERROR_VALUE_EXIST;
903
904       if (vec_len (locals) < 2)
905         return VNET_API_ERROR_INVALID_VALUE;
906
907       /* Find external address in allocated addresses and reserve port for
908          address and port pair mapping when dynamic translations enabled */
909       if (!(sm->static_mapping_only || out2in_only))
910         {
911           for (i = 0; i < vec_len (sm->addresses); i++)
912             {
913               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
914                 {
915                   a = sm->addresses + i;
916                   /* External port must be unused */
917                   switch (proto)
918                     {
919 #define _(N, j, n, s) \
920                     case NAT_PROTOCOL_##N: \
921                       if (a->busy_##n##_port_refcounts[e_port]) \
922                         return VNET_API_ERROR_INVALID_VALUE; \
923                       ++a->busy_##n##_port_refcounts[e_port]; \
924                       if (e_port > 1024) \
925                         { \
926                           a->busy_##n##_ports++; \
927                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
928                         } \
929                       break;
930                       foreach_nat_protocol
931 #undef _
932                         default : nat_elog_info (sm, "unknown protocol");
933                       return VNET_API_ERROR_INVALID_VALUE_2;
934                     }
935                   break;
936                 }
937             }
938           /* External address must be allocated */
939           if (!a)
940             return VNET_API_ERROR_NO_SUCH_ENTRY;
941         }
942
943       pool_get (sm->static_mappings, m);
944       clib_memset (m, 0, sizeof (*m));
945       m->tag = vec_dup (tag);
946       m->external_addr = e_addr;
947       m->external_port = e_port;
948       m->proto = proto;
949       m->twice_nat = twice_nat;
950       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
951       if (out2in_only)
952         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
953       m->affinity = affinity;
954
955       if (affinity)
956         m->affinity_per_service_list_head_index =
957           nat_affinity_get_per_service_list_head_index ();
958       else
959         m->affinity_per_service_list_head_index = ~0;
960
961       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
962                    m - sm->static_mappings);
963       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
964         {
965           nat_elog_err (sm, "static_mapping_by_external key add failed");
966           return VNET_API_ERROR_UNSPECIFIED;
967         }
968
969       for (i = 0; i < vec_len (locals); i++)
970         {
971           locals[i].fib_index =
972             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
973                                                locals[i].vrf_id,
974                                                sm->fib_src_low);
975           if (!out2in_only)
976             {
977               init_nat_kv (&kv, locals[i].addr, locals[i].port,
978                            locals[i].fib_index, m->proto, 0,
979                            m - sm->static_mappings);
980               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
981             }
982           locals[i].prefix = (i == 0) ? locals[i].probability :
983             (locals[i - 1].prefix + locals[i].probability);
984           pool_get (m->locals, local);
985           *local = locals[i];
986           if (sm->num_workers > 1)
987             {
988               ip4_header_t ip = {
989                 .src_address = locals[i].addr,
990               };
991               bitmap =
992                 clib_bitmap_set (bitmap,
993                                  sm->worker_in2out_cb (&ip, m->fib_index, 0),
994                                  1);
995             }
996         }
997
998       /* Assign workers */
999       if (sm->num_workers > 1)
1000         {
1001           /* *INDENT-OFF* */
1002           clib_bitmap_foreach (i, bitmap)
1003              {
1004                vec_add1(m->workers, i);
1005             }
1006           /* *INDENT-ON* */
1007         }
1008     }
1009   else
1010     {
1011       if (!m)
1012         return VNET_API_ERROR_NO_SUCH_ENTRY;
1013
1014       if (!is_lb_static_mapping (m))
1015         return VNET_API_ERROR_INVALID_VALUE;
1016
1017       /* Free external address port */
1018       if (!(sm->static_mapping_only || out2in_only))
1019         {
1020           for (i = 0; i < vec_len (sm->addresses); i++)
1021             {
1022               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1023                 {
1024                   a = sm->addresses + i;
1025                   switch (proto)
1026                     {
1027 #define _(N, j, n, s) \
1028                     case NAT_PROTOCOL_##N: \
1029                       --a->busy_##n##_port_refcounts[e_port]; \
1030                       if (e_port > 1024) \
1031                         { \
1032                           a->busy_##n##_ports--; \
1033                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1034                         } \
1035                       break;
1036                       foreach_nat_protocol
1037 #undef _
1038                         default : nat_elog_info (sm, "unknown protocol");
1039                       return VNET_API_ERROR_INVALID_VALUE_2;
1040                     }
1041                   break;
1042                 }
1043             }
1044         }
1045
1046       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1047       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1048         {
1049           nat_elog_err (sm, "static_mapping_by_external key del failed");
1050           return VNET_API_ERROR_UNSPECIFIED;
1051         }
1052
1053       /* *INDENT-OFF* */
1054       pool_foreach (local, m->locals)
1055       {
1056           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1057                             sm->fib_src_low);
1058           if (!out2in_only)
1059             {
1060 init_nat_k(&              kv, local->addr, local->port, local->fib_index, m->proto);
1061               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1062                 {
1063                   nat_elog_err (sm, "static_mapping_by_local key del failed");
1064                   return VNET_API_ERROR_UNSPECIFIED;
1065                 }
1066             }
1067
1068           if (sm->num_workers > 1)
1069             {
1070               ip4_header_t ip = {
1071                 .src_address = local->addr,
1072               };
1073               tsm =
1074                 vec_elt_at_index (sm->per_thread_data,
1075                                   sm->worker_in2out_cb (&ip, m->fib_index, 0));
1076             }
1077           else
1078             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1079
1080           /* Delete sessions */
1081           pool_foreach (s, tsm->sessions)
1082             {
1083               if (!(is_lb_session (s)))
1084                 continue;
1085
1086               if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1087                   s->in2out.port != local->port)
1088                 continue;
1089
1090               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1091               nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1092             }
1093       }
1094       /* *INDENT-ON* */
1095       if (m->affinity)
1096         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1097       pool_free (m->locals);
1098       vec_free (m->tag);
1099       vec_free (m->workers);
1100
1101       pool_put (sm->static_mappings, m);
1102     }
1103
1104   return 0;
1105 }
1106
1107 int
1108 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1109                                        ip4_address_t l_addr, u16 l_port,
1110                                        nat_protocol_t proto, u32 vrf_id,
1111                                        u8 probability, u8 is_add)
1112 {
1113   snat_main_t *sm = &snat_main;
1114   snat_static_mapping_t *m = 0;
1115   clib_bihash_kv_8_8_t kv, value;
1116   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1117   snat_main_per_thread_data_t *tsm;
1118   snat_session_t *s;
1119   u32 *locals = 0;
1120   uword *bitmap = 0;
1121   int i;
1122
1123   init_nat_k (&kv, e_addr, e_port, 0, proto);
1124   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1125     m = pool_elt_at_index (sm->static_mappings, value.value);
1126
1127   if (!m)
1128     return VNET_API_ERROR_NO_SUCH_ENTRY;
1129
1130   if (!is_lb_static_mapping (m))
1131     return VNET_API_ERROR_INVALID_VALUE;
1132
1133   /* *INDENT-OFF* */
1134   pool_foreach (local, m->locals)
1135    {
1136     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1137         (local->vrf_id == vrf_id))
1138       {
1139         match_local = local;
1140         break;
1141       }
1142   }
1143   /* *INDENT-ON* */
1144
1145   if (is_add)
1146     {
1147       if (match_local)
1148         return VNET_API_ERROR_VALUE_EXIST;
1149
1150       pool_get (m->locals, local);
1151       clib_memset (local, 0, sizeof (*local));
1152       local->addr.as_u32 = l_addr.as_u32;
1153       local->port = l_port;
1154       local->probability = probability;
1155       local->vrf_id = vrf_id;
1156       local->fib_index =
1157         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1158                                            sm->fib_src_low);
1159
1160       if (!is_out2in_only_static_mapping (m))
1161         {
1162           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
1163                        m - sm->static_mappings);
1164           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1165             nat_elog_err (sm, "static_mapping_by_local key add failed");
1166         }
1167     }
1168   else
1169     {
1170       if (!match_local)
1171         return VNET_API_ERROR_NO_SUCH_ENTRY;
1172
1173       if (pool_elts (m->locals) < 3)
1174         return VNET_API_ERROR_UNSPECIFIED;
1175
1176       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1177                         sm->fib_src_low);
1178
1179       if (!is_out2in_only_static_mapping (m))
1180         {
1181           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1182           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1183             nat_elog_err (sm, "static_mapping_by_local key del failed");
1184         }
1185
1186       if (sm->num_workers > 1)
1187         {
1188           ip4_header_t ip = {
1189             .src_address = local->addr,
1190           };
1191           tsm = vec_elt_at_index (sm->per_thread_data,
1192                                   sm->worker_in2out_cb (&ip, m->fib_index,
1193                                                         0));
1194         }
1195       else
1196         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1197
1198       /* Delete sessions */
1199       /* *INDENT-OFF* */
1200       pool_foreach (s, tsm->sessions) {
1201         if (!(is_lb_session (s)))
1202           continue;
1203
1204         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1205             s->in2out.port != match_local->port)
1206           continue;
1207
1208         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1209         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1210       }
1211       /* *INDENT-ON* */
1212
1213       pool_put (m->locals, match_local);
1214     }
1215
1216   vec_free (m->workers);
1217
1218   /* *INDENT-OFF* */
1219   pool_foreach (local, m->locals)
1220    {
1221     vec_add1 (locals, local - m->locals);
1222     if (sm->num_workers > 1)
1223       {
1224         ip4_header_t ip;
1225         ip.src_address.as_u32 = local->addr.as_u32,
1226         bitmap = clib_bitmap_set (bitmap,
1227                                   sm->worker_in2out_cb (&ip, local->fib_index, 0),
1228                                   1);
1229       }
1230   }
1231   /* *INDENT-ON* */
1232
1233   ASSERT (vec_len (locals) > 1);
1234
1235   local = pool_elt_at_index (m->locals, locals[0]);
1236   local->prefix = local->probability;
1237   for (i = 1; i < vec_len (locals); i++)
1238     {
1239       local = pool_elt_at_index (m->locals, locals[i]);
1240       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1241       local->prefix = local->probability + prev_local->prefix;
1242     }
1243
1244   /* Assign workers */
1245   if (sm->num_workers > 1)
1246     {
1247       /* *INDENT-OFF* */
1248       clib_bitmap_foreach (i, bitmap)  { vec_add1(m->workers, i); }
1249       /* *INDENT-ON* */
1250     }
1251
1252   return 0;
1253 }
1254
1255 int
1256 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1257                   u8 twice_nat)
1258 {
1259   snat_address_t *a = 0;
1260   snat_session_t *ses;
1261   u32 *ses_to_be_removed = 0, *ses_index;
1262   snat_main_per_thread_data_t *tsm;
1263   snat_static_mapping_t *m;
1264   snat_interface_t *interface;
1265   int i;
1266   snat_address_t *addresses =
1267     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1268
1269   /* Find SNAT address */
1270   for (i = 0; i < vec_len (addresses); i++)
1271     {
1272       if (addresses[i].addr.as_u32 == addr.as_u32)
1273         {
1274           a = addresses + i;
1275           break;
1276         }
1277     }
1278   if (!a)
1279     {
1280       nat_log_err ("no such address");
1281       return VNET_API_ERROR_NO_SUCH_ENTRY;
1282     }
1283
1284   if (delete_sm)
1285     {
1286       ip4_address_t pool_addr = { 0 };
1287       /* *INDENT-OFF* */
1288       pool_foreach (m, sm->static_mappings)
1289        {
1290           if (m->external_addr.as_u32 == addr.as_u32)
1291             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1292                                             m->local_port, m->external_port,
1293                                             m->vrf_id,
1294                                             is_addr_only_static_mapping(m), ~0,
1295                                             m->proto, 0 /* is_add */,
1296                                             m->twice_nat,
1297                                             is_out2in_only_static_mapping(m),
1298                                             m->tag,
1299                                             is_identity_static_mapping(m),
1300                                             pool_addr, 0);
1301       }
1302       /* *INDENT-ON* */
1303     }
1304   else
1305     {
1306       /* Check if address is used in some static mapping */
1307       if (is_snat_address_used_in_static_mapping (sm, addr))
1308         {
1309           nat_log_err ("address used in static mapping");
1310           return VNET_API_ERROR_UNSPECIFIED;
1311         }
1312     }
1313
1314   if (a->fib_index != ~0)
1315     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
1316
1317   /* Delete sessions using address */
1318   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1319     {
1320       vec_foreach (tsm, sm->per_thread_data)
1321       {
1322         pool_foreach (ses, tsm->sessions)  {
1323           if (ses->out2in.addr.as_u32 == addr.as_u32)
1324             {
1325               nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1326               vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1327             }
1328         }
1329
1330             vec_foreach (ses_index, ses_to_be_removed)
1331             {
1332               ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1333               nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1334             }
1335
1336         vec_free (ses_to_be_removed);
1337       }
1338     }
1339
1340 #define _(N, i, n, s) \
1341   vec_free (a->busy_##n##_ports_per_thread);
1342   foreach_nat_protocol
1343 #undef _
1344
1345     if (twice_nat)
1346   {
1347     vec_del1 (sm->twice_nat_addresses, i);
1348     return 0;
1349   }
1350   else vec_del1 (sm->addresses, i);
1351
1352   /* Delete external address from FIB */
1353   pool_foreach (interface, sm->interfaces)
1354     {
1355       if (nat_interface_is_inside (interface))
1356         continue;
1357
1358       snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1359       break;
1360     }
1361   pool_foreach (interface, sm->output_feature_interfaces)
1362    {
1363      if (nat_interface_is_inside (interface))
1364        continue;
1365
1366      snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
1367      break;
1368   }
1369
1370   return 0;
1371 }
1372
1373 void
1374 expire_per_vrf_sessions (u32 fib_index)
1375 {
1376   per_vrf_sessions_t *per_vrf_sessions;
1377   snat_main_per_thread_data_t *tsm;
1378   snat_main_t *sm = &snat_main;
1379
1380   /* *INDENT-OFF* */
1381   vec_foreach (tsm, sm->per_thread_data)
1382     {
1383       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1384         {
1385           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1386               (per_vrf_sessions->tx_fib_index == fib_index))
1387             {
1388               per_vrf_sessions->expired = 1;
1389             }
1390         }
1391     }
1392   /* *INDENT-ON* */
1393 }
1394
1395 void
1396 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1397 {
1398   snat_main_t *sm = &snat_main;
1399   nat_fib_t *fib;
1400
1401   // we don't care if it is outside/inside fib
1402   // we just care about their ref_count
1403   // if it reaches 0 sessions should expire
1404   // because the fib isn't valid for NAT anymore
1405
1406   vec_foreach (fib, sm->fibs)
1407   {
1408     if (fib->fib_index == fib_index)
1409       {
1410         if (is_del)
1411           {
1412             fib->ref_count--;
1413             if (!fib->ref_count)
1414               {
1415                 vec_del1 (sm->fibs, fib - sm->fibs);
1416                 expire_per_vrf_sessions (fib_index);
1417               }
1418             return;
1419           }
1420         else
1421           fib->ref_count++;
1422       }
1423   }
1424   if (!is_del)
1425     {
1426       vec_add2 (sm->fibs, fib, 1);
1427       fib->ref_count = 1;
1428       fib->fib_index = fib_index;
1429     }
1430 }
1431
1432 int
1433 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1434 {
1435   snat_main_t *sm = &snat_main;
1436   snat_interface_t *i;
1437   const char *feature_name, *del_feature_name;
1438   snat_address_t *ap;
1439   snat_static_mapping_t *m;
1440   nat_outside_fib_t *outside_fib;
1441   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1442                                                        sw_if_index);
1443
1444   if (!sm->enabled)
1445     {
1446       nat_log_err ("nat44 is disabled");
1447       return VNET_API_ERROR_UNSUPPORTED;
1448     }
1449
1450   /* *INDENT-OFF* */
1451   pool_foreach (i, sm->output_feature_interfaces)
1452    {
1453     if (i->sw_if_index == sw_if_index)
1454       {
1455         nat_log_err ("error interface already configured");
1456         return VNET_API_ERROR_VALUE_EXIST;
1457       }
1458   }
1459   /* *INDENT-ON* */
1460
1461   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1462     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1463   else
1464     {
1465       if (sm->num_workers > 1)
1466         feature_name =
1467           is_inside ? "nat44-in2out-worker-handoff" :
1468           "nat44-out2in-worker-handoff";
1469       else
1470         feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1471     }
1472
1473   ASSERT (sm->frame_queue_nelts > 0);
1474
1475   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1476     sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index,
1477                                                       sm->frame_queue_nelts);
1478
1479   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1480     sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index,
1481                                                       sm->frame_queue_nelts);
1482
1483   update_per_vrf_sessions_vec (fib_index, is_del);
1484
1485   if (!is_inside)
1486     {
1487       /* *INDENT-OFF* */
1488       vec_foreach (outside_fib, sm->outside_fibs)
1489         {
1490           if (outside_fib->fib_index == fib_index)
1491             {
1492               if (is_del)
1493                 {
1494                   outside_fib->refcount--;
1495                   if (!outside_fib->refcount)
1496                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1497                 }
1498               else
1499                 outside_fib->refcount++;
1500               goto feature_set;
1501             }
1502         }
1503       /* *INDENT-ON* */
1504       if (!is_del)
1505         {
1506           vec_add2 (sm->outside_fibs, outside_fib, 1);
1507           outside_fib->refcount = 1;
1508           outside_fib->fib_index = fib_index;
1509         }
1510     }
1511
1512 feature_set:
1513   /* *INDENT-OFF* */
1514   pool_foreach (i, sm->interfaces)
1515    {
1516     if (i->sw_if_index == sw_if_index)
1517       {
1518         if (is_del)
1519           {
1520             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1521               {
1522                 if (is_inside)
1523                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1524                 else
1525                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1526
1527                 if (sm->num_workers > 1)
1528                   {
1529                     del_feature_name = "nat44-handoff-classify";
1530                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1531                                                  "nat44-out2in-worker-handoff";
1532                   }
1533                 else
1534                   {
1535                     del_feature_name = "nat44-ed-classify";
1536                     feature_name =
1537                       !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1538                   }
1539
1540                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1541                 if (rv)
1542                   return rv;
1543                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1544                                              sw_if_index, 0, 0, 0);
1545                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1546                                              sw_if_index, 1, 0, 0);
1547               }
1548             else
1549               {
1550                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1551                 if (rv)
1552                   return rv;
1553                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1554                                              sw_if_index, 0, 0, 0);
1555                 pool_put (sm->interfaces, i);
1556               }
1557           }
1558         else
1559           {
1560             if ((nat_interface_is_inside (i) && is_inside) ||
1561                 (nat_interface_is_outside (i) && !is_inside))
1562               return 0;
1563
1564             if (sm->num_workers > 1)
1565               {
1566                 del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
1567                                                 "nat44-out2in-worker-handoff";
1568                 feature_name = "nat44-handoff-classify";
1569               }
1570             else
1571               {
1572                 del_feature_name =
1573                   !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1574
1575                 feature_name = "nat44-ed-classify";
1576               }
1577
1578             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1579             if (rv)
1580               return rv;
1581             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1582                                          sw_if_index, 0, 0, 0);
1583             vnet_feature_enable_disable ("ip4-unicast", feature_name,
1584                                          sw_if_index, 1, 0, 0);
1585             goto set_flags;
1586           }
1587
1588         goto fib;
1589       }
1590   }
1591   /* *INDENT-ON* */
1592
1593   if (is_del)
1594     {
1595       nat_log_err ("error interface couldn't be found");
1596       return VNET_API_ERROR_NO_SUCH_ENTRY;
1597     }
1598
1599   pool_get (sm->interfaces, i);
1600   i->sw_if_index = sw_if_index;
1601   i->flags = 0;
1602   nat_validate_interface_counters (sm, sw_if_index);
1603
1604   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
1605                                0);
1606
1607   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
1608   if (rv)
1609     return rv;
1610
1611 set_flags:
1612   if (is_inside)
1613     {
1614       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1615       return 0;
1616     }
1617   else
1618     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1619
1620   /* Add/delete external addresses to FIB */
1621 fib:
1622   /* *INDENT-OFF* */
1623   vec_foreach (ap, sm->addresses)
1624     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1625
1626   pool_foreach (m, sm->static_mappings)
1627    {
1628     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
1629       continue;
1630
1631     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1632   }
1633   /* *INDENT-ON* */
1634
1635   return 0;
1636 }
1637
1638 int
1639 snat_interface_add_del_output_feature (u32 sw_if_index,
1640                                        u8 is_inside, int is_del)
1641 {
1642   snat_main_t *sm = &snat_main;
1643   snat_interface_t *i;
1644   snat_address_t *ap;
1645   snat_static_mapping_t *m;
1646   nat_outside_fib_t *outside_fib;
1647   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1648                                                        sw_if_index);
1649
1650   if (!sm->enabled)
1651     {
1652       nat_log_err ("nat44 is disabled");
1653       return VNET_API_ERROR_UNSUPPORTED;
1654     }
1655
1656   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1657     {
1658       nat_log_err ("error unsupported");
1659       return VNET_API_ERROR_UNSUPPORTED;
1660     }
1661
1662   /* *INDENT-OFF* */
1663   pool_foreach (i, sm->interfaces)
1664    {
1665     if (i->sw_if_index == sw_if_index)
1666       {
1667         nat_log_err ("error interface already configured");
1668         return VNET_API_ERROR_VALUE_EXIST;
1669       }
1670   }
1671   /* *INDENT-ON* */
1672
1673   update_per_vrf_sessions_vec (fib_index, is_del);
1674
1675   if (!is_inside)
1676     {
1677       /* *INDENT-OFF* */
1678       vec_foreach (outside_fib, sm->outside_fibs)
1679         {
1680           if (outside_fib->fib_index == fib_index)
1681             {
1682               if (is_del)
1683                 {
1684                   outside_fib->refcount--;
1685                   if (!outside_fib->refcount)
1686                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1687                 }
1688               else
1689                 outside_fib->refcount++;
1690               goto feature_set;
1691             }
1692         }
1693       /* *INDENT-ON* */
1694       if (!is_del)
1695         {
1696           vec_add2 (sm->outside_fibs, outside_fib, 1);
1697           outside_fib->refcount = 1;
1698           outside_fib->fib_index = fib_index;
1699         }
1700     }
1701
1702 feature_set:
1703   if (is_inside)
1704     {
1705           int rv =
1706             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1707           if (rv)
1708             return rv;
1709           rv =
1710             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1711                                                             !is_del);
1712           if (rv)
1713             return rv;
1714       goto fq;
1715     }
1716
1717   if (sm->num_workers > 1)
1718     {
1719       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1720       if (rv)
1721         return rv;
1722       rv =
1723         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
1724       if (rv)
1725         return rv;
1726       vnet_feature_enable_disable ("ip4-unicast",
1727                                    "nat44-out2in-worker-handoff",
1728                                    sw_if_index, !is_del, 0, 0);
1729       vnet_feature_enable_disable ("ip4-output",
1730                                    "nat44-in2out-output-worker-handoff",
1731                                    sw_if_index, !is_del, 0, 0);
1732     }
1733   else
1734     {
1735           int rv =
1736             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
1737           if (rv)
1738             return rv;
1739           rv =
1740             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
1741                                                             !is_del);
1742           if (rv)
1743             return rv;
1744           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
1745                                        sw_if_index, !is_del, 0, 0);
1746           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
1747                                        sw_if_index, !is_del, 0, 0);
1748     }
1749
1750 fq:
1751   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
1752     sm->fq_in2out_output_index =
1753       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
1754
1755   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1756     sm->fq_out2in_index =
1757       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
1758
1759   /* *INDENT-OFF* */
1760   pool_foreach (i, sm->output_feature_interfaces)
1761    {
1762     if (i->sw_if_index == sw_if_index)
1763       {
1764         if (is_del)
1765           pool_put (sm->output_feature_interfaces, i);
1766         else
1767           return VNET_API_ERROR_VALUE_EXIST;
1768
1769         goto fib;
1770       }
1771   }
1772   /* *INDENT-ON* */
1773
1774   if (is_del)
1775     {
1776       nat_log_err ("error interface couldn't be found");
1777       return VNET_API_ERROR_NO_SUCH_ENTRY;
1778     }
1779
1780   pool_get (sm->output_feature_interfaces, i);
1781   i->sw_if_index = sw_if_index;
1782   i->flags = 0;
1783   nat_validate_interface_counters (sm, sw_if_index);
1784   if (is_inside)
1785     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1786   else
1787     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1788
1789   /* Add/delete external addresses to FIB */
1790 fib:
1791   if (is_inside)
1792     return 0;
1793
1794   /* *INDENT-OFF* */
1795   vec_foreach (ap, sm->addresses)
1796     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1797
1798   pool_foreach (m, sm->static_mappings)
1799    {
1800     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
1801       continue;
1802
1803     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1804   }
1805   /* *INDENT-ON* */
1806
1807   return 0;
1808 }
1809
1810 int
1811 snat_set_workers (uword * bitmap)
1812 {
1813   snat_main_t *sm = &snat_main;
1814   int i, j = 0;
1815
1816   if (sm->num_workers < 2)
1817     return VNET_API_ERROR_FEATURE_DISABLED;
1818
1819   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
1820     return VNET_API_ERROR_INVALID_WORKER;
1821
1822   vec_free (sm->workers);
1823   /* *INDENT-OFF* */
1824   clib_bitmap_foreach (i, bitmap)
1825     {
1826       vec_add1(sm->workers, i);
1827       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
1828       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
1829       j++;
1830     }
1831   /* *INDENT-ON* */
1832
1833   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
1834
1835   return 0;
1836 }
1837
1838 int
1839 nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
1840 {
1841   fail_if_enabled ();
1842   snat_main_t *sm = &snat_main;
1843   sm->frame_queue_nelts = frame_queue_nelts;
1844   return 0;
1845 }
1846
1847 static void
1848 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1849                          u32 sw_if_index, u32 new_fib_index,
1850                          u32 old_fib_index)
1851 {
1852   snat_main_t *sm = &snat_main;
1853   nat_outside_fib_t *outside_fib;
1854   snat_interface_t *i;
1855   u8 is_add = 1;
1856   u8 match = 0;
1857
1858   if (!sm->enabled || (new_fib_index == old_fib_index)
1859       || (!vec_len (sm->outside_fibs)))
1860     {
1861       return;
1862     }
1863
1864   /* *INDENT-OFF* */
1865   pool_foreach (i, sm->interfaces)
1866      {
1867       if (i->sw_if_index == sw_if_index)
1868         {
1869           if (!(nat_interface_is_outside (i)))
1870             return;
1871           match = 1;
1872         }
1873     }
1874
1875   pool_foreach (i, sm->output_feature_interfaces)
1876      {
1877       if (i->sw_if_index == sw_if_index)
1878         {
1879           if (!(nat_interface_is_outside (i)))
1880             return;
1881           match = 1;
1882         }
1883     }
1884   /* *INDENT-ON* */
1885
1886   if (!match)
1887     return;
1888
1889   vec_foreach (outside_fib, sm->outside_fibs)
1890   {
1891     if (outside_fib->fib_index == old_fib_index)
1892       {
1893         outside_fib->refcount--;
1894         if (!outside_fib->refcount)
1895           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1896         break;
1897       }
1898   }
1899
1900   vec_foreach (outside_fib, sm->outside_fibs)
1901   {
1902     if (outside_fib->fib_index == new_fib_index)
1903       {
1904         outside_fib->refcount++;
1905         is_add = 0;
1906         break;
1907       }
1908   }
1909
1910   if (is_add)
1911     {
1912       vec_add2 (sm->outside_fibs, outside_fib, 1);
1913       outside_fib->refcount = 1;
1914       outside_fib->fib_index = new_fib_index;
1915     }
1916 }
1917
1918 static void
1919 snat_update_outside_fib (ip4_main_t * im, uword opaque,
1920                          u32 sw_if_index, u32 new_fib_index,
1921                          u32 old_fib_index);
1922
1923 static void
1924 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
1925                                        uword opaque,
1926                                        u32 sw_if_index,
1927                                        ip4_address_t * address,
1928                                        u32 address_length,
1929                                        u32 if_address_index, u32 is_delete);
1930
1931 static void
1932 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
1933                                  uword opaque,
1934                                  u32 sw_if_index,
1935                                  ip4_address_t * address,
1936                                  u32 address_length,
1937                                  u32 if_address_index, u32 is_delete);
1938
1939 void
1940 test_key_calc_split ()
1941 {
1942   ip4_address_t l_addr;
1943   l_addr.as_u8[0] = 1;
1944   l_addr.as_u8[1] = 1;
1945   l_addr.as_u8[2] = 1;
1946   l_addr.as_u8[3] = 1;
1947   ip4_address_t r_addr;
1948   r_addr.as_u8[0] = 2;
1949   r_addr.as_u8[1] = 2;
1950   r_addr.as_u8[2] = 2;
1951   r_addr.as_u8[3] = 2;
1952   u16 l_port = 40001;
1953   u16 r_port = 40301;
1954   u8 proto = 9;
1955   u32 fib_index = 9000001;
1956   u32 thread_index = 3000000001;
1957   u32 session_index = 3000000221;
1958   clib_bihash_kv_16_8_t kv;
1959   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
1960               thread_index, session_index);
1961   ip4_address_t l_addr2;
1962   ip4_address_t r_addr2;
1963   clib_memset (&l_addr2, 0, sizeof (l_addr2));
1964   clib_memset (&r_addr2, 0, sizeof (r_addr2));
1965   u16 l_port2 = 0;
1966   u16 r_port2 = 0;
1967   u8 proto2 = 0;
1968   u32 fib_index2 = 0;
1969   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
1970                &r_port2);
1971   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1972   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
1973   ASSERT (l_port == l_port2);
1974   ASSERT (r_port == r_port2);
1975   ASSERT (proto == proto2);
1976   ASSERT (fib_index == fib_index2);
1977   ASSERT (thread_index == ed_value_get_thread_index (&kv));
1978   ASSERT (session_index == ed_value_get_session_index (&kv));
1979
1980   fib_index = 7001;
1981   proto = 5;
1982   nat_protocol_t proto3 = ~0;
1983   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
1984   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
1985   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
1986   ASSERT (l_port == l_port2);
1987   ASSERT (proto == proto3);
1988   ASSERT (fib_index == fib_index2);
1989 }
1990
1991 static clib_error_t *
1992 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
1993 {
1994   u32 fib_index;
1995
1996       // TODO: consider removing all NAT interfaces
1997       if (!is_add)
1998         {
1999           fib_index = ip4_fib_index_from_table_id (table_id);
2000           if (fib_index != ~0)
2001             expire_per_vrf_sessions (fib_index);
2002         }
2003   return 0;
2004 }
2005
2006 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2007
2008 void
2009 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
2010 {
2011   vlib_node_t *node;
2012
2013   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2014   sm->in2out_node_index = node->index;
2015
2016   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2017   sm->out2in_node_index = node->index;
2018
2019   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2020   sm->in2out_output_node_index = node->index;
2021 }
2022
2023 #define nat_validate_simple_counter(c, i)                                     \
2024   do                                                                          \
2025     {                                                                         \
2026       vlib_validate_simple_counter (&c, i);                                   \
2027       vlib_zero_simple_counter (&c, i);                                       \
2028     }                                                                         \
2029   while (0);
2030
2031 #define nat_init_simple_counter(c, n, sn)                                     \
2032   do                                                                          \
2033     {                                                                         \
2034       c.name = n;                                                             \
2035       c.stat_segment_name = sn;                                               \
2036       nat_validate_simple_counter (c, 0);                                     \
2037     }                                                                         \
2038   while (0);
2039
2040 static_always_inline void
2041 nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index)
2042 {
2043 #define _(x)                                                                  \
2044   nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index);  \
2045   nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index);  \
2046   nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index);  \
2047   nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index);
2048   foreach_nat_counter;
2049 #undef _
2050   nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index);
2051 }
2052
2053 static clib_error_t *
2054 nat_init (vlib_main_t * vm)
2055 {
2056   snat_main_t *sm = &snat_main;
2057   vlib_thread_main_t *tm = vlib_get_thread_main ();
2058   vlib_thread_registration_t *tr;
2059   ip4_add_del_interface_address_callback_t cbi = { 0 };
2060   ip4_table_bind_callback_t cbt = { 0 };
2061   u32 i, num_threads = 0;
2062   uword *p, *bitmap = 0;
2063
2064   clib_memset (sm, 0, sizeof (*sm));
2065
2066   // required
2067   sm->vnet_main = vnet_get_main ();
2068   // convenience
2069   sm->ip4_main = &ip4_main;
2070   sm->api_main = vlibapi_get_main ();
2071   sm->ip4_lookup_main = &ip4_main.lookup_main;
2072
2073   // frame queue indices used for handoff
2074   sm->fq_out2in_index = ~0;
2075   sm->fq_in2out_index = ~0;
2076   sm->fq_in2out_output_index = ~0;
2077
2078   sm->log_level = NAT_LOG_ERROR;
2079
2080   nat44_set_node_indexes (sm, vm);
2081   sm->log_class = vlib_log_register_class ("nat", 0);
2082   nat_ipfix_logging_init (vm);
2083
2084   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2085                            "/nat44-ed/total-sessions");
2086
2087 #define _(x)                                                                  \
2088   nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x,                \
2089                            "/nat44-ed/in2out/fastpath/" #x);                  \
2090   nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x,                \
2091                            "/nat44-ed/out2in/fastpath/" #x);                  \
2092   nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x,                \
2093                            "/nat44-ed/in2out/slowpath/" #x);                  \
2094   nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x,                \
2095                            "/nat44-ed/out2in/slowpath/" #x);
2096   foreach_nat_counter;
2097 #undef _
2098   nat_init_simple_counter (sm->counters.hairpinning, "hairpinning",
2099                            "/nat44-ed/hairpinning");
2100
2101   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2102   if (p)
2103     {
2104       tr = (vlib_thread_registration_t *) p[0];
2105       if (tr)
2106         {
2107           sm->num_workers = tr->count;
2108           sm->first_worker_index = tr->first_index;
2109         }
2110     }
2111   num_threads = tm->n_vlib_mains - 1;
2112   sm->port_per_thread = 0xffff - 1024;
2113   vec_validate (sm->per_thread_data, num_threads);
2114
2115   /* Use all available workers by default */
2116   if (sm->num_workers > 1)
2117     {
2118
2119       for (i = 0; i < sm->num_workers; i++)
2120         bitmap = clib_bitmap_set (bitmap, i, 1);
2121       snat_set_workers (bitmap);
2122       clib_bitmap_free (bitmap);
2123     }
2124   else
2125     sm->per_thread_data[0].snat_thread_index = 0;
2126
2127   /* callbacks to call when interface address changes. */
2128   cbi.function = snat_ip4_add_del_interface_address_cb;
2129   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2130   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2131   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2132
2133   /* callbacks to call when interface to table biding changes */
2134   cbt.function = snat_update_outside_fib;
2135   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2136
2137   sm->fib_src_low =
2138     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2139                          FIB_SOURCE_BH_SIMPLE);
2140   sm->fib_src_hi =
2141     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2142                          FIB_SOURCE_BH_SIMPLE);
2143
2144   nat_affinity_init (vm);
2145   test_key_calc_split ();
2146
2147   return nat44_api_hookup (vm);
2148 }
2149
2150 VLIB_INIT_FUNCTION (nat_init);
2151
2152 int
2153 nat44_plugin_enable (nat44_config_t c)
2154 {
2155   snat_main_t *sm = &snat_main;
2156
2157   fail_if_enabled ();
2158
2159   // UPDATE based on these appropriate API/CLI
2160   // c.static_mapping_only + c.connection_tracking
2161   //  - supported in NAT EI & NAT ED
2162   // c.out2in_dpo, c.static_mapping_only
2163   //  - supported in NAT EI
2164
2165   if (c.static_mapping_only && !c.connection_tracking)
2166     {
2167       nat_log_err ("unsupported combination of configuration");
2168       return 1;
2169     }
2170
2171   // nat44 feature configuration
2172   sm->static_mapping_only = c.static_mapping_only;
2173   sm->static_mapping_connection_tracking = c.connection_tracking;
2174
2175   sm->forwarding_enabled = 0;
2176   sm->mss_clamping = 0;
2177   sm->pat = (!c.static_mapping_only ||
2178              (c.static_mapping_only && c.connection_tracking));
2179
2180   if (!c.sessions)
2181     c.sessions = 63 * 1024;
2182
2183   sm->max_translations_per_thread = c.sessions;
2184   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2185
2186   // ED only feature
2187   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2188
2189   sm->inside_vrf_id = c.inside_vrf;
2190   sm->inside_fib_index =
2191     fib_table_find_or_create_and_lock
2192     (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi);
2193
2194   sm->outside_vrf_id = c.outside_vrf;
2195   sm->outside_fib_index = fib_table_find_or_create_and_lock (
2196     FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
2197
2198   sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
2199   sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
2200
2201   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
2202
2203   nat_affinity_enable ();
2204
2205   nat_reset_timeouts (&sm->timeouts);
2206
2207   vlib_zero_simple_counter (&sm->total_sessions, 0);
2208
2209   if (!sm->frame_queue_nelts)
2210     sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT;
2211
2212   sm->enabled = 1;
2213   sm->rconfig = c;
2214
2215   return 0;
2216 }
2217
2218 void
2219 nat44_addresses_free (snat_address_t ** addresses)
2220 {
2221   snat_address_t *ap;
2222   vec_foreach (ap, *addresses)
2223     {
2224     #define _(N, i, n, s) \
2225       vec_free (ap->busy_##n##_ports_per_thread);
2226       foreach_nat_protocol
2227     #undef _
2228     }
2229   vec_free (*addresses);
2230   *addresses = 0;
2231 }
2232
2233 int
2234 nat44_plugin_disable ()
2235 {
2236   snat_main_t *sm = &snat_main;
2237   snat_interface_t *i, *vec;
2238   int error = 0;
2239
2240   fail_if_disabled ();
2241
2242   // first unregister all nodes from interfaces
2243   vec = vec_dup (sm->interfaces);
2244   vec_foreach (i, vec)
2245     {
2246       if (nat_interface_is_inside(i))
2247         error = snat_interface_add_del (i->sw_if_index, 1, 1);
2248       if (nat_interface_is_outside(i))
2249         error = snat_interface_add_del (i->sw_if_index, 0, 1);
2250
2251       if (error)
2252         {
2253           nat_log_err ("error occurred while removing interface %u",
2254                        i->sw_if_index);
2255         }
2256     }
2257   vec_free (vec);
2258   sm->interfaces = 0;
2259
2260   vec = vec_dup (sm->output_feature_interfaces);
2261   vec_foreach (i, vec)
2262     {
2263       if (nat_interface_is_inside(i))
2264         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
2265       if (nat_interface_is_outside(i))
2266         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
2267
2268       if (error)
2269         {
2270           nat_log_err ("error occurred while removing interface %u",
2271                        i->sw_if_index);
2272         }
2273     }
2274   vec_free (vec);
2275   sm->output_feature_interfaces = 0;
2276
2277   vec_free (sm->max_translations_per_fib);
2278
2279   nat44_ed_db_free ();
2280
2281   nat44_addresses_free (&sm->addresses);
2282   nat44_addresses_free (&sm->twice_nat_addresses);
2283
2284   vec_free (sm->to_resolve);
2285   vec_free (sm->auto_add_sw_if_indices);
2286   vec_free (sm->auto_add_sw_if_indices_twice_nat);
2287
2288   sm->to_resolve = 0;
2289   sm->auto_add_sw_if_indices = 0;
2290   sm->auto_add_sw_if_indices_twice_nat = 0;
2291
2292   sm->forwarding_enabled = 0;
2293
2294   sm->enabled = 0;
2295   clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
2296
2297   return 0;
2298 }
2299
2300 void
2301 nat44_ed_forwarding_enable_disable (u8 is_enable)
2302 {
2303   snat_main_per_thread_data_t *tsm;
2304   snat_main_t *sm = &snat_main;
2305   snat_session_t *s;
2306
2307   u32 *ses_to_be_removed = 0, *ses_index;
2308
2309   sm->forwarding_enabled = is_enable != 0;
2310
2311   if (is_enable)
2312     return;
2313
2314   vec_foreach (tsm, sm->per_thread_data)
2315     {
2316       pool_foreach (s, tsm->sessions)
2317         {
2318           if (is_fwd_bypass_session (s))
2319             {
2320               vec_add1 (ses_to_be_removed, s - tsm->sessions);
2321             }
2322         }
2323       vec_foreach (ses_index, ses_to_be_removed)
2324         {
2325           s = pool_elt_at_index (tsm->sessions, ses_index[0]);
2326           nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
2327           nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
2328         }
2329
2330       vec_free (ses_to_be_removed);
2331     }
2332 }
2333
2334 void
2335 snat_free_outside_address_and_port (snat_address_t *addresses,
2336                                     u32 thread_index, ip4_address_t *addr,
2337                                     u16 port, nat_protocol_t protocol)
2338 {
2339   snat_main_t *sm = &snat_main;
2340   snat_address_t *a;
2341   u32 address_index;
2342   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2343
2344   for (address_index = 0; address_index < vec_len (addresses);
2345        address_index++)
2346     {
2347       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2348         break;
2349     }
2350
2351   ASSERT (address_index < vec_len (addresses));
2352
2353   a = addresses + address_index;
2354
2355   switch (protocol)
2356     {
2357 #define _(N, i, n, s) \
2358     case NAT_PROTOCOL_##N: \
2359       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2360       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2361       a->busy_##n##_ports--; \
2362       a->busy_##n##_ports_per_thread[thread_index]--; \
2363       break;
2364       foreach_nat_protocol
2365 #undef _
2366         default : nat_elog_info (sm, "unknown protocol");
2367       return;
2368     }
2369 }
2370
2371 int
2372 nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
2373                                   ip4_address_t addr, u16 port,
2374                                   nat_protocol_t protocol)
2375 {
2376   snat_main_t *sm = &snat_main;
2377   snat_address_t *a = 0;
2378   u32 address_index;
2379   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2380
2381   for (address_index = 0; address_index < vec_len (addresses);
2382        address_index++)
2383     {
2384       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2385         continue;
2386
2387       a = addresses + address_index;
2388       switch (protocol)
2389         {
2390 #define _(N, j, n, s) \
2391         case NAT_PROTOCOL_##N: \
2392           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2393             return VNET_API_ERROR_INSTANCE_IN_USE; \
2394           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2395           a->busy_##n##_ports_per_thread[thread_index]++; \
2396           a->busy_##n##_ports++; \
2397           return 0;
2398           foreach_nat_protocol
2399 #undef _
2400             default : nat_elog_info (sm, "unknown protocol");
2401           return 1;
2402         }
2403     }
2404
2405   return VNET_API_ERROR_NO_SUCH_ENTRY;
2406 }
2407
2408 int
2409 snat_static_mapping_match (snat_main_t * sm,
2410                            ip4_address_t match_addr,
2411                            u16 match_port,
2412                            u32 match_fib_index,
2413                            nat_protocol_t match_protocol,
2414                            ip4_address_t * mapping_addr,
2415                            u16 * mapping_port,
2416                            u32 * mapping_fib_index,
2417                            u8 by_external,
2418                            u8 * is_addr_only,
2419                            twice_nat_type_t * twice_nat,
2420                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
2421                            u8 * is_identity_nat, snat_static_mapping_t ** out)
2422 {
2423   clib_bihash_kv_8_8_t kv, value;
2424   clib_bihash_8_8_t *mapping_hash;
2425   snat_static_mapping_t *m;
2426   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2427   nat44_lb_addr_port_t *local;
2428   u8 backend_index;
2429
2430   if (!by_external)
2431     {
2432       mapping_hash = &sm->static_mapping_by_local;
2433       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2434                   match_protocol);
2435       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2436         {
2437           /* Try address only mapping */
2438           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2439           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2440             return 1;
2441         }
2442     }
2443   else
2444     {
2445       mapping_hash = &sm->static_mapping_by_external;
2446       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2447       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2448         {
2449           /* Try address only mapping */
2450           init_nat_k (&kv, match_addr, 0, 0, 0);
2451           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2452             return 1;
2453         }
2454     }
2455
2456   m = pool_elt_at_index (sm->static_mappings, value.value);
2457
2458   if (by_external)
2459     {
2460       if (is_lb_static_mapping (m))
2461         {
2462           if (PREDICT_FALSE (lb != 0))
2463             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2464           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
2465                                                           match_addr,
2466                                                           match_protocol,
2467                                                           match_port,
2468                                                           &backend_index))
2469             {
2470               local = pool_elt_at_index (m->locals, backend_index);
2471               *mapping_addr = local->addr;
2472               *mapping_port = local->port;
2473               *mapping_fib_index = local->fib_index;
2474               goto end;
2475             }
2476           // pick locals matching this worker
2477           if (PREDICT_FALSE (sm->num_workers > 1))
2478             {
2479               u32 thread_index = vlib_get_thread_index ();
2480               pool_foreach_index (i, m->locals)
2481                {
2482                 local = pool_elt_at_index (m->locals, i);
2483
2484                 ip4_header_t ip = {
2485                   .src_address = local->addr,
2486                 };
2487
2488                 if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
2489                     thread_index)
2490                   {
2491                     vec_add1 (tmp, i);
2492                   }
2493               }
2494               ASSERT (vec_len (tmp) != 0);
2495             }
2496           else
2497             {
2498               pool_foreach_index (i, m->locals)
2499                {
2500                 vec_add1 (tmp, i);
2501               }
2502             }
2503           hi = vec_len (tmp) - 1;
2504           local = pool_elt_at_index (m->locals, tmp[hi]);
2505           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2506           while (lo < hi)
2507             {
2508               mid = ((hi - lo) >> 1) + lo;
2509               local = pool_elt_at_index (m->locals, tmp[mid]);
2510               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2511             }
2512           local = pool_elt_at_index (m->locals, tmp[lo]);
2513           if (!(local->prefix >= rand))
2514             return 1;
2515           *mapping_addr = local->addr;
2516           *mapping_port = local->port;
2517           *mapping_fib_index = local->fib_index;
2518           if (m->affinity)
2519             {
2520               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2521                                                 match_protocol, match_port,
2522                                                 tmp[lo], m->affinity,
2523                                                 m->affinity_per_service_list_head_index))
2524                 nat_elog_info (sm, "create affinity record failed");
2525             }
2526           vec_free (tmp);
2527         }
2528       else
2529         {
2530           if (PREDICT_FALSE (lb != 0))
2531             *lb = NO_LB_NAT;
2532           *mapping_fib_index = m->fib_index;
2533           *mapping_addr = m->local_addr;
2534           /* Address only mapping doesn't change port */
2535           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2536             : m->local_port;
2537         }
2538     }
2539   else
2540     {
2541       *mapping_addr = m->external_addr;
2542       /* Address only mapping doesn't change port */
2543       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2544         : m->external_port;
2545       *mapping_fib_index = sm->outside_fib_index;
2546     }
2547
2548 end:
2549   if (PREDICT_FALSE (is_addr_only != 0))
2550     *is_addr_only = is_addr_only_static_mapping (m);
2551
2552   if (PREDICT_FALSE (twice_nat != 0))
2553     *twice_nat = m->twice_nat;
2554
2555   if (PREDICT_FALSE (is_identity_nat != 0))
2556     *is_identity_nat = is_identity_static_mapping (m);
2557
2558   if (out != 0)
2559     *out = m;
2560
2561   return 0;
2562 }
2563
2564 static u32
2565 nat44_ed_get_worker_in2out_cb (ip4_header_t *ip, u32 rx_fib_index,
2566                                u8 is_output)
2567 {
2568   snat_main_t *sm = &snat_main;
2569   u32 next_worker_index = sm->first_worker_index;
2570   u32 hash;
2571
2572   clib_bihash_kv_16_8_t kv16, value16;
2573   snat_main_per_thread_data_t *tsm;
2574   udp_header_t *udp;
2575
2576   if (PREDICT_FALSE (is_output))
2577     {
2578       u32 fib_index = sm->outside_fib_index;
2579       nat_outside_fib_t *outside_fib;
2580       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2581       fib_prefix_t pfx = {
2582         .fp_proto = FIB_PROTOCOL_IP4,
2583         .fp_len = 32,
2584         .fp_addr = {
2585                     .ip4.as_u32 = ip->dst_address.as_u32,
2586                     }
2587         ,
2588       };
2589
2590       udp = ip4_next_header (ip);
2591
2592       switch (vec_len (sm->outside_fibs))
2593         {
2594         case 0:
2595           fib_index = sm->outside_fib_index;
2596           break;
2597         case 1:
2598           fib_index = sm->outside_fibs[0].fib_index;
2599           break;
2600         default:
2601             /* *INDENT-OFF* */
2602             vec_foreach (outside_fib, sm->outside_fibs)
2603               {
2604                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2605                 if (FIB_NODE_INDEX_INVALID != fei)
2606                   {
2607                     if (fib_entry_get_resolving_interface (fei) != ~0)
2608                       {
2609                         fib_index = outside_fib->fib_index;
2610                         break;
2611                       }
2612                   }
2613               }
2614             /* *INDENT-ON* */
2615           break;
2616         }
2617
2618       init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address,
2619                  udp->dst_port, fib_index, ip->protocol);
2620
2621       if (PREDICT_TRUE (
2622             !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2623         {
2624           tsm =
2625             vec_elt_at_index (sm->per_thread_data,
2626                               ed_value_get_thread_index (&value16));
2627           next_worker_index += tsm->thread_index;
2628
2629           nat_elog_debug_handoff (
2630             sm, "HANDOFF IN2OUT-OUTPUT-FEATURE (session)", next_worker_index,
2631             fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2632             clib_net_to_host_u32 (ip->dst_address.as_u32));
2633
2634           return next_worker_index;
2635         }
2636     }
2637
2638   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2639     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2640
2641   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2642     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2643   else
2644     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2645
2646   if (PREDICT_TRUE (!is_output))
2647     {
2648       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index,
2649                               rx_fib_index,
2650                               clib_net_to_host_u32 (ip->src_address.as_u32),
2651                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2652     }
2653   else
2654     {
2655       nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE",
2656                               next_worker_index, rx_fib_index,
2657                               clib_net_to_host_u32 (ip->src_address.as_u32),
2658                               clib_net_to_host_u32 (ip->dst_address.as_u32));
2659     }
2660
2661   return next_worker_index;
2662 }
2663
2664 static u32
2665 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
2666                                u32 rx_fib_index, u8 is_output)
2667 {
2668   snat_main_t *sm = &snat_main;
2669   clib_bihash_kv_8_8_t kv, value;
2670   clib_bihash_kv_16_8_t kv16, value16;
2671   snat_main_per_thread_data_t *tsm;
2672
2673   u32 proto, next_worker_index = 0;
2674   udp_header_t *udp;
2675   u16 port;
2676   snat_static_mapping_t *m;
2677   u32 hash;
2678
2679   proto = ip_proto_to_nat_proto (ip->protocol);
2680
2681   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
2682     {
2683       udp = ip4_next_header (ip);
2684
2685       init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address,
2686                  udp->src_port, rx_fib_index, ip->protocol);
2687
2688       if (PREDICT_TRUE (
2689             !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2690         {
2691           tsm =
2692             vec_elt_at_index (sm->per_thread_data,
2693                               ed_value_get_thread_index (&value16));
2694           vnet_buffer2 (b)->nat.cached_session_index =
2695             ed_value_get_session_index (&value16);
2696           next_worker_index = sm->first_worker_index + tsm->thread_index;
2697           nat_elog_debug_handoff (
2698             sm, "HANDOFF OUT2IN (session)", next_worker_index, rx_fib_index,
2699             clib_net_to_host_u32 (ip->src_address.as_u32),
2700             clib_net_to_host_u32 (ip->dst_address.as_u32));
2701           return next_worker_index;
2702         }
2703     }
2704   else if (proto == NAT_PROTOCOL_ICMP)
2705     {
2706       ip4_address_t lookup_saddr, lookup_daddr;
2707       u16 lookup_sport, lookup_dport;
2708       u8 lookup_protocol;
2709       if (!nat_get_icmp_session_lookup_values (
2710             b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
2711             &lookup_protocol))
2712         {
2713           init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
2714                      lookup_dport, rx_fib_index, lookup_protocol);
2715           if (PREDICT_TRUE (
2716                 !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
2717             {
2718               tsm =
2719                 vec_elt_at_index (sm->per_thread_data,
2720                                   ed_value_get_thread_index (&value16));
2721               next_worker_index = sm->first_worker_index + tsm->thread_index;
2722               nat_elog_debug_handoff (
2723                 sm, "HANDOFF OUT2IN (session)", next_worker_index,
2724                 rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32),
2725                 clib_net_to_host_u32 (ip->dst_address.as_u32));
2726               return next_worker_index;
2727             }
2728         }
2729     }
2730
2731   /* first try static mappings without port */
2732   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2733     {
2734       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
2735       if (!clib_bihash_search_8_8
2736           (&sm->static_mapping_by_external, &kv, &value))
2737         {
2738           m = pool_elt_at_index (sm->static_mappings, value.value);
2739           next_worker_index = m->workers[0];
2740           goto done;
2741         }
2742     }
2743
2744   /* unknown protocol */
2745   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
2746     {
2747       /* use current thread */
2748       next_worker_index = vlib_get_thread_index ();
2749       goto done;
2750     }
2751
2752   udp = ip4_next_header (ip);
2753   port = udp->dst_port;
2754
2755   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2756     {
2757       icmp46_header_t *icmp = (icmp46_header_t *) udp;
2758       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
2759       if (!icmp_type_is_error_message
2760           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
2761         port = vnet_buffer (b)->ip.reass.l4_src_port;
2762       else
2763         {
2764           /* if error message, then it's not fragmented and we can access it */
2765           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
2766           proto = ip_proto_to_nat_proto (inner_ip->protocol);
2767           void *l4_header = ip4_next_header (inner_ip);
2768           switch (proto)
2769             {
2770             case NAT_PROTOCOL_ICMP:
2771               icmp = (icmp46_header_t *) l4_header;
2772               echo = (icmp_echo_header_t *) (icmp + 1);
2773               port = echo->identifier;
2774               break;
2775             case NAT_PROTOCOL_UDP:
2776             case NAT_PROTOCOL_TCP:
2777               port = ((tcp_udp_header_t *) l4_header)->src_port;
2778               break;
2779             default:
2780               next_worker_index = vlib_get_thread_index ();
2781               goto done;
2782             }
2783         }
2784     }
2785
2786   /* try static mappings with port */
2787   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2788     {
2789       init_nat_k (&kv, ip->dst_address, port, 0, proto);
2790       if (!clib_bihash_search_8_8
2791           (&sm->static_mapping_by_external, &kv, &value))
2792         {
2793           m = pool_elt_at_index (sm->static_mappings, value.value);
2794           if (!is_lb_static_mapping (m))
2795             {
2796               next_worker_index = m->workers[0];
2797               goto done;
2798             }
2799
2800           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2801             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
2802
2803           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
2804             next_worker_index =
2805               m->workers[hash & (_vec_len (m->workers) - 1)];
2806           else
2807             next_worker_index = m->workers[hash % _vec_len (m->workers)];
2808           goto done;
2809         }
2810     }
2811
2812   /* worker by outside port */
2813   next_worker_index = sm->first_worker_index;
2814   next_worker_index +=
2815     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2816
2817 done:
2818   nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
2819                           rx_fib_index,
2820                           clib_net_to_host_u32 (ip->src_address.as_u32),
2821                           clib_net_to_host_u32 (ip->dst_address.as_u32));
2822   return next_worker_index;
2823 }
2824
2825 u32
2826 nat44_get_max_session_limit ()
2827 {
2828   snat_main_t *sm = &snat_main;
2829   u32 max_limit = 0, len = 0;
2830
2831   for (; len < vec_len (sm->max_translations_per_fib); len++)
2832     {
2833       if (max_limit < sm->max_translations_per_fib[len])
2834         max_limit = sm->max_translations_per_fib[len];
2835     }
2836   return max_limit;
2837 }
2838
2839 int
2840 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
2841 {
2842   snat_main_t *sm = &snat_main;
2843   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
2844   u32 len = vec_len (sm->max_translations_per_fib);
2845
2846   if (len <= fib_index)
2847     {
2848       vec_validate (sm->max_translations_per_fib, fib_index + 1);
2849
2850       for (; len < vec_len (sm->max_translations_per_fib); len++)
2851         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
2852     }
2853
2854   sm->max_translations_per_fib[fib_index] = session_limit;
2855   return 0;
2856 }
2857
2858 int
2859 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
2860 {
2861   snat_main_t *sm = &snat_main;
2862
2863   if (nat44_set_session_limit (session_limit, vrf_id))
2864     return 1;
2865   sm->max_translations_per_thread = nat44_get_max_session_limit ();
2866
2867   sm->translation_buckets =
2868     nat_calc_bihash_buckets (sm->max_translations_per_thread);
2869
2870   nat44_ed_sessions_clear ();
2871   return 0;
2872 }
2873
2874 static void
2875 nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
2876                          u32 translation_buckets)
2877 {
2878   dlist_elt_t *head;
2879
2880   pool_alloc (tsm->sessions, translations);
2881   pool_alloc (tsm->lru_pool, translations);
2882
2883   pool_get (tsm->lru_pool, head);
2884   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
2885   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
2886
2887   pool_get (tsm->lru_pool, head);
2888   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
2889   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
2890
2891   pool_get (tsm->lru_pool, head);
2892   tsm->udp_lru_head_index = head - tsm->lru_pool;
2893   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
2894
2895   pool_get (tsm->lru_pool, head);
2896   tsm->icmp_lru_head_index = head - tsm->lru_pool;
2897   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
2898
2899   pool_get (tsm->lru_pool, head);
2900   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
2901   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
2902 }
2903
2904 static void
2905 reinit_ed_flow_hash ()
2906 {
2907   snat_main_t *sm = &snat_main;
2908   // we expect 2 flows per session, so multiply translation_buckets by 2
2909   clib_bihash_init_16_8 (
2910     &sm->flow_hash, "ed-flow-hash",
2911     clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0);
2912   clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp);
2913 }
2914
2915 static void
2916 nat44_ed_db_init (u32 translations, u32 translation_buckets)
2917 {
2918   snat_main_t *sm = &snat_main;
2919   snat_main_per_thread_data_t *tsm;
2920   u32 static_mapping_buckets = 1024;
2921   u32 static_mapping_memory_size = 64 << 20;
2922
2923   reinit_ed_flow_hash ();
2924
2925   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2926                         "static_mapping_by_local", static_mapping_buckets,
2927                         static_mapping_memory_size);
2928   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2929                                      format_static_mapping_kvp);
2930
2931   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2932                         "static_mapping_by_external", static_mapping_buckets,
2933                         static_mapping_memory_size);
2934   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2935                                      format_static_mapping_kvp);
2936
2937   if (sm->pat)
2938     {
2939       vec_foreach (tsm, sm->per_thread_data)
2940         {
2941           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2942                                    sm->translation_buckets);
2943         }
2944     }
2945 }
2946
2947 static void
2948 nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
2949 {
2950   pool_free (tsm->lru_pool);
2951   pool_free (tsm->sessions);
2952   vec_free (tsm->per_vrf_sessions_vec);
2953 }
2954
2955 static void
2956 nat44_ed_db_free ()
2957 {
2958   snat_main_t *sm = &snat_main;
2959   snat_main_per_thread_data_t *tsm;
2960
2961   pool_free (sm->static_mappings);
2962   clib_bihash_free_16_8 (&sm->flow_hash);
2963   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
2964   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
2965
2966   if (sm->pat)
2967     {
2968       vec_foreach (tsm, sm->per_thread_data)
2969         {
2970           nat44_ed_worker_db_free (tsm);
2971         }
2972     }
2973 }
2974
2975 void
2976 nat44_ed_sessions_clear ()
2977 {
2978   snat_main_t *sm = &snat_main;
2979   snat_main_per_thread_data_t *tsm;
2980
2981   reinit_ed_flow_hash ();
2982
2983   if (sm->pat)
2984     {
2985       vec_foreach (tsm, sm->per_thread_data)
2986         {
2987
2988           nat44_ed_worker_db_free (tsm);
2989           nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
2990                                    sm->translation_buckets);
2991         }
2992     }
2993   vlib_zero_simple_counter (&sm->total_sessions, 0);
2994 }
2995
2996 static void
2997 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2998                                  uword opaque,
2999                                  u32 sw_if_index,
3000                                  ip4_address_t * address,
3001                                  u32 address_length,
3002                                  u32 if_address_index, u32 is_delete)
3003 {
3004   snat_main_t *sm = &snat_main;
3005   snat_static_map_resolve_t *rp;
3006   snat_static_mapping_t *m;
3007   clib_bihash_kv_8_8_t kv, value;
3008   int i, rv;
3009   ip4_address_t l_addr;
3010
3011   if (!sm->enabled)
3012     return;
3013
3014   for (i = 0; i < vec_len (sm->to_resolve); i++)
3015     {
3016       rp = sm->to_resolve + i;
3017       if (rp->addr_only == 0)
3018         continue;
3019       if (rp->sw_if_index == sw_if_index)
3020         goto match;
3021     }
3022
3023   return;
3024
3025 match:
3026   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
3027               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
3028   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3029     m = 0;
3030   else
3031     m = pool_elt_at_index (sm->static_mappings, value.value);
3032
3033   if (!is_delete)
3034     {
3035       /* Don't trip over lease renewal, static config */
3036       if (m)
3037         return;
3038     }
3039   else
3040     {
3041       if (!m)
3042         return;
3043     }
3044
3045   /* Indetity mapping? */
3046   if (rp->l_addr.as_u32 == 0)
3047     l_addr.as_u32 = address[0].as_u32;
3048   else
3049     l_addr.as_u32 = rp->l_addr.as_u32;
3050   /* Add the static mapping */
3051   rv = snat_add_static_mapping (l_addr,
3052                                 address[0],
3053                                 rp->l_port,
3054                                 rp->e_port,
3055                                 rp->vrf_id,
3056                                 rp->addr_only, ~0 /* sw_if_index */ ,
3057                                 rp->proto, !is_delete, rp->twice_nat,
3058                                 rp->out2in_only, rp->tag, rp->identity_nat,
3059                                 rp->pool_addr, rp->exact);
3060   if (rv)
3061     nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", "i4", rv);
3062 }
3063
3064 static void
3065 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
3066                                        uword opaque,
3067                                        u32 sw_if_index,
3068                                        ip4_address_t * address,
3069                                        u32 address_length,
3070                                        u32 if_address_index, u32 is_delete)
3071 {
3072   snat_main_t *sm = &snat_main;
3073   snat_static_map_resolve_t *rp;
3074   ip4_address_t l_addr;
3075   int i, j;
3076   int rv;
3077   u8 twice_nat = 0;
3078   snat_address_t *addresses = sm->addresses;
3079
3080   if (!sm->enabled)
3081     return;
3082
3083   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
3084     {
3085       if (sw_if_index == sm->auto_add_sw_if_indices[i])
3086         goto match;
3087     }
3088
3089   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
3090     {
3091       twice_nat = 1;
3092       addresses = sm->twice_nat_addresses;
3093       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
3094         goto match;
3095     }
3096
3097   return;
3098
3099 match:
3100   if (!is_delete)
3101     {
3102       /* Don't trip over lease renewal, static config */
3103       for (j = 0; j < vec_len (addresses); j++)
3104         if (addresses[j].addr.as_u32 == address->as_u32)
3105           return;
3106
3107       (void) snat_add_address (sm, address, ~0, twice_nat);
3108       /* Scan static map resolution vector */
3109       for (j = 0; j < vec_len (sm->to_resolve); j++)
3110         {
3111           rp = sm->to_resolve + j;
3112           if (rp->addr_only)
3113             continue;
3114           /* On this interface? */
3115           if (rp->sw_if_index == sw_if_index)
3116             {
3117               /* Indetity mapping? */
3118               if (rp->l_addr.as_u32 == 0)
3119                 l_addr.as_u32 = address[0].as_u32;
3120               else
3121                 l_addr.as_u32 = rp->l_addr.as_u32;
3122               /* Add the static mapping */
3123               rv = snat_add_static_mapping (
3124                 l_addr, address[0], rp->l_port, rp->e_port, rp->vrf_id,
3125                 rp->addr_only, ~0 /* sw_if_index */, rp->proto, 1,
3126                 rp->twice_nat, rp->out2in_only, rp->tag, rp->identity_nat,
3127                 rp->pool_addr, rp->exact);
3128               if (rv)
3129                 nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d",
3130                                     "i4", rv);
3131             }
3132         }
3133       return;
3134     }
3135   else
3136     {
3137       (void) snat_del_address (sm, address[0], 1, twice_nat);
3138       return;
3139     }
3140 }
3141
3142 int
3143 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
3144                             u8 twice_nat)
3145 {
3146   ip4_main_t *ip4_main = sm->ip4_main;
3147   ip4_address_t *first_int_addr;
3148   snat_static_map_resolve_t *rp;
3149   u32 *indices_to_delete = 0;
3150   int i, j;
3151   u32 *auto_add_sw_if_indices =
3152     twice_nat ? sm->
3153     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
3154
3155   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
3156     );
3157
3158   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
3159     {
3160       if (auto_add_sw_if_indices[i] == sw_if_index)
3161         {
3162           if (is_del)
3163             {
3164               /* if have address remove it */
3165               if (first_int_addr)
3166                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
3167               else
3168                 {
3169                   for (j = 0; j < vec_len (sm->to_resolve); j++)
3170                     {
3171                       rp = sm->to_resolve + j;
3172                       if (rp->sw_if_index == sw_if_index)
3173                         vec_add1 (indices_to_delete, j);
3174                     }
3175                   if (vec_len (indices_to_delete))
3176                     {
3177                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
3178                         vec_del1 (sm->to_resolve, j);
3179                       vec_free (indices_to_delete);
3180                     }
3181                 }
3182               if (twice_nat)
3183                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
3184               else
3185                 vec_del1 (sm->auto_add_sw_if_indices, i);
3186             }
3187           else
3188             return VNET_API_ERROR_VALUE_EXIST;
3189
3190           return 0;
3191         }
3192     }
3193
3194   if (is_del)
3195     return VNET_API_ERROR_NO_SUCH_ENTRY;
3196
3197   /* add to the auto-address list */
3198   if (twice_nat)
3199     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
3200   else
3201     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
3202
3203   /* If the address is already bound - or static - add it now */
3204   if (first_int_addr)
3205     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
3206
3207   return 0;
3208 }
3209
3210 int
3211 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
3212                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3213                       u32 vrf_id, int is_in)
3214 {
3215   ip4_header_t ip;
3216   clib_bihash_kv_16_8_t kv, value;
3217   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3218   snat_session_t *s;
3219   snat_main_per_thread_data_t *tsm;
3220
3221   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3222   if (sm->num_workers > 1)
3223     tsm =
3224       vec_elt_at_index (sm->per_thread_data,
3225                         sm->worker_in2out_cb (&ip, fib_index, 0));
3226   else
3227     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3228
3229   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
3230   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
3231     {
3232       return VNET_API_ERROR_NO_SUCH_ENTRY;
3233     }
3234
3235   if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
3236     return VNET_API_ERROR_UNSPECIFIED;
3237   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3238   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
3239   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
3240   return 0;
3241 }
3242
3243 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
3244                                  vlib_node_runtime_t * node,
3245                                  vlib_frame_t * frame)
3246 {
3247   return 0;
3248 }
3249
3250 /* *INDENT-OFF* */
3251 VLIB_REGISTER_NODE (nat_default_node) = {
3252   .name = "nat-default",
3253   .vector_size = sizeof (u32),
3254   .format_trace = 0,
3255   .type = VLIB_NODE_TYPE_INTERNAL,
3256   .n_errors = 0,
3257   .n_next_nodes = NAT_N_NEXT,
3258   .next_nodes = {
3259     [NAT_NEXT_DROP] = "error-drop",
3260     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3261     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
3262     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3263     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
3264     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3265     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
3266     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3267     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
3268     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
3269   },
3270 };
3271 /* *INDENT-ON* */
3272
3273 void
3274 nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
3275 {
3276   f->l3_csum_delta = 0;
3277   f->l4_csum_delta = 0;
3278   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE &&
3279       f->rewrite.saddr.as_u32 != f->match.saddr.as_u32)
3280     {
3281       f->l3_csum_delta =
3282         ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32);
3283       f->l3_csum_delta =
3284         ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32);
3285     }
3286   else
3287     {
3288       f->rewrite.saddr.as_u32 = f->match.saddr.as_u32;
3289     }
3290   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE &&
3291       f->rewrite.daddr.as_u32 != f->match.daddr.as_u32)
3292     {
3293       f->l3_csum_delta =
3294         ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32);
3295       f->l3_csum_delta =
3296         ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32);
3297     }
3298   else
3299     {
3300       f->rewrite.daddr.as_u32 = f->match.daddr.as_u32;
3301     }
3302   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport)
3303     {
3304       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport);
3305       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
3306     }
3307   else
3308     {
3309       f->rewrite.sport = f->match.sport;
3310     }
3311   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport)
3312     {
3313       f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport);
3314       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport);
3315     }
3316   else
3317     {
3318       f->rewrite.dport = f->match.dport;
3319     }
3320   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
3321       f->rewrite.icmp_id != f->match.icmp_id)
3322     {
3323       f->l4_csum_delta =
3324         ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
3325       f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.icmp_id);
3326     }
3327   else
3328     {
3329       f->rewrite.icmp_id = f->match.icmp_id;
3330     }
3331   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3332     {
3333     }
3334   else
3335     {
3336       f->rewrite.fib_index = f->match.fib_index;
3337     }
3338 }
3339
3340 static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm,
3341                                                      vlib_buffer_t *b,
3342                                                      ip4_header_t *ip,
3343                                                      nat_6t_flow_t *f);
3344
3345 static_always_inline void
3346 nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3347                            nat_6t_flow_t *f, nat_protocol_t proto,
3348                            int is_icmp_inner_ip4)
3349 {
3350   udp_header_t *udp = ip4_next_header (ip);
3351   tcp_header_t *tcp = (tcp_header_t *) udp;
3352
3353   if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
3354       !vnet_buffer (b)->ip.reass.is_non_first_fragment)
3355     {
3356       if (!is_icmp_inner_ip4)
3357         { // regular case
3358           ip->src_address = f->rewrite.saddr;
3359           ip->dst_address = f->rewrite.daddr;
3360           udp->src_port = f->rewrite.sport;
3361           udp->dst_port = f->rewrite.dport;
3362         }
3363       else
3364         { // icmp inner ip4 - reversed saddr/daddr
3365           ip->src_address = f->rewrite.daddr;
3366           ip->dst_address = f->rewrite.saddr;
3367           udp->src_port = f->rewrite.dport;
3368           udp->dst_port = f->rewrite.sport;
3369         }
3370
3371       if (NAT_PROTOCOL_TCP == proto)
3372         {
3373           ip_csum_t tcp_sum = tcp->checksum;
3374           tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
3375           tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta);
3376           mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
3377           tcp->checksum = ip_csum_fold (tcp_sum);
3378         }
3379       else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
3380         {
3381           ip_csum_t udp_sum = udp->checksum;
3382           udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
3383           udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta);
3384           udp->checksum = ip_csum_fold (udp_sum);
3385         }
3386     }
3387   else
3388     {
3389       if (!is_icmp_inner_ip4)
3390         { // regular case
3391           ip->src_address = f->rewrite.saddr;
3392           ip->dst_address = f->rewrite.daddr;
3393         }
3394       else
3395         { // icmp inner ip4 - reversed saddr/daddr
3396           ip->src_address = f->rewrite.daddr;
3397           ip->dst_address = f->rewrite.saddr;
3398         }
3399     }
3400
3401   ip_csum_t ip_sum = ip->checksum;
3402   ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta);
3403   ip->checksum = ip_csum_fold (ip_sum);
3404   ASSERT (ip->checksum == ip4_header_checksum (ip));
3405 }
3406
3407 static_always_inline int
3408 nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b,
3409                             ip4_header_t *ip, nat_6t_flow_t *f)
3410 {
3411   if (IP_PROTOCOL_ICMP != ip->protocol)
3412     return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3413
3414   icmp46_header_t *icmp = ip4_next_header (ip);
3415   icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3416
3417   if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
3418     {
3419       if (icmp->checksum == 0)
3420         icmp->checksum = 0xffff;
3421
3422       if (!icmp_type_is_error_message (icmp->type))
3423         {
3424           if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
3425               (f->rewrite.icmp_id != echo->identifier))
3426             {
3427               ip_csum_t sum = icmp->checksum;
3428               sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
3429                                     icmp_echo_header_t,
3430                                     identifier /* changed member */);
3431               echo->identifier = f->rewrite.icmp_id;
3432               icmp->checksum = ip_csum_fold (sum);
3433             }
3434         }
3435       else
3436         {
3437           // errors are not fragmented
3438           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3439
3440           if (!ip4_header_checksum_is_valid (inner_ip))
3441             {
3442               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3443             }
3444
3445           nat_protocol_t inner_proto =
3446             ip_proto_to_nat_proto (inner_ip->protocol);
3447
3448           ip_csum_t icmp_sum = icmp->checksum;
3449
3450           switch (inner_proto)
3451             {
3452             case NAT_PROTOCOL_UDP:
3453             case NAT_PROTOCOL_TCP:
3454               nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
3455                                          1 /* is_icmp_inner_ip4 */);
3456               icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta);
3457               icmp->checksum = ip_csum_fold (icmp_sum);
3458               break;
3459             case NAT_PROTOCOL_ICMP:
3460               if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3461                 {
3462                   icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
3463                   icmp_echo_header_t *inner_echo =
3464                     (icmp_echo_header_t *) (inner_icmp + 1);
3465                   if (f->rewrite.icmp_id != inner_echo->identifier)
3466                     {
3467                       ip_csum_t sum = icmp->checksum;
3468                       sum = ip_csum_update (
3469                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3470                         icmp_echo_header_t, identifier /* changed member */);
3471                       icmp->checksum = ip_csum_fold (sum);
3472                       ip_csum_t inner_sum = inner_icmp->checksum;
3473                       inner_sum = ip_csum_update (
3474                         sum, inner_echo->identifier, f->rewrite.icmp_id,
3475                         icmp_echo_header_t, identifier /* changed member */);
3476                       inner_icmp->checksum = ip_csum_fold (inner_sum);
3477                       inner_echo->identifier = f->rewrite.icmp_id;
3478                     }
3479                 }
3480               break;
3481             default:
3482               clib_warning ("unexpected NAT protocol value `%d'", inner_proto);
3483               return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
3484             }
3485         }
3486     }
3487   return NAT_ED_TRNSL_ERR_SUCCESS;
3488 }
3489
3490 nat_translation_error_e
3491 nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
3492                            nat_6t_flow_t *f, nat_protocol_t proto,
3493                            int is_output_feature)
3494 {
3495   if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3496     {
3497       vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
3498     }
3499
3500   nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */);
3501
3502   if (NAT_PROTOCOL_ICMP == proto)
3503     {
3504       return nat_6t_flow_icmp_translate (sm, b, ip, f);
3505     }
3506
3507   return NAT_ED_TRNSL_ERR_SUCCESS;
3508 }
3509
3510 u8 *
3511 format_nat_6t (u8 *s, va_list *args)
3512 {
3513   nat_6t_t *t = va_arg (*args, nat_6t_t *);
3514
3515   s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
3516               format_ip4_address, t->saddr.as_u8,
3517               clib_net_to_host_u16 (t->sport), format_ip4_address,
3518               t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
3519               format_ip_protocol, t->proto, t->fib_index);
3520   return s;
3521 }
3522
3523 u8 *
3524 format_nat_ed_translation_error (u8 *s, va_list *args)
3525 {
3526   nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
3527
3528   switch (e)
3529     {
3530     case NAT_ED_TRNSL_ERR_SUCCESS:
3531       s = format (s, "success");
3532       break;
3533     case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
3534       s = format (s, "translation-failed");
3535       break;
3536     case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
3537       s = format (s, "flow-mismatch");
3538       break;
3539     }
3540   return s;
3541 }
3542
3543 u8 *
3544 format_nat_6t_flow (u8 *s, va_list *args)
3545 {
3546   nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
3547
3548   s = format (s, "match: %U ", format_nat_6t, &f->match);
3549   int r = 0;
3550   if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
3551     {
3552       s = format (s, "rewrite: saddr %U ", format_ip4_address,
3553                   f->rewrite.saddr.as_u8);
3554       r = 1;
3555     }
3556   if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
3557     {
3558       if (!r)
3559         {
3560           s = format (s, "rewrite: ");
3561           r = 1;
3562         }
3563       s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
3564     }
3565   if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
3566     {
3567       if (!r)
3568         {
3569           s = format (s, "rewrite: ");
3570           r = 1;
3571         }
3572       s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
3573     }
3574   if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
3575     {
3576       if (!r)
3577         {
3578           s = format (s, "rewrite: ");
3579           r = 1;
3580         }
3581       s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
3582     }
3583   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
3584     {
3585       if (!r)
3586         {
3587           s = format (s, "rewrite: ");
3588           r = 1;
3589         }
3590       s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
3591     }
3592   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
3593     {
3594       if (!r)
3595         {
3596           s = format (s, "rewrite: ");
3597           r = 1;
3598         }
3599       s = format (s, "txfib %u ", f->rewrite.fib_index);
3600     }
3601   return s;
3602 }
3603
3604 /*
3605  * fd.io coding-style-patch-verification: ON
3606  *
3607  * Local Variables:
3608  * eval: (c-set-style "gnu")
3609  * End:
3610  */