NAT: TCP MSS clamping
[vpp.git] / src / plugins / nat / nat.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/plugin/plugin.h>
22 #include <nat/nat.h>
23 #include <nat/nat_dpo.h>
24 #include <nat/nat_ipfix_logging.h>
25 #include <nat/nat_det.h>
26 #include <nat/nat64.h>
27 #include <nat/nat66.h>
28 #include <nat/dslite.h>
29 #include <nat/nat_reass.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat_affinity.h>
32 #include <vnet/fib/fib_table.h>
33 #include <vnet/fib/ip4_fib.h>
34
35 #include <vpp/app/version.h>
36
37 snat_main_t snat_main;
38
39
40 /* Hook up input features */
41 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
42   .arc_name = "ip4-unicast",
43   .node_name = "nat44-in2out",
44   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
45 };
46 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
47   .arc_name = "ip4-unicast",
48   .node_name = "nat44-out2in",
49   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
50                                "ip4-dhcp-client-detect"),
51 };
52 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
53   .arc_name = "ip4-unicast",
54   .node_name = "nat44-classify",
55   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
56 };
57 VNET_FEATURE_INIT (ip4_snat_det_in2out, static) = {
58   .arc_name = "ip4-unicast",
59   .node_name = "nat44-det-in2out",
60   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
61 };
62 VNET_FEATURE_INIT (ip4_snat_det_out2in, static) = {
63   .arc_name = "ip4-unicast",
64   .node_name = "nat44-det-out2in",
65   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
66                                "ip4-dhcp-client-detect"),
67 };
68 VNET_FEATURE_INIT (ip4_nat_det_classify, static) = {
69   .arc_name = "ip4-unicast",
70   .node_name = "nat44-det-classify",
71   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
72 };
73 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
74   .arc_name = "ip4-unicast",
75   .node_name = "nat44-ed-in2out",
76   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
77 };
78 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
79   .arc_name = "ip4-unicast",
80   .node_name = "nat44-ed-out2in",
81   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
82                                "ip4-dhcp-client-detect"),
83 };
84 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
85   .arc_name = "ip4-unicast",
86   .node_name = "nat44-ed-classify",
87   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
88 };
89 VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = {
90   .arc_name = "ip4-unicast",
91   .node_name = "nat44-in2out-worker-handoff",
92   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
93 };
94 VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = {
95   .arc_name = "ip4-unicast",
96   .node_name = "nat44-out2in-worker-handoff",
97   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
98                                "ip4-dhcp-client-detect"),
99 };
100 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
101   .arc_name = "ip4-unicast",
102   .node_name = "nat44-handoff-classify",
103   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
104 };
105 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
106   .arc_name = "ip4-unicast",
107   .node_name = "nat44-in2out-fast",
108   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
109 };
110 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
111   .arc_name = "ip4-unicast",
112   .node_name = "nat44-out2in-fast",
113   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
114                                "ip4-dhcp-client-detect"),
115 };
116 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
117   .arc_name = "ip4-unicast",
118   .node_name = "nat44-hairpin-dst",
119   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
120 };
121 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
122   .arc_name = "ip4-unicast",
123   .node_name = "nat44-ed-hairpin-dst",
124   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
125 };
126
127 /* Hook up output features */
128 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
129   .arc_name = "ip4-output",
130   .node_name = "nat44-in2out-output",
131   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
132 };
133 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
134   .arc_name = "ip4-output",
135   .node_name = "nat44-in2out-output-worker-handoff",
136   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
137 };
138 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
139   .arc_name = "ip4-output",
140   .node_name = "nat44-hairpin-src",
141   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
142 };
143 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
144   .arc_name = "ip4-output",
145   .node_name = "nat44-ed-in2out-output",
146   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
147 };
148 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
149   .arc_name = "ip4-output",
150   .node_name = "nat44-ed-hairpin-src",
151   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
152 };
153
154 /* Hook up ip4-local features */
155 VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
156 {
157   .arc_name = "ip4-local",
158   .node_name = "nat44-hairpinning",
159   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
160 };
161 VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) =
162 {
163   .arc_name = "ip4-local",
164   .node_name = "nat44-ed-hairpinning",
165   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
166 };
167
168
169 /* *INDENT-OFF* */
170 VLIB_PLUGIN_REGISTER () = {
171     .version = VPP_BUILD_VER,
172     .description = "Network Address Translation",
173 };
174 /* *INDENT-ON* */
175
176 vlib_node_registration_t nat44_classify_node;
177 vlib_node_registration_t nat44_ed_classify_node;
178 vlib_node_registration_t nat44_det_classify_node;
179 vlib_node_registration_t nat44_handoff_classify_node;
180
181 typedef enum {
182   NAT44_CLASSIFY_NEXT_IN2OUT,
183   NAT44_CLASSIFY_NEXT_OUT2IN,
184   NAT44_CLASSIFY_N_NEXT,
185 } nat44_classify_next_t;
186
187 void
188 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index)
189 {
190   snat_session_key_t key;
191   clib_bihash_kv_8_8_t kv;
192   nat_ed_ses_key_t ed_key;
193   clib_bihash_kv_16_8_t ed_kv;
194   snat_main_per_thread_data_t *tsm =
195     vec_elt_at_index (sm->per_thread_data, thread_index);
196
197   if (is_fwd_bypass_session (s))
198     {
199       ed_key.l_addr = s->in2out.addr;
200       ed_key.r_addr = s->ext_host_addr;
201       ed_key.l_port = s->in2out.port;
202       ed_key.r_port = s->ext_host_port;
203       ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
204       ed_key.fib_index = 0;
205       ed_kv.key[0] = ed_key.as_u64[0];
206       ed_kv.key[1] = ed_key.as_u64[1];
207       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
208         nat_log_warn ("in2out_ed key del failed");
209       return;
210     }
211
212   /* session lookup tables */
213   if (is_ed_session (s))
214     {
215       if (is_affinity_sessions (s))
216         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
217                              s->in2out.protocol, s->out2in.port);
218       ed_key.l_addr = s->out2in.addr;
219       ed_key.r_addr = s->ext_host_addr;
220       ed_key.fib_index = s->out2in.fib_index;
221       if (snat_is_unk_proto_session (s))
222         {
223           ed_key.proto = s->in2out.port;
224           ed_key.r_port = 0;
225           ed_key.l_port = 0;
226         }
227       else
228         {
229           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
230           ed_key.l_port = s->out2in.port;
231           ed_key.r_port = s->ext_host_port;
232         }
233       ed_kv.key[0] = ed_key.as_u64[0];
234       ed_kv.key[1] = ed_key.as_u64[1];
235       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
236         nat_log_warn ("out2in_ed key del failed");
237       ed_key.l_addr = s->in2out.addr;
238       ed_key.fib_index = s->in2out.fib_index;
239       if (!snat_is_unk_proto_session (s))
240         ed_key.l_port = s->in2out.port;
241       if (is_twice_nat_session (s))
242         {
243           ed_key.r_addr = s->ext_host_nat_addr;
244           ed_key.r_port = s->ext_host_nat_port;
245         }
246       ed_kv.key[0] = ed_key.as_u64[0];
247       ed_kv.key[1] = ed_key.as_u64[1];
248       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
249         nat_log_warn ("in2out_ed key del failed");
250     }
251   else
252     {
253       kv.key = s->in2out.as_u64;
254       if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0))
255         nat_log_warn ("in2out key del failed");
256       kv.key = s->out2in.as_u64;
257       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
258         nat_log_warn ("out2in key del failed");
259     }
260
261   if (snat_is_unk_proto_session (s))
262     return;
263
264   /* log NAT event */
265   snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
266                                       s->out2in.addr.as_u32,
267                                       s->in2out.protocol,
268                                       s->in2out.port,
269                                       s->out2in.port,
270                                       s->in2out.fib_index);
271
272   /* Twice NAT address and port for external host */
273   if (is_twice_nat_session (s))
274     {
275       key.protocol = s->in2out.protocol;
276       key.port = s->ext_host_nat_port;
277       key.addr.as_u32 = s->ext_host_nat_addr.as_u32;
278       snat_free_outside_address_and_port (sm->twice_nat_addresses,
279                                           thread_index, &key);
280     }
281
282   if (snat_is_session_static (s))
283     return;
284
285   if (s->outside_address_index != ~0)
286     snat_free_outside_address_and_port (sm->addresses, thread_index,
287                                         &s->out2in);
288 }
289
290 snat_user_t *
291 nat_user_get_or_create (snat_main_t *sm, ip4_address_t *addr, u32 fib_index,
292                         u32 thread_index)
293 {
294   snat_user_t *u = 0;
295   snat_user_key_t user_key;
296   clib_bihash_kv_8_8_t kv, value;
297   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
298   dlist_elt_t * per_user_list_head_elt;
299
300   user_key.addr.as_u32 = addr->as_u32;
301   user_key.fib_index = fib_index;
302   kv.key = user_key.as_u64;
303
304   /* Ever heard of the "user" = src ip4 address before? */
305   if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
306     {
307       /* no, make a new one */
308       pool_get (tsm->users, u);
309       memset (u, 0, sizeof (*u));
310       u->addr.as_u32 = addr->as_u32;
311       u->fib_index = fib_index;
312
313       pool_get (tsm->list_pool, per_user_list_head_elt);
314
315       u->sessions_per_user_list_head_index = per_user_list_head_elt -
316         tsm->list_pool;
317
318       clib_dlist_init (tsm->list_pool, u->sessions_per_user_list_head_index);
319
320       kv.value = u - tsm->users;
321
322       /* add user */
323       if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
324         nat_log_warn ("user_hash keay add failed");
325     }
326   else
327     {
328       u = pool_elt_at_index (tsm->users, value.value);
329     }
330
331   return u;
332 }
333
334 snat_session_t *
335 nat_session_alloc_or_recycle (snat_main_t *sm, snat_user_t *u, u32 thread_index)
336 {
337   snat_session_t *s;
338   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
339   u32 oldest_per_user_translation_list_index, session_index;
340   dlist_elt_t * oldest_per_user_translation_list_elt;
341   dlist_elt_t * per_user_translation_list_elt;
342
343   /* Over quota? Recycle the least recently used translation */
344   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
345     {
346       oldest_per_user_translation_list_index =
347         clib_dlist_remove_head (tsm->list_pool,
348                                 u->sessions_per_user_list_head_index);
349
350       ASSERT (oldest_per_user_translation_list_index != ~0);
351
352       /* Add it back to the end of the LRU list */
353       clib_dlist_addtail (tsm->list_pool,
354                           u->sessions_per_user_list_head_index,
355                           oldest_per_user_translation_list_index);
356       /* Get the list element */
357       oldest_per_user_translation_list_elt =
358         pool_elt_at_index (tsm->list_pool,
359                            oldest_per_user_translation_list_index);
360
361       /* Get the session index from the list element */
362       session_index = oldest_per_user_translation_list_elt->value;
363
364       /* Get the session */
365       s = pool_elt_at_index (tsm->sessions, session_index);
366       nat_free_session_data (sm, s, thread_index);
367       if (snat_is_session_static(s))
368         u->nstaticsessions--;
369       else
370         u->nsessions--;
371       s->outside_address_index = ~0;
372       s->flags = 0;
373       s->total_bytes = 0;
374       s->total_pkts = 0;
375       s->state = 0;
376       s->ext_host_addr.as_u32 = 0;
377       s->ext_host_port = 0;
378       s->ext_host_nat_addr.as_u32 = 0;
379       s->ext_host_nat_port = 0;
380     }
381   else
382     {
383       pool_get (tsm->sessions, s);
384       memset (s, 0, sizeof (*s));
385       s->outside_address_index = ~0;
386
387       /* Create list elts */
388       pool_get (tsm->list_pool, per_user_translation_list_elt);
389       clib_dlist_init (tsm->list_pool,
390                        per_user_translation_list_elt - tsm->list_pool);
391
392       per_user_translation_list_elt->value = s - tsm->sessions;
393       s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
394       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
395
396       clib_dlist_addtail (tsm->list_pool,
397                           s->per_user_list_head_index,
398                           per_user_translation_list_elt - tsm->list_pool);
399     }
400
401   return s;
402 }
403
404 snat_session_t *
405 nat_ed_session_alloc (snat_main_t *sm, snat_user_t *u, u32 thread_index)
406 {
407   snat_session_t *s;
408   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
409   dlist_elt_t * per_user_translation_list_elt;
410
411   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
412     {
413       nat_log_warn ("max translations per user %U", format_ip4_address, &u->addr);
414       snat_ipfix_logging_max_entries_per_user (sm->max_translations_per_user,
415                                                u->addr.as_u32);
416       return 0;
417     }
418
419   pool_get (tsm->sessions, s);
420   memset (s, 0, sizeof (*s));
421   s->outside_address_index = ~0;
422
423   /* Create list elts */
424   pool_get (tsm->list_pool, per_user_translation_list_elt);
425   clib_dlist_init (tsm->list_pool,
426                    per_user_translation_list_elt - tsm->list_pool);
427
428   per_user_translation_list_elt->value = s - tsm->sessions;
429   s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
430   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
431
432   clib_dlist_addtail (tsm->list_pool,
433                       s->per_user_list_head_index,
434                       per_user_translation_list_elt - tsm->list_pool);
435
436   return s;
437 }
438
439 typedef struct {
440   u8 next_in2out;
441 } nat44_classify_trace_t;
442
443 static u8 * format_nat44_classify_trace (u8 * s, va_list * args)
444 {
445   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
446   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
447   nat44_classify_trace_t *t = va_arg (*args, nat44_classify_trace_t *);
448   char *next;
449
450   next = t->next_in2out ? "nat44-in2out" : "nat44-out2in";
451
452   s = format (s, "nat44-classify: next %s", next);
453
454   return s;
455 }
456
457 static inline uword
458 nat44_classify_node_fn_inline (vlib_main_t * vm,
459                                vlib_node_runtime_t * node,
460                                vlib_frame_t * frame,
461                                int is_ed)
462 {
463   u32 n_left_from, * from, * to_next;
464   nat44_classify_next_t next_index;
465   snat_main_t *sm = &snat_main;
466   snat_static_mapping_t *m;
467   u32 thread_index = vm->thread_index;
468   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
469
470   from = vlib_frame_vector_args (frame);
471   n_left_from = frame->n_vectors;
472   next_index = node->cached_next_index;
473
474   while (n_left_from > 0)
475     {
476       u32 n_left_to_next;
477
478       vlib_get_next_frame (vm, node, next_index,
479                            to_next, n_left_to_next);
480
481       while (n_left_from > 0 && n_left_to_next > 0)
482         {
483           u32 bi0;
484           vlib_buffer_t *b0;
485           u32 next0 = NAT44_CLASSIFY_NEXT_IN2OUT, sw_if_index0, rx_fib_index0;
486           ip4_header_t *ip0;
487           snat_address_t *ap;
488           snat_session_key_t m_key0;
489           clib_bihash_kv_8_8_t kv0, value0;
490           clib_bihash_kv_16_8_t ed_kv0, ed_value0;
491           udp_header_t *udp0;
492
493           /* speculatively enqueue b0 to the current next frame */
494           bi0 = from[0];
495           to_next[0] = bi0;
496           from += 1;
497           to_next += 1;
498           n_left_from -= 1;
499           n_left_to_next -= 1;
500
501           b0 = vlib_get_buffer (vm, bi0);
502           ip0 = vlib_buffer_get_current (b0);
503           udp0 = ip4_next_header (ip0);
504
505           if (is_ed)
506             {
507               sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
508               rx_fib_index0 =
509                 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
510                                                      sw_if_index0);
511               make_ed_kv (&ed_kv0, &ip0->src_address, &ip0->dst_address,
512                           ip0->protocol, rx_fib_index0, udp0->src_port,
513                           udp0->dst_port);
514               if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &ed_kv0, &ed_value0))
515                 goto enqueue0;
516             }
517
518           vec_foreach (ap, sm->addresses)
519             {
520               if (ip0->dst_address.as_u32 == ap->addr.as_u32)
521                 {
522                   next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
523                   goto enqueue0;
524                 }
525             }
526
527           if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
528             {
529               m_key0.addr = ip0->dst_address;
530               m_key0.port = 0;
531               m_key0.protocol = 0;
532               m_key0.fib_index = 0;
533               kv0.key = m_key0.as_u64;
534               if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv0, &value0))
535                 {
536                   m = pool_elt_at_index (sm->static_mappings, value0.value);
537                   if (m->local_addr.as_u32 != m->external_addr.as_u32)
538                     next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
539                   goto enqueue0;
540                 }
541               m_key0.port = clib_net_to_host_u16 (udp0->dst_port);
542               m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol);
543               kv0.key = m_key0.as_u64;
544               if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv0, &value0))
545                 {
546                   m = pool_elt_at_index (sm->static_mappings, value0.value);
547                   if (m->local_addr.as_u32 != m->external_addr.as_u32)
548                     next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
549                 }
550             }
551
552         enqueue0:
553           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
554                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
555             {
556               nat44_classify_trace_t *t =
557                   vlib_add_trace (vm, node, b0, sizeof (*t));
558               t->next_in2out = next0 == NAT44_CLASSIFY_NEXT_IN2OUT ? 1 : 0;
559             }
560
561           /* verify speculative enqueue, maybe switch current next frame */
562           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
563                                            to_next, n_left_to_next,
564                                            bi0, next0);
565         }
566
567       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
568     }
569
570   return frame->n_vectors;
571 }
572
573 static uword
574 nat44_classify_node_fn (vlib_main_t * vm,
575                         vlib_node_runtime_t * node,
576                         vlib_frame_t * frame)
577 {
578   return nat44_classify_node_fn_inline (vm, node, frame, 0);
579 };
580
581 VLIB_REGISTER_NODE (nat44_classify_node) = {
582   .function = nat44_classify_node_fn,
583   .name = "nat44-classify",
584   .vector_size = sizeof (u32),
585   .format_trace = format_nat44_classify_trace,
586   .type = VLIB_NODE_TYPE_INTERNAL,
587   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
588   .next_nodes = {
589     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-in2out",
590     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-out2in",
591   },
592 };
593
594 VLIB_NODE_FUNCTION_MULTIARCH (nat44_classify_node,
595                               nat44_classify_node_fn);
596 static uword
597 nat44_ed_classify_node_fn (vlib_main_t * vm,
598                            vlib_node_runtime_t * node,
599                            vlib_frame_t * frame)
600 {
601   return nat44_classify_node_fn_inline (vm, node, frame, 1);
602 };
603
604 VLIB_REGISTER_NODE (nat44_ed_classify_node) = {
605   .function = nat44_ed_classify_node_fn,
606   .name = "nat44-ed-classify",
607   .vector_size = sizeof (u32),
608   .format_trace = format_nat44_classify_trace,
609   .type = VLIB_NODE_TYPE_INTERNAL,
610   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
611   .next_nodes = {
612     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-ed-in2out",
613     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-ed-out2in",
614   },
615 };
616
617 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_classify_node,
618                               nat44_ed_classify_node_fn);
619
620 static uword
621 nat44_det_classify_node_fn (vlib_main_t * vm,
622                             vlib_node_runtime_t * node,
623                             vlib_frame_t * frame)
624 {
625   return nat44_classify_node_fn_inline (vm, node, frame, 0);
626 };
627
628 VLIB_REGISTER_NODE (nat44_det_classify_node) = {
629   .function = nat44_det_classify_node_fn,
630   .name = "nat44-det-classify",
631   .vector_size = sizeof (u32),
632   .format_trace = format_nat44_classify_trace,
633   .type = VLIB_NODE_TYPE_INTERNAL,
634   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
635   .next_nodes = {
636     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-det-in2out",
637     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-det-out2in",
638   },
639 };
640
641 VLIB_NODE_FUNCTION_MULTIARCH (nat44_det_classify_node,
642                               nat44_det_classify_node_fn);
643
644 static uword
645 nat44_handoff_classify_node_fn (vlib_main_t * vm,
646                                 vlib_node_runtime_t * node,
647                                 vlib_frame_t * frame)
648 {
649   return nat44_classify_node_fn_inline (vm, node, frame, 0);
650 };
651
652 VLIB_REGISTER_NODE (nat44_handoff_classify_node) = {
653   .function = nat44_handoff_classify_node_fn,
654   .name = "nat44-handoff-classify",
655   .vector_size = sizeof (u32),
656   .format_trace = format_nat44_classify_trace,
657   .type = VLIB_NODE_TYPE_INTERNAL,
658   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
659   .next_nodes = {
660     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-in2out-worker-handoff",
661     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-out2in-worker-handoff",
662   },
663 };
664
665 VLIB_NODE_FUNCTION_MULTIARCH (nat44_handoff_classify_node,
666                               nat44_handoff_classify_node_fn);
667
668 /**
669  * @brief Add/del NAT address to FIB.
670  *
671  * Add the external NAT address to the FIB as receive entries. This ensures
672  * that VPP will reply to ARP for this address and we don't need to enable
673  * proxy ARP on the outside interface.
674  *
675  * @param addr IPv4 address.
676  * @param plen address prefix length
677  * @param sw_if_index Interface.
678  * @param is_add If 0 delete, otherwise add.
679  */
680 void
681 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
682                           int is_add)
683 {
684   fib_prefix_t prefix = {
685     .fp_len = p_len,
686     .fp_proto = FIB_PROTOCOL_IP4,
687     .fp_addr = {
688         .ip4.as_u32 = addr->as_u32,
689     },
690   };
691   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index(sw_if_index);
692
693   if (is_add)
694     fib_table_entry_update_one_path(fib_index,
695                                     &prefix,
696                                     FIB_SOURCE_PLUGIN_LOW,
697                                     (FIB_ENTRY_FLAG_CONNECTED |
698                                      FIB_ENTRY_FLAG_LOCAL |
699                                      FIB_ENTRY_FLAG_EXCLUSIVE),
700                                     DPO_PROTO_IP4,
701                                     NULL,
702                                     sw_if_index,
703                                     ~0,
704                                     1,
705                                     NULL,
706                                     FIB_ROUTE_PATH_FLAG_NONE);
707   else
708     fib_table_entry_delete(fib_index,
709                            &prefix,
710                            FIB_SOURCE_PLUGIN_LOW);
711 }
712
713 int snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id,
714                        u8 twice_nat)
715 {
716   snat_address_t * ap;
717   snat_interface_t *i;
718   vlib_thread_main_t *tm = vlib_get_thread_main ();
719
720   if (twice_nat && !sm->endpoint_dependent)
721     return VNET_API_ERROR_FEATURE_DISABLED;
722
723   /* Check if address already exists */
724   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
725     {
726       if (ap->addr.as_u32 == addr->as_u32)
727         return VNET_API_ERROR_VALUE_EXIST;
728     }
729
730   if (twice_nat)
731     vec_add2 (sm->twice_nat_addresses, ap, 1);
732   else
733     vec_add2 (sm->addresses, ap, 1);
734
735   ap->addr = *addr;
736   if (vrf_id != ~0)
737     ap->fib_index =
738       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
739                                          FIB_SOURCE_PLUGIN_LOW);
740   else
741     ap->fib_index = ~0;
742 #define _(N, i, n, s) \
743   clib_bitmap_alloc (ap->busy_##n##_port_bitmap, 65535); \
744   ap->busy_##n##_ports = 0; \
745   vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
746   foreach_snat_protocol
747 #undef _
748
749   if (twice_nat)
750     return 0;
751
752   /* Add external address to FIB */
753   pool_foreach (i, sm->interfaces,
754   ({
755     if (nat_interface_is_inside(i) || sm->out2in_dpo)
756       continue;
757
758     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
759     break;
760   }));
761   pool_foreach (i, sm->output_feature_interfaces,
762   ({
763     if (nat_interface_is_inside(i) || sm->out2in_dpo)
764       continue;
765
766     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
767     break;
768   }));
769
770   return 0;
771 }
772
773 static int is_snat_address_used_in_static_mapping (snat_main_t *sm,
774                                                    ip4_address_t addr)
775 {
776   snat_static_mapping_t *m;
777   pool_foreach (m, sm->static_mappings,
778   ({
779       if (m->external_addr.as_u32 == addr.as_u32)
780         return 1;
781   }));
782
783   return 0;
784 }
785
786 void increment_v4_address (ip4_address_t * a)
787 {
788   u32 v;
789
790   v = clib_net_to_host_u32(a->as_u32) + 1;
791   a->as_u32 = clib_host_to_net_u32(v);
792 }
793
794 static void
795 snat_add_static_mapping_when_resolved (snat_main_t * sm,
796                                        ip4_address_t l_addr,
797                                        u16 l_port,
798                                        u32 sw_if_index,
799                                        u16 e_port,
800                                        u32 vrf_id,
801                                        snat_protocol_t proto,
802                                        int addr_only,
803                                        int is_add,
804                                        u8 * tag)
805 {
806   snat_static_map_resolve_t *rp;
807
808   vec_add2 (sm->to_resolve, rp, 1);
809   rp->l_addr.as_u32 = l_addr.as_u32;
810   rp->l_port = l_port;
811   rp->sw_if_index = sw_if_index;
812   rp->e_port = e_port;
813   rp->vrf_id = vrf_id;
814   rp->proto = proto;
815   rp->addr_only = addr_only;
816   rp->is_add = is_add;
817   rp->tag = vec_dup (tag);
818 }
819                                        
820 static u32 get_thread_idx_by_port(u16 e_port)
821 {
822     snat_main_t * sm = &snat_main;
823     u32 thread_idx = sm->num_workers;
824     if (sm->num_workers > 1)
825     {
826         thread_idx = sm->first_worker_index + sm->workers[(e_port - 1024) / sm->port_per_thread];
827     }      
828     return thread_idx;
829 }
830
831 /**
832  * @brief Add static mapping.
833  *
834  * Create static mapping between local addr+port and external addr+port.
835  *
836  * @param l_addr Local IPv4 address.
837  * @param e_addr External IPv4 address.
838  * @param l_port Local port number.
839  * @param e_port External port number.
840  * @param vrf_id VRF ID.
841  * @param addr_only If 0 address port and pair mapping, otherwise address only.
842  * @param sw_if_index External port instead of specific IP address.
843  * @param is_add If 0 delete static mapping, otherwise add.
844  * @param twice_nat If value is TWICE_NAT then translate external host address
845  *                  and port.
846  *                  If value is TWICE_NAT_SELF then translate external host
847  *                  address and port whenever external host address equals
848  *                  local address of internal host.
849  * @param out2in_only If 1 rule match only out2in direction
850  * @param tag - opaque string tag
851  *
852  * @returns
853  */
854 int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
855                             u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
856                             u32 sw_if_index, snat_protocol_t proto, int is_add,
857                             twice_nat_type_t twice_nat, u8 out2in_only,
858                             u8 * tag)
859 {
860   snat_main_t * sm = &snat_main;
861   snat_static_mapping_t *m;
862   snat_session_key_t m_key;
863   clib_bihash_kv_8_8_t kv, value;
864   snat_address_t *a = 0;
865   u32 fib_index = ~0;
866   uword * p;
867   snat_interface_t *interface;
868   int i;
869   snat_main_per_thread_data_t *tsm;
870   snat_user_key_t u_key;
871   snat_user_t *u;
872   dlist_elt_t * head, * elt;
873   u32 elt_index, head_index;
874   u32 ses_index;
875   u64 user_index;
876   snat_session_t * s;
877   snat_static_map_resolve_t *rp, *rp_match = 0;
878
879   if (!sm->endpoint_dependent)
880     {
881       if (twice_nat || out2in_only)
882         return VNET_API_ERROR_FEATURE_DISABLED;
883     }
884
885   /* If the external address is a specific interface address */
886   if (sw_if_index != ~0)
887     {
888       ip4_address_t * first_int_addr;
889
890       for (i = 0; i < vec_len (sm->to_resolve); i++)
891         {
892           rp = sm->to_resolve + i;
893           if (rp->sw_if_index != sw_if_index ||
894               rp->l_addr.as_u32 != l_addr.as_u32 ||
895               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
896             continue;
897
898           if (!addr_only)
899             {
900               if (rp->l_port != l_port || rp->e_port != e_port || rp->proto != proto)
901                 continue;
902             }
903
904           rp_match = rp;
905           break;
906         }
907
908       /* Might be already set... */
909       first_int_addr = ip4_interface_first_address
910         (sm->ip4_main, sw_if_index, 0 /* just want the address*/);
911
912       if (is_add)
913         {
914           if (rp_match)
915             return VNET_API_ERROR_VALUE_EXIST;
916
917           snat_add_static_mapping_when_resolved
918             (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
919              addr_only,  is_add, tag);
920
921           /* DHCP resolution required? */
922           if (first_int_addr == 0)
923             {
924               return 0;
925             }
926           else
927             {
928               e_addr.as_u32 = first_int_addr->as_u32;
929               /* Identity mapping? */
930               if (l_addr.as_u32 == 0)
931                 l_addr.as_u32 = e_addr.as_u32;
932             }
933         }
934       else
935         {
936           if (!rp_match)
937             return VNET_API_ERROR_NO_SUCH_ENTRY;
938
939           vec_del1 (sm->to_resolve, i);
940
941           if (first_int_addr)
942             {
943               e_addr.as_u32 = first_int_addr->as_u32;
944               /* Identity mapping? */
945               if (l_addr.as_u32 == 0)
946                 l_addr.as_u32 = e_addr.as_u32;
947             }
948           else
949             return 0;
950         }
951     }
952
953   m_key.addr = e_addr;
954   m_key.port = addr_only ? 0 : e_port;
955   m_key.protocol = addr_only ? 0 : proto;
956   m_key.fib_index = 0;
957   kv.key = m_key.as_u64;
958   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
959     m = 0;
960   else
961     m = pool_elt_at_index (sm->static_mappings, value.value);
962
963   if (is_add)
964     {
965       if (m)
966         return VNET_API_ERROR_VALUE_EXIST;
967
968       if (twice_nat && addr_only)
969         return VNET_API_ERROR_UNSUPPORTED;
970
971       /* Convert VRF id to FIB index */
972       if (vrf_id != ~0)
973         {
974           p = hash_get (sm->ip4_main->fib_index_by_table_id, vrf_id);
975           if (!p)
976             return VNET_API_ERROR_NO_SUCH_FIB;
977           fib_index = p[0];
978         }
979       /* If not specified use inside VRF id from SNAT plugin startup config */
980       else
981         {
982           fib_index = sm->inside_fib_index;
983           vrf_id = sm->inside_vrf_id;
984         }
985
986       if (!out2in_only)
987         {
988           m_key.addr = l_addr;
989           m_key.port = addr_only ? 0 : l_port;
990           m_key.protocol = addr_only ? 0 : proto;
991           m_key.fib_index = fib_index;
992           kv.key = m_key.as_u64;
993           if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
994             return VNET_API_ERROR_VALUE_EXIST;
995         }
996
997       /* Find external address in allocated addresses and reserve port for
998          address and port pair mapping when dynamic translations enabled */
999       if (!(addr_only || sm->static_mapping_only || out2in_only))
1000         {
1001           for (i = 0; i < vec_len (sm->addresses); i++)
1002             {
1003               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1004                 {
1005                   a = sm->addresses + i;
1006                   /* External port must be unused */
1007                   switch (proto)
1008                     {
1009 #define _(N, j, n, s) \
1010                     case SNAT_PROTOCOL_##N: \
1011                       if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \
1012                         return VNET_API_ERROR_INVALID_VALUE; \
1013                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \
1014                       if (e_port > 1024) \
1015                         { \
1016                           a->busy_##n##_ports++; \
1017                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1018                         } \
1019                       break;
1020                       foreach_snat_protocol
1021 #undef _
1022                     default:
1023                       nat_log_info ("unknown protocol");
1024                       return VNET_API_ERROR_INVALID_VALUE_2;
1025                     }
1026                   break;
1027                 }
1028             }
1029           /* External address must be allocated */
1030           if (!a && (l_addr.as_u32 != e_addr.as_u32))
1031             {
1032               if (sw_if_index != ~0)
1033                 {
1034                   for (i = 0; i < vec_len (sm->to_resolve); i++)
1035                     {
1036                       rp = sm->to_resolve + i;
1037                       if (rp->addr_only)
1038                          continue;
1039                       if (rp->sw_if_index != sw_if_index &&
1040                           rp->l_addr.as_u32 != l_addr.as_u32 &&
1041                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
1042                           rp->e_port != e_port && rp->proto != proto)
1043                         continue;
1044
1045                       vec_del1 (sm->to_resolve, i);
1046                       break;
1047                     }
1048                 }
1049               return VNET_API_ERROR_NO_SUCH_ENTRY;
1050             }
1051         }
1052
1053       pool_get (sm->static_mappings, m);
1054       memset (m, 0, sizeof (*m));
1055       m->tag = vec_dup (tag);
1056       m->local_addr = l_addr;
1057       m->external_addr = e_addr;
1058       m->addr_only = addr_only;
1059       m->vrf_id = vrf_id;
1060       m->fib_index = fib_index;
1061       m->twice_nat = twice_nat;
1062       m->out2in_only = out2in_only;
1063       if (!addr_only)
1064         {
1065           m->local_port = l_port;
1066           m->external_port = e_port;
1067           m->proto = proto;
1068         }
1069
1070       if (sm->num_workers > 1)
1071         {
1072           ip4_header_t ip = {
1073             .src_address = m->local_addr,
1074           };
1075           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index));
1076           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1077         }
1078       else
1079         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1080
1081       m_key.addr = m->local_addr;
1082       m_key.port = m->local_port;
1083       m_key.protocol = m->proto;
1084       m_key.fib_index = m->fib_index;
1085       kv.key = m_key.as_u64;
1086       kv.value = m - sm->static_mappings;
1087       if (!out2in_only)
1088         clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1);
1089
1090       m_key.addr = m->external_addr;
1091       m_key.port = m->external_port;
1092       m_key.fib_index = 0;
1093       kv.key = m_key.as_u64;
1094       kv.value = m - sm->static_mappings;
1095       clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1);
1096
1097       /* Delete dynamic sessions matching local address (+ local port) */
1098       if (!(sm->static_mapping_only))
1099         {
1100           u_key.addr = m->local_addr;
1101           u_key.fib_index = m->fib_index;
1102           kv.key = u_key.as_u64;
1103           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1104             {
1105               user_index = value.value;
1106               u = pool_elt_at_index (tsm->users, user_index);
1107               if (u->nsessions)
1108                 {
1109                   head_index = u->sessions_per_user_list_head_index;
1110                   head = pool_elt_at_index (tsm->list_pool, head_index);
1111                   elt_index = head->next;
1112                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1113                   ses_index = elt->value;
1114                   while (ses_index != ~0)
1115                     {
1116                       s =  pool_elt_at_index (tsm->sessions, ses_index);
1117                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1118                       ses_index = elt->value;
1119
1120                       if (snat_is_session_static (s))
1121                         continue;
1122
1123                       if (!addr_only && (clib_net_to_host_u16 (s->in2out.port) != m->local_port))
1124                         continue;
1125
1126                       nat_free_session_data (sm, s, tsm - sm->per_thread_data);
1127                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1128
1129                       if (!addr_only && !sm->endpoint_dependent)
1130                         break;
1131                     }
1132                 }
1133             }
1134         }
1135     }
1136   else
1137     {
1138       if (!m)
1139         {
1140           if (sw_if_index != ~0)
1141             return 0;
1142           else
1143             return VNET_API_ERROR_NO_SUCH_ENTRY;
1144         }
1145
1146       /* Free external address port */
1147       if (!(addr_only || sm->static_mapping_only || out2in_only))
1148         {
1149           for (i = 0; i < vec_len (sm->addresses); i++)
1150             {
1151               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1152                 {
1153                   a = sm->addresses + i;
1154                   switch (proto)
1155                     {
1156 #define _(N, j, n, s) \
1157                     case SNAT_PROTOCOL_##N: \
1158                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \
1159                       if (e_port > 1024) \
1160                         { \
1161                           a->busy_##n##_ports--; \
1162                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1163                         } \
1164                       break;
1165                       foreach_snat_protocol
1166 #undef _
1167                     default:
1168                       nat_log_info ("unknown protocol");
1169                       return VNET_API_ERROR_INVALID_VALUE_2;
1170                     }
1171                   break;
1172                 }
1173             }
1174         }
1175
1176       if (sm->num_workers > 1)
1177         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1178       else
1179         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1180
1181       m_key.addr = m->local_addr;
1182       m_key.port = m->local_port;
1183       m_key.protocol = m->proto;
1184       m_key.fib_index = m->fib_index;
1185       kv.key = m_key.as_u64;
1186       if (!out2in_only)
1187         clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0);
1188
1189       m_key.addr = m->external_addr;
1190       m_key.port = m->external_port;
1191       m_key.fib_index = 0;
1192       kv.key = m_key.as_u64;
1193       clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0);
1194
1195       /* Delete session(s) for static mapping if exist */
1196       if (!(sm->static_mapping_only) ||
1197           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
1198         {
1199           u_key.addr = m->local_addr;
1200           u_key.fib_index = m->fib_index;
1201           kv.key = u_key.as_u64;
1202           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1203             {
1204               user_index = value.value;
1205               u = pool_elt_at_index (tsm->users, user_index);
1206               if (u->nstaticsessions)
1207                 {
1208                   head_index = u->sessions_per_user_list_head_index;
1209                   head = pool_elt_at_index (tsm->list_pool, head_index);
1210                   elt_index = head->next;
1211                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1212                   ses_index = elt->value;
1213                   while (ses_index != ~0)
1214                     {
1215                       s =  pool_elt_at_index (tsm->sessions, ses_index);
1216                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1217                       ses_index = elt->value;
1218
1219                       if (!addr_only)
1220                         {
1221                           if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
1222                               (clib_net_to_host_u16 (s->out2in.port) != e_port))
1223                             continue;
1224                         }
1225
1226                       if (is_lb_session (s))
1227                         continue;
1228
1229                       if (!snat_is_session_static (s))
1230                         continue;
1231
1232                       nat_free_session_data (sm, s, tsm - sm->per_thread_data);
1233                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1234
1235                       if (!addr_only && !sm->endpoint_dependent)
1236                         break;
1237                     }
1238                 }
1239             }
1240         }
1241
1242       vec_free (m->tag);
1243       vec_free (m->workers);
1244       /* Delete static mapping from pool */
1245       pool_put (sm->static_mappings, m);
1246     }
1247
1248   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
1249     return 0;
1250
1251   /* Add/delete external address to FIB */
1252   pool_foreach (interface, sm->interfaces,
1253   ({
1254     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1255       continue;
1256
1257     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1258     break;
1259   }));
1260   pool_foreach (interface, sm->output_feature_interfaces,
1261   ({
1262     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1263       continue;
1264
1265     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1266     break;
1267   }));
1268
1269   return 0;
1270 }
1271
1272 int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1273                                      snat_protocol_t proto,
1274                                      nat44_lb_addr_port_t *locals, u8 is_add,
1275                                      twice_nat_type_t twice_nat, u8 out2in_only,
1276                                      u8 *tag, u32 affinity)
1277 {
1278   snat_main_t * sm = &snat_main;
1279   snat_static_mapping_t *m;
1280   snat_session_key_t m_key;
1281   clib_bihash_kv_8_8_t kv, value;
1282   snat_address_t *a = 0;
1283   int i;
1284   nat44_lb_addr_port_t *local;
1285   u32 elt_index, head_index, ses_index;
1286   snat_main_per_thread_data_t *tsm;
1287   snat_user_key_t u_key;
1288   snat_user_t *u;
1289   snat_session_t * s;
1290   dlist_elt_t * head, * elt;
1291   uword *bitmap = 0;
1292
1293   if (!sm->endpoint_dependent)
1294     return VNET_API_ERROR_FEATURE_DISABLED;
1295
1296   m_key.addr = e_addr;
1297   m_key.port = e_port;
1298   m_key.protocol = proto;
1299   m_key.fib_index = 0;
1300   kv.key = m_key.as_u64;
1301   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1302     m = 0;
1303   else
1304     m = pool_elt_at_index (sm->static_mappings, value.value);
1305
1306   if (is_add)
1307     {
1308       if (m)
1309         return VNET_API_ERROR_VALUE_EXIST;
1310
1311       if (vec_len (locals) < 2)
1312         return VNET_API_ERROR_INVALID_VALUE;
1313
1314       /* Find external address in allocated addresses and reserve port for
1315          address and port pair mapping when dynamic translations enabled */
1316       if (!(sm->static_mapping_only || out2in_only))
1317         {
1318           for (i = 0; i < vec_len (sm->addresses); i++)
1319             {
1320               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1321                 {
1322                   a = sm->addresses + i;
1323                   /* External port must be unused */
1324                   switch (proto)
1325                     {
1326 #define _(N, j, n, s) \
1327                     case SNAT_PROTOCOL_##N: \
1328                       if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \
1329                         return VNET_API_ERROR_INVALID_VALUE; \
1330                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \
1331                       if (e_port > 1024) \
1332                         { \
1333                           a->busy_##n##_ports++; \
1334                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1335                         } \
1336                       break;
1337                       foreach_snat_protocol
1338 #undef _
1339                     default:
1340                       nat_log_info ("unknown protocol");
1341                       return VNET_API_ERROR_INVALID_VALUE_2;
1342                     }
1343                   break;
1344                 }
1345             }
1346           /* External address must be allocated */
1347           if (!a)
1348             return VNET_API_ERROR_NO_SUCH_ENTRY;
1349         }
1350
1351       pool_get (sm->static_mappings, m);
1352       memset (m, 0, sizeof (*m));
1353       m->tag = vec_dup (tag);
1354       m->external_addr = e_addr;
1355       m->addr_only = 0;
1356       m->external_port = e_port;
1357       m->proto = proto;
1358       m->twice_nat = twice_nat;
1359       m->out2in_only = out2in_only;
1360       m->affinity = affinity;
1361
1362       if (affinity)
1363         m->affinity_per_service_list_head_index =
1364           nat_affinity_get_per_service_list_head_index();
1365       else
1366         m->affinity_per_service_list_head_index = ~0;
1367
1368       m_key.addr = m->external_addr;
1369       m_key.port = m->external_port;
1370       m_key.protocol = m->proto;
1371       m_key.fib_index = 0;
1372       kv.key = m_key.as_u64;
1373       kv.value = m - sm->static_mappings;
1374       if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1))
1375         {
1376           nat_log_err ("static_mapping_by_external key add failed");
1377           return VNET_API_ERROR_UNSPECIFIED;
1378         }
1379
1380       m_key.fib_index = m->fib_index;
1381       for (i = 0; i < vec_len (locals); i++)
1382         {
1383           locals[i].fib_index = fib_table_find_or_create_and_lock (
1384             FIB_PROTOCOL_IP4, locals[i].vrf_id, FIB_SOURCE_PLUGIN_LOW);
1385           m_key.addr = locals[i].addr;
1386           m_key.fib_index = locals[i].fib_index;
1387           if (!out2in_only)
1388             {
1389               m_key.port = locals[i].port;
1390               kv.key = m_key.as_u64;
1391               kv.value = m - sm->static_mappings;
1392               clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1);
1393             }
1394           locals[i].prefix = (i == 0) ? locals[i].probability :\
1395             (locals[i - 1].prefix + locals[i].probability);
1396           vec_add1 (m->locals, locals[i]);
1397           if (sm->num_workers > 1)
1398             {
1399               ip4_header_t ip = {
1400                 .src_address = locals[i].addr,
1401               };
1402               bitmap = clib_bitmap_set (
1403                 bitmap, sm->worker_in2out_cb (&ip, m->fib_index), 1);
1404             }
1405         }
1406
1407       /* Assign workers */
1408       if (sm->num_workers > 1)
1409         {
1410           clib_bitmap_foreach (i, bitmap,
1411             ({
1412                vec_add1(m->workers, i);
1413             }));
1414         }
1415     }
1416   else
1417     {
1418       if (!m)
1419         return VNET_API_ERROR_NO_SUCH_ENTRY;
1420
1421       /* Free external address port */
1422       if (!(sm->static_mapping_only || out2in_only))
1423         {
1424           for (i = 0; i < vec_len (sm->addresses); i++)
1425             {
1426               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1427                 {
1428                   a = sm->addresses + i;
1429                   switch (proto)
1430                     {
1431 #define _(N, j, n, s) \
1432                     case SNAT_PROTOCOL_##N: \
1433                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \
1434                       if (e_port > 1024) \
1435                         { \
1436                           a->busy_##n##_ports--; \
1437                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1438                         } \
1439                       break;
1440                       foreach_snat_protocol
1441 #undef _
1442                     default:
1443                       nat_log_info ("unknown protocol");
1444                       return VNET_API_ERROR_INVALID_VALUE_2;
1445                     }
1446                   break;
1447                 }
1448             }
1449         }
1450
1451       m_key.addr = m->external_addr;
1452       m_key.port = m->external_port;
1453       m_key.protocol = m->proto;
1454       m_key.fib_index = 0;
1455       kv.key = m_key.as_u64;
1456       if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0))
1457         {
1458           nat_log_err ("static_mapping_by_external key del failed");
1459           return VNET_API_ERROR_UNSPECIFIED;
1460         }
1461
1462       vec_foreach (local, m->locals)
1463         {
1464           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1465                             FIB_SOURCE_PLUGIN_LOW);
1466           m_key.addr = local->addr;
1467           if (!out2in_only)
1468             {
1469               m_key.port = local->port;
1470               m_key.fib_index = local->fib_index;
1471               kv.key = m_key.as_u64;
1472               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1473                 {
1474                   nat_log_err ("static_mapping_by_local key del failed");
1475                   return VNET_API_ERROR_UNSPECIFIED;
1476                 }
1477             }
1478
1479           if (sm->num_workers > 1)
1480             {
1481               ip4_header_t ip = {
1482                 .src_address = local->addr,
1483               };
1484               tsm = vec_elt_at_index (sm->per_thread_data,
1485                                       sm->worker_in2out_cb (&ip, m->fib_index));
1486             }
1487           else
1488             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1489
1490           /* Delete sessions */
1491           u_key.addr = local->addr;
1492           u_key.fib_index = m->fib_index;
1493           kv.key = u_key.as_u64;
1494           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1495             {
1496               u = pool_elt_at_index (tsm->users, value.value);
1497               if (u->nstaticsessions)
1498                 {
1499                   head_index = u->sessions_per_user_list_head_index;
1500                   head = pool_elt_at_index (tsm->list_pool, head_index);
1501                   elt_index = head->next;
1502                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1503                   ses_index = elt->value;
1504                   while (ses_index != ~0)
1505                     {
1506                       s =  pool_elt_at_index (tsm->sessions, ses_index);
1507                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1508                       ses_index = elt->value;
1509
1510                       if (!(is_lb_session (s)))
1511                         continue;
1512
1513                       if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1514                           (clib_net_to_host_u16 (s->in2out.port) != local->port))
1515                         continue;
1516
1517                       nat_free_session_data (sm, s, tsm - sm->per_thread_data);
1518                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1519                     }
1520                 }
1521             }
1522         }
1523       if (m->affinity)
1524         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1525       vec_free(m->locals);
1526       vec_free(m->tag);
1527       vec_free(m->workers);
1528
1529       pool_put (sm->static_mappings, m);
1530     }
1531
1532   return 0;
1533 }
1534
1535 int
1536 snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm,
1537                   u8 twice_nat)
1538 {
1539   snat_address_t *a = 0;
1540   snat_session_t *ses;
1541   u32 *ses_to_be_removed = 0, *ses_index;
1542   snat_main_per_thread_data_t *tsm;
1543   snat_static_mapping_t *m;
1544   snat_interface_t *interface;
1545   int i;
1546   snat_address_t *addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
1547
1548   /* Find SNAT address */
1549   for (i=0; i < vec_len (addresses); i++)
1550     {
1551       if (addresses[i].addr.as_u32 == addr.as_u32)
1552         {
1553           a = addresses + i;
1554           break;
1555         }
1556     }
1557   if (!a)
1558     return VNET_API_ERROR_NO_SUCH_ENTRY;
1559
1560   if (delete_sm)
1561     {
1562       pool_foreach (m, sm->static_mappings,
1563       ({
1564           if (m->external_addr.as_u32 == addr.as_u32)
1565             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1566                                             m->local_port, m->external_port,
1567                                             m->vrf_id, m->addr_only, ~0,
1568                                             m->proto, 0, m->twice_nat,
1569                                             m->out2in_only, m->tag);
1570       }));
1571     }
1572   else
1573     {
1574       /* Check if address is used in some static mapping */
1575       if (is_snat_address_used_in_static_mapping(sm, addr))
1576         {
1577           nat_log_notice ("address used in static mapping");
1578           return VNET_API_ERROR_UNSPECIFIED;
1579         }
1580     }
1581
1582   if (a->fib_index != ~0)
1583     fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4,
1584                      FIB_SOURCE_PLUGIN_LOW);
1585
1586   /* Delete sessions using address */
1587   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1588     {
1589       vec_foreach (tsm, sm->per_thread_data)
1590         {
1591           pool_foreach (ses, tsm->sessions, ({
1592             if (ses->out2in.addr.as_u32 == addr.as_u32)
1593               {
1594                 ses->outside_address_index = ~0;
1595                 nat_free_session_data (sm, ses, tsm - sm->per_thread_data);
1596                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1597               }
1598           }));
1599
1600           vec_foreach (ses_index, ses_to_be_removed)
1601             {
1602               ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1603               nat44_delete_session (sm, ses, tsm - sm->per_thread_data);
1604             }
1605
1606           vec_free (ses_to_be_removed);
1607        }
1608     }
1609
1610   if (twice_nat)
1611     {
1612       vec_del1 (sm->twice_nat_addresses, i);
1613       return 0;
1614     }
1615   else
1616     vec_del1 (sm->addresses, i);
1617
1618   /* Delete external address from FIB */
1619   pool_foreach (interface, sm->interfaces,
1620   ({
1621     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1622       continue;
1623
1624     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1625     break;
1626   }));
1627   pool_foreach (interface, sm->output_feature_interfaces,
1628   ({
1629     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1630       continue;
1631
1632     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1633     break;
1634   }));
1635
1636   return 0;
1637 }
1638
1639 int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1640 {
1641   snat_main_t *sm = &snat_main;
1642   snat_interface_t *i;
1643   const char * feature_name, *del_feature_name;
1644   snat_address_t * ap;
1645   snat_static_mapping_t * m;
1646   snat_det_map_t * dm;
1647   nat_outside_fib_t *outside_fib;
1648   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1649                                                        sw_if_index);
1650
1651   if (sm->out2in_dpo && !is_inside)
1652     return VNET_API_ERROR_UNSUPPORTED;
1653
1654   pool_foreach (i, sm->output_feature_interfaces,
1655   ({
1656     if (i->sw_if_index == sw_if_index)
1657       return VNET_API_ERROR_VALUE_EXIST;
1658   }));
1659
1660   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1661     feature_name = is_inside ?  "nat44-in2out-fast" : "nat44-out2in-fast";
1662   else
1663     {
1664       if (sm->num_workers > 1 && !sm->deterministic)
1665         feature_name = is_inside ?  "nat44-in2out-worker-handoff" : "nat44-out2in-worker-handoff";
1666       else if (sm->deterministic)
1667         feature_name = is_inside ?  "nat44-det-in2out" : "nat44-det-out2in";
1668       else if (sm->endpoint_dependent)
1669         feature_name = is_inside ?  "nat44-ed-in2out" : "nat44-ed-out2in";
1670       else
1671         feature_name = is_inside ?  "nat44-in2out" : "nat44-out2in";
1672     }
1673
1674   if (sm->fq_in2out_index == ~0 && !sm->deterministic && sm->num_workers > 1)
1675     sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index,
1676                                                       NAT_FQ_NELTS);
1677
1678   if (sm->fq_out2in_index == ~0 && !sm->deterministic && sm->num_workers > 1)
1679     sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index,
1680                                                       NAT_FQ_NELTS);
1681
1682   if (!is_inside)
1683     {
1684       vec_foreach (outside_fib, sm->outside_fibs)
1685         {
1686           if (outside_fib->fib_index == fib_index)
1687             {
1688               if (is_del)
1689                 {
1690                   outside_fib->refcount--;
1691                   if (!outside_fib->refcount)
1692                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1693                 }
1694               else
1695                 outside_fib->refcount++;
1696               goto feature_set;
1697             }
1698         }
1699       if (!is_del)
1700         {
1701           vec_add2 (sm->outside_fibs, outside_fib, 1);
1702           outside_fib->refcount = 1;
1703           outside_fib->fib_index = fib_index;
1704         }
1705     }
1706 feature_set:
1707   pool_foreach (i, sm->interfaces,
1708   ({
1709     if (i->sw_if_index == sw_if_index)
1710       {
1711         if (is_del)
1712           {
1713             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1714               {
1715                 if (is_inside)
1716                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1717                 else
1718                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1719
1720                 if (sm->num_workers > 1 && !sm->deterministic)
1721                   {
1722                     del_feature_name = "nat44-handoff-classify";
1723                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1724                                                  "nat44-out2in-worker-handoff";
1725                   }
1726                 else if (sm->deterministic)
1727                   {
1728                     del_feature_name = "nat44-det-classify";
1729                     feature_name = !is_inside ?  "nat44-det-in2out" :
1730                                                  "nat44-det-out2in";
1731                   }
1732                 else if (sm->endpoint_dependent)
1733                   {
1734                     del_feature_name = "nat44-ed-classify";
1735                     feature_name = !is_inside ?  "nat44-ed-in2out" :
1736                                                  "nat44-ed-out2in";
1737                   }
1738                 else
1739                   {
1740                     del_feature_name = "nat44-classify";
1741                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
1742                   }
1743
1744                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1745                                              sw_if_index, 0, 0, 0);
1746                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1747                                              sw_if_index, 1, 0, 0);
1748                 if (!is_inside)
1749                   {
1750                     if (sm->endpoint_dependent)
1751                       vnet_feature_enable_disable ("ip4-local",
1752                                                    "nat44-ed-hairpinning",
1753                                                    sw_if_index, 1, 0, 0);
1754                     else if (!sm->deterministic)
1755                       vnet_feature_enable_disable ("ip4-local",
1756                                                    "nat44-hairpinning",
1757                                                    sw_if_index, 1, 0, 0);
1758                   }
1759               }
1760             else
1761               {
1762                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1763                                              sw_if_index, 0, 0, 0);
1764                 pool_put (sm->interfaces, i);
1765                 if (is_inside)
1766                   {
1767                     if (sm->endpoint_dependent)
1768                       vnet_feature_enable_disable ("ip4-local",
1769                                                    "nat44-ed-hairpinning",
1770                                                    sw_if_index, 0, 0, 0);
1771                     else if (!sm->deterministic)
1772                       vnet_feature_enable_disable ("ip4-local",
1773                                                    "nat44-hairpinning",
1774                                                    sw_if_index, 0, 0, 0);
1775                   }
1776               }
1777           }
1778         else
1779           {
1780             if ((nat_interface_is_inside(i) && is_inside) ||
1781                 (nat_interface_is_outside(i) && !is_inside))
1782               return 0;
1783
1784             if (sm->num_workers > 1 && !sm->deterministic)
1785               {
1786                 del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1787                                                  "nat44-out2in-worker-handoff";
1788                 feature_name = "nat44-handoff-classify";
1789               }
1790             else if (sm->deterministic)
1791               {
1792                 del_feature_name = !is_inside ?  "nat44-det-in2out" :
1793                                                  "nat44-det-out2in";
1794                 feature_name = "nat44-det-classify";
1795               }
1796             else if (sm->endpoint_dependent)
1797               {
1798                 del_feature_name = !is_inside ?  "nat44-ed-in2out" :
1799                                                  "nat44-ed-out2in";
1800                 feature_name = "nat44-ed-classify";
1801               }
1802             else
1803               {
1804                 del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
1805                 feature_name = "nat44-classify";
1806               }
1807
1808             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1809                                          sw_if_index, 0, 0, 0);
1810             vnet_feature_enable_disable ("ip4-unicast", feature_name,
1811                                          sw_if_index, 1, 0, 0);
1812             if (!is_inside)
1813               {
1814                 if (sm->endpoint_dependent)
1815                   vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
1816                                                sw_if_index, 0, 0, 0);
1817                 else if (!sm->deterministic)
1818                   vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
1819                                                sw_if_index, 0, 0, 0);
1820               }
1821             goto set_flags;
1822           }
1823
1824         goto fib;
1825       }
1826   }));
1827
1828   if (is_del)
1829     return VNET_API_ERROR_NO_SUCH_ENTRY;
1830
1831   pool_get (sm->interfaces, i);
1832   i->sw_if_index = sw_if_index;
1833   i->flags = 0;
1834   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0, 0);
1835
1836   if (is_inside && !sm->out2in_dpo)
1837     {
1838       if (sm->endpoint_dependent)
1839         vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
1840                                      sw_if_index, 1, 0, 0);
1841       else if (!sm->deterministic)
1842         vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
1843                                      sw_if_index, 1, 0, 0);
1844     }
1845
1846 set_flags:
1847   if (is_inside)
1848     {
1849       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1850       return 0;
1851     }
1852   else
1853     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1854
1855   /* Add/delete external addresses to FIB */
1856 fib:
1857   vec_foreach (ap, sm->addresses)
1858     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1859
1860   pool_foreach (m, sm->static_mappings,
1861   ({
1862     if (!(m->addr_only) || (m->local_addr.as_u32 == m->external_addr.as_u32))
1863       continue;
1864
1865     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1866   }));
1867
1868   pool_foreach (dm, sm->det_maps,
1869   ({
1870     snat_add_del_addr_to_fib(&dm->out_addr, dm->out_plen, sw_if_index, !is_del);
1871   }));
1872
1873   return 0;
1874 }
1875
1876 int snat_interface_add_del_output_feature (u32 sw_if_index,
1877                                            u8 is_inside,
1878                                            int is_del)
1879 {
1880   snat_main_t *sm = &snat_main;
1881   snat_interface_t *i;
1882   snat_address_t * ap;
1883   snat_static_mapping_t * m;
1884
1885   if (sm->deterministic ||
1886       (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)))
1887     return VNET_API_ERROR_UNSUPPORTED;
1888
1889   pool_foreach (i, sm->interfaces,
1890   ({
1891     if (i->sw_if_index == sw_if_index)
1892       return VNET_API_ERROR_VALUE_EXIST;
1893   }));
1894
1895   if (is_inside)
1896     {
1897       if (sm->endpoint_dependent)
1898         {
1899           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
1900                                        sw_if_index, !is_del, 0, 0);
1901           vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
1902                                        sw_if_index, !is_del, 0, 0);
1903         }
1904       else
1905         {
1906           vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
1907                                        sw_if_index, !is_del, 0, 0);
1908           vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
1909                                        sw_if_index, !is_del, 0, 0);
1910         }
1911       goto fq;
1912     }
1913
1914   if (sm->num_workers > 1)
1915     {
1916       vnet_feature_enable_disable ("ip4-unicast",
1917                                    "nat44-out2in-worker-handoff",
1918                                    sw_if_index, !is_del, 0, 0);
1919       vnet_feature_enable_disable ("ip4-output",
1920                                    "nat44-in2out-output-worker-handoff",
1921                                    sw_if_index, !is_del, 0, 0);
1922     }
1923   else
1924     {
1925       if (sm->endpoint_dependent)
1926         {
1927           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-out2in",
1928                                        sw_if_index, !is_del, 0, 0);
1929           vnet_feature_enable_disable ("ip4-output", "nat44-ed-in2out-output",
1930                                        sw_if_index, !is_del, 0, 0);
1931         }
1932       else
1933         {
1934           vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
1935                                        sw_if_index, !is_del, 0, 0);
1936           vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
1937                                        sw_if_index, !is_del, 0, 0);
1938         }
1939     }
1940
1941 fq:
1942   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
1943     sm->fq_in2out_output_index =
1944       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
1945
1946   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1947     sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index, 0);
1948
1949   pool_foreach (i, sm->output_feature_interfaces,
1950   ({
1951     if (i->sw_if_index == sw_if_index)
1952       {
1953         if (is_del)
1954           pool_put (sm->output_feature_interfaces, i);
1955         else
1956           return VNET_API_ERROR_VALUE_EXIST;
1957
1958         goto fib;
1959       }
1960   }));
1961
1962   if (is_del)
1963     return VNET_API_ERROR_NO_SUCH_ENTRY;
1964
1965   pool_get (sm->output_feature_interfaces, i);
1966   i->sw_if_index = sw_if_index;
1967   i->flags = 0;
1968   if (is_inside)
1969     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
1970   else
1971     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
1972
1973   /* Add/delete external addresses to FIB */
1974 fib:
1975   if (is_inside)
1976     return 0;
1977
1978   vec_foreach (ap, sm->addresses)
1979     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
1980
1981   pool_foreach (m, sm->static_mappings,
1982   ({
1983     if (!(m->addr_only)  || (m->local_addr.as_u32 == m->external_addr.as_u32))
1984       continue;
1985
1986     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
1987   }));
1988
1989   return 0;
1990 }
1991
1992 int snat_set_workers (uword * bitmap)
1993 {
1994   snat_main_t *sm = &snat_main;
1995   int i, j = 0;
1996
1997   if (sm->num_workers < 2)
1998     return VNET_API_ERROR_FEATURE_DISABLED;
1999
2000   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2001     return VNET_API_ERROR_INVALID_WORKER;
2002
2003   vec_free (sm->workers);
2004   clib_bitmap_foreach (i, bitmap,
2005     ({
2006       vec_add1(sm->workers, i);
2007       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2008       j++;
2009     }));
2010
2011   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2012   sm->num_snat_thread = _vec_len (sm->workers);
2013
2014   return 0;
2015 }
2016
2017
2018 static void
2019 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2020                                        uword opaque,
2021                                        u32 sw_if_index,
2022                                        ip4_address_t * address,
2023                                        u32 address_length,
2024                                        u32 if_address_index,
2025                                        u32 is_delete);
2026
2027 static void
2028 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2029                                  uword opaque,
2030                                  u32 sw_if_index,
2031                                  ip4_address_t * address,
2032                                  u32 address_length,
2033                                  u32 if_address_index,
2034                                  u32 is_delete);
2035
2036 static int
2037 nat_alloc_addr_and_port_default (snat_address_t * addresses,
2038                                  u32 fib_index,
2039                                  u32 thread_index,
2040                                  snat_session_key_t * k,
2041                                  u32 * address_indexp,
2042                                  u16 port_per_thread,
2043                                  u32 snat_thread_index);
2044
2045 static clib_error_t * snat_init (vlib_main_t * vm)
2046 {
2047   snat_main_t * sm = &snat_main;
2048   clib_error_t * error = 0;
2049   ip4_main_t * im = &ip4_main;
2050   ip_lookup_main_t * lm = &im->lookup_main;
2051   uword *p;
2052   vlib_thread_registration_t *tr;
2053   vlib_thread_main_t *tm = vlib_get_thread_main ();
2054   uword *bitmap = 0;
2055   u32 i;
2056   ip4_add_del_interface_address_callback_t cb4;
2057   vlib_node_t * error_drop_node;
2058
2059   sm->vlib_main = vm;
2060   sm->vnet_main = vnet_get_main();
2061   sm->ip4_main = im;
2062   sm->ip4_lookup_main = lm;
2063   sm->api_main = &api_main;
2064   sm->first_worker_index = 0;
2065   sm->num_workers = 0;
2066   sm->num_snat_thread = 1;
2067   sm->workers = 0;
2068   sm->port_per_thread = 0xffff - 1024;
2069   sm->fq_in2out_index = ~0;
2070   sm->fq_out2in_index = ~0;
2071   sm->udp_timeout = SNAT_UDP_TIMEOUT;
2072   sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
2073   sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
2074   sm->icmp_timeout = SNAT_ICMP_TIMEOUT;
2075   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
2076   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
2077   sm->forwarding_enabled = 0;
2078   sm->log_class = vlib_log_register_class ("nat", 0);
2079   error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
2080   sm->error_node_index = error_drop_node->index;
2081   sm->mss_clamping = 0;
2082
2083   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2084   if (p)
2085     {
2086       tr = (vlib_thread_registration_t *) p[0];
2087       if (tr)
2088         {
2089           sm->num_workers = tr->count;
2090           sm->first_worker_index = tr->first_index;
2091         }
2092     }
2093
2094   vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1);
2095
2096   /* Use all available workers by default */
2097   if (sm->num_workers > 1)
2098     {
2099       for (i=0; i < sm->num_workers; i++)
2100         bitmap = clib_bitmap_set (bitmap, i, 1);
2101       snat_set_workers(bitmap);
2102       clib_bitmap_free (bitmap);
2103     }
2104   else
2105     {
2106       sm->per_thread_data[0].snat_thread_index = 0;
2107     }
2108
2109   error = snat_api_init(vm, sm);
2110   if (error)
2111     return error;
2112
2113   /* Set up the interface address add/del callback */
2114   cb4.function = snat_ip4_add_del_interface_address_cb;
2115   cb4.function_opaque = 0;
2116
2117   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2118
2119   cb4.function = nat_ip4_add_del_addr_only_sm_cb;
2120   cb4.function_opaque = 0;
2121
2122   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2123
2124   nat_dpo_module_init ();
2125
2126   /* Init IPFIX logging */
2127   snat_ipfix_logging_init(vm);
2128
2129   /* Init NAT64 */
2130   error = nat64_init(vm);
2131   if (error)
2132     return error;
2133
2134   dslite_init(vm);
2135
2136   nat66_init();
2137
2138   /* Init virtual fragmenentation reassembly */
2139   return nat_reass_init(vm);
2140 }
2141
2142 VLIB_INIT_FUNCTION (snat_init);
2143
2144 void snat_free_outside_address_and_port (snat_address_t * addresses,
2145                                          u32 thread_index,
2146                                          snat_session_key_t * k)
2147 {
2148   snat_address_t *a;
2149   u32 address_index;
2150   u16 port_host_byte_order = clib_net_to_host_u16 (k->port);
2151
2152   for (address_index = 0; address_index < vec_len (addresses); address_index++)
2153     {
2154       if (addresses[address_index].addr.as_u32 == k->addr.as_u32)
2155         break;
2156     }
2157
2158   ASSERT (address_index < vec_len (addresses));
2159
2160   a = addresses + address_index;
2161
2162   switch (k->protocol)
2163     {
2164 #define _(N, i, n, s) \
2165     case SNAT_PROTOCOL_##N: \
2166       ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
2167         port_host_byte_order) == 1); \
2168       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
2169         port_host_byte_order, 0); \
2170       a->busy_##n##_ports--; \
2171       a->busy_##n##_ports_per_thread[thread_index]--; \
2172       break;
2173       foreach_snat_protocol
2174 #undef _
2175     default:
2176       nat_log_info ("unknown protocol");
2177       return;
2178     }
2179 }
2180
2181 /**
2182  * @brief Match NAT44 static mapping.
2183  *
2184  * @param sm          NAT main.
2185  * @param match       Address and port to match.
2186  * @param mapping     External or local address and port of the matched mapping.
2187  * @param by_external If 0 match by local address otherwise match by external
2188  *                    address.
2189  * @param is_addr_only If matched mapping is address only
2190  * @param twice_nat If matched mapping is twice NAT.
2191  * @param lb If matched mapping is load-balanced.
2192  *
2193  * @returns 0 if match found otherwise 1.
2194  */
2195 int snat_static_mapping_match (snat_main_t * sm,
2196                                snat_session_key_t match,
2197                                snat_session_key_t * mapping,
2198                                u8 by_external,
2199                                u8 *is_addr_only,
2200                                twice_nat_type_t *twice_nat,
2201                                lb_nat_type_t *lb,
2202                                ip4_address_t * ext_host_addr)
2203 {
2204   clib_bihash_kv_8_8_t kv, value;
2205   snat_static_mapping_t *m;
2206   snat_session_key_t m_key;
2207   clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local;
2208   u32 rand, lo = 0, hi, mid;
2209   u8 backend_index;
2210
2211   m_key.fib_index = match.fib_index;
2212   if (by_external)
2213     {
2214       mapping_hash = &sm->static_mapping_by_external;
2215       m_key.fib_index = 0;
2216     }
2217
2218   m_key.addr = match.addr;
2219   m_key.port = clib_net_to_host_u16 (match.port);
2220   m_key.protocol = match.protocol;
2221
2222   kv.key = m_key.as_u64;
2223
2224   if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2225     {
2226       /* Try address only mapping */
2227       m_key.port = 0;
2228       m_key.protocol = 0;
2229       kv.key = m_key.as_u64;
2230       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2231         return 1;
2232     }
2233
2234   m = pool_elt_at_index (sm->static_mappings, value.value);
2235
2236   if (by_external)
2237     {
2238       if (vec_len (m->locals))
2239         {
2240           if (PREDICT_FALSE(lb != 0))
2241             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2242           if (m->affinity)
2243             {
2244               if (nat_affinity_find_and_lock (ext_host_addr[0], match.addr,
2245                   match.protocol, match.port, &backend_index))
2246                 goto get_local;
2247
2248               mapping->addr = m->locals[backend_index].addr;
2249               mapping->port = clib_host_to_net_u16 (m->locals[backend_index].port);
2250               mapping->fib_index = m->locals[backend_index].fib_index;
2251               goto end;
2252             }
2253 get_local:
2254           hi = vec_len (m->locals) - 1;
2255           rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix);
2256           while (lo < hi)
2257             {
2258               mid = ((hi - lo) >> 1) + lo;
2259               (rand > m->locals[mid].prefix) ? (lo = mid + 1) : (hi = mid);
2260             }
2261           if (!(m->locals[lo].prefix >= rand))
2262             return 1;
2263           if (PREDICT_FALSE (sm->num_workers > 1))
2264             {
2265               ip4_header_t ip = {
2266                 .src_address = m->locals[lo].addr,
2267               };
2268               if (sm->worker_in2out_cb (&ip, m->fib_index) != vlib_get_thread_index ())
2269                 goto get_local;
2270             }
2271           mapping->addr = m->locals[lo].addr;
2272           mapping->port = clib_host_to_net_u16 (m->locals[lo].port);
2273           mapping->fib_index = m->locals[lo].fib_index;
2274           if (m->affinity)
2275             {
2276               if (nat_affinity_create_and_lock (ext_host_addr[0], match.addr,
2277                   match.protocol, match.port, lo, m->affinity,
2278                   m->affinity_per_service_list_head_index))
2279                 nat_log_info ("create affinity record failed");
2280             }
2281         }
2282       else
2283         {
2284           if (PREDICT_FALSE(lb != 0))
2285             *lb = NO_LB_NAT;
2286           mapping->fib_index = m->fib_index;
2287           mapping->addr = m->local_addr;
2288           /* Address only mapping doesn't change port */
2289           mapping->port = m->addr_only ? match.port
2290             : clib_host_to_net_u16 (m->local_port);
2291         }
2292       mapping->protocol = m->proto;
2293     }
2294   else
2295     {
2296       mapping->addr = m->external_addr;
2297       /* Address only mapping doesn't change port */
2298       mapping->port = m->addr_only ? match.port
2299         : clib_host_to_net_u16 (m->external_port);
2300       mapping->fib_index = sm->outside_fib_index;
2301     }
2302
2303 end:
2304   if (PREDICT_FALSE(is_addr_only != 0))
2305     *is_addr_only = m->addr_only;
2306
2307   if (PREDICT_FALSE(twice_nat != 0))
2308     *twice_nat = m->twice_nat;
2309
2310   return 0;
2311 }
2312
2313 static_always_inline u16
2314 snat_random_port (u16 min, u16 max)
2315 {
2316   snat_main_t *sm = &snat_main;
2317   return min + random_u32 (&sm->random_seed) /
2318     (random_u32_max() / (max - min + 1) + 1);
2319 }
2320
2321 int
2322 snat_alloc_outside_address_and_port (snat_address_t * addresses,
2323                                      u32 fib_index,
2324                                      u32 thread_index,
2325                                      snat_session_key_t * k,
2326                                      u32 * address_indexp,
2327                                      u16 port_per_thread,
2328                                      u32 snat_thread_index)
2329 {
2330   snat_main_t *sm = &snat_main;
2331
2332   return sm->alloc_addr_and_port(addresses, fib_index, thread_index, k,
2333                                  address_indexp, port_per_thread,
2334                                  snat_thread_index);
2335 }
2336
2337 static int
2338 nat_alloc_addr_and_port_default (snat_address_t * addresses,
2339                                  u32 fib_index,
2340                                  u32 thread_index,
2341                                  snat_session_key_t * k,
2342                                  u32 * address_indexp,
2343                                  u16 port_per_thread,
2344                                  u32 snat_thread_index)
2345 {
2346   int i, gi = 0;
2347   snat_address_t *a, *ga = 0;
2348   u32 portnum;
2349
2350   for (i = 0; i < vec_len (addresses); i++)
2351     {
2352       a = addresses + i;
2353       switch (k->protocol)
2354         {
2355 #define _(N, j, n, s) \
2356         case SNAT_PROTOCOL_##N: \
2357           if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
2358             { \
2359               if (a->fib_index == fib_index) \
2360                 { \
2361                   while (1) \
2362                     { \
2363                       portnum = (port_per_thread * \
2364                         snat_thread_index) + \
2365                         snat_random_port(1, port_per_thread) + 1024; \
2366                       if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \
2367                         continue; \
2368                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \
2369                       a->busy_##n##_ports_per_thread[thread_index]++; \
2370                       a->busy_##n##_ports++; \
2371                       k->addr = a->addr; \
2372                       k->port = clib_host_to_net_u16(portnum); \
2373                       *address_indexp = i; \
2374                       return 0; \
2375                     } \
2376                 } \
2377               else if (a->fib_index == ~0) \
2378                 { \
2379                   ga = a; \
2380                   gi = i; \
2381                 } \
2382             } \
2383           break;
2384           foreach_snat_protocol
2385 #undef _
2386         default:
2387           nat_log_info ("unknown protocol");
2388           return 1;
2389         }
2390
2391     }
2392
2393   if (ga)
2394     {
2395       a = ga;
2396       switch (k->protocol)
2397         {
2398 #define _(N, j, n, s) \
2399         case SNAT_PROTOCOL_##N: \
2400           while (1) \
2401             { \
2402               portnum = (port_per_thread * \
2403                 snat_thread_index) + \
2404                 snat_random_port(1, port_per_thread) + 1024; \
2405               if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \
2406                 continue; \
2407               clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \
2408               a->busy_##n##_ports_per_thread[thread_index]++; \
2409               a->busy_##n##_ports++; \
2410               k->addr = a->addr; \
2411               k->port = clib_host_to_net_u16(portnum); \
2412               *address_indexp = gi; \
2413               return 0; \
2414             }
2415           break;
2416           foreach_snat_protocol
2417 #undef _
2418         default:
2419           nat_log_info ("unknown protocol");
2420           return 1;
2421         }
2422     }
2423
2424   /* Totally out of translations to use... */
2425   snat_ipfix_logging_addresses_exhausted(0);
2426   return 1;
2427 }
2428
2429 static int
2430 nat_alloc_addr_and_port_mape (snat_address_t * addresses,
2431                               u32 fib_index,
2432                               u32 thread_index,
2433                               snat_session_key_t * k,
2434                               u32 * address_indexp,
2435                               u16 port_per_thread,
2436                               u32 snat_thread_index)
2437 {
2438   snat_main_t *sm = &snat_main;
2439   snat_address_t *a = addresses;
2440   u16 m, ports, portnum, A, j;
2441   m = 16 - (sm->psid_offset + sm->psid_length);
2442   ports = (1 << (16 - sm->psid_length)) - (1 << m);
2443
2444   if (!vec_len (addresses))
2445     goto exhausted;
2446
2447   switch (k->protocol)
2448     {
2449 #define _(N, i, n, s) \
2450     case SNAT_PROTOCOL_##N: \
2451       if (a->busy_##n##_ports < ports) \
2452         { \
2453           while (1) \
2454             { \
2455               A = snat_random_port(1, pow2_mask(sm->psid_offset)); \
2456               j = snat_random_port(0, pow2_mask(m)); \
2457               portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \
2458               if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \
2459                 continue; \
2460               clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \
2461               a->busy_##n##_ports++; \
2462               k->addr = a->addr; \
2463               k->port = clib_host_to_net_u16 (portnum); \
2464               *address_indexp = i; \
2465               return 0; \
2466             } \
2467         } \
2468       break;
2469       foreach_snat_protocol
2470 #undef _
2471     default:
2472       nat_log_info ("unknown protocol");
2473       return 1;
2474     }
2475
2476 exhausted:
2477   /* Totally out of translations to use... */
2478   snat_ipfix_logging_addresses_exhausted(0);
2479   return 1;
2480 }
2481
2482 static int
2483 nat_alloc_addr_and_port_range (snat_address_t * addresses,
2484                                u32 fib_index,
2485                                u32 thread_index,
2486                                snat_session_key_t * k,
2487                                u32 * address_indexp,
2488                                u16 port_per_thread,
2489                                u32 snat_thread_index)
2490 {
2491   snat_main_t *sm = &snat_main;
2492   snat_address_t *a = addresses;
2493   u16 portnum, ports;
2494
2495   ports = sm->end_port - sm->start_port + 1;
2496
2497   if (!vec_len (addresses))
2498     goto exhausted;
2499
2500   switch (k->protocol)
2501     {
2502 #define _(N, i, n, s) \
2503     case SNAT_PROTOCOL_##N: \
2504       if (a->busy_##n##_ports < ports) \
2505         { \
2506           while (1) \
2507             { \
2508               portnum = snat_random_port(sm->start_port, sm->end_port); \
2509               if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \
2510                 continue; \
2511               clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \
2512               a->busy_##n##_ports++; \
2513               k->addr = a->addr; \
2514               k->port = clib_host_to_net_u16 (portnum); \
2515               *address_indexp = i; \
2516               return 0; \
2517             } \
2518         } \
2519       break;
2520       foreach_snat_protocol
2521 #undef _
2522     default:
2523       nat_log_info ("unknown protocol");
2524       return 1;
2525     }
2526
2527 exhausted:
2528   /* Totally out of translations to use... */
2529   snat_ipfix_logging_addresses_exhausted(0);
2530   return 1;
2531 }
2532
2533 void
2534 nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add)
2535 {
2536   dpo_id_t dpo_v4 = DPO_INVALID;
2537   fib_prefix_t pfx = {
2538     .fp_proto = FIB_PROTOCOL_IP4,
2539     .fp_len = 32,
2540     .fp_addr.ip4.as_u32 = addr.as_u32,
2541   };
2542
2543   if (is_add)
2544     {
2545       nat_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4);
2546       fib_table_entry_special_dpo_add (0, &pfx, FIB_SOURCE_PLUGIN_HI,
2547                                        FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
2548       dpo_reset (&dpo_v4);
2549     }
2550   else
2551     {
2552       fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_PLUGIN_HI);
2553     }
2554 }
2555
2556 uword
2557 unformat_snat_protocol (unformat_input_t * input, va_list * args)
2558 {
2559   u32 *r = va_arg (*args, u32 *);
2560
2561   if (0);
2562 #define _(N, i, n, s) else if (unformat (input, s)) *r = SNAT_PROTOCOL_##N;
2563   foreach_snat_protocol
2564 #undef _
2565   else
2566     return 0;
2567   return 1;
2568 }
2569
2570 u8 *
2571 format_snat_protocol (u8 * s, va_list * args)
2572 {
2573   u32 i = va_arg (*args, u32);
2574   u8 *t = 0;
2575
2576   switch (i)
2577     {
2578 #define _(N, j, n, str) case SNAT_PROTOCOL_##N: t = (u8 *) str; break;
2579       foreach_snat_protocol
2580 #undef _
2581     default:
2582       s = format (s, "unknown");
2583       return s;
2584     }
2585   s = format (s, "%s", t);
2586   return s;
2587 }
2588
2589 u8 *
2590 format_nat_addr_and_port_alloc_alg (u8 * s, va_list * args)
2591 {
2592   u32 i = va_arg (*args, u32);
2593   u8 *t = 0;
2594
2595   switch (i)
2596     {
2597 #define _(v, N, s) case NAT_ADDR_AND_PORT_ALLOC_ALG_##N: t = (u8 *) s; break;
2598       foreach_nat_addr_and_port_alloc_alg
2599 #undef _
2600     default:
2601       s = format (s, "unknown");
2602       return s;
2603     }
2604   s = format (s, "%s", t);
2605   return s;
2606 }
2607
2608 u8 * format_snat_key (u8 * s, va_list * args);
2609 u8 * format_static_mapping_key (u8 * s, va_list * args);
2610
2611 u8 *
2612 format_session_kvp (u8 * s, va_list * args)
2613 {
2614   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
2615   snat_session_key_t k;
2616
2617   k.as_u64 = v->key;
2618
2619   s = format (s, "%U session-index %llu", format_snat_key, &k, v->value);
2620
2621   return s;
2622 }
2623
2624 u8 *
2625 format_static_mapping_kvp (u8 * s, va_list * args)
2626 {
2627   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
2628   snat_session_key_t k;
2629
2630   k.as_u64 = v->key;
2631
2632   s = format (s, "%U static-mapping-index %llu",
2633               format_static_mapping_key, &k, v->value);
2634
2635   return s;
2636 }
2637
2638 u8 *
2639 format_user_kvp (u8 * s, va_list * args)
2640 {
2641   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
2642   snat_user_key_t k;
2643
2644   k.as_u64 = v->key;
2645
2646   s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr,
2647               k.fib_index, v->value);
2648
2649   return s;
2650 }
2651
2652 u8 *
2653 format_ed_session_kvp (u8 * s, va_list * args)
2654 {
2655   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
2656   nat_ed_ses_key_t k;
2657
2658   k.as_u64[0] = v->key[0];
2659   k.as_u64[1] = v->key[1];
2660
2661   s = format (s, "local %U:%d remote %U:%d proto %U fib %d session-index %llu",
2662               format_ip4_address, &k.l_addr, clib_net_to_host_u16 (k.l_port),
2663               format_ip4_address, &k.r_addr, clib_net_to_host_u16 (k.r_port),
2664               format_ip_protocol, k.proto, k.fib_index, v->value);
2665
2666   return s;
2667 }
2668
2669 static u32
2670 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0)
2671 {
2672   snat_main_t *sm = &snat_main;
2673   u32 next_worker_index = 0;
2674   u32 hash;
2675
2676   next_worker_index = sm->first_worker_index;
2677   hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
2678          (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >>24);
2679
2680   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
2681     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
2682   else
2683     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
2684
2685   return next_worker_index;
2686 }
2687
2688 static u32
2689 snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0)
2690 {
2691   snat_main_t *sm = &snat_main;
2692   udp_header_t *udp;
2693   u16 port;
2694   snat_session_key_t m_key;
2695   clib_bihash_kv_8_8_t kv, value;
2696   snat_static_mapping_t *m;
2697   u32 proto;
2698   u32 next_worker_index = 0;
2699
2700   /* first try static mappings without port */
2701   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2702     {
2703       m_key.addr = ip0->dst_address;
2704       m_key.port = 0;
2705       m_key.protocol = 0;
2706       m_key.fib_index = rx_fib_index0;
2707       kv.key = m_key.as_u64;
2708       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2709         {
2710           m = pool_elt_at_index (sm->static_mappings, value.value);
2711           return m->workers[0];
2712         }
2713     }
2714
2715   proto = ip_proto_to_snat_proto (ip0->protocol);
2716   udp = ip4_next_header (ip0);
2717   port = udp->dst_port;
2718
2719   if (PREDICT_FALSE (ip4_is_fragment (ip0)))
2720     {
2721       if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
2722         return vlib_get_thread_index ();
2723
2724       if (PREDICT_TRUE (!ip4_is_first_fragment (ip0)))
2725         {
2726           nat_reass_ip4_t *reass;
2727
2728           reass = nat_ip4_reass_find (ip0->src_address, ip0->dst_address,
2729                                       ip0->fragment_id, ip0->protocol);
2730
2731           if (reass && (reass->thread_index != (u32) ~ 0))
2732             return reass->thread_index;
2733           else
2734             return vlib_get_thread_index ();
2735         }
2736     }
2737
2738   /* unknown protocol */
2739   if (PREDICT_FALSE (proto == ~0))
2740     {
2741       /* use current thread */
2742       return vlib_get_thread_index ();
2743     }
2744
2745   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
2746     {
2747       icmp46_header_t * icmp = (icmp46_header_t *) udp;
2748       icmp_echo_header_t *echo = (icmp_echo_header_t *)(icmp + 1);
2749       if (!icmp_is_error_message (icmp))
2750         port = echo->identifier;
2751       else
2752         {
2753           ip4_header_t *inner_ip = (ip4_header_t *)(echo + 1);
2754           proto = ip_proto_to_snat_proto (inner_ip->protocol);
2755           void *l4_header = ip4_next_header (inner_ip);
2756           switch (proto)
2757             {
2758             case SNAT_PROTOCOL_ICMP:
2759               icmp = (icmp46_header_t*)l4_header;
2760               echo = (icmp_echo_header_t *)(icmp + 1);
2761               port = echo->identifier;
2762               break;
2763             case SNAT_PROTOCOL_UDP:
2764             case SNAT_PROTOCOL_TCP:
2765               port = ((tcp_udp_header_t*)l4_header)->src_port;
2766               break;
2767             default:
2768               return vlib_get_thread_index ();
2769             }
2770         }
2771     }
2772
2773   /* try static mappings with port */
2774   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2775     {
2776       m_key.addr = ip0->dst_address;
2777       m_key.port = clib_net_to_host_u16 (port);
2778       m_key.protocol = proto;
2779       m_key.fib_index = rx_fib_index0;
2780       kv.key = m_key.as_u64;
2781       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2782         {
2783           m = pool_elt_at_index (sm->static_mappings, value.value);
2784           return m->workers[0];
2785         }
2786     }
2787
2788   /* worker by outside port */
2789   next_worker_index = sm->first_worker_index;
2790   next_worker_index +=
2791     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2792   return next_worker_index;
2793 }
2794
2795 static u32
2796 nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index)
2797 {
2798   snat_main_t *sm = &snat_main;
2799   clib_bihash_kv_8_8_t kv, value;
2800   u32 proto, next_worker_index = 0;
2801   udp_header_t *udp;
2802   u16 port;
2803   snat_static_mapping_t *m;
2804   u32 hash;
2805
2806   /* first try static mappings without port */
2807   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2808     {
2809       make_sm_kv (&kv, &ip->dst_address, 0, rx_fib_index, 0);
2810       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2811         {
2812           m = pool_elt_at_index (sm->static_mappings, value.value);
2813           return m->workers[0];
2814         }
2815     }
2816
2817   proto = ip_proto_to_snat_proto (ip->protocol);
2818
2819   /* unknown protocol */
2820   if (PREDICT_FALSE (proto == ~0))
2821     {
2822       /* use current thread */
2823       return vlib_get_thread_index ();
2824     }
2825
2826   udp = ip4_next_header (ip);
2827   port = udp->dst_port;
2828
2829   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
2830     {
2831       icmp46_header_t * icmp = (icmp46_header_t *) udp;
2832       icmp_echo_header_t *echo = (icmp_echo_header_t *)(icmp + 1);
2833       if (!icmp_is_error_message (icmp))
2834         port = echo->identifier;
2835       else
2836         {
2837           ip4_header_t *inner_ip = (ip4_header_t *)(echo + 1);
2838           proto = ip_proto_to_snat_proto (inner_ip->protocol);
2839           void *l4_header = ip4_next_header (inner_ip);
2840           switch (proto)
2841             {
2842             case SNAT_PROTOCOL_ICMP:
2843               icmp = (icmp46_header_t*)l4_header;
2844               echo = (icmp_echo_header_t *)(icmp + 1);
2845               port = echo->identifier;
2846               break;
2847             case SNAT_PROTOCOL_UDP:
2848             case SNAT_PROTOCOL_TCP:
2849               port = ((tcp_udp_header_t*)l4_header)->src_port;
2850               break;
2851             default:
2852               return vlib_get_thread_index ();
2853             }
2854         }
2855     }
2856
2857   /* try static mappings with port */
2858   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
2859     {
2860       make_sm_kv (&kv, &ip->dst_address, proto, rx_fib_index,
2861                   clib_net_to_host_u16 (port));
2862       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2863         {
2864           m = pool_elt_at_index (sm->static_mappings, value.value);
2865           if (!vec_len(m->locals))
2866             return m->workers[0];
2867
2868           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
2869                  (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >>24);
2870
2871           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
2872             return m->workers[hash & (_vec_len (m->workers) - 1)];
2873           else
2874             return m->workers[hash % _vec_len (m->workers)];
2875         }
2876     }
2877
2878   /* worker by outside port */
2879   next_worker_index = sm->first_worker_index;
2880   next_worker_index +=
2881     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
2882
2883   return next_worker_index;
2884 }
2885
2886 static clib_error_t *
2887 snat_config (vlib_main_t * vm, unformat_input_t * input)
2888 {
2889   snat_main_t * sm = &snat_main;
2890   nat66_main_t * nm = &nat66_main;
2891   u32 translation_buckets = 1024;
2892   u32 translation_memory_size = 128<<20;
2893   u32 user_buckets = 128;
2894   u32 user_memory_size = 64<<20;
2895   u32 max_translations_per_user = 100;
2896   u32 outside_vrf_id = 0;
2897   u32 outside_ip6_vrf_id = 0;
2898   u32 inside_vrf_id = 0;
2899   u32 static_mapping_buckets = 1024;
2900   u32 static_mapping_memory_size = 64<<20;
2901   u32 nat64_bib_buckets = 1024;
2902   u32 nat64_bib_memory_size = 128 << 20;
2903   u32 nat64_st_buckets = 2048;
2904   u32 nat64_st_memory_size = 256 << 20;
2905   u8 static_mapping_only = 0;
2906   u8 static_mapping_connection_tracking = 0;
2907   snat_main_per_thread_data_t *tsm;
2908   dslite_main_t * dm = &dslite_main;
2909
2910   sm->deterministic = 0;
2911   sm->out2in_dpo = 0;
2912   sm->endpoint_dependent = 0;
2913
2914   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2915     {
2916       if (unformat (input, "translation hash buckets %d", &translation_buckets))
2917         ;
2918       else if (unformat (input, "translation hash memory %d",
2919                          &translation_memory_size));
2920       else if (unformat (input, "user hash buckets %d", &user_buckets))
2921         ;
2922       else if (unformat (input, "user hash memory %d",
2923                          &user_memory_size))
2924         ;
2925       else if (unformat (input, "max translations per user %d",
2926                          &max_translations_per_user))
2927         ;
2928       else if (unformat (input, "outside VRF id %d",
2929                          &outside_vrf_id))
2930         ;
2931       else if (unformat (input, "outside ip6 VRF id %d",
2932                          &outside_ip6_vrf_id))
2933         ;
2934       else if (unformat (input, "inside VRF id %d",
2935                          &inside_vrf_id))
2936         ;
2937       else if (unformat (input, "static mapping only"))
2938         {
2939           static_mapping_only = 1;
2940           if (unformat (input, "connection tracking"))
2941             static_mapping_connection_tracking = 1;
2942         }
2943       else if (unformat (input, "deterministic"))
2944         sm->deterministic = 1;
2945       else if (unformat (input, "nat64 bib hash buckets %d",
2946                          &nat64_bib_buckets))
2947         ;
2948       else if (unformat (input, "nat64 bib hash memory %d",
2949                          &nat64_bib_memory_size))
2950         ;
2951       else if (unformat (input, "nat64 st hash buckets %d", &nat64_st_buckets))
2952         ;
2953       else if (unformat (input, "nat64 st hash memory %d",
2954                          &nat64_st_memory_size))
2955         ;
2956       else if (unformat (input, "out2in dpo"))
2957         sm->out2in_dpo = 1;
2958       else if (unformat (input, "dslite ce"))
2959         dslite_set_ce(dm, 1);
2960       else if (unformat (input, "endpoint-dependent"))
2961         sm->endpoint_dependent = 1;
2962       else
2963         return clib_error_return (0, "unknown input '%U'",
2964                                   format_unformat_error, input);
2965     }
2966
2967   if (sm->deterministic && sm->endpoint_dependent)
2968     return clib_error_return (
2969       0, "deterministic and endpoint-dependent modes are mutually exclusive");
2970
2971   if (static_mapping_only && (sm->deterministic || sm->endpoint_dependent))
2972     return clib_error_return (
2973       0, "static mapping only mode available only for simple nat");
2974
2975   if (sm->out2in_dpo && (sm->deterministic || sm->endpoint_dependent))
2976     return clib_error_return (
2977       0, "out2in dpo mode available only for simple nat");
2978
2979   /* for show commands, etc. */
2980   sm->translation_buckets = translation_buckets;
2981   sm->translation_memory_size = translation_memory_size;
2982   /* do not exceed load factor 10 */
2983   sm->max_translations = 10 * translation_buckets;
2984   sm->user_buckets = user_buckets;
2985   sm->user_memory_size = user_memory_size;
2986   sm->max_translations_per_user = max_translations_per_user;
2987   sm->outside_vrf_id = outside_vrf_id;
2988   sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2989                                                              outside_vrf_id,
2990                                                              FIB_SOURCE_PLUGIN_HI);
2991   nm->outside_vrf_id = outside_ip6_vrf_id;
2992   nm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
2993                                                              outside_ip6_vrf_id,
2994                                                              FIB_SOURCE_PLUGIN_HI);
2995   sm->inside_vrf_id = inside_vrf_id;
2996   sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2997                                                             inside_vrf_id,
2998                                                             FIB_SOURCE_PLUGIN_HI);
2999   sm->static_mapping_only = static_mapping_only;
3000   sm->static_mapping_connection_tracking = static_mapping_connection_tracking;
3001
3002   nat64_set_hash(nat64_bib_buckets, nat64_bib_memory_size, nat64_st_buckets,
3003                  nat64_st_memory_size);
3004
3005   if (sm->deterministic)
3006     {
3007       sm->in2out_node_index = snat_det_in2out_node.index;
3008       sm->in2out_output_node_index = ~0;
3009       sm->out2in_node_index = snat_det_out2in_node.index;
3010       sm->icmp_match_in2out_cb = icmp_match_in2out_det;
3011       sm->icmp_match_out2in_cb = icmp_match_out2in_det;
3012     }
3013   else
3014     {
3015       if (sm->endpoint_dependent)
3016         {
3017           sm->worker_in2out_cb = snat_get_worker_in2out_cb;
3018           sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
3019           sm->in2out_node_index = nat44_ed_in2out_node.index;
3020           sm->in2out_output_node_index = nat44_ed_in2out_output_node.index;
3021           sm->out2in_node_index = nat44_ed_out2in_node.index;
3022           sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
3023           sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
3024           nat_affinity_init (vm);
3025         }
3026       else
3027         {
3028           sm->worker_in2out_cb = snat_get_worker_in2out_cb;
3029           sm->worker_out2in_cb = snat_get_worker_out2in_cb;
3030           sm->in2out_node_index = snat_in2out_node.index;
3031           sm->in2out_output_node_index = snat_in2out_output_node.index;
3032           sm->out2in_node_index = snat_out2in_node.index;
3033           sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
3034           sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
3035         }
3036       if (!static_mapping_only ||
3037           (static_mapping_only && static_mapping_connection_tracking))
3038         {
3039           vec_foreach (tsm, sm->per_thread_data)
3040             {
3041               if (sm->endpoint_dependent)
3042                 {
3043                   clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed",
3044                                          translation_buckets,
3045                                          translation_memory_size);
3046                   clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
3047                                                       format_ed_session_kvp);
3048
3049                   clib_bihash_init_16_8 (&tsm->out2in_ed, "out2in-ed",
3050                                          translation_buckets,
3051                                          translation_memory_size);
3052                   clib_bihash_set_kvp_format_fn_16_8 (&tsm->out2in_ed,
3053                                                       format_ed_session_kvp);
3054                 }
3055               else
3056                 {
3057                   clib_bihash_init_8_8 (&tsm->in2out, "in2out",
3058                                         translation_buckets,
3059                                         translation_memory_size);
3060                   clib_bihash_set_kvp_format_fn_8_8 (&tsm->in2out,
3061                                                      format_session_kvp);
3062
3063                   clib_bihash_init_8_8 (&tsm->out2in, "out2in",
3064                                         translation_buckets,
3065                                         translation_memory_size);
3066                   clib_bihash_set_kvp_format_fn_8_8 (&tsm->out2in,
3067                                                      format_session_kvp);
3068                 }
3069
3070               clib_bihash_init_8_8 (&tsm->user_hash, "users", user_buckets,
3071                                     user_memory_size);
3072               clib_bihash_set_kvp_format_fn_8_8 (&tsm->user_hash,
3073                                                  format_user_kvp);
3074             }
3075
3076         }
3077       else
3078         {
3079           sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
3080           sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
3081         }
3082       clib_bihash_init_8_8 (&sm->static_mapping_by_local,
3083                             "static_mapping_by_local", static_mapping_buckets,
3084                             static_mapping_memory_size);
3085       clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
3086                                          format_static_mapping_kvp);
3087
3088       clib_bihash_init_8_8 (&sm->static_mapping_by_external,
3089                             "static_mapping_by_external", static_mapping_buckets,
3090                             static_mapping_memory_size);
3091       clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
3092                                          format_static_mapping_kvp);
3093     }
3094
3095   return 0;
3096 }
3097
3098 VLIB_CONFIG_FUNCTION (snat_config, "nat");
3099
3100 u8 * format_snat_session_state (u8 * s, va_list * args)
3101 {
3102   u32 i = va_arg (*args, u32);
3103   u8 *t = 0;
3104
3105   switch (i)
3106     {
3107 #define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break;
3108     foreach_snat_session_state
3109 #undef _
3110     default:
3111       t = format (t, "unknown");
3112     }
3113   s = format (s, "%s", t);
3114   return s;
3115 }
3116
3117 u8 * format_snat_key (u8 * s, va_list * args)
3118 {
3119   snat_session_key_t * key = va_arg (*args, snat_session_key_t *);
3120
3121   s = format (s, "%U proto %U port %d fib %d",
3122               format_ip4_address, &key->addr,
3123               format_snat_protocol, key->protocol,
3124               clib_net_to_host_u16 (key->port), key->fib_index);
3125   return s;
3126 }
3127
3128 u8 * format_static_mapping_key (u8 * s, va_list * args)
3129 {
3130   snat_session_key_t * key = va_arg (*args, snat_session_key_t *);
3131
3132   s = format (s, "%U proto %U port %d fib %d",
3133               format_ip4_address, &key->addr,
3134               format_snat_protocol, key->protocol,
3135               key->port, key->fib_index);
3136   return s;
3137 }
3138
3139 u8 * format_snat_session (u8 * s, va_list * args)
3140 {
3141   snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *);
3142   snat_session_t * sess = va_arg (*args, snat_session_t *);
3143
3144   if (snat_is_unk_proto_session (sess))
3145     {
3146       s = format (s, "  i2o %U proto %u fib %u\n",
3147                   format_ip4_address, &sess->in2out.addr,
3148                   clib_net_to_host_u16 (sess->in2out.port),
3149                   sess->in2out.fib_index);
3150       s = format (s, "    o2i %U proto %u fib %u\n",
3151                   format_ip4_address, &sess->out2in.addr,
3152                   clib_net_to_host_u16 (sess->out2in.port),
3153                   sess->out2in.fib_index);
3154     }
3155   else
3156     {
3157       s = format (s, "  i2o %U\n", format_snat_key, &sess->in2out);
3158       s = format (s, "    o2i %U\n", format_snat_key, &sess->out2in);
3159     }
3160   if (is_ed_session (sess) || is_fwd_bypass_session (sess))
3161     {
3162       if (is_twice_nat_session (sess))
3163         {
3164           s = format (s, "       external host o2i %U:%d i2o %U:%d\n",
3165                       format_ip4_address, &sess->ext_host_addr,
3166                       clib_net_to_host_u16 (sess->ext_host_port),
3167                       format_ip4_address, &sess->ext_host_nat_addr,
3168                       clib_net_to_host_u16 (sess->ext_host_nat_port));
3169         }
3170       else
3171         {
3172           if (sess->ext_host_addr.as_u32)
3173               s = format (s, "       external host %U:%u\n",
3174                           format_ip4_address, &sess->ext_host_addr,
3175                           clib_net_to_host_u16 (sess->ext_host_port));
3176         }
3177     }
3178   s = format (s, "       index %llu\n", sess - sm->sessions);
3179   s = format (s, "       last heard %.2f\n", sess->last_heard);
3180   s = format (s, "       total pkts %d, total bytes %lld\n",
3181               sess->total_pkts, sess->total_bytes);
3182   if (snat_is_session_static (sess))
3183     s = format (s, "       static translation\n");
3184   else
3185     s = format (s, "       dynamic translation\n");
3186   if (is_fwd_bypass_session (sess))
3187     s = format (s, "       forwarding-bypass\n");
3188   if (is_lb_session (sess))
3189     s = format (s, "       load-balancing\n");
3190   if (is_twice_nat_session (sess))
3191     s = format (s, "       twice-nat\n");
3192
3193   return s;
3194 }
3195
3196 u8 * format_snat_user (u8 * s, va_list * args)
3197 {
3198   snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *);
3199   snat_user_t * u = va_arg (*args, snat_user_t *);
3200   int verbose = va_arg (*args, int);
3201   dlist_elt_t * head, * elt;
3202   u32 elt_index, head_index;
3203   u32 session_index;
3204   snat_session_t * sess;
3205
3206   s = format (s, "%U: %d dynamic translations, %d static translations\n",
3207               format_ip4_address, &u->addr, u->nsessions, u->nstaticsessions);
3208
3209   if (verbose == 0)
3210     return s;
3211
3212   if (u->nsessions || u->nstaticsessions)
3213     {
3214       head_index = u->sessions_per_user_list_head_index;
3215       head = pool_elt_at_index (sm->list_pool, head_index);
3216
3217       elt_index = head->next;
3218       elt = pool_elt_at_index (sm->list_pool, elt_index);
3219       session_index = elt->value;
3220
3221       while (session_index != ~0)
3222         {
3223           sess = pool_elt_at_index (sm->sessions, session_index);
3224
3225           s = format (s, "  %U\n", format_snat_session, sm, sess);
3226
3227           elt_index = elt->next;
3228           elt = pool_elt_at_index (sm->list_pool, elt_index);
3229           session_index = elt->value;
3230         }
3231     }
3232
3233   return s;
3234 }
3235
3236 u8 * format_snat_static_mapping (u8 * s, va_list * args)
3237 {
3238   snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *);
3239   nat44_lb_addr_port_t *local;
3240
3241   if (m->addr_only)
3242       s = format (s, "local %U external %U vrf %d %s %s",
3243                   format_ip4_address, &m->local_addr,
3244                   format_ip4_address, &m->external_addr,
3245                   m->vrf_id,
3246                   m->twice_nat == TWICE_NAT ? "twice-nat" :
3247                   m->twice_nat == TWICE_NAT_SELF ? "self-twice-nat" : "",
3248                   m->out2in_only ? "out2in-only" : "");
3249   else
3250    {
3251       if (vec_len (m->locals))
3252         {
3253           s = format (s, "%U external %U:%d %s %s",
3254                       format_snat_protocol, m->proto,
3255                       format_ip4_address, &m->external_addr, m->external_port,
3256                       m->twice_nat == TWICE_NAT ? "twice-nat" :
3257                       m->twice_nat == TWICE_NAT_SELF ? "self-twice-nat" : "",
3258                       m->out2in_only ? "out2in-only" : "");
3259           vec_foreach (local, m->locals)
3260             s = format (s, "\n  local %U:%d vrf %d probability %d\%",
3261                         format_ip4_address, &local->addr, local->port,
3262                         local->vrf_id, local->probability);
3263         }
3264       else
3265         s = format (s, "%U local %U:%d external %U:%d vrf %d %s %s",
3266                     format_snat_protocol, m->proto,
3267                     format_ip4_address, &m->local_addr, m->local_port,
3268                     format_ip4_address, &m->external_addr, m->external_port,
3269                     m->vrf_id,
3270                     m->twice_nat == TWICE_NAT ? "twice-nat" :
3271                     m->twice_nat == TWICE_NAT_SELF ? "self-twice-nat" : "",
3272                     m->out2in_only ? "out2in-only" : "");
3273    }
3274   return s;
3275 }
3276
3277 u8 * format_snat_static_map_to_resolve (u8 * s, va_list * args)
3278 {
3279   snat_static_map_resolve_t *m = va_arg (*args, snat_static_map_resolve_t *);
3280   vnet_main_t *vnm = vnet_get_main();
3281
3282   if (m->addr_only)
3283       s = format (s, "local %U external %U vrf %d",
3284                   format_ip4_address, &m->l_addr,
3285                   format_vnet_sw_if_index_name, vnm, m->sw_if_index,
3286                   m->vrf_id);
3287   else
3288       s = format (s, "%U local %U:%d external %U:%d vrf %d",
3289                   format_snat_protocol, m->proto,
3290                   format_ip4_address, &m->l_addr, m->l_port,
3291                   format_vnet_sw_if_index_name, vnm, m->sw_if_index,
3292                   m->e_port, m->vrf_id);
3293
3294   return s;
3295 }
3296
3297 u8 * format_det_map_ses (u8 * s, va_list * args)
3298 {
3299   snat_det_map_t * det_map = va_arg (*args, snat_det_map_t *);
3300   ip4_address_t in_addr, out_addr;
3301   u32 in_offset, out_offset;
3302   snat_det_session_t * ses = va_arg (*args, snat_det_session_t *);
3303   u32 * i = va_arg (*args, u32 *);
3304
3305   u32 user_index = *i / SNAT_DET_SES_PER_USER;
3306   in_addr.as_u32 = clib_host_to_net_u32 (
3307     clib_net_to_host_u32(det_map->in_addr.as_u32) + user_index);
3308   in_offset = clib_net_to_host_u32(in_addr.as_u32) -
3309     clib_net_to_host_u32(det_map->in_addr.as_u32);
3310   out_offset = in_offset / det_map->sharing_ratio;
3311   out_addr.as_u32 = clib_host_to_net_u32(
3312     clib_net_to_host_u32(det_map->out_addr.as_u32) + out_offset);
3313   s = format (s, "in %U:%d out %U:%d external host %U:%d state: %U expire: %d\n",
3314               format_ip4_address, &in_addr,
3315               clib_net_to_host_u16 (ses->in_port),
3316               format_ip4_address, &out_addr,
3317               clib_net_to_host_u16 (ses->out.out_port),
3318               format_ip4_address, &ses->out.ext_host_addr,
3319               clib_net_to_host_u16 (ses->out.ext_host_port),
3320               format_snat_session_state, ses->state,
3321               ses->expire);
3322
3323   return s;
3324 }
3325
3326 static void
3327 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
3328                                  uword opaque,
3329                                  u32 sw_if_index,
3330                                  ip4_address_t * address,
3331                                  u32 address_length,
3332                                  u32 if_address_index,
3333                                  u32 is_delete)
3334 {
3335   snat_main_t *sm = &snat_main;
3336   snat_static_map_resolve_t *rp;
3337   snat_static_mapping_t *m;
3338   snat_session_key_t m_key;
3339   clib_bihash_kv_8_8_t kv, value;
3340   int i, rv;
3341   ip4_address_t l_addr;
3342
3343   for (i = 0; i < vec_len (sm->to_resolve); i++)
3344     {
3345       rp = sm->to_resolve + i;
3346       if (rp->addr_only == 0)
3347         continue;
3348       if (rp->sw_if_index == sw_if_index)
3349         goto match;
3350     }
3351
3352   return;
3353
3354 match:
3355   m_key.addr.as_u32 = address->as_u32;
3356   m_key.port = rp->addr_only ? 0 : rp->e_port;
3357   m_key.protocol = rp->addr_only ? 0 : rp->proto;
3358   m_key.fib_index = sm->outside_fib_index;
3359   kv.key = m_key.as_u64;
3360   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3361     m = 0;
3362   else
3363     m = pool_elt_at_index (sm->static_mappings, value.value);
3364
3365   if (!is_delete)
3366     {
3367       /* Don't trip over lease renewal, static config */
3368       if (m)
3369         return;
3370     }
3371   else
3372     {
3373       if (!m)
3374         return;
3375     }
3376
3377   /* Indetity mapping? */
3378   if (rp->l_addr.as_u32 == 0)
3379     l_addr.as_u32 = address[0].as_u32;
3380   else
3381     l_addr.as_u32 = rp->l_addr.as_u32;
3382   /* Add the static mapping */
3383   rv = snat_add_static_mapping (l_addr,
3384                                 address[0],
3385                                 rp->l_port,
3386                                 rp->e_port,
3387                                 rp->vrf_id,
3388                                 rp->addr_only,
3389                                 ~0 /* sw_if_index */,
3390                                 rp->proto,
3391                                 !is_delete,
3392                                 0, 0, rp->tag);
3393   if (rv)
3394     nat_log_notice ("snat_add_static_mapping returned %d", rv);
3395 }
3396
3397 static void
3398 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
3399                                        uword opaque,
3400                                        u32 sw_if_index,
3401                                        ip4_address_t * address,
3402                                        u32 address_length,
3403                                        u32 if_address_index,
3404                                        u32 is_delete)
3405 {
3406   snat_main_t *sm = &snat_main;
3407   snat_static_map_resolve_t *rp;
3408   ip4_address_t l_addr;
3409   int i, j;
3410   int rv;
3411   u8 twice_nat = 0;
3412   snat_address_t *addresses = sm->addresses;
3413
3414   for (i = 0; i < vec_len(sm->auto_add_sw_if_indices); i++)
3415     {
3416       if (sw_if_index == sm->auto_add_sw_if_indices[i])
3417           goto match;
3418     }
3419
3420   for (i = 0; i < vec_len(sm->auto_add_sw_if_indices_twice_nat); i++)
3421     {
3422       twice_nat = 1;
3423       addresses = sm->twice_nat_addresses;
3424       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
3425           goto match;
3426     }
3427
3428   return;
3429
3430 match:
3431   if (!is_delete)
3432     {
3433       /* Don't trip over lease renewal, static config */
3434       for (j = 0; j < vec_len(addresses); j++)
3435         if (addresses[j].addr.as_u32 == address->as_u32)
3436           return;
3437
3438       (void) snat_add_address (sm, address, ~0, twice_nat);
3439       /* Scan static map resolution vector */
3440       for (j = 0; j < vec_len (sm->to_resolve); j++)
3441         {
3442           rp = sm->to_resolve + j;
3443           if (rp->addr_only)
3444             continue;
3445           /* On this interface? */
3446           if (rp->sw_if_index == sw_if_index)
3447             {
3448               /* Indetity mapping? */
3449               if (rp->l_addr.as_u32 == 0)
3450                 l_addr.as_u32 = address[0].as_u32;
3451               else
3452                 l_addr.as_u32 = rp->l_addr.as_u32;
3453               /* Add the static mapping */
3454               rv = snat_add_static_mapping (l_addr,
3455                                             address[0],
3456                                             rp->l_port,
3457                                             rp->e_port,
3458                                             rp->vrf_id,
3459                                             rp->addr_only,
3460                                             ~0 /* sw_if_index */,
3461                                             rp->proto,
3462                                             rp->is_add,
3463                                             0, 0, rp->tag);
3464               if (rv)
3465                 nat_log_notice ("snat_add_static_mapping returned %d", rv);
3466             }
3467         }
3468       return;
3469     }
3470   else
3471     {
3472       (void) snat_del_address(sm, address[0], 1, twice_nat);
3473       return;
3474     }
3475 }
3476
3477
3478 int snat_add_interface_address (snat_main_t *sm, u32 sw_if_index, int is_del,
3479                                 u8 twice_nat)
3480 {
3481   ip4_main_t * ip4_main = sm->ip4_main;
3482   ip4_address_t * first_int_addr;
3483   snat_static_map_resolve_t *rp;
3484   u32 *indices_to_delete = 0;
3485   int i, j;
3486   u32 *auto_add_sw_if_indices =
3487     twice_nat ? sm->auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
3488
3489   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index,
3490                                                 0 /* just want the address*/);
3491
3492   for (i = 0; i < vec_len(auto_add_sw_if_indices); i++)
3493     {
3494       if (auto_add_sw_if_indices[i] == sw_if_index)
3495         {
3496           if (is_del)
3497             {
3498               /* if have address remove it */
3499               if (first_int_addr)
3500                   (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
3501               else
3502                 {
3503                   for (j = 0; j < vec_len (sm->to_resolve); j++)
3504                     {
3505                       rp = sm->to_resolve + j;
3506                       if (rp->sw_if_index == sw_if_index)
3507                         vec_add1 (indices_to_delete, j);
3508                     }
3509                   if (vec_len(indices_to_delete))
3510                     {
3511                       for (j = vec_len(indices_to_delete)-1; j >= 0; j--)
3512                         vec_del1(sm->to_resolve, j);
3513                       vec_free(indices_to_delete);
3514                     }
3515                 }
3516               if (twice_nat)
3517                 vec_del1(sm->auto_add_sw_if_indices_twice_nat, i);
3518               else
3519                 vec_del1(sm->auto_add_sw_if_indices, i);
3520             }
3521           else
3522             return VNET_API_ERROR_VALUE_EXIST;
3523
3524           return 0;
3525         }
3526     }
3527
3528   if (is_del)
3529     return VNET_API_ERROR_NO_SUCH_ENTRY;
3530
3531   /* add to the auto-address list */
3532   if (twice_nat)
3533     vec_add1(sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
3534   else
3535     vec_add1(sm->auto_add_sw_if_indices, sw_if_index);
3536
3537   /* If the address is already bound - or static - add it now */
3538   if (first_int_addr)
3539       (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
3540
3541   return 0;
3542 }
3543
3544 int
3545 nat44_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
3546                    snat_protocol_t proto, u32 vrf_id, int is_in)
3547 {
3548   snat_main_per_thread_data_t *tsm;
3549   clib_bihash_kv_8_8_t kv, value;
3550   ip4_header_t ip;
3551   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3552   snat_session_key_t key;
3553   snat_session_t *s;
3554   clib_bihash_8_8_t *t;
3555
3556   if (sm->endpoint_dependent)
3557     return VNET_API_ERROR_UNSUPPORTED;
3558
3559   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3560   if (sm->num_workers > 1)
3561     tsm =
3562       vec_elt_at_index (sm->per_thread_data,
3563                         sm->worker_in2out_cb (&ip, fib_index));
3564   else
3565     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3566
3567   key.addr.as_u32 = addr->as_u32;
3568   key.port = clib_host_to_net_u16 (port);
3569   key.protocol = proto;
3570   key.fib_index = fib_index;
3571   kv.key = key.as_u64;
3572   t = is_in ? &tsm->in2out : &tsm->out2in;
3573   if (!clib_bihash_search_8_8 (t, &kv, &value))
3574     {
3575       if (pool_is_free_index (tsm->sessions, value.value))
3576         return VNET_API_ERROR_UNSPECIFIED;
3577
3578       s = pool_elt_at_index (tsm->sessions, value.value);
3579       nat_free_session_data (sm, s, tsm - sm->per_thread_data);
3580       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
3581       return 0;
3582     }
3583
3584   return VNET_API_ERROR_NO_SUCH_ENTRY;
3585 }
3586
3587 int
3588 nat44_del_ed_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
3589                       ip4_address_t *eh_addr, u16 eh_port, u8 proto,
3590                       u32 vrf_id, int is_in)
3591 {
3592   ip4_header_t ip;
3593   clib_bihash_16_8_t *t;
3594   nat_ed_ses_key_t key;
3595   clib_bihash_kv_16_8_t kv, value;
3596   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3597   snat_session_t *s;
3598   snat_main_per_thread_data_t *tsm;
3599
3600   if (!sm->endpoint_dependent)
3601     return VNET_API_ERROR_FEATURE_DISABLED;
3602
3603   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
3604   if (sm->num_workers > 1)
3605     tsm =
3606       vec_elt_at_index (sm->per_thread_data,
3607                         sm->worker_in2out_cb (&ip, fib_index));
3608   else
3609     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
3610
3611   t = is_in ? &tsm->in2out_ed : &tsm->out2in_ed;
3612   key.l_addr.as_u32 = addr->as_u32;
3613   key.r_addr.as_u32 = eh_addr->as_u32;
3614   key.l_port = clib_host_to_net_u16 (port);
3615   key.r_port = clib_host_to_net_u16 (eh_port);
3616   key.proto = proto;
3617   key.fib_index = clib_host_to_net_u32 (fib_index);
3618   kv.key[0] = key.as_u64[0];
3619   kv.key[1] = key.as_u64[1];
3620   if (clib_bihash_search_16_8 (t, &kv, &value))
3621     return VNET_API_ERROR_NO_SUCH_ENTRY;
3622
3623   if (pool_is_free_index (tsm->sessions, value.value))
3624     return VNET_API_ERROR_UNSPECIFIED;
3625   s = pool_elt_at_index (tsm->sessions, value.value);
3626   nat_free_session_data (sm, s, tsm - sm->per_thread_data);
3627   nat44_delete_session (sm, s, tsm - sm->per_thread_data);
3628   return 0;
3629 }
3630
3631 void
3632 nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset, u16 psid_length)
3633 {
3634   snat_main_t *sm = &snat_main;
3635
3636   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE;
3637   sm->alloc_addr_and_port = nat_alloc_addr_and_port_mape;
3638   sm->psid = psid;
3639   sm->psid_offset = psid_offset;
3640   sm->psid_length = psid_length;
3641 }
3642
3643 void
3644 nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port)
3645 {
3646   snat_main_t *sm = &snat_main;
3647
3648   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE;
3649   sm->alloc_addr_and_port = nat_alloc_addr_and_port_range;
3650   sm->start_port = start_port;
3651   sm->end_port = end_port;
3652 }
3653
3654 void
3655 nat_set_alloc_addr_and_port_default (void)
3656 {
3657   snat_main_t *sm = &snat_main;
3658
3659   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
3660   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
3661 }
3662