nat: per vrf session limits
[vpp.git] / src / plugins / nat / in2out_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/pg/pg.h>
23 #include <vnet/ip/ip.h>
24 #include <vnet/ethernet/ethernet.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/udp/udp.h>
27 #include <vppinfra/error.h>
28 #include <nat/nat.h>
29 #include <nat/nat_ipfix_logging.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat44/inlines.h>
32 #include <nat/nat_syslog.h>
33 #include <nat/nat_ha.h>
34
35 static char *nat_in2out_ed_error_strings[] = {
36 #define _(sym,string) string,
37   foreach_nat_in2out_ed_error
38 #undef _
39 };
40
41 typedef struct
42 {
43   u32 sw_if_index;
44   u32 next_index;
45   u32 session_index;
46   u32 is_slow_path;
47 } nat_in2out_ed_trace_t;
48
49 static u8 *
50 format_nat_in2out_ed_trace (u8 * s, va_list * args)
51 {
52   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
53   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
54   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
55   char *tag;
56
57   tag =
58     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
59     "NAT44_IN2OUT_ED_FAST_PATH";
60
61   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
62               t->sw_if_index, t->next_index, t->session_index);
63
64   return s;
65 }
66
67 #ifndef CLIB_MARCH_VARIANT
68 int
69 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
70 {
71   snat_main_t *sm = &snat_main;
72   nat44_is_idle_session_ctx_t *ctx = arg;
73   snat_session_t *s;
74   u64 sess_timeout_time;
75   u8 proto;
76   u16 r_port, l_port;
77   ip4_address_t *l_addr, *r_addr;
78   u32 fib_index;
79   clib_bihash_kv_16_8_t ed_kv;
80   int i;
81   snat_address_t *a;
82   snat_session_key_t key;
83   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
84                                                        ctx->thread_index);
85
86   s = pool_elt_at_index (tsm->sessions, kv->value);
87   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
88   if (ctx->now >= sess_timeout_time)
89     {
90       if (is_fwd_bypass_session (s))
91         goto delete;
92
93       l_addr = &s->out2in.addr;
94       r_addr = &s->ext_host_addr;
95       fib_index = s->out2in.fib_index;
96       if (snat_is_unk_proto_session (s))
97         {
98           proto = s->in2out.port;
99           r_port = 0;
100           l_port = 0;
101         }
102       else
103         {
104           proto = snat_proto_to_ip_proto (s->in2out.protocol);
105           l_port = s->out2in.port;
106           r_port = s->ext_host_port;
107         }
108       make_ed_kv (l_addr, r_addr, proto, fib_index, l_port, r_port, ~0ULL,
109                   &ed_kv);
110       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
111         nat_elog_warn ("out2in_ed key del failed");
112
113       if (snat_is_unk_proto_session (s))
114         goto delete;
115
116       snat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
117                                            s->in2out.addr.as_u32,
118                                            s->out2in.addr.as_u32,
119                                            s->in2out.protocol,
120                                            s->in2out.port,
121                                            s->out2in.port,
122                                            s->in2out.fib_index);
123
124       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
125                              &s->in2out.addr, s->in2out.port,
126                              &s->ext_host_nat_addr, s->ext_host_nat_port,
127                              &s->out2in.addr, s->out2in.port,
128                              &s->ext_host_addr, s->ext_host_port,
129                              s->in2out.protocol, is_twice_nat_session (s));
130
131       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
132                    s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
133                    ctx->thread_index);
134
135       if (is_twice_nat_session (s))
136         {
137           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
138             {
139               key.protocol = s->in2out.protocol;
140               key.port = s->ext_host_nat_port;
141               a = sm->twice_nat_addresses + i;
142               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
143                 {
144                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
145                                                       ctx->thread_index,
146                                                       &key);
147                   break;
148                 }
149             }
150         }
151
152       if (snat_is_session_static (s))
153         goto delete;
154
155       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
156                                           &s->out2in);
157     delete:
158       nat44_ed_delete_session (sm, s, ctx->thread_index, 1);
159       return 1;
160     }
161
162   return 0;
163 }
164 #endif
165
166 static inline u32
167 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
168                           ip4_header_t * ip0, icmp46_header_t * icmp0,
169                           u32 sw_if_index0, u32 rx_fib_index0,
170                           vlib_node_runtime_t * node, u32 next0, f64 now,
171                           u32 thread_index, snat_session_t ** p_s0)
172 {
173   next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
174                        next0, thread_index, p_s0, 0);
175   snat_session_t *s0 = *p_s0;
176   if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
177     {
178       /* Accounting */
179       nat44_session_update_counters (s0, now,
180                                      vlib_buffer_length_in_chain
181                                      (sm->vlib_main, b0), thread_index);
182       /* Per-user LRU list maintenance */
183       nat44_session_update_lru (sm, s0, thread_index);
184     }
185   return next0;
186 }
187
188 static_always_inline u16
189 snat_random_port (u16 min, u16 max)
190 {
191   snat_main_t *sm = &snat_main;
192   return min + random_u32 (&sm->random_seed) /
193     (random_u32_max () / (max - min + 1) + 1);
194 }
195
196 static int
197 nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index,
198                             u32 snat_proto, u32 thread_index,
199                             ip4_address_t r_addr, u16 r_port, u8 proto,
200                             u16 port_per_thread, u32 snat_thread_index,
201                             snat_session_t * s,
202                             ip4_address_t * allocated_addr,
203                             u16 * allocated_port,
204                             clib_bihash_kv_16_8_t * out2in_ed_kv)
205 {
206   int i;
207   snat_address_t *a, *ga = 0;
208   u32 portnum;
209   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
210
211   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
212
213   for (i = 0; i < vec_len (sm->addresses); i++)
214     {
215       a = sm->addresses + i;
216       switch (snat_proto)
217         {
218 #define _(N, j, n, unused)                                                    \
219   case SNAT_PROTOCOL_##N:                                                     \
220     if (a->fib_index == rx_fib_index)                                         \
221       {                                                                       \
222         u16 port = snat_random_port (1, port_per_thread);                     \
223         u16 attempts = port_per_thread;                                       \
224         while (attempts > 0)                                                  \
225           {                                                                   \
226             --attempts;                                                       \
227             portnum = port_thread_offset + port;                              \
228             make_ed_kv (&a->addr, &r_addr, proto, s->out2in.fib_index,        \
229                         clib_host_to_net_u16 (portnum), r_port,               \
230                         s - tsm->sessions, out2in_ed_kv);                     \
231             int rv = clib_bihash_add_del_16_8 (&tsm->out2in_ed, out2in_ed_kv, \
232                                                2 /* is_add */);               \
233             if (0 == rv)                                                      \
234               {                                                               \
235                 ++a->busy_##n##_port_refcounts[portnum];                      \
236                 a->busy_##n##_ports_per_thread[thread_index]++;               \
237                 a->busy_##n##_ports++;                                        \
238                 *allocated_addr = a->addr;                                    \
239                 *allocated_port = clib_host_to_net_u16 (portnum);             \
240                 return 0;                                                     \
241               }                                                               \
242             port = (port + 1) % port_per_thread;                              \
243           }                                                                   \
244       }                                                                       \
245     else if (a->fib_index == ~0)                                              \
246       {                                                                       \
247         ga = a;                                                               \
248       }                                                                       \
249     break;
250
251           foreach_snat_protocol;
252         default:
253           nat_elog_info ("unknown protocol");
254           return 1;
255         }
256     }
257
258   if (ga)
259     {
260       /* fake fib_index to reuse macro */
261       rx_fib_index = ~0;
262       a = ga;
263       switch (snat_proto)
264         {
265           foreach_snat_protocol;
266         default:
267           nat_elog_info ("unknown protocol");
268           return 1;
269         }
270     }
271
272 #undef _
273
274   /* Totally out of translations to use... */
275   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
276   return 1;
277 }
278
279 static u32
280 slow_path_ed (snat_main_t * sm,
281               vlib_buffer_t * b,
282               ip4_address_t l_addr,
283               ip4_address_t r_addr,
284               u16 l_port,
285               u16 r_port,
286               u8 proto,
287               u32 rx_fib_index,
288               snat_session_t ** sessionp,
289               vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now)
290 {
291   snat_session_t *s = NULL;
292   snat_session_key_t key0, key1;
293   lb_nat_type_t lb = 0;
294   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
295   u32 snat_proto = ip_proto_to_snat_proto (proto);
296   nat_outside_fib_t *outside_fib;
297   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
298   clib_bihash_kv_16_8_t out2in_ed_kv;
299   bool out2in_ed_inserted = false;
300   ip4_address_t allocated_addr;
301   u16 allocated_port;
302   u8 identity_nat;
303   fib_prefix_t pfx = {
304     .fp_proto = FIB_PROTOCOL_IP4,
305     .fp_len = 32,
306     .fp_addr = {.ip4.as_u32 = r_addr.as_u32,},
307   };
308   nat44_is_idle_session_ctx_t ctx;
309
310   if (PREDICT_TRUE (snat_proto == SNAT_PROTOCOL_TCP))
311     {
312       if (PREDICT_FALSE
313           (!tcp_flags_is_init
314            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
315         {
316           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
317           return NAT_NEXT_DROP;
318         }
319     }
320
321   // TODO: based on fib index do a lookup
322   if (PREDICT_FALSE
323       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
324     {
325       if (!nat_global_lru_free_one (sm, thread_index, now))
326         {
327           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
328           nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
329           nat_elog_notice ("maximum sessions exceeded");
330           return NAT_NEXT_DROP;
331         }
332     }
333
334   key0.addr = l_addr;
335   key0.port = l_port;
336   key1.protocol = key0.protocol = snat_proto;
337   key0.fib_index = rx_fib_index;
338   key1.fib_index = sm->outside_fib_index;
339
340   /* First try to match static mapping by local address and port */
341   if (snat_static_mapping_match
342       (sm, key0, &key1, 0, 0, 0, &lb, 0, &identity_nat))
343     {
344       s = nat_ed_session_alloc (sm, thread_index, now);
345       if (!s)
346         {
347           nat_elog_warn ("create NAT session failed");
348           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
349           goto drop;
350         }
351       switch (vec_len (sm->outside_fibs))
352         {
353         case 0:
354           s->out2in.fib_index = sm->outside_fib_index;
355           break;
356         case 1:
357           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
358           break;
359         default:
360           /* *INDENT-OFF* */
361           vec_foreach (outside_fib, sm->outside_fibs)
362           {
363             fei = fib_table_lookup (outside_fib->fib_index, &pfx);
364             if (FIB_NODE_INDEX_INVALID != fei)
365               {
366                 if (fib_entry_get_resolving_interface (fei) != ~0)
367                   {
368                     s->out2in.fib_index = outside_fib->fib_index;
369                     break;
370                   }
371               }
372           }
373           /* *INDENT-ON* */
374           break;
375         }
376
377       /* Try to create dynamic translation */
378       if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, snat_proto,
379                                       thread_index, r_addr, r_port, proto,
380                                       sm->port_per_thread,
381                                       tsm->snat_thread_index, s,
382                                       &allocated_addr,
383                                       &allocated_port, &out2in_ed_kv))
384         {
385           nat_elog_notice ("addresses exhausted");
386           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
387           goto drop;
388         }
389
390       out2in_ed_inserted = true;
391       key1.addr = allocated_addr;
392       key1.port = allocated_port;
393     }
394   else
395     {
396       if (PREDICT_FALSE (identity_nat))
397         {
398           *sessionp = s;
399           return next;
400         }
401       s = nat_ed_session_alloc (sm, thread_index, now);
402       if (!s)
403         {
404           nat_elog_warn ("create NAT session failed");
405           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
406           goto drop;
407         }
408       switch (vec_len (sm->outside_fibs))
409         {
410         case 0:
411           s->out2in.fib_index = sm->outside_fib_index;
412           break;
413         case 1:
414           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
415           break;
416         default:
417           /* *INDENT-OFF* */
418           vec_foreach (outside_fib, sm->outside_fibs)
419           {
420             fei = fib_table_lookup (outside_fib->fib_index, &pfx);
421             if (FIB_NODE_INDEX_INVALID != fei)
422               {
423                 if (fib_entry_get_resolving_interface (fei) != ~0)
424                   {
425                     s->out2in.fib_index = outside_fib->fib_index;
426                     break;
427                   }
428               }
429           }
430           /* *INDENT-ON* */
431           break;
432         }
433
434       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
435
436
437       make_ed_kv (&key1.addr, &r_addr, proto,
438                   s->out2in.fib_index, key1.port, r_port, s - tsm->sessions,
439                   &out2in_ed_kv);
440       if (clib_bihash_add_or_overwrite_stale_16_8
441           (&tsm->out2in_ed, &out2in_ed_kv, nat44_o2i_ed_is_idle_session_cb,
442            &ctx))
443         nat_elog_notice ("out2in-ed key add failed");
444       out2in_ed_inserted = true;
445     }
446
447   if (lb)
448     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
449   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
450   s->ext_host_addr = r_addr;
451   s->ext_host_port = r_port;
452   s->in2out = key0;
453   s->out2in = key1;
454   s->out2in.protocol = key0.protocol;
455
456   clib_bihash_kv_16_8_t in2out_ed_kv;
457   make_ed_kv (&l_addr, &r_addr, proto, rx_fib_index, l_port, r_port,
458               s - tsm->sessions, &in2out_ed_kv);
459   ctx.now = now;
460   ctx.thread_index = thread_index;
461   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &in2out_ed_kv,
462                                                nat44_i2o_ed_is_idle_session_cb,
463                                                &ctx))
464     nat_elog_notice ("in2out-ed key add failed");
465
466   *sessionp = s;
467
468   /* log NAT event */
469   snat_ipfix_logging_nat44_ses_create (thread_index,
470                                        s->in2out.addr.as_u32,
471                                        s->out2in.addr.as_u32,
472                                        s->in2out.protocol,
473                                        s->in2out.port,
474                                        s->out2in.port, s->in2out.fib_index);
475
476   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
477                          &s->in2out.addr, s->in2out.port,
478                          &s->ext_host_nat_addr, s->ext_host_nat_port,
479                          &s->out2in.addr, s->out2in.port,
480                          &s->ext_host_addr, s->ext_host_port,
481                          s->in2out.protocol, 0);
482
483   nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
484                s->out2in.port, &s->ext_host_addr, s->ext_host_port,
485                &s->ext_host_nat_addr, s->ext_host_nat_port,
486                s->in2out.protocol, s->in2out.fib_index, s->flags,
487                thread_index, 0);
488
489   return next;
490 drop:
491   if (out2in_ed_inserted)
492     {
493       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &out2in_ed_kv, 0))
494         nat_elog_notice ("out2in-ed key del failed");
495     }
496   if (s)
497     {
498       nat_free_session_data (sm, s, thread_index, 0);
499       nat44_ed_delete_session (sm, s, thread_index, 1);
500     }
501   return NAT_NEXT_DROP;
502 }
503
504 static_always_inline int
505 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
506                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
507                         u32 rx_fib_index, u32 thread_index)
508 {
509   udp_header_t *udp = ip4_next_header (ip);
510   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
511   clib_bihash_kv_16_8_t kv, value;
512   snat_session_key_t key0, key1;
513
514   make_ed_kv (&ip->dst_address, &ip->src_address, ip->protocol,
515               sm->outside_fib_index, udp->dst_port, udp->src_port, ~0ULL,
516               &kv);
517
518   /* NAT packet aimed at external address if has active sessions */
519   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
520     {
521       key0.addr = ip->dst_address;
522       key0.port = udp->dst_port;
523       key0.protocol = proto;
524       key0.fib_index = sm->outside_fib_index;
525       /* or is static mappings */
526       if (!snat_static_mapping_match (sm, key0, &key1, 1, 0, 0, 0, 0, 0))
527         return 0;
528     }
529   else
530     return 0;
531
532   if (sm->forwarding_enabled)
533     return 1;
534
535   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
536                                   rx_fib_index);
537 }
538
539 static_always_inline int
540 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
541                                       u32 thread_index, f64 now,
542                                       vlib_main_t * vm, vlib_buffer_t * b)
543 {
544   clib_bihash_kv_16_8_t kv, value;
545   snat_session_t *s = 0;
546   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
547
548   if (!sm->forwarding_enabled)
549     return 0;
550
551   if (ip->protocol == IP_PROTOCOL_ICMP)
552     {
553       if (get_icmp_i2o_ed_key (b, ip, 0, ~0ULL, 0, 0, 0, &kv))
554         return 0;
555     }
556   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
557     {
558       make_ed_kv (&ip->src_address, &ip->dst_address, ip->protocol, 0,
559                   vnet_buffer (b)->ip.reass.l4_src_port,
560                   vnet_buffer (b)->ip.reass.l4_dst_port, ~0ULL, &kv);
561     }
562   else
563     {
564       make_ed_kv (&ip->src_address, &ip->dst_address, ip->protocol, 0, 0,
565                   0, ~0ULL, &kv);
566     }
567
568   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
569     {
570       s = pool_elt_at_index (tsm->sessions, value.value);
571       if (is_fwd_bypass_session (s))
572         {
573           if (ip->protocol == IP_PROTOCOL_TCP)
574             {
575               if (nat44_set_tcp_session_state_i2o
576                   (sm, now, s, b, thread_index))
577                 return 1;
578             }
579           /* Accounting */
580           nat44_session_update_counters (s, now,
581                                          vlib_buffer_length_in_chain (vm, b),
582                                          thread_index);
583           /* Per-user LRU list maintenance */
584           nat44_session_update_lru (sm, s, thread_index);
585           return 1;
586         }
587       else
588         return 0;
589     }
590
591   return 0;
592 }
593
594 static_always_inline int
595 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
596                                        u16 src_port, u16 dst_port,
597                                        u32 thread_index, u32 rx_sw_if_index,
598                                        u32 tx_sw_if_index)
599 {
600   clib_bihash_kv_16_8_t kv, value;
601   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
602   snat_interface_t *i;
603   snat_session_t *s;
604   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
605   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
606
607   /* src NAT check */
608   make_ed_kv (&ip->src_address, &ip->dst_address, ip->protocol,
609               tx_fib_index, src_port, dst_port, ~0ULL, &kv);
610   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
611     {
612       s = pool_elt_at_index (tsm->sessions, value.value);
613       if (nat44_is_ses_closed (s))
614         {
615           nat_free_session_data (sm, s, thread_index, 0);
616           nat44_ed_delete_session (sm, s, thread_index, 1);
617         }
618       else
619         s->flags |= SNAT_SESSION_FLAG_OUTPUT_FEATURE;
620       return 1;
621     }
622
623   /* dst NAT check */
624   make_ed_kv (&ip->dst_address, &ip->src_address, ip->protocol,
625               rx_fib_index, dst_port, src_port, ~0ULL, &kv);
626   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
627     {
628       s = pool_elt_at_index (tsm->sessions, value.value);
629       if (is_fwd_bypass_session (s))
630         return 0;
631
632       /* hairpinning */
633       /* *INDENT-OFF* */
634       pool_foreach (i, sm->output_feature_interfaces,
635       ({
636         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
637            return 0;
638       }));
639       /* *INDENT-ON* */
640       return 1;
641     }
642
643   return 0;
644 }
645
646 #ifndef CLIB_MARCH_VARIANT
647 u32
648 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
649                       u32 thread_index, vlib_buffer_t * b, ip4_header_t * ip,
650                       u8 * p_proto, snat_session_key_t * p_value,
651                       u8 * p_dont_translate, void *d, void *e)
652 {
653   u32 sw_if_index;
654   u32 rx_fib_index;
655   snat_session_t *s = 0;
656   u8 dont_translate = 0;
657   clib_bihash_kv_16_8_t kv, value;
658   u32 next = ~0;
659   int err;
660   u16 l_port = 0, r_port = 0;   // initialize to workaround gcc warning
661   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
662
663   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
664   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
665
666   err =
667     get_icmp_i2o_ed_key (b, ip, rx_fib_index, ~0ULL, p_proto, &l_port,
668                          &r_port, &kv);
669   if (err != 0)
670     {
671       b->error = node->errors[err];
672       next = NAT_NEXT_DROP;
673       goto out;
674     }
675
676   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
677     {
678       if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
679         {
680           if (PREDICT_FALSE
681               (nat44_ed_not_translate_output_feature
682                (sm, ip, l_port, r_port, thread_index,
683                 sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX])))
684             {
685               dont_translate = 1;
686               goto out;
687             }
688         }
689       else
690         {
691           if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index,
692                                                      ip, SNAT_PROTOCOL_ICMP,
693                                                      rx_fib_index,
694                                                      thread_index)))
695             {
696               dont_translate = 1;
697               goto out;
698             }
699         }
700
701       if (PREDICT_FALSE
702           (icmp_type_is_error_message
703            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
704         {
705           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
706           next = NAT_NEXT_DROP;
707           goto out;
708         }
709
710       next =
711         slow_path_ed (sm, b, ip->src_address, ip->dst_address, l_port, r_port,
712                       ip->protocol, rx_fib_index, &s, node, next,
713                       thread_index, vlib_time_now (sm->vlib_main));
714
715       if (PREDICT_FALSE (next == NAT_NEXT_DROP))
716         goto out;
717
718       if (!s)
719         {
720           dont_translate = 1;
721           goto out;
722         }
723     }
724   else
725     {
726       if (PREDICT_FALSE
727           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
728            ICMP4_echo_request
729            && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
730            ICMP4_echo_reply
731            && !icmp_type_is_error_message (vnet_buffer (b)->ip.
732                                            reass.icmp_type_or_tcp_flags)))
733         {
734           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
735           next = NAT_NEXT_DROP;
736           goto out;
737         }
738
739       s = pool_elt_at_index (tsm->sessions, value.value);
740     }
741 out:
742   if (s)
743     *p_value = s->out2in;
744   *p_dont_translate = dont_translate;
745   if (d)
746     *(snat_session_t **) d = s;
747   return next;
748 }
749 #endif
750
751 static snat_session_t *
752 nat44_ed_in2out_unknown_proto (snat_main_t * sm,
753                                vlib_buffer_t * b,
754                                ip4_header_t * ip,
755                                u32 rx_fib_index,
756                                u32 thread_index,
757                                f64 now,
758                                vlib_main_t * vm, vlib_node_runtime_t * node)
759 {
760   clib_bihash_kv_8_8_t kv, value;
761   clib_bihash_kv_16_8_t s_kv, s_value;
762   snat_static_mapping_t *m;
763   u32 old_addr, new_addr = 0;
764   ip_csum_t sum;
765   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
766   snat_session_t *s;
767   u32 outside_fib_index = sm->outside_fib_index;
768   int i;
769   u8 is_sm = 0;
770   nat_outside_fib_t *outside_fib;
771   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
772   fib_prefix_t pfx = {
773     .fp_proto = FIB_PROTOCOL_IP4,
774     .fp_len = 32,
775     .fp_addr = {
776                 .ip4.as_u32 = ip->dst_address.as_u32,
777                 },
778   };
779
780   switch (vec_len (sm->outside_fibs))
781     {
782     case 0:
783       outside_fib_index = sm->outside_fib_index;
784       break;
785     case 1:
786       outside_fib_index = sm->outside_fibs[0].fib_index;
787       break;
788     default:
789       /* *INDENT-OFF* */
790       vec_foreach (outside_fib, sm->outside_fibs)
791         {
792           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
793           if (FIB_NODE_INDEX_INVALID != fei)
794             {
795               if (fib_entry_get_resolving_interface (fei) != ~0)
796                 {
797                   outside_fib_index = outside_fib->fib_index;
798                   break;
799                 }
800             }
801         }
802       /* *INDENT-ON* */
803       break;
804     }
805   old_addr = ip->src_address.as_u32;
806
807   make_ed_kv (&ip->src_address, &ip->dst_address, ip->protocol,
808               rx_fib_index, 0, 0, ~0ULL, &s_kv);
809
810   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
811     {
812       s = pool_elt_at_index (tsm->sessions, s_value.value);
813       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
814     }
815   else
816     {
817       if (PREDICT_FALSE
818           (nat44_ed_maximum_sessions_exceeded
819            (sm, rx_fib_index, thread_index)))
820         {
821           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
822           nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
823           nat_elog_notice ("maximum sessions exceeded");
824           return 0;
825         }
826
827       make_sm_kv (&kv, &ip->src_address, 0, rx_fib_index, 0);
828
829       /* Try to find static mapping first */
830       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
831         {
832           m = pool_elt_at_index (sm->static_mappings, value.value);
833           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
834           is_sm = 1;
835           goto create_ses;
836         }
837       else
838         {
839           /* *INDENT-OFF* */
840           pool_foreach (s, tsm->sessions, {
841             if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
842               {
843                 new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
844
845                 make_ed_kv (&s->out2in.addr, &ip->dst_address, ip->protocol,
846                             outside_fib_index, 0, 0, ~0ULL, &s_kv);
847                 if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
848                   goto create_ses;
849
850                 break;
851               }
852           });
853           /* *INDENT-ON* */
854
855           for (i = 0; i < vec_len (sm->addresses); i++)
856             {
857               make_ed_kv (&sm->addresses[i].addr, &ip->dst_address,
858                           ip->protocol, outside_fib_index, 0, 0, ~0ULL,
859                           &s_kv);
860               if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
861                 {
862                   new_addr = ip->src_address.as_u32 =
863                     sm->addresses[i].addr.as_u32;
864                   goto create_ses;
865                 }
866             }
867           return 0;
868         }
869
870     create_ses:
871       s = nat_ed_session_alloc (sm, thread_index, now);
872       if (!s)
873         {
874           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
875           nat_elog_warn ("create NAT session failed");
876           return 0;
877         }
878
879       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
880       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
881       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
882       s->out2in.addr.as_u32 = new_addr;
883       s->out2in.fib_index = outside_fib_index;
884       s->in2out.addr.as_u32 = old_addr;
885       s->in2out.fib_index = rx_fib_index;
886       s->in2out.port = s->out2in.port = ip->protocol;
887       if (is_sm)
888         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
889
890       /* Add to lookup tables */
891       make_ed_kv (&s->in2out.addr, &ip->dst_address, ip->protocol,
892                   rx_fib_index, 0, 0, s - tsm->sessions, &s_kv);
893       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
894         nat_elog_notice ("in2out key add failed");
895
896       make_ed_kv (&s->out2in.addr, &ip->dst_address, ip->protocol,
897                   outside_fib_index, 0, 0, s - tsm->sessions, &s_kv);
898       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
899         nat_elog_notice ("out2in key add failed");
900     }
901
902   /* Update IP checksum */
903   sum = ip->checksum;
904   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
905   ip->checksum = ip_csum_fold (sum);
906
907   /* Accounting */
908   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
909                                  thread_index);
910   /* Per-user LRU list maintenance */
911   nat44_session_update_lru (sm, s, thread_index);
912
913   /* Hairpinning */
914   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
915     nat44_ed_hairpinning_unknown_proto (sm, b, ip);
916
917   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
918     vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index;
919
920   return s;
921 }
922
923 static inline uword
924 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
925                                           vlib_node_runtime_t * node,
926                                           vlib_frame_t * frame,
927                                           int is_output_feature)
928 {
929   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
930   nat_next_t next_index;
931   snat_main_t *sm = &snat_main;
932   f64 now = vlib_time_now (vm);
933   u32 thread_index = vm->thread_index;
934   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
935   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
936     0, def_slow;
937
938   def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH :
939     NAT_NEXT_IN2OUT_ED_SLOW_PATH;
940
941   stats_node_index = sm->ed_in2out_node_index;
942
943   from = vlib_frame_vector_args (frame);
944   n_left_from = frame->n_vectors;
945   next_index = node->cached_next_index;
946
947   while (n_left_from > 0)
948     {
949       u32 n_left_to_next;
950
951       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
952
953       while (n_left_from > 0 && n_left_to_next > 0)
954         {
955           u32 bi0;
956           vlib_buffer_t *b0;
957           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
958             new_addr0, old_addr0;
959           u16 old_port0, new_port0;
960           ip4_header_t *ip0;
961           udp_header_t *udp0;
962           tcp_header_t *tcp0;
963           snat_session_t *s0 = 0;
964           clib_bihash_kv_16_8_t kv0, value0;
965           ip_csum_t sum0;
966
967           /* speculatively enqueue b0 to the current next frame */
968           bi0 = from[0];
969           to_next[0] = bi0;
970           from += 1;
971           to_next += 1;
972           n_left_from -= 1;
973           n_left_to_next -= 1;
974
975           b0 = vlib_get_buffer (vm, bi0);
976
977           if (is_output_feature)
978             {
979               vnet_feature_next (&vnet_buffer2 (b0)->nat.arc_next, b0);
980               iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
981             }
982
983           next0 = vnet_buffer2 (b0)->nat.arc_next;
984
985           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
986                                   iph_offset0);
987
988           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
989           rx_fib_index0 =
990             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
991                                                  sw_if_index0);
992
993           if (PREDICT_FALSE (ip0->ttl == 1))
994             {
995               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
996               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
997                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
998                                            0);
999               next0 = NAT_NEXT_ICMP_ERROR;
1000               goto trace0;
1001             }
1002
1003           udp0 = ip4_next_header (ip0);
1004           tcp0 = (tcp_header_t *) udp0;
1005           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1006
1007           if (PREDICT_FALSE (proto0 == ~0))
1008             {
1009               next0 = def_slow;
1010               goto trace0;
1011             }
1012
1013           if (is_output_feature)
1014             {
1015               if (PREDICT_FALSE (nat_not_translate_output_feature_fwd
1016                                  (sm, ip0, thread_index, now, vm, b0)))
1017                 goto trace0;
1018             }
1019
1020           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1021             {
1022               next0 = def_slow;
1023               goto trace0;
1024             }
1025
1026           make_ed_kv (&ip0->src_address, &ip0->dst_address,
1027                       ip0->protocol, rx_fib_index0,
1028                       vnet_buffer (b0)->ip.reass.l4_src_port,
1029                       vnet_buffer (b0)->ip.reass.l4_dst_port, ~0ULL, &kv0);
1030
1031           // lookup for session
1032           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1033             {
1034               // session does not exist go slow path
1035               next0 = def_slow;
1036               goto trace0;
1037             }
1038           s0 = pool_elt_at_index (tsm->sessions, value0.value);
1039
1040           if (s0->tcp_close_timestamp)
1041             {
1042               if (now >= s0->tcp_close_timestamp)
1043                 {
1044                   // session is closed, go slow path
1045                   next0 = def_slow;
1046                 }
1047               else
1048                 {
1049                   // session in transitory timeout, drop
1050                   b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1051                   next0 = NAT_NEXT_DROP;
1052                 }
1053               goto trace0;
1054             }
1055
1056           // drop if session expired
1057           u64 sess_timeout_time;
1058           sess_timeout_time = s0->last_heard +
1059             (f64) nat44_session_get_timeout (sm, s0);
1060           if (now >= sess_timeout_time)
1061             {
1062               nat_free_session_data (sm, s0, thread_index, 0);
1063               nat44_ed_delete_session (sm, s0, thread_index, 1);
1064               // session is closed, go slow path
1065               next0 = def_slow;
1066               goto trace0;
1067             }
1068
1069           b0->flags |= VNET_BUFFER_F_IS_NATED;
1070
1071           if (!is_output_feature)
1072             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1073
1074           old_addr0 = ip0->src_address.as_u32;
1075           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1076           sum0 = ip0->checksum;
1077           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1078                                  src_address);
1079           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1080             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1081                                    s0->ext_host_addr.as_u32, ip4_header_t,
1082                                    dst_address);
1083           ip0->checksum = ip_csum_fold (sum0);
1084
1085           old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1086
1087           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1088             {
1089               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1090                 {
1091                   new_port0 = udp0->src_port = s0->out2in.port;
1092                   sum0 = tcp0->checksum;
1093                   sum0 =
1094                     ip_csum_update (sum0, old_addr0, new_addr0,
1095                                     ip4_header_t, dst_address);
1096                   sum0 =
1097                     ip_csum_update (sum0, old_port0, new_port0,
1098                                     ip4_header_t, length);
1099                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1100                     {
1101                       sum0 =
1102                         ip_csum_update (sum0, ip0->dst_address.as_u32,
1103                                         s0->ext_host_addr.as_u32,
1104                                         ip4_header_t, dst_address);
1105                       sum0 =
1106                         ip_csum_update (sum0,
1107                                         vnet_buffer (b0)->ip.
1108                                         reass.l4_dst_port, s0->ext_host_port,
1109                                         ip4_header_t, length);
1110                       tcp0->dst_port = s0->ext_host_port;
1111                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1112                     }
1113                   mss_clamping (sm, tcp0, &sum0);
1114                   tcp0->checksum = ip_csum_fold (sum0);
1115                 }
1116               tcp_packets++;
1117               if (nat44_set_tcp_session_state_i2o
1118                   (sm, now, s0, b0, thread_index))
1119                 goto trace0;
1120             }
1121           else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1122                    && udp0->checksum)
1123             {
1124               new_port0 = udp0->src_port = s0->out2in.port;
1125               sum0 = udp0->checksum;
1126               sum0 =
1127                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1128                                 dst_address);
1129               sum0 =
1130                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1131                                 length);
1132               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1133                 {
1134                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1135                                          s0->ext_host_addr.as_u32,
1136                                          ip4_header_t, dst_address);
1137                   sum0 =
1138                     ip_csum_update (sum0,
1139                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1140                                     s0->ext_host_port, ip4_header_t, length);
1141                   udp0->dst_port = s0->ext_host_port;
1142                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1143                 }
1144               udp0->checksum = ip_csum_fold (sum0);
1145               udp_packets++;
1146             }
1147           else
1148             {
1149               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1150                 {
1151                   new_port0 = udp0->src_port = s0->out2in.port;
1152                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1153                     {
1154                       udp0->dst_port = s0->ext_host_port;
1155                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1156                     }
1157                   udp_packets++;
1158                 }
1159             }
1160
1161           /* Accounting */
1162           nat44_session_update_counters (s0, now,
1163                                          vlib_buffer_length_in_chain
1164                                          (vm, b0), thread_index);
1165           /* Per-user LRU list maintenance */
1166           nat44_session_update_lru (sm, s0, thread_index);
1167
1168         trace0:
1169           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1170                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1171             {
1172               nat_in2out_ed_trace_t *t =
1173                 vlib_add_trace (vm, node, b0, sizeof (*t));
1174               t->sw_if_index = sw_if_index0;
1175               t->next_index = next0;
1176               t->is_slow_path = 0;
1177
1178               if (s0)
1179                 t->session_index = s0 - tsm->sessions;
1180               else
1181                 t->session_index = ~0;
1182             }
1183
1184           pkts_processed += next0 == vnet_buffer2 (b0)->nat.arc_next;
1185           /* verify speculative enqueue, maybe switch current next frame */
1186           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1187                                            to_next, n_left_to_next,
1188                                            bi0, next0);
1189         }
1190
1191       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1192     }
1193
1194   vlib_node_increment_counter (vm, stats_node_index,
1195                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1196                                pkts_processed);
1197   vlib_node_increment_counter (vm, stats_node_index,
1198                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1199   vlib_node_increment_counter (vm, stats_node_index,
1200                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, udp_packets);
1201   vlib_node_increment_counter (vm, stats_node_index,
1202                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1203                                icmp_packets);
1204   vlib_node_increment_counter (vm, stats_node_index,
1205                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1206                                other_packets);
1207   return frame->n_vectors;
1208 }
1209
1210 static inline uword
1211 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
1212                                           vlib_node_runtime_t * node,
1213                                           vlib_frame_t * frame,
1214                                           int is_output_feature)
1215 {
1216   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
1217   nat_next_t next_index;
1218   snat_main_t *sm = &snat_main;
1219   f64 now = vlib_time_now (vm);
1220   u32 thread_index = vm->thread_index;
1221   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1222   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets = 0;
1223
1224   stats_node_index = sm->ed_in2out_slowpath_node_index;
1225
1226   from = vlib_frame_vector_args (frame);
1227   n_left_from = frame->n_vectors;
1228   next_index = node->cached_next_index;
1229
1230   while (n_left_from > 0)
1231     {
1232       u32 n_left_to_next;
1233
1234       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1235
1236       while (n_left_from > 0 && n_left_to_next > 0)
1237         {
1238           u32 bi0;
1239           vlib_buffer_t *b0;
1240           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
1241             new_addr0, old_addr0;
1242           u16 old_port0, new_port0;
1243           ip4_header_t *ip0;
1244           udp_header_t *udp0;
1245           tcp_header_t *tcp0;
1246           icmp46_header_t *icmp0;
1247           snat_session_t *s0 = 0;
1248           clib_bihash_kv_16_8_t kv0, value0;
1249           ip_csum_t sum0;
1250
1251           /* speculatively enqueue b0 to the current next frame */
1252           bi0 = from[0];
1253           to_next[0] = bi0;
1254           from += 1;
1255           to_next += 1;
1256           n_left_from -= 1;
1257           n_left_to_next -= 1;
1258
1259           b0 = vlib_get_buffer (vm, bi0);
1260
1261           if (is_output_feature)
1262             iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1263
1264           next0 = vnet_buffer2 (b0)->nat.arc_next;
1265
1266           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1267                                   iph_offset0);
1268
1269           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1270           rx_fib_index0 =
1271             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1272                                                  sw_if_index0);
1273
1274           if (PREDICT_FALSE (ip0->ttl == 1))
1275             {
1276               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1277               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1278                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1279                                            0);
1280               next0 = NAT_NEXT_ICMP_ERROR;
1281               goto trace0;
1282             }
1283
1284           udp0 = ip4_next_header (ip0);
1285           tcp0 = (tcp_header_t *) udp0;
1286           icmp0 = (icmp46_header_t *) udp0;
1287           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1288
1289           if (PREDICT_FALSE (proto0 == ~0))
1290             {
1291               s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1292                                                   rx_fib_index0,
1293                                                   thread_index, now,
1294                                                   vm, node);
1295               if (!s0)
1296                 next0 = NAT_NEXT_DROP;
1297
1298               other_packets++;
1299               goto trace0;
1300             }
1301
1302           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1303             {
1304               next0 = icmp_in2out_ed_slow_path
1305                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1306                  node, next0, now, thread_index, &s0);
1307               icmp_packets++;
1308               goto trace0;
1309             }
1310
1311           // move down
1312           make_ed_kv (&ip0->src_address, &ip0->dst_address,
1313                       ip0->protocol, rx_fib_index0,
1314                       vnet_buffer (b0)->ip.reass.l4_src_port,
1315                       vnet_buffer (b0)->ip.reass.l4_dst_port, ~0ULL, &kv0);
1316
1317           if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1318             {
1319               s0 = pool_elt_at_index (tsm->sessions, value0.value);
1320
1321               if (s0->tcp_close_timestamp && now >= s0->tcp_close_timestamp)
1322                 {
1323                   nat_free_session_data (sm, s0, thread_index, 0);
1324                   nat44_ed_delete_session (sm, s0, thread_index, 1);
1325                   s0 = NULL;
1326                 }
1327             }
1328
1329           if (!s0)
1330             {
1331               if (is_output_feature)
1332                 {
1333                   if (PREDICT_FALSE
1334                       (nat44_ed_not_translate_output_feature
1335                        (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1336                         vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1337                         sw_if_index0,
1338                         vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1339                     goto trace0;
1340
1341                   /*
1342                    * Send DHCP packets to the ipv4 stack, or we won't
1343                    * be able to use dhcp client on the outside interface
1344                    */
1345                   if (PREDICT_FALSE
1346                       (proto0 == SNAT_PROTOCOL_UDP
1347                        && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1348                            clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1349                        && ip0->dst_address.as_u32 == 0xffffffff))
1350                     goto trace0;
1351                 }
1352               else
1353                 {
1354                   if (PREDICT_FALSE
1355                       (nat44_ed_not_translate
1356                        (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
1357                         thread_index)))
1358                     goto trace0;
1359                 }
1360
1361               next0 =
1362                 slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address,
1363                               vnet_buffer (b0)->ip.reass.l4_src_port,
1364                               vnet_buffer (b0)->ip.reass.l4_dst_port,
1365                               ip0->protocol, rx_fib_index0, &s0, node, next0,
1366                               thread_index, now);
1367
1368               if (PREDICT_FALSE (next0 == NAT_NEXT_DROP))
1369                 goto trace0;
1370
1371               if (PREDICT_FALSE (!s0))
1372                 goto trace0;
1373
1374             }
1375
1376           b0->flags |= VNET_BUFFER_F_IS_NATED;
1377
1378           if (!is_output_feature)
1379             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1380
1381           old_addr0 = ip0->src_address.as_u32;
1382           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1383           sum0 = ip0->checksum;
1384           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1385                                  src_address);
1386           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1387             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1388                                    s0->ext_host_addr.as_u32, ip4_header_t,
1389                                    dst_address);
1390           ip0->checksum = ip_csum_fold (sum0);
1391
1392           old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1393
1394           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1395             {
1396               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1397                 {
1398                   new_port0 = udp0->src_port = s0->out2in.port;
1399                   sum0 = tcp0->checksum;
1400                   sum0 =
1401                     ip_csum_update (sum0, old_addr0, new_addr0,
1402                                     ip4_header_t, dst_address);
1403                   sum0 =
1404                     ip_csum_update (sum0, old_port0, new_port0,
1405                                     ip4_header_t, length);
1406                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1407                     {
1408                       sum0 =
1409                         ip_csum_update (sum0, ip0->dst_address.as_u32,
1410                                         s0->ext_host_addr.as_u32,
1411                                         ip4_header_t, dst_address);
1412                       sum0 =
1413                         ip_csum_update (sum0,
1414                                         vnet_buffer (b0)->ip.
1415                                         reass.l4_dst_port, s0->ext_host_port,
1416                                         ip4_header_t, length);
1417                       tcp0->dst_port = s0->ext_host_port;
1418                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1419                     }
1420                   mss_clamping (sm, tcp0, &sum0);
1421                   tcp0->checksum = ip_csum_fold (sum0);
1422                 }
1423               tcp_packets++;
1424               if (nat44_set_tcp_session_state_i2o
1425                   (sm, now, s0, b0, thread_index))
1426                 goto trace0;
1427             }
1428           else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1429                    && udp0->checksum)
1430             {
1431               new_port0 = udp0->src_port = s0->out2in.port;
1432               sum0 = udp0->checksum;
1433               sum0 =
1434                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1435                                 dst_address);
1436               sum0 =
1437                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1438                                 length);
1439               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1440                 {
1441                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1442                                          s0->ext_host_addr.as_u32,
1443                                          ip4_header_t, dst_address);
1444                   sum0 =
1445                     ip_csum_update (sum0,
1446                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1447                                     s0->ext_host_port, ip4_header_t, length);
1448                   udp0->dst_port = s0->ext_host_port;
1449                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1450                 }
1451               udp0->checksum = ip_csum_fold (sum0);
1452               udp_packets++;
1453             }
1454           else
1455             {
1456               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1457                 {
1458                   new_port0 = udp0->src_port = s0->out2in.port;
1459                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1460                     {
1461                       udp0->dst_port = s0->ext_host_port;
1462                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1463                     }
1464                   udp_packets++;
1465                 }
1466             }
1467
1468           /* Accounting */
1469           nat44_session_update_counters (s0, now,
1470                                          vlib_buffer_length_in_chain
1471                                          (vm, b0), thread_index);
1472           /* Per-user LRU list maintenance */
1473           nat44_session_update_lru (sm, s0, thread_index);
1474
1475         trace0:
1476           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1477                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1478             {
1479               nat_in2out_ed_trace_t *t =
1480                 vlib_add_trace (vm, node, b0, sizeof (*t));
1481               t->sw_if_index = sw_if_index0;
1482               t->next_index = next0;
1483               t->is_slow_path = 1;
1484
1485               if (s0)
1486                 t->session_index = s0 - tsm->sessions;
1487               else
1488                 t->session_index = ~0;
1489             }
1490
1491           pkts_processed += next0 == vnet_buffer2 (b0)->nat.arc_next;
1492
1493           /* verify speculative enqueue, maybe switch current next frame */
1494           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1495                                            to_next, n_left_to_next,
1496                                            bi0, next0);
1497         }
1498
1499       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1500     }
1501
1502   vlib_node_increment_counter (vm, stats_node_index,
1503                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1504                                pkts_processed);
1505   vlib_node_increment_counter (vm, stats_node_index,
1506                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1507   vlib_node_increment_counter (vm, stats_node_index,
1508                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, udp_packets);
1509   vlib_node_increment_counter (vm, stats_node_index,
1510                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1511                                icmp_packets);
1512   vlib_node_increment_counter (vm, stats_node_index,
1513                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1514                                other_packets);
1515   return frame->n_vectors;
1516 }
1517
1518 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1519                                      vlib_node_runtime_t * node,
1520                                      vlib_frame_t * frame)
1521 {
1522   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0);
1523 }
1524
1525 /* *INDENT-OFF* */
1526 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1527   .name = "nat44-ed-in2out",
1528   .vector_size = sizeof (u32),
1529   .sibling_of = "nat-default",
1530   .format_trace = format_nat_in2out_ed_trace,
1531   .type = VLIB_NODE_TYPE_INTERNAL,
1532   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1533   .error_strings = nat_in2out_ed_error_strings,
1534   .runtime_data_bytes = sizeof (snat_runtime_t),
1535 };
1536 /* *INDENT-ON* */
1537
1538 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1539                                             vlib_node_runtime_t * node,
1540                                             vlib_frame_t * frame)
1541 {
1542   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1);
1543 }
1544
1545 /* *INDENT-OFF* */
1546 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1547   .name = "nat44-ed-in2out-output",
1548   .vector_size = sizeof (u32),
1549   .sibling_of = "nat-default",
1550   .format_trace = format_nat_in2out_ed_trace,
1551   .type = VLIB_NODE_TYPE_INTERNAL,
1552   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1553   .error_strings = nat_in2out_ed_error_strings,
1554   .runtime_data_bytes = sizeof (snat_runtime_t),
1555 };
1556 /* *INDENT-ON* */
1557
1558 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1559                                               vlib_node_runtime_t *
1560                                               node, vlib_frame_t * frame)
1561 {
1562   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0);
1563 }
1564
1565 /* *INDENT-OFF* */
1566 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1567   .name = "nat44-ed-in2out-slowpath",
1568   .vector_size = sizeof (u32),
1569   .sibling_of = "nat-default",
1570   .format_trace = format_nat_in2out_ed_trace,
1571   .type = VLIB_NODE_TYPE_INTERNAL,
1572   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1573   .error_strings = nat_in2out_ed_error_strings,
1574   .runtime_data_bytes = sizeof (snat_runtime_t),
1575 };
1576 /* *INDENT-ON* */
1577
1578 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1579                                                      vlib_node_runtime_t
1580                                                      * node,
1581                                                      vlib_frame_t * frame)
1582 {
1583   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1);
1584 }
1585
1586 /* *INDENT-OFF* */
1587 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1588   .name = "nat44-ed-in2out-output-slowpath",
1589   .vector_size = sizeof (u32),
1590   .sibling_of = "nat-default",
1591   .format_trace = format_nat_in2out_ed_trace,
1592   .type = VLIB_NODE_TYPE_INTERNAL,
1593   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1594   .error_strings = nat_in2out_ed_error_strings,
1595   .runtime_data_bytes = sizeof (snat_runtime_t),
1596 };
1597 /* *INDENT-ON* */
1598
1599 static u8 *
1600 format_nat_pre_trace (u8 * s, va_list * args)
1601 {
1602   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1603   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1604   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1605   return format (s, "in2out next_index %d", t->next_index);
1606 }
1607
1608 VLIB_NODE_FN (nat_pre_in2out_node)
1609   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1610 {
1611   return nat_pre_node_fn_inline (vm, node, frame,
1612                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1613 }
1614
1615 /* *INDENT-OFF* */
1616 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1617   .name = "nat-pre-in2out",
1618   .vector_size = sizeof (u32),
1619   .sibling_of = "nat-default",
1620   .format_trace = format_nat_pre_trace,
1621   .type = VLIB_NODE_TYPE_INTERNAL,
1622   .n_errors = 0,
1623 };
1624 /* *INDENT-ON* */
1625
1626 /*
1627  * fd.io coding-style-patch-verification: ON
1628  *
1629  * Local Variables:
1630  * eval: (c-set-style "gnu")
1631  * End:
1632  */