nat: improve outside port selection & perf
[vpp.git] / src / plugins / nat / in2out_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/pg/pg.h>
23 #include <vnet/ip/ip.h>
24 #include <vnet/ethernet/ethernet.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/udp/udp.h>
27 #include <vppinfra/error.h>
28 #include <nat/nat.h>
29 #include <nat/nat_ipfix_logging.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat44/inlines.h>
32 #include <nat/nat_syslog.h>
33 #include <nat/nat_ha.h>
34 #include <nat/nat44/ed_inlines.h>
35 #include <nat/lib/nat_inlines.h>
36
37 static char *nat_in2out_ed_error_strings[] = {
38 #define _(sym,string) string,
39   foreach_nat_in2out_ed_error
40 #undef _
41 };
42
43 typedef struct
44 {
45   u32 sw_if_index;
46   u32 next_index;
47   u32 session_index;
48   u32 is_slow_path;
49 } nat_in2out_ed_trace_t;
50
51 static u8 *
52 format_nat_in2out_ed_trace (u8 * s, va_list * args)
53 {
54   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
55   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
56   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
57   char *tag;
58
59   tag =
60     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
61     "NAT44_IN2OUT_ED_FAST_PATH";
62
63   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
64               t->sw_if_index, t->next_index, t->session_index);
65
66   return s;
67 }
68
69 #ifndef CLIB_MARCH_VARIANT
70 int
71 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
72 {
73   snat_main_t *sm = &snat_main;
74   nat44_is_idle_session_ctx_t *ctx = arg;
75   snat_session_t *s;
76   u64 sess_timeout_time;
77   u8 proto;
78   u16 r_port, l_port;
79   ip4_address_t *l_addr, *r_addr;
80   u32 fib_index;
81   clib_bihash_kv_16_8_t ed_kv;
82   int i;
83   snat_address_t *a;
84   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
85                                                        ctx->thread_index);
86
87   ASSERT (ctx->thread_index == ed_value_get_thread_index (kv));
88   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (kv));
89   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
90   if (ctx->now >= sess_timeout_time)
91     {
92       if (is_fwd_bypass_session (s))
93         goto delete;
94
95       l_addr = &s->out2in.addr;
96       r_addr = &s->ext_host_addr;
97       fib_index = s->out2in.fib_index;
98       if (snat_is_unk_proto_session (s))
99         {
100           proto = s->in2out.port;
101           r_port = 0;
102           l_port = 0;
103         }
104       else
105         {
106           proto = nat_proto_to_ip_proto (s->nat_proto);
107           l_port = s->out2in.port;
108           r_port = s->ext_host_port;
109         }
110       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
111       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
112         nat_elog_warn ("out2in_ed key del failed");
113
114       if (snat_is_unk_proto_session (s))
115         goto delete;
116
117       snat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
118                                            s->in2out.addr.as_u32,
119                                            s->out2in.addr.as_u32,
120                                            s->nat_proto,
121                                            s->in2out.port,
122                                            s->out2in.port,
123                                            s->in2out.fib_index);
124
125       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
126                              &s->in2out.addr, s->in2out.port,
127                              &s->ext_host_nat_addr, s->ext_host_nat_port,
128                              &s->out2in.addr, s->out2in.port,
129                              &s->ext_host_addr, s->ext_host_port,
130                              s->nat_proto, is_twice_nat_session (s));
131
132       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
133                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
134                    ctx->thread_index);
135
136       if (is_twice_nat_session (s))
137         {
138           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
139             {
140               // TODO FIXME this is obviously broken - which address should be
141               // freed here?!
142               a = sm->twice_nat_addresses + i;
143               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
144                 {
145                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
146                                                       ctx->thread_index,
147                                                       &s->ext_host_nat_addr,
148                                                       s->ext_host_nat_port,
149                                                       s->nat_proto);
150                   break;
151                 }
152             }
153         }
154
155       if (snat_is_session_static (s))
156         goto delete;
157
158       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
159                                           &s->out2in.addr, s->out2in.port,
160                                           s->nat_proto);
161     delete:
162       nat_ed_session_delete (sm, s, ctx->thread_index, 1);
163       return 1;
164     }
165
166   return 0;
167 }
168 #endif
169
170 static inline u32
171 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
172                           ip4_header_t * ip0, icmp46_header_t * icmp0,
173                           u32 sw_if_index0, u32 rx_fib_index0,
174                           vlib_node_runtime_t * node, u32 next0, f64 now,
175                           u32 thread_index, snat_session_t ** p_s0)
176 {
177   vlib_main_t *vm = vlib_get_main ();
178
179   next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
180                        next0, thread_index, p_s0, 0);
181   snat_session_t *s0 = *p_s0;
182   if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
183     {
184       /* Accounting */
185       nat44_session_update_counters (s0, now,
186                                      vlib_buffer_length_in_chain
187                                      (vm, b0), thread_index);
188       /* Per-user LRU list maintenance */
189       nat44_session_update_lru (sm, s0, thread_index);
190     }
191   return next0;
192 }
193
194 static int
195 nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index,
196                             u32 nat_proto, u32 thread_index,
197                             ip4_address_t r_addr, u16 r_port, u8 proto,
198                             u16 port_per_thread, u32 snat_thread_index,
199                             snat_session_t * s,
200                             ip4_address_t * outside_addr,
201                             u16 * outside_port,
202                             clib_bihash_kv_16_8_t * out2in_ed_kv)
203 {
204   int i;
205   snat_address_t *a, *ga = 0;
206   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
207
208   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
209
210   for (i = 0; i < vec_len (sm->addresses); i++)
211     {
212       a = sm->addresses + i;
213       switch (nat_proto)
214         {
215 #define _(N, j, n, unused)                                                   \
216   case NAT_PROTOCOL_##N:                                                     \
217     if (a->fib_index == rx_fib_index)                                        \
218       {                                                                      \
219         /* first try port suggested by caller */                             \
220         u16 port = clib_net_to_host_u16 (*outside_port);                   \
221         u16 port_offset = port - port_thread_offset;                         \
222         if (port <= port_thread_offset ||                                    \
223             port > port_thread_offset + port_per_thread)                     \
224           {                                                                  \
225             /* need to pick a different port, suggested port doesn't fit in  \
226              * this thread's port range */                                   \
227             port_offset = snat_random_port (1, port_per_thread);             \
228             port = port_thread_offset + port_offset;                         \
229           }                                                                  \
230         u16 attempts = port_per_thread;                                      \
231         do                                                                   \
232           {                                                                  \
233             init_ed_kv (out2in_ed_kv, a->addr, clib_host_to_net_u16 (port),  \
234                         r_addr, r_port, s->out2in.fib_index, proto,          \
235                         thread_index, s - tsm->sessions);                    \
236             int rv = clib_bihash_add_del_16_8 (&sm->out2in_ed, out2in_ed_kv, \
237                                                2 /* is_add */);              \
238             if (0 == rv)                                                     \
239               {                                                              \
240                 ++a->busy_##n##_port_refcounts[port];                        \
241                 a->busy_##n##_ports_per_thread[thread_index]++;              \
242                 a->busy_##n##_ports++;                                       \
243                 *outside_addr = a->addr;                                   \
244                 *outside_port = clib_host_to_net_u16 (port);               \
245                 return 0;                                                    \
246               }                                                              \
247             port_offset = (port_offset + 1) % port_per_thread;               \
248             port = port_thread_offset + port_offset;                         \
249             --attempts;                                                      \
250           }                                                                  \
251         while (attempts > 0);                                                \
252       }                                                                      \
253     else if (a->fib_index == ~0)                                             \
254       {                                                                      \
255         ga = a;                                                              \
256       }                                                                      \
257     break;
258
259           foreach_nat_protocol;
260         default:
261           nat_elog_info ("unknown protocol");
262           return 1;
263         }
264     }
265
266   if (ga)
267     {
268       /* fake fib_index to reuse macro */
269       rx_fib_index = ~0;
270       a = ga;
271       switch (nat_proto)
272         {
273           foreach_nat_protocol;
274         default:
275           nat_elog_info ("unknown protocol");
276           return 1;
277         }
278     }
279
280 #undef _
281
282   /* Totally out of translations to use... */
283   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
284   return 1;
285 }
286
287 static_always_inline u32
288 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
289 {
290   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
291   nat_outside_fib_t *outside_fib;
292   fib_prefix_t pfx = {
293     .fp_proto = FIB_PROTOCOL_IP4,
294     .fp_len = 32,
295     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
296     ,
297   };
298   // TODO: multiple vrfs none can resolve addr
299   /* *INDENT-OFF* */
300   vec_foreach (outside_fib, sm->outside_fibs)
301     {
302       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
303       if (FIB_NODE_INDEX_INVALID != fei)
304         {
305           if (fib_entry_get_resolving_interface (fei) != ~0)
306             {
307               return outside_fib->fib_index;
308             }
309         }
310     }
311   /* *INDENT-ON* */
312   return ~0;
313 }
314
315 static u32
316 slow_path_ed (snat_main_t * sm,
317               vlib_buffer_t * b,
318               ip4_address_t l_addr,
319               ip4_address_t r_addr,
320               u16 l_port,
321               u16 r_port,
322               u8 proto,
323               u32 rx_fib_index,
324               snat_session_t ** sessionp,
325               vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now)
326 {
327   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
328   clib_bihash_kv_16_8_t out2in_ed_kv;
329   nat44_is_idle_session_ctx_t ctx;
330   ip4_address_t outside_addr;
331   u16 outside_port;
332   u8 identity_nat;
333
334   u32 nat_proto = ip_proto_to_nat_proto (proto);
335   snat_session_t *s = NULL;
336   lb_nat_type_t lb = 0;
337
338   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
339     {
340       if (PREDICT_FALSE
341           (!tcp_flags_is_init
342            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
343         {
344           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
345           return NAT_NEXT_DROP;
346         }
347     }
348
349   if (PREDICT_FALSE
350       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
351     {
352       if (!nat_lru_free_one (sm, thread_index, now))
353         {
354           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
355           nat_ipfix_logging_max_sessions (thread_index,
356                                           sm->max_translations_per_thread);
357           nat_elog_notice ("maximum sessions exceeded");
358           return NAT_NEXT_DROP;
359         }
360     }
361
362   ip4_address_t sm_addr;
363   u16 sm_port;
364   u32 sm_fib_index;
365   /* First try to match static mapping by local address and port */
366   if (snat_static_mapping_match
367       (sm, l_addr, l_port, rx_fib_index, nat_proto, &sm_addr, &sm_port,
368        &sm_fib_index, 0, 0, 0, &lb, 0, &identity_nat))
369     {
370       s = nat_ed_session_alloc (sm, thread_index, now, proto);
371       if (!s)
372         {
373           nat_elog_warn ("create NAT session failed");
374           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
375           return NAT_NEXT_DROP;
376         }
377       s->in2out.addr = l_addr;
378       s->in2out.port = l_port;
379       s->nat_proto = nat_proto;
380       s->in2out.fib_index = rx_fib_index;
381       s->out2in.fib_index = sm->outside_fib_index;
382
383       switch (vec_len (sm->outside_fibs))
384         {
385         case 0:
386           s->out2in.fib_index = sm->outside_fib_index;
387           break;
388         case 1:
389           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
390           break;
391         default:
392           s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr);
393           break;
394         }
395
396       /* Try to create dynamic translation */
397       outside_port = l_port;    // suggest using local port to allocation function
398       if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, nat_proto,
399                                       thread_index, r_addr, r_port, proto,
400                                       sm->port_per_thread,
401                                       tsm->snat_thread_index, s,
402                                       &outside_addr,
403                                       &outside_port, &out2in_ed_kv))
404         {
405           nat_elog_notice ("addresses exhausted");
406           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
407           nat_ed_session_delete (sm, s, thread_index, 1);
408           return NAT_NEXT_DROP;
409         }
410       s->out2in.addr = outside_addr;
411       s->out2in.port = outside_port;
412     }
413   else
414     {
415       if (PREDICT_FALSE (identity_nat))
416         {
417           *sessionp = NULL;
418           return next;
419         }
420       s = nat_ed_session_alloc (sm, thread_index, now, proto);
421       if (!s)
422         {
423           nat_elog_warn ("create NAT session failed");
424           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
425           return NAT_NEXT_DROP;
426         }
427       s->out2in.addr = sm_addr;
428       s->out2in.port = sm_port;
429       s->in2out.addr = l_addr;
430       s->in2out.port = l_port;
431       s->nat_proto = nat_proto;
432       s->in2out.fib_index = rx_fib_index;
433       s->out2in.fib_index = sm->outside_fib_index;
434       switch (vec_len (sm->outside_fibs))
435         {
436         case 0:
437           s->out2in.fib_index = sm->outside_fib_index;
438           break;
439         case 1:
440           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
441           break;
442         default:
443           s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr);
444           break;
445         }
446
447       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
448
449       init_ed_kv (&out2in_ed_kv, sm_addr, sm_port, r_addr, r_port,
450                   s->out2in.fib_index, proto, thread_index,
451                   s - tsm->sessions);
452       if (clib_bihash_add_or_overwrite_stale_16_8
453           (&sm->out2in_ed, &out2in_ed_kv, nat44_o2i_ed_is_idle_session_cb,
454            &ctx))
455         nat_elog_notice ("out2in-ed key add failed");
456     }
457
458   if (lb)
459     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
460   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
461   s->ext_host_addr = r_addr;
462   s->ext_host_port = r_port;
463
464   clib_bihash_kv_16_8_t in2out_ed_kv;
465   init_ed_kv (&in2out_ed_kv, l_addr, l_port, r_addr, r_port, rx_fib_index,
466               proto, thread_index, s - tsm->sessions);
467   ctx.now = now;
468   ctx.thread_index = thread_index;
469   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &in2out_ed_kv,
470                                                nat44_i2o_ed_is_idle_session_cb,
471                                                &ctx))
472     nat_elog_notice ("in2out-ed key add failed");
473
474   *sessionp = s;
475
476   /* log NAT event */
477   snat_ipfix_logging_nat44_ses_create (thread_index,
478                                        s->in2out.addr.as_u32,
479                                        s->out2in.addr.as_u32,
480                                        s->nat_proto,
481                                        s->in2out.port,
482                                        s->out2in.port, s->in2out.fib_index);
483
484   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
485                          &s->in2out.addr, s->in2out.port,
486                          &s->ext_host_nat_addr, s->ext_host_nat_port,
487                          &s->out2in.addr, s->out2in.port,
488                          &s->ext_host_addr, s->ext_host_port, s->nat_proto,
489                          0);
490
491   nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
492                s->out2in.port, &s->ext_host_addr, s->ext_host_port,
493                &s->ext_host_nat_addr, s->ext_host_nat_port,
494                s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);
495
496   return next;
497 }
498
499 static_always_inline int
500 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
501                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
502                         u32 rx_fib_index, u32 thread_index)
503 {
504   udp_header_t *udp = ip4_next_header (ip);
505   clib_bihash_kv_16_8_t kv, value;
506
507   init_ed_k (&kv, ip->dst_address, udp->dst_port, ip->src_address,
508              udp->src_port, sm->outside_fib_index, ip->protocol);
509
510   /* NAT packet aimed at external address if has active sessions */
511   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
512     {
513       /* or is static mappings */
514       ip4_address_t dummy_addr;
515       u16 dummy_port;
516       u32 dummy_fib_index;
517       if (!snat_static_mapping_match
518           (sm, ip->dst_address, udp->dst_port, sm->outside_fib_index, proto,
519            &dummy_addr, &dummy_port, &dummy_fib_index, 1, 0, 0, 0, 0, 0))
520         return 0;
521     }
522   else
523     return 0;
524
525   if (sm->forwarding_enabled)
526     return 1;
527
528   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
529                                   rx_fib_index);
530 }
531
532 static_always_inline int
533 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
534                                       u32 thread_index, f64 now,
535                                       vlib_main_t * vm, vlib_buffer_t * b)
536 {
537   clib_bihash_kv_16_8_t kv, value;
538   snat_session_t *s = 0;
539   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
540
541   if (!sm->forwarding_enabled)
542     return 0;
543
544   if (ip->protocol == IP_PROTOCOL_ICMP)
545     {
546       if (get_icmp_i2o_ed_key (b, ip, 0, ~0, ~0, 0, 0, 0, &kv))
547         return 0;
548     }
549   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
550     {
551       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
552                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
553                  ip->protocol);
554     }
555   else
556     {
557       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
558                  ip->protocol);
559     }
560
561   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
562     {
563       ASSERT (thread_index == ed_value_get_thread_index (&value));
564       s =
565         pool_elt_at_index (tsm->sessions,
566                            ed_value_get_session_index (&value));
567       if (is_fwd_bypass_session (s))
568         {
569           if (ip->protocol == IP_PROTOCOL_TCP)
570             {
571               if (nat44_set_tcp_session_state_i2o
572                   (sm, now, s, b, thread_index))
573                 return 1;
574             }
575           /* Accounting */
576           nat44_session_update_counters (s, now,
577                                          vlib_buffer_length_in_chain (vm, b),
578                                          thread_index);
579           /* Per-user LRU list maintenance */
580           nat44_session_update_lru (sm, s, thread_index);
581           return 1;
582         }
583       else
584         return 0;
585     }
586
587   return 0;
588 }
589
590 static_always_inline int
591 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
592                                        u16 src_port, u16 dst_port,
593                                        u32 thread_index, u32 rx_sw_if_index,
594                                        u32 tx_sw_if_index)
595 {
596   clib_bihash_kv_16_8_t kv, value;
597   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
598   snat_interface_t *i;
599   snat_session_t *s;
600   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
601   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
602
603   /* src NAT check */
604   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
605              tx_fib_index, ip->protocol);
606   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
607     {
608       ASSERT (thread_index == ed_value_get_thread_index (&value));
609       s =
610         pool_elt_at_index (tsm->sessions,
611                            ed_value_get_session_index (&value));
612       if (nat44_is_ses_closed (s))
613         {
614           nat_free_session_data (sm, s, thread_index, 0);
615           nat_ed_session_delete (sm, s, thread_index, 1);
616         }
617       else
618         s->flags |= SNAT_SESSION_FLAG_OUTPUT_FEATURE;
619       return 1;
620     }
621
622   /* dst NAT check */
623   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
624              rx_fib_index, ip->protocol);
625   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
626     {
627       ASSERT (thread_index == ed_value_get_thread_index (&value));
628       s =
629         pool_elt_at_index (tsm->sessions,
630                            ed_value_get_session_index (&value));
631       if (is_fwd_bypass_session (s))
632         return 0;
633
634       /* hairpinning */
635       /* *INDENT-OFF* */
636       pool_foreach (i, sm->output_feature_interfaces,
637       ({
638         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
639            return 0;
640       }));
641       /* *INDENT-ON* */
642       return 1;
643     }
644
645   return 0;
646 }
647
648 #ifndef CLIB_MARCH_VARIANT
649 u32
650 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
651                       u32 thread_index, vlib_buffer_t * b,
652                       ip4_header_t * ip, ip4_address_t * addr,
653                       u16 * port, u32 * fib_index, nat_protocol_t * proto,
654                       void *d, void *e, u8 * dont_translate)
655 {
656   u32 sw_if_index;
657   u32 rx_fib_index;
658   clib_bihash_kv_16_8_t kv, value;
659   u32 next = ~0;
660   int err;
661   snat_session_t *s = NULL;
662   u16 l_port = 0, r_port = 0;   // initialize to workaround gcc warning
663   vlib_main_t *vm = vlib_get_main ();
664   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
665   *dont_translate = 0;
666
667   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
668   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
669
670   err =
671     get_icmp_i2o_ed_key (b, ip, rx_fib_index, ~0, ~0, proto, &l_port,
672                          &r_port, &kv);
673   if (err != 0)
674     {
675       b->error = node->errors[err];
676       next = NAT_NEXT_DROP;
677       goto out;
678     }
679
680   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
681     {
682       if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
683         {
684           if (PREDICT_FALSE
685               (nat44_ed_not_translate_output_feature
686                (sm, ip, l_port, r_port, thread_index,
687                 sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX])))
688             {
689               *dont_translate = 1;
690               goto out;
691             }
692         }
693       else
694         {
695           if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index,
696                                                      ip, NAT_PROTOCOL_ICMP,
697                                                      rx_fib_index,
698                                                      thread_index)))
699             {
700               *dont_translate = 1;
701               goto out;
702             }
703         }
704
705       if (PREDICT_FALSE
706           (icmp_type_is_error_message
707            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
708         {
709           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
710           next = NAT_NEXT_DROP;
711           goto out;
712         }
713
714       next =
715         slow_path_ed (sm, b, ip->src_address, ip->dst_address, l_port, r_port,
716                       ip->protocol, rx_fib_index, &s, node, next,
717                       thread_index, vlib_time_now (vm));
718
719       if (PREDICT_FALSE (next == NAT_NEXT_DROP))
720         goto out;
721
722       if (!s)
723         {
724           *dont_translate = 1;
725           goto out;
726         }
727     }
728   else
729     {
730       if (PREDICT_FALSE
731           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
732            ICMP4_echo_request
733            && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
734            ICMP4_echo_reply
735            && !icmp_type_is_error_message (vnet_buffer (b)->ip.
736                                            reass.icmp_type_or_tcp_flags)))
737         {
738           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
739           next = NAT_NEXT_DROP;
740           goto out;
741         }
742
743       ASSERT (thread_index == ed_value_get_thread_index (&value));
744       s =
745         pool_elt_at_index (tsm->sessions,
746                            ed_value_get_session_index (&value));
747     }
748 out:
749   if (s)
750     {
751       *addr = s->out2in.addr;
752       *port = s->out2in.port;
753       *fib_index = s->out2in.fib_index;
754     }
755   if (d)
756     {
757       *(snat_session_t **) d = s;
758     }
759   return next;
760 }
761 #endif
762
763 static snat_session_t *
764 nat44_ed_in2out_unknown_proto (snat_main_t * sm,
765                                vlib_buffer_t * b,
766                                ip4_header_t * ip,
767                                u32 rx_fib_index,
768                                u32 thread_index,
769                                f64 now,
770                                vlib_main_t * vm, vlib_node_runtime_t * node)
771 {
772   clib_bihash_kv_8_8_t kv, value;
773   clib_bihash_kv_16_8_t s_kv, s_value;
774   snat_static_mapping_t *m;
775   u32 old_addr, new_addr = 0;
776   ip_csum_t sum;
777   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
778   snat_session_t *s;
779   u32 outside_fib_index = sm->outside_fib_index;
780   int i;
781   u8 is_sm = 0;
782
783   switch (vec_len (sm->outside_fibs))
784     {
785     case 0:
786       outside_fib_index = sm->outside_fib_index;
787       break;
788     case 1:
789       outside_fib_index = sm->outside_fibs[0].fib_index;
790       break;
791     default:
792       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
793       break;
794     }
795   old_addr = ip->src_address.as_u32;
796
797   init_ed_k (&s_kv, ip->src_address, 0, ip->dst_address, 0, rx_fib_index,
798              ip->protocol);
799
800   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
801     {
802       ASSERT (thread_index == ed_value_get_thread_index (&s_value));
803       s =
804         pool_elt_at_index (tsm->sessions,
805                            ed_value_get_session_index (&s_value));
806       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
807     }
808   else
809     {
810       if (PREDICT_FALSE
811           (nat44_ed_maximum_sessions_exceeded
812            (sm, rx_fib_index, thread_index)))
813         {
814           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
815           nat_ipfix_logging_max_sessions (thread_index,
816                                           sm->max_translations_per_thread);
817           nat_elog_notice ("maximum sessions exceeded");
818           return 0;
819         }
820
821       init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
822
823       /* Try to find static mapping first */
824       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
825         {
826           m = pool_elt_at_index (sm->static_mappings, value.value);
827           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
828           is_sm = 1;
829           goto create_ses;
830         }
831       else
832         {
833           /* *INDENT-OFF* */
834           pool_foreach (s, tsm->sessions, {
835             if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
836               {
837                 new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
838
839                 init_ed_k(&s_kv, s->out2in.addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol);
840                 if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
841                   goto create_ses;
842
843                 break;
844               }
845           });
846           /* *INDENT-ON* */
847
848           for (i = 0; i < vec_len (sm->addresses); i++)
849             {
850               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
851                          outside_fib_index, ip->protocol);
852               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
853                 {
854                   new_addr = ip->src_address.as_u32 =
855                     sm->addresses[i].addr.as_u32;
856                   goto create_ses;
857                 }
858             }
859           return 0;
860         }
861
862     create_ses:
863       s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
864       if (!s)
865         {
866           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
867           nat_elog_warn ("create NAT session failed");
868           return 0;
869         }
870
871       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
872       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
873       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
874       s->out2in.addr.as_u32 = new_addr;
875       s->out2in.fib_index = outside_fib_index;
876       s->in2out.addr.as_u32 = old_addr;
877       s->in2out.fib_index = rx_fib_index;
878       s->in2out.port = s->out2in.port = ip->protocol;
879       if (is_sm)
880         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
881
882       /* Add to lookup tables */
883       init_ed_kv (&s_kv, s->in2out.addr, 0, ip->dst_address, 0, rx_fib_index,
884                   ip->protocol, thread_index, s - tsm->sessions);
885       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
886         nat_elog_notice ("in2out key add failed");
887
888       init_ed_kv (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
889                   outside_fib_index, ip->protocol, thread_index,
890                   s - tsm->sessions);
891       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
892         nat_elog_notice ("out2in key add failed");
893     }
894
895   /* Update IP checksum */
896   sum = ip->checksum;
897   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
898   ip->checksum = ip_csum_fold (sum);
899
900   /* Accounting */
901   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
902                                  thread_index);
903   /* Per-user LRU list maintenance */
904   nat44_session_update_lru (sm, s, thread_index);
905
906   /* Hairpinning */
907   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
908     nat44_ed_hairpinning_unknown_proto (sm, b, ip);
909
910   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
911     vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index;
912
913   return s;
914 }
915
916 static inline uword
917 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
918                                           vlib_node_runtime_t * node,
919                                           vlib_frame_t * frame,
920                                           int is_output_feature)
921 {
922   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
923   nat_next_t next_index;
924   snat_main_t *sm = &snat_main;
925   f64 now = vlib_time_now (vm);
926   u32 thread_index = vm->thread_index;
927   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
928   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
929     0, def_slow;
930
931   def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH :
932     NAT_NEXT_IN2OUT_ED_SLOW_PATH;
933
934   stats_node_index = sm->ed_in2out_node_index;
935
936   from = vlib_frame_vector_args (frame);
937   n_left_from = frame->n_vectors;
938   next_index = node->cached_next_index;
939
940   while (n_left_from > 0)
941     {
942       u32 n_left_to_next;
943
944       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
945
946       while (n_left_from > 0 && n_left_to_next > 0)
947         {
948           u32 bi0;
949           vlib_buffer_t *b0;
950           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
951             new_addr0, old_addr0;
952           u16 old_port0, new_port0;
953           ip4_header_t *ip0;
954           udp_header_t *udp0;
955           tcp_header_t *tcp0;
956           snat_session_t *s0 = 0;
957           clib_bihash_kv_16_8_t kv0, value0;
958           ip_csum_t sum0;
959
960           /* speculatively enqueue b0 to the current next frame */
961           bi0 = from[0];
962           to_next[0] = bi0;
963           from += 1;
964           to_next += 1;
965           n_left_from -= 1;
966           n_left_to_next -= 1;
967
968           b0 = vlib_get_buffer (vm, bi0);
969
970           if (is_output_feature)
971             {
972               vnet_feature_next (&vnet_buffer2 (b0)->nat.arc_next, b0);
973               iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
974             }
975
976           next0 = vnet_buffer2 (b0)->nat.arc_next;
977
978           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
979                                   iph_offset0);
980
981           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
982           rx_fib_index0 =
983             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
984                                                  sw_if_index0);
985
986           if (PREDICT_FALSE (ip0->ttl == 1))
987             {
988               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
989               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
990                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
991                                            0);
992               next0 = NAT_NEXT_ICMP_ERROR;
993               goto trace0;
994             }
995
996           udp0 = ip4_next_header (ip0);
997           tcp0 = (tcp_header_t *) udp0;
998           proto0 = ip_proto_to_nat_proto (ip0->protocol);
999
1000           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1001             {
1002               next0 = def_slow;
1003               goto trace0;
1004             }
1005
1006           if (is_output_feature)
1007             {
1008               if (PREDICT_FALSE (nat_not_translate_output_feature_fwd
1009                                  (sm, ip0, thread_index, now, vm, b0)))
1010                 goto trace0;
1011             }
1012
1013           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1014             {
1015               next0 = def_slow;
1016               goto trace0;
1017             }
1018
1019           init_ed_k (&kv0, ip0->src_address,
1020                      vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1021                      vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1022                      ip0->protocol);
1023
1024           // lookup for session
1025           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1026             {
1027               // session does not exist go slow path
1028               next0 = def_slow;
1029               goto trace0;
1030             }
1031           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1032           s0 =
1033             pool_elt_at_index (tsm->sessions,
1034                                ed_value_get_session_index (&value0));
1035
1036           if (s0->tcp_closed_timestamp)
1037             {
1038               if (now >= s0->tcp_closed_timestamp)
1039                 {
1040                   // session is closed, go slow path
1041                   next0 = def_slow;
1042                 }
1043               else
1044                 {
1045                   // session in transitory timeout, drop
1046                   b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1047                   next0 = NAT_NEXT_DROP;
1048                 }
1049               goto trace0;
1050             }
1051
1052           // drop if session expired
1053           u64 sess_timeout_time;
1054           sess_timeout_time = s0->last_heard +
1055             (f64) nat44_session_get_timeout (sm, s0);
1056           if (now >= sess_timeout_time)
1057             {
1058               nat_free_session_data (sm, s0, thread_index, 0);
1059               nat_ed_session_delete (sm, s0, thread_index, 1);
1060               // session is closed, go slow path
1061               next0 = def_slow;
1062               goto trace0;
1063             }
1064
1065           b0->flags |= VNET_BUFFER_F_IS_NATED;
1066
1067           if (!is_output_feature)
1068             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1069
1070           old_addr0 = ip0->src_address.as_u32;
1071           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1072           sum0 = ip0->checksum;
1073           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1074                                  src_address);
1075           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1076             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1077                                    s0->ext_host_addr.as_u32, ip4_header_t,
1078                                    dst_address);
1079           ip0->checksum = ip_csum_fold (sum0);
1080
1081           old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1082
1083           if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1084             {
1085               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1086                 {
1087                   new_port0 = udp0->src_port = s0->out2in.port;
1088                   sum0 = tcp0->checksum;
1089                   sum0 =
1090                     ip_csum_update (sum0, old_addr0, new_addr0,
1091                                     ip4_header_t, dst_address);
1092                   sum0 =
1093                     ip_csum_update (sum0, old_port0, new_port0,
1094                                     ip4_header_t, length);
1095                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1096                     {
1097                       sum0 =
1098                         ip_csum_update (sum0, ip0->dst_address.as_u32,
1099                                         s0->ext_host_addr.as_u32,
1100                                         ip4_header_t, dst_address);
1101                       sum0 =
1102                         ip_csum_update (sum0,
1103                                         vnet_buffer (b0)->ip.
1104                                         reass.l4_dst_port, s0->ext_host_port,
1105                                         ip4_header_t, length);
1106                       tcp0->dst_port = s0->ext_host_port;
1107                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1108                     }
1109                   mss_clamping (sm->mss_clamping, tcp0, &sum0);
1110                   tcp0->checksum = ip_csum_fold (sum0);
1111                 }
1112               tcp_packets++;
1113               if (nat44_set_tcp_session_state_i2o
1114                   (sm, now, s0, b0, thread_index))
1115                 goto trace0;
1116             }
1117           else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1118                    && udp0->checksum)
1119             {
1120               new_port0 = udp0->src_port = s0->out2in.port;
1121               sum0 = udp0->checksum;
1122               sum0 =
1123                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1124                                 dst_address);
1125               sum0 =
1126                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1127                                 length);
1128               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1129                 {
1130                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1131                                          s0->ext_host_addr.as_u32,
1132                                          ip4_header_t, dst_address);
1133                   sum0 =
1134                     ip_csum_update (sum0,
1135                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1136                                     s0->ext_host_port, ip4_header_t, length);
1137                   udp0->dst_port = s0->ext_host_port;
1138                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1139                 }
1140               udp0->checksum = ip_csum_fold (sum0);
1141               udp_packets++;
1142             }
1143           else
1144             {
1145               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1146                 {
1147                   new_port0 = udp0->src_port = s0->out2in.port;
1148                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1149                     {
1150                       udp0->dst_port = s0->ext_host_port;
1151                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1152                     }
1153                   udp_packets++;
1154                 }
1155             }
1156
1157           /* Accounting */
1158           nat44_session_update_counters (s0, now,
1159                                          vlib_buffer_length_in_chain
1160                                          (vm, b0), thread_index);
1161           /* Per-user LRU list maintenance */
1162           nat44_session_update_lru (sm, s0, thread_index);
1163
1164         trace0:
1165           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1166                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1167             {
1168               nat_in2out_ed_trace_t *t =
1169                 vlib_add_trace (vm, node, b0, sizeof (*t));
1170               t->sw_if_index = sw_if_index0;
1171               t->next_index = next0;
1172               t->is_slow_path = 0;
1173
1174               if (s0)
1175                 t->session_index = s0 - tsm->sessions;
1176               else
1177                 t->session_index = ~0;
1178             }
1179
1180           pkts_processed += next0 == vnet_buffer2 (b0)->nat.arc_next;
1181           /* verify speculative enqueue, maybe switch current next frame */
1182           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1183                                            to_next, n_left_to_next,
1184                                            bi0, next0);
1185         }
1186
1187       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1188     }
1189
1190   vlib_node_increment_counter (vm, stats_node_index,
1191                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1192                                pkts_processed);
1193   vlib_node_increment_counter (vm, stats_node_index,
1194                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1195   vlib_node_increment_counter (vm, stats_node_index,
1196                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, udp_packets);
1197   vlib_node_increment_counter (vm, stats_node_index,
1198                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1199                                icmp_packets);
1200   vlib_node_increment_counter (vm, stats_node_index,
1201                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1202                                other_packets);
1203   return frame->n_vectors;
1204 }
1205
1206 static inline uword
1207 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
1208                                           vlib_node_runtime_t * node,
1209                                           vlib_frame_t * frame,
1210                                           int is_output_feature)
1211 {
1212   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
1213   nat_next_t next_index;
1214   snat_main_t *sm = &snat_main;
1215   f64 now = vlib_time_now (vm);
1216   u32 thread_index = vm->thread_index;
1217   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1218   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets = 0;
1219
1220   stats_node_index = sm->ed_in2out_slowpath_node_index;
1221
1222   from = vlib_frame_vector_args (frame);
1223   n_left_from = frame->n_vectors;
1224   next_index = node->cached_next_index;
1225
1226   while (n_left_from > 0)
1227     {
1228       u32 n_left_to_next;
1229
1230       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1231
1232       while (n_left_from > 0 && n_left_to_next > 0)
1233         {
1234           u32 bi0;
1235           vlib_buffer_t *b0;
1236           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
1237             new_addr0, old_addr0;
1238           u16 old_port0, new_port0;
1239           ip4_header_t *ip0;
1240           udp_header_t *udp0;
1241           tcp_header_t *tcp0;
1242           icmp46_header_t *icmp0;
1243           snat_session_t *s0 = 0;
1244           clib_bihash_kv_16_8_t kv0, value0;
1245           ip_csum_t sum0;
1246
1247           /* speculatively enqueue b0 to the current next frame */
1248           bi0 = from[0];
1249           to_next[0] = bi0;
1250           from += 1;
1251           to_next += 1;
1252           n_left_from -= 1;
1253           n_left_to_next -= 1;
1254
1255           b0 = vlib_get_buffer (vm, bi0);
1256
1257           if (is_output_feature)
1258             iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1259
1260           next0 = vnet_buffer2 (b0)->nat.arc_next;
1261
1262           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1263                                   iph_offset0);
1264
1265           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1266           rx_fib_index0 =
1267             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1268                                                  sw_if_index0);
1269
1270           if (PREDICT_FALSE (ip0->ttl == 1))
1271             {
1272               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1273               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1274                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1275                                            0);
1276               next0 = NAT_NEXT_ICMP_ERROR;
1277               goto trace0;
1278             }
1279
1280           udp0 = ip4_next_header (ip0);
1281           tcp0 = (tcp_header_t *) udp0;
1282           icmp0 = (icmp46_header_t *) udp0;
1283           proto0 = ip_proto_to_nat_proto (ip0->protocol);
1284
1285           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1286             {
1287               s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1288                                                   rx_fib_index0,
1289                                                   thread_index, now,
1290                                                   vm, node);
1291               if (!s0)
1292                 next0 = NAT_NEXT_DROP;
1293
1294               other_packets++;
1295               goto trace0;
1296             }
1297
1298           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1299             {
1300               next0 = icmp_in2out_ed_slow_path
1301                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1302                  node, next0, now, thread_index, &s0);
1303               icmp_packets++;
1304               goto trace0;
1305             }
1306
1307           init_ed_k (&kv0, ip0->src_address,
1308                      vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1309                      vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1310                      ip0->protocol);
1311           if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1312             {
1313               ASSERT (thread_index == ed_value_get_thread_index (&value0));
1314               s0 =
1315                 pool_elt_at_index (tsm->sessions,
1316                                    ed_value_get_session_index (&value0));
1317
1318               if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1319                 {
1320                   nat_free_session_data (sm, s0, thread_index, 0);
1321                   nat_ed_session_delete (sm, s0, thread_index, 1);
1322                   s0 = NULL;
1323                 }
1324             }
1325
1326           if (!s0)
1327             {
1328               if (is_output_feature)
1329                 {
1330                   if (PREDICT_FALSE
1331                       (nat44_ed_not_translate_output_feature
1332                        (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1333                         vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1334                         sw_if_index0,
1335                         vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1336                     goto trace0;
1337
1338                   /*
1339                    * Send DHCP packets to the ipv4 stack, or we won't
1340                    * be able to use dhcp client on the outside interface
1341                    */
1342                   if (PREDICT_FALSE
1343                       (proto0 == NAT_PROTOCOL_UDP
1344                        && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1345                            clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1346                        && ip0->dst_address.as_u32 == 0xffffffff))
1347                     goto trace0;
1348                 }
1349               else
1350                 {
1351                   if (PREDICT_FALSE
1352                       (nat44_ed_not_translate
1353                        (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
1354                         thread_index)))
1355                     goto trace0;
1356                 }
1357
1358               next0 =
1359                 slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address,
1360                               vnet_buffer (b0)->ip.reass.l4_src_port,
1361                               vnet_buffer (b0)->ip.reass.l4_dst_port,
1362                               ip0->protocol, rx_fib_index0, &s0, node, next0,
1363                               thread_index, now);
1364
1365               if (PREDICT_FALSE (next0 == NAT_NEXT_DROP))
1366                 goto trace0;
1367
1368               if (PREDICT_FALSE (!s0))
1369                 goto trace0;
1370
1371             }
1372
1373           b0->flags |= VNET_BUFFER_F_IS_NATED;
1374
1375           if (!is_output_feature)
1376             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1377
1378           old_addr0 = ip0->src_address.as_u32;
1379           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1380           sum0 = ip0->checksum;
1381           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1382                                  src_address);
1383           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1384             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1385                                    s0->ext_host_addr.as_u32, ip4_header_t,
1386                                    dst_address);
1387           ip0->checksum = ip_csum_fold (sum0);
1388
1389           old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1390
1391           if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1392             {
1393               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1394                 {
1395                   new_port0 = udp0->src_port = s0->out2in.port;
1396                   sum0 = tcp0->checksum;
1397                   sum0 =
1398                     ip_csum_update (sum0, old_addr0, new_addr0,
1399                                     ip4_header_t, dst_address);
1400                   sum0 =
1401                     ip_csum_update (sum0, old_port0, new_port0,
1402                                     ip4_header_t, length);
1403                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1404                     {
1405                       sum0 =
1406                         ip_csum_update (sum0, ip0->dst_address.as_u32,
1407                                         s0->ext_host_addr.as_u32,
1408                                         ip4_header_t, dst_address);
1409                       sum0 =
1410                         ip_csum_update (sum0,
1411                                         vnet_buffer (b0)->ip.
1412                                         reass.l4_dst_port, s0->ext_host_port,
1413                                         ip4_header_t, length);
1414                       tcp0->dst_port = s0->ext_host_port;
1415                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1416                     }
1417                   mss_clamping (sm->mss_clamping, tcp0, &sum0);
1418                   tcp0->checksum = ip_csum_fold (sum0);
1419                 }
1420               tcp_packets++;
1421               if (nat44_set_tcp_session_state_i2o
1422                   (sm, now, s0, b0, thread_index))
1423                 goto trace0;
1424             }
1425           else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1426                    && udp0->checksum)
1427             {
1428               new_port0 = udp0->src_port = s0->out2in.port;
1429               sum0 = udp0->checksum;
1430               sum0 =
1431                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1432                                 dst_address);
1433               sum0 =
1434                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1435                                 length);
1436               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1437                 {
1438                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1439                                          s0->ext_host_addr.as_u32,
1440                                          ip4_header_t, dst_address);
1441                   sum0 =
1442                     ip_csum_update (sum0,
1443                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1444                                     s0->ext_host_port, ip4_header_t, length);
1445                   udp0->dst_port = s0->ext_host_port;
1446                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1447                 }
1448               udp0->checksum = ip_csum_fold (sum0);
1449               udp_packets++;
1450             }
1451           else
1452             {
1453               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1454                 {
1455                   new_port0 = udp0->src_port = s0->out2in.port;
1456                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1457                     {
1458                       udp0->dst_port = s0->ext_host_port;
1459                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1460                     }
1461                   udp_packets++;
1462                 }
1463             }
1464
1465           /* Accounting */
1466           nat44_session_update_counters (s0, now,
1467                                          vlib_buffer_length_in_chain
1468                                          (vm, b0), thread_index);
1469           /* Per-user LRU list maintenance */
1470           nat44_session_update_lru (sm, s0, thread_index);
1471
1472         trace0:
1473           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1474                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1475             {
1476               nat_in2out_ed_trace_t *t =
1477                 vlib_add_trace (vm, node, b0, sizeof (*t));
1478               t->sw_if_index = sw_if_index0;
1479               t->next_index = next0;
1480               t->is_slow_path = 1;
1481
1482               if (s0)
1483                 t->session_index = s0 - tsm->sessions;
1484               else
1485                 t->session_index = ~0;
1486             }
1487
1488           pkts_processed += next0 == vnet_buffer2 (b0)->nat.arc_next;
1489
1490           /* verify speculative enqueue, maybe switch current next frame */
1491           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1492                                            to_next, n_left_to_next,
1493                                            bi0, next0);
1494         }
1495
1496       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1497     }
1498
1499   vlib_node_increment_counter (vm, stats_node_index,
1500                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1501                                pkts_processed);
1502   vlib_node_increment_counter (vm, stats_node_index,
1503                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1504   vlib_node_increment_counter (vm, stats_node_index,
1505                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, udp_packets);
1506   vlib_node_increment_counter (vm, stats_node_index,
1507                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1508                                icmp_packets);
1509   vlib_node_increment_counter (vm, stats_node_index,
1510                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1511                                other_packets);
1512   return frame->n_vectors;
1513 }
1514
1515 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1516                                      vlib_node_runtime_t * node,
1517                                      vlib_frame_t * frame)
1518 {
1519   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0);
1520 }
1521
1522 /* *INDENT-OFF* */
1523 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1524   .name = "nat44-ed-in2out",
1525   .vector_size = sizeof (u32),
1526   .sibling_of = "nat-default",
1527   .format_trace = format_nat_in2out_ed_trace,
1528   .type = VLIB_NODE_TYPE_INTERNAL,
1529   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1530   .error_strings = nat_in2out_ed_error_strings,
1531   .runtime_data_bytes = sizeof (snat_runtime_t),
1532 };
1533 /* *INDENT-ON* */
1534
1535 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1536                                             vlib_node_runtime_t * node,
1537                                             vlib_frame_t * frame)
1538 {
1539   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1);
1540 }
1541
1542 /* *INDENT-OFF* */
1543 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1544   .name = "nat44-ed-in2out-output",
1545   .vector_size = sizeof (u32),
1546   .sibling_of = "nat-default",
1547   .format_trace = format_nat_in2out_ed_trace,
1548   .type = VLIB_NODE_TYPE_INTERNAL,
1549   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1550   .error_strings = nat_in2out_ed_error_strings,
1551   .runtime_data_bytes = sizeof (snat_runtime_t),
1552 };
1553 /* *INDENT-ON* */
1554
1555 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1556                                               vlib_node_runtime_t *
1557                                               node, vlib_frame_t * frame)
1558 {
1559   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0);
1560 }
1561
1562 /* *INDENT-OFF* */
1563 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1564   .name = "nat44-ed-in2out-slowpath",
1565   .vector_size = sizeof (u32),
1566   .sibling_of = "nat-default",
1567   .format_trace = format_nat_in2out_ed_trace,
1568   .type = VLIB_NODE_TYPE_INTERNAL,
1569   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1570   .error_strings = nat_in2out_ed_error_strings,
1571   .runtime_data_bytes = sizeof (snat_runtime_t),
1572 };
1573 /* *INDENT-ON* */
1574
1575 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1576                                                      vlib_node_runtime_t
1577                                                      * node,
1578                                                      vlib_frame_t * frame)
1579 {
1580   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1);
1581 }
1582
1583 /* *INDENT-OFF* */
1584 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1585   .name = "nat44-ed-in2out-output-slowpath",
1586   .vector_size = sizeof (u32),
1587   .sibling_of = "nat-default",
1588   .format_trace = format_nat_in2out_ed_trace,
1589   .type = VLIB_NODE_TYPE_INTERNAL,
1590   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1591   .error_strings = nat_in2out_ed_error_strings,
1592   .runtime_data_bytes = sizeof (snat_runtime_t),
1593 };
1594 /* *INDENT-ON* */
1595
1596 static u8 *
1597 format_nat_pre_trace (u8 * s, va_list * args)
1598 {
1599   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1600   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1601   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1602   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1603                  t->arc_next_index);
1604 }
1605
1606 VLIB_NODE_FN (nat_pre_in2out_node)
1607   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1608 {
1609   return nat_pre_node_fn_inline (vm, node, frame,
1610                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1611 }
1612
1613 /* *INDENT-OFF* */
1614 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1615   .name = "nat-pre-in2out",
1616   .vector_size = sizeof (u32),
1617   .sibling_of = "nat-default",
1618   .format_trace = format_nat_pre_trace,
1619   .type = VLIB_NODE_TYPE_INTERNAL,
1620   .n_errors = 0,
1621 };
1622 /* *INDENT-ON* */
1623
1624 /*
1625  * fd.io coding-style-patch-verification: ON
1626  *
1627  * Local Variables:
1628  * eval: (c-set-style "gnu")
1629  * End:
1630  */