nat: more long read after short write optimization
[vpp.git] / src / plugins / nat / in2out_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/pg/pg.h>
23 #include <vnet/ip/ip.h>
24 #include <vnet/ethernet/ethernet.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/udp/udp.h>
27 #include <vppinfra/error.h>
28 #include <nat/nat.h>
29 #include <nat/nat_ipfix_logging.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat44/inlines.h>
32 #include <nat/nat_syslog.h>
33 #include <nat/nat_ha.h>
34 #include <nat/nat44/ed_inlines.h>
35 #include <nat/lib/nat_inlines.h>
36
37 static char *nat_in2out_ed_error_strings[] = {
38 #define _(sym,string) string,
39   foreach_nat_in2out_ed_error
40 #undef _
41 };
42
43 typedef struct
44 {
45   u32 sw_if_index;
46   u32 next_index;
47   u32 session_index;
48   u32 is_slow_path;
49 } nat_in2out_ed_trace_t;
50
51 static u8 *
52 format_nat_in2out_ed_trace (u8 * s, va_list * args)
53 {
54   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
55   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
56   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
57   char *tag;
58
59   tag =
60     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
61     "NAT44_IN2OUT_ED_FAST_PATH";
62
63   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
64               t->sw_if_index, t->next_index, t->session_index);
65
66   return s;
67 }
68
69 #ifndef CLIB_MARCH_VARIANT
70 int
71 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
72 {
73   snat_main_t *sm = &snat_main;
74   nat44_is_idle_session_ctx_t *ctx = arg;
75   snat_session_t *s;
76   u64 sess_timeout_time;
77   u8 proto;
78   u16 r_port, l_port;
79   ip4_address_t *l_addr, *r_addr;
80   u32 fib_index;
81   clib_bihash_kv_16_8_t ed_kv;
82   int i;
83   snat_address_t *a;
84   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
85                                                        ctx->thread_index);
86
87   ASSERT (ctx->thread_index == ed_value_get_thread_index (kv));
88   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (kv));
89   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
90   if (ctx->now >= sess_timeout_time)
91     {
92       if (is_fwd_bypass_session (s))
93         goto delete;
94
95       l_addr = &s->out2in.addr;
96       r_addr = &s->ext_host_addr;
97       fib_index = s->out2in.fib_index;
98       if (snat_is_unk_proto_session (s))
99         {
100           proto = s->in2out.port;
101           r_port = 0;
102           l_port = 0;
103         }
104       else
105         {
106           proto = nat_proto_to_ip_proto (s->nat_proto);
107           l_port = s->out2in.port;
108           r_port = s->ext_host_port;
109         }
110       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
111       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
112         nat_elog_warn ("out2in_ed key del failed");
113
114       if (snat_is_unk_proto_session (s))
115         goto delete;
116
117       snat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
118                                            s->in2out.addr.as_u32,
119                                            s->out2in.addr.as_u32,
120                                            s->nat_proto,
121                                            s->in2out.port,
122                                            s->out2in.port,
123                                            s->in2out.fib_index);
124
125       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
126                              &s->in2out.addr, s->in2out.port,
127                              &s->ext_host_nat_addr, s->ext_host_nat_port,
128                              &s->out2in.addr, s->out2in.port,
129                              &s->ext_host_addr, s->ext_host_port,
130                              s->nat_proto, is_twice_nat_session (s));
131
132       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
133                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
134                    ctx->thread_index);
135
136       if (is_twice_nat_session (s))
137         {
138           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
139             {
140               // TODO FIXME this is obviously broken - which address should be
141               // freed here?!
142               a = sm->twice_nat_addresses + i;
143               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
144                 {
145                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
146                                                       ctx->thread_index,
147                                                       &s->ext_host_nat_addr,
148                                                       s->ext_host_nat_port,
149                                                       s->nat_proto);
150                   break;
151                 }
152             }
153         }
154
155       if (snat_is_session_static (s))
156         goto delete;
157
158       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
159                                           &s->out2in.addr, s->out2in.port,
160                                           s->nat_proto);
161     delete:
162       nat_ed_session_delete (sm, s, ctx->thread_index, 1);
163       return 1;
164     }
165
166   return 0;
167 }
168 #endif
169
170 static inline u32
171 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
172                           ip4_header_t * ip0, icmp46_header_t * icmp0,
173                           u32 sw_if_index0, u32 rx_fib_index0,
174                           vlib_node_runtime_t * node, u32 next0, f64 now,
175                           u32 thread_index, snat_session_t ** p_s0)
176 {
177   vlib_main_t *vm = vlib_get_main ();
178
179   next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
180                        next0, thread_index, p_s0, 0);
181   snat_session_t *s0 = *p_s0;
182   if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
183     {
184       /* Accounting */
185       nat44_session_update_counters (s0, now,
186                                      vlib_buffer_length_in_chain
187                                      (vm, b0), thread_index);
188       /* Per-user LRU list maintenance */
189       nat44_session_update_lru (sm, s0, thread_index);
190     }
191   return next0;
192 }
193
194 static_always_inline u16
195 snat_random_port (u16 min, u16 max)
196 {
197   snat_main_t *sm = &snat_main;
198   return min + random_u32 (&sm->random_seed) /
199     (random_u32_max () / (max - min + 1) + 1);
200 }
201
202 static int
203 nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index,
204                             u32 nat_proto, u32 thread_index,
205                             ip4_address_t r_addr, u16 r_port, u8 proto,
206                             u16 port_per_thread, u32 snat_thread_index,
207                             snat_session_t * s,
208                             ip4_address_t * allocated_addr,
209                             u16 * allocated_port,
210                             clib_bihash_kv_16_8_t * out2in_ed_kv)
211 {
212   int i;
213   snat_address_t *a, *ga = 0;
214   u32 portnum;
215   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
216
217   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
218
219   for (i = 0; i < vec_len (sm->addresses); i++)
220     {
221       a = sm->addresses + i;
222       switch (nat_proto)
223         {
224 #define _(N, j, n, unused)                                                   \
225   case NAT_PROTOCOL_##N:                                                     \
226     if (a->fib_index == rx_fib_index)                                        \
227       {                                                                      \
228         u16 port = snat_random_port (1, port_per_thread);                    \
229         u16 attempts = port_per_thread;                                      \
230         while (attempts > 0)                                                 \
231           {                                                                  \
232             --attempts;                                                      \
233             portnum = port_thread_offset + port;                             \
234             init_ed_kv (out2in_ed_kv, a->addr,                               \
235                         clib_host_to_net_u16 (portnum), r_addr, r_port,      \
236                         s->out2in.fib_index, proto, thread_index,            \
237                         s - tsm->sessions);                                  \
238             int rv = clib_bihash_add_del_16_8 (&sm->out2in_ed, out2in_ed_kv, \
239                                                2 /* is_add */);              \
240             if (0 == rv)                                                     \
241               {                                                              \
242                 ++a->busy_##n##_port_refcounts[portnum];                     \
243                 a->busy_##n##_ports_per_thread[thread_index]++;              \
244                 a->busy_##n##_ports++;                                       \
245                 *allocated_addr = a->addr;                                   \
246                 *allocated_port = clib_host_to_net_u16 (portnum);            \
247                 return 0;                                                    \
248               }                                                              \
249             port = (port + 1) % port_per_thread;                             \
250           }                                                                  \
251       }                                                                      \
252     else if (a->fib_index == ~0)                                             \
253       {                                                                      \
254         ga = a;                                                              \
255       }                                                                      \
256     break;
257
258           foreach_nat_protocol;
259         default:
260           nat_elog_info ("unknown protocol");
261           return 1;
262         }
263     }
264
265   if (ga)
266     {
267       /* fake fib_index to reuse macro */
268       rx_fib_index = ~0;
269       a = ga;
270       switch (nat_proto)
271         {
272           foreach_nat_protocol;
273         default:
274           nat_elog_info ("unknown protocol");
275           return 1;
276         }
277     }
278
279 #undef _
280
281   /* Totally out of translations to use... */
282   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
283   return 1;
284 }
285
286 static_always_inline u32
287 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
288 {
289   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
290   nat_outside_fib_t *outside_fib;
291   fib_prefix_t pfx = {
292     .fp_proto = FIB_PROTOCOL_IP4,
293     .fp_len = 32,
294     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
295     ,
296   };
297   // TODO: multiple vrfs none can resolve addr
298   /* *INDENT-OFF* */
299   vec_foreach (outside_fib, sm->outside_fibs)
300     {
301       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
302       if (FIB_NODE_INDEX_INVALID != fei)
303         {
304           if (fib_entry_get_resolving_interface (fei) != ~0)
305             {
306               return outside_fib->fib_index;
307             }
308         }
309     }
310   /* *INDENT-ON* */
311   return ~0;
312 }
313
314 static u32
315 slow_path_ed (snat_main_t * sm,
316               vlib_buffer_t * b,
317               ip4_address_t l_addr,
318               ip4_address_t r_addr,
319               u16 l_port,
320               u16 r_port,
321               u8 proto,
322               u32 rx_fib_index,
323               snat_session_t ** sessionp,
324               vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now)
325 {
326   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
327   clib_bihash_kv_16_8_t out2in_ed_kv;
328   nat44_is_idle_session_ctx_t ctx;
329   ip4_address_t allocated_addr;
330   u16 allocated_port;
331   u8 identity_nat;
332
333   u32 nat_proto = ip_proto_to_nat_proto (proto);
334   snat_session_t *s = NULL;
335   lb_nat_type_t lb = 0;
336
337   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
338     {
339       if (PREDICT_FALSE
340           (!tcp_flags_is_init
341            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
342         {
343           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
344           return NAT_NEXT_DROP;
345         }
346     }
347
348   if (PREDICT_FALSE
349       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
350     {
351       if (!nat_lru_free_one (sm, thread_index, now))
352         {
353           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
354           nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
355           nat_elog_notice ("maximum sessions exceeded");
356           return NAT_NEXT_DROP;
357         }
358     }
359
360   ip4_address_t sm_addr;
361   u16 sm_port;
362   u32 sm_fib_index;
363   /* First try to match static mapping by local address and port */
364   if (snat_static_mapping_match
365       (sm, l_addr, l_port, rx_fib_index, nat_proto, &sm_addr, &sm_port,
366        &sm_fib_index, 0, 0, 0, &lb, 0, &identity_nat))
367     {
368       s = nat_ed_session_alloc (sm, thread_index, now, proto);
369       if (!s)
370         {
371           nat_elog_warn ("create NAT session failed");
372           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
373           return NAT_NEXT_DROP;
374         }
375       s->in2out.addr = l_addr;
376       s->in2out.port = l_port;
377       s->nat_proto = nat_proto;
378       s->in2out.fib_index = rx_fib_index;
379       s->out2in.fib_index = sm->outside_fib_index;
380
381       switch (vec_len (sm->outside_fibs))
382         {
383         case 0:
384           s->out2in.fib_index = sm->outside_fib_index;
385           break;
386         case 1:
387           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
388           break;
389         default:
390           s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr);
391           break;
392         }
393
394       /* Try to create dynamic translation */
395       if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, nat_proto,
396                                       thread_index, r_addr, r_port, proto,
397                                       sm->port_per_thread,
398                                       tsm->snat_thread_index, s,
399                                       &allocated_addr,
400                                       &allocated_port, &out2in_ed_kv))
401         {
402           nat_elog_notice ("addresses exhausted");
403           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
404           nat_ed_session_delete (sm, s, thread_index, 1);
405           return NAT_NEXT_DROP;
406         }
407       s->out2in.addr = allocated_addr;
408       s->out2in.port = allocated_port;
409     }
410   else
411     {
412       if (PREDICT_FALSE (identity_nat))
413         {
414           *sessionp = NULL;
415           return next;
416         }
417       s = nat_ed_session_alloc (sm, thread_index, now, proto);
418       if (!s)
419         {
420           nat_elog_warn ("create NAT session failed");
421           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
422           return NAT_NEXT_DROP;
423         }
424       s->out2in.addr = sm_addr;
425       s->out2in.port = sm_port;
426       s->in2out.addr = l_addr;
427       s->in2out.port = l_port;
428       s->nat_proto = nat_proto;
429       s->in2out.fib_index = rx_fib_index;
430       s->out2in.fib_index = sm->outside_fib_index;
431       switch (vec_len (sm->outside_fibs))
432         {
433         case 0:
434           s->out2in.fib_index = sm->outside_fib_index;
435           break;
436         case 1:
437           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
438           break;
439         default:
440           s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr);
441           break;
442         }
443
444       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
445
446       init_ed_kv (&out2in_ed_kv, sm_addr, sm_port, r_addr, r_port,
447                   s->out2in.fib_index, proto, thread_index,
448                   s - tsm->sessions);
449       if (clib_bihash_add_or_overwrite_stale_16_8
450           (&sm->out2in_ed, &out2in_ed_kv, nat44_o2i_ed_is_idle_session_cb,
451            &ctx))
452         nat_elog_notice ("out2in-ed key add failed");
453     }
454
455   if (lb)
456     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
457   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
458   s->ext_host_addr = r_addr;
459   s->ext_host_port = r_port;
460
461   clib_bihash_kv_16_8_t in2out_ed_kv;
462   init_ed_kv (&in2out_ed_kv, l_addr, l_port, r_addr, r_port, rx_fib_index,
463               proto, thread_index, s - tsm->sessions);
464   ctx.now = now;
465   ctx.thread_index = thread_index;
466   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &in2out_ed_kv,
467                                                nat44_i2o_ed_is_idle_session_cb,
468                                                &ctx))
469     nat_elog_notice ("in2out-ed key add failed");
470
471   *sessionp = s;
472
473   /* log NAT event */
474   snat_ipfix_logging_nat44_ses_create (thread_index,
475                                        s->in2out.addr.as_u32,
476                                        s->out2in.addr.as_u32,
477                                        s->nat_proto,
478                                        s->in2out.port,
479                                        s->out2in.port, s->in2out.fib_index);
480
481   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
482                          &s->in2out.addr, s->in2out.port,
483                          &s->ext_host_nat_addr, s->ext_host_nat_port,
484                          &s->out2in.addr, s->out2in.port,
485                          &s->ext_host_addr, s->ext_host_port, s->nat_proto,
486                          0);
487
488   nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
489                s->out2in.port, &s->ext_host_addr, s->ext_host_port,
490                &s->ext_host_nat_addr, s->ext_host_nat_port,
491                s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);
492
493   return next;
494 }
495
496 static_always_inline int
497 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
498                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
499                         u32 rx_fib_index, u32 thread_index)
500 {
501   udp_header_t *udp = ip4_next_header (ip);
502   clib_bihash_kv_16_8_t kv, value;
503
504   init_ed_k (&kv, ip->dst_address, udp->dst_port, ip->src_address,
505              udp->src_port, sm->outside_fib_index, ip->protocol);
506
507   /* NAT packet aimed at external address if has active sessions */
508   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
509     {
510       /* or is static mappings */
511       ip4_address_t dummy_addr;
512       u16 dummy_port;
513       u32 dummy_fib_index;
514       if (!snat_static_mapping_match
515           (sm, ip->dst_address, udp->dst_port, sm->outside_fib_index, proto,
516            &dummy_addr, &dummy_port, &dummy_fib_index, 1, 0, 0, 0, 0, 0))
517         return 0;
518     }
519   else
520     return 0;
521
522   if (sm->forwarding_enabled)
523     return 1;
524
525   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
526                                   rx_fib_index);
527 }
528
529 static_always_inline int
530 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
531                                       u32 thread_index, f64 now,
532                                       vlib_main_t * vm, vlib_buffer_t * b)
533 {
534   clib_bihash_kv_16_8_t kv, value;
535   snat_session_t *s = 0;
536   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
537
538   if (!sm->forwarding_enabled)
539     return 0;
540
541   if (ip->protocol == IP_PROTOCOL_ICMP)
542     {
543       if (get_icmp_i2o_ed_key (b, ip, 0, ~0, ~0, 0, 0, 0, &kv))
544         return 0;
545     }
546   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
547     {
548       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
549                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
550                  ip->protocol);
551     }
552   else
553     {
554       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
555                  ip->protocol);
556     }
557
558   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
559     {
560       ASSERT (thread_index == ed_value_get_thread_index (&value));
561       s =
562         pool_elt_at_index (tsm->sessions,
563                            ed_value_get_session_index (&value));
564       if (is_fwd_bypass_session (s))
565         {
566           if (ip->protocol == IP_PROTOCOL_TCP)
567             {
568               if (nat44_set_tcp_session_state_i2o
569                   (sm, now, s, b, thread_index))
570                 return 1;
571             }
572           /* Accounting */
573           nat44_session_update_counters (s, now,
574                                          vlib_buffer_length_in_chain (vm, b),
575                                          thread_index);
576           /* Per-user LRU list maintenance */
577           nat44_session_update_lru (sm, s, thread_index);
578           return 1;
579         }
580       else
581         return 0;
582     }
583
584   return 0;
585 }
586
587 static_always_inline int
588 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
589                                        u16 src_port, u16 dst_port,
590                                        u32 thread_index, u32 rx_sw_if_index,
591                                        u32 tx_sw_if_index)
592 {
593   clib_bihash_kv_16_8_t kv, value;
594   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
595   snat_interface_t *i;
596   snat_session_t *s;
597   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
598   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
599
600   /* src NAT check */
601   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
602              tx_fib_index, ip->protocol);
603   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
604     {
605       ASSERT (thread_index == ed_value_get_thread_index (&value));
606       s =
607         pool_elt_at_index (tsm->sessions,
608                            ed_value_get_session_index (&value));
609       if (nat44_is_ses_closed (s))
610         {
611           nat_free_session_data (sm, s, thread_index, 0);
612           nat_ed_session_delete (sm, s, thread_index, 1);
613         }
614       else
615         s->flags |= SNAT_SESSION_FLAG_OUTPUT_FEATURE;
616       return 1;
617     }
618
619   /* dst NAT check */
620   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
621              rx_fib_index, ip->protocol);
622   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
623     {
624       ASSERT (thread_index == ed_value_get_thread_index (&value));
625       s =
626         pool_elt_at_index (tsm->sessions,
627                            ed_value_get_session_index (&value));
628       if (is_fwd_bypass_session (s))
629         return 0;
630
631       /* hairpinning */
632       /* *INDENT-OFF* */
633       pool_foreach (i, sm->output_feature_interfaces,
634       ({
635         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
636            return 0;
637       }));
638       /* *INDENT-ON* */
639       return 1;
640     }
641
642   return 0;
643 }
644
645 #ifndef CLIB_MARCH_VARIANT
646 u32
647 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
648                       u32 thread_index, vlib_buffer_t * b,
649                       ip4_header_t * ip, ip4_address_t * addr,
650                       u16 * port, u32 * fib_index, nat_protocol_t * proto,
651                       void *d, void *e, u8 * dont_translate)
652 {
653   u32 sw_if_index;
654   u32 rx_fib_index;
655   clib_bihash_kv_16_8_t kv, value;
656   u32 next = ~0;
657   int err;
658   snat_session_t *s = NULL;
659   u16 l_port = 0, r_port = 0;   // initialize to workaround gcc warning
660   vlib_main_t *vm = vlib_get_main ();
661   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
662   *dont_translate = 0;
663
664   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
665   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
666
667   err =
668     get_icmp_i2o_ed_key (b, ip, rx_fib_index, ~0, ~0, proto, &l_port,
669                          &r_port, &kv);
670   if (err != 0)
671     {
672       b->error = node->errors[err];
673       next = NAT_NEXT_DROP;
674       goto out;
675     }
676
677   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
678     {
679       if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
680         {
681           if (PREDICT_FALSE
682               (nat44_ed_not_translate_output_feature
683                (sm, ip, l_port, r_port, thread_index,
684                 sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX])))
685             {
686               *dont_translate = 1;
687               goto out;
688             }
689         }
690       else
691         {
692           if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index,
693                                                      ip, NAT_PROTOCOL_ICMP,
694                                                      rx_fib_index,
695                                                      thread_index)))
696             {
697               *dont_translate = 1;
698               goto out;
699             }
700         }
701
702       if (PREDICT_FALSE
703           (icmp_type_is_error_message
704            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
705         {
706           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
707           next = NAT_NEXT_DROP;
708           goto out;
709         }
710
711       next =
712         slow_path_ed (sm, b, ip->src_address, ip->dst_address, l_port, r_port,
713                       ip->protocol, rx_fib_index, &s, node, next,
714                       thread_index, vlib_time_now (vm));
715
716       if (PREDICT_FALSE (next == NAT_NEXT_DROP))
717         goto out;
718
719       if (!s)
720         {
721           *dont_translate = 1;
722           goto out;
723         }
724     }
725   else
726     {
727       if (PREDICT_FALSE
728           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
729            ICMP4_echo_request
730            && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
731            ICMP4_echo_reply
732            && !icmp_type_is_error_message (vnet_buffer (b)->ip.
733                                            reass.icmp_type_or_tcp_flags)))
734         {
735           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
736           next = NAT_NEXT_DROP;
737           goto out;
738         }
739
740       ASSERT (thread_index == ed_value_get_thread_index (&value));
741       s =
742         pool_elt_at_index (tsm->sessions,
743                            ed_value_get_session_index (&value));
744     }
745 out:
746   if (s)
747     {
748       *addr = s->out2in.addr;
749       *port = s->out2in.port;
750       *fib_index = s->out2in.fib_index;
751     }
752   if (d)
753     {
754       *(snat_session_t **) d = s;
755     }
756   return next;
757 }
758 #endif
759
760 static snat_session_t *
761 nat44_ed_in2out_unknown_proto (snat_main_t * sm,
762                                vlib_buffer_t * b,
763                                ip4_header_t * ip,
764                                u32 rx_fib_index,
765                                u32 thread_index,
766                                f64 now,
767                                vlib_main_t * vm, vlib_node_runtime_t * node)
768 {
769   clib_bihash_kv_8_8_t kv, value;
770   clib_bihash_kv_16_8_t s_kv, s_value;
771   snat_static_mapping_t *m;
772   u32 old_addr, new_addr = 0;
773   ip_csum_t sum;
774   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
775   snat_session_t *s;
776   u32 outside_fib_index = sm->outside_fib_index;
777   int i;
778   u8 is_sm = 0;
779
780   switch (vec_len (sm->outside_fibs))
781     {
782     case 0:
783       outside_fib_index = sm->outside_fib_index;
784       break;
785     case 1:
786       outside_fib_index = sm->outside_fibs[0].fib_index;
787       break;
788     default:
789       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
790       break;
791     }
792   old_addr = ip->src_address.as_u32;
793
794   init_ed_k (&s_kv, ip->src_address, 0, ip->dst_address, 0, rx_fib_index,
795              ip->protocol);
796
797   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
798     {
799       ASSERT (thread_index == ed_value_get_thread_index (&s_value));
800       s =
801         pool_elt_at_index (tsm->sessions,
802                            ed_value_get_session_index (&s_value));
803       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
804     }
805   else
806     {
807       if (PREDICT_FALSE
808           (nat44_ed_maximum_sessions_exceeded
809            (sm, rx_fib_index, thread_index)))
810         {
811           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
812           nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
813           nat_elog_notice ("maximum sessions exceeded");
814           return 0;
815         }
816
817       init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
818
819       /* Try to find static mapping first */
820       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
821         {
822           m = pool_elt_at_index (sm->static_mappings, value.value);
823           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
824           is_sm = 1;
825           goto create_ses;
826         }
827       else
828         {
829           /* *INDENT-OFF* */
830           pool_foreach (s, tsm->sessions, {
831             if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
832               {
833                 new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
834
835                 init_ed_k(&s_kv, s->out2in.addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol);
836                 if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
837                   goto create_ses;
838
839                 break;
840               }
841           });
842           /* *INDENT-ON* */
843
844           for (i = 0; i < vec_len (sm->addresses); i++)
845             {
846               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
847                          outside_fib_index, ip->protocol);
848               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
849                 {
850                   new_addr = ip->src_address.as_u32 =
851                     sm->addresses[i].addr.as_u32;
852                   goto create_ses;
853                 }
854             }
855           return 0;
856         }
857
858     create_ses:
859       s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
860       if (!s)
861         {
862           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
863           nat_elog_warn ("create NAT session failed");
864           return 0;
865         }
866
867       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
868       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
869       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
870       s->out2in.addr.as_u32 = new_addr;
871       s->out2in.fib_index = outside_fib_index;
872       s->in2out.addr.as_u32 = old_addr;
873       s->in2out.fib_index = rx_fib_index;
874       s->in2out.port = s->out2in.port = ip->protocol;
875       if (is_sm)
876         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
877
878       /* Add to lookup tables */
879       init_ed_kv (&s_kv, s->in2out.addr, 0, ip->dst_address, 0, rx_fib_index,
880                   ip->protocol, thread_index, s - tsm->sessions);
881       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
882         nat_elog_notice ("in2out key add failed");
883
884       init_ed_kv (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
885                   outside_fib_index, ip->protocol, thread_index,
886                   s - tsm->sessions);
887       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
888         nat_elog_notice ("out2in key add failed");
889     }
890
891   /* Update IP checksum */
892   sum = ip->checksum;
893   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
894   ip->checksum = ip_csum_fold (sum);
895
896   /* Accounting */
897   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
898                                  thread_index);
899   /* Per-user LRU list maintenance */
900   nat44_session_update_lru (sm, s, thread_index);
901
902   /* Hairpinning */
903   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
904     nat44_ed_hairpinning_unknown_proto (sm, b, ip);
905
906   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
907     vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index;
908
909   return s;
910 }
911
912 static inline uword
913 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
914                                           vlib_node_runtime_t * node,
915                                           vlib_frame_t * frame,
916                                           int is_output_feature)
917 {
918   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
919   nat_next_t next_index;
920   snat_main_t *sm = &snat_main;
921   f64 now = vlib_time_now (vm);
922   u32 thread_index = vm->thread_index;
923   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
924   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
925     0, def_slow;
926
927   def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH :
928     NAT_NEXT_IN2OUT_ED_SLOW_PATH;
929
930   stats_node_index = sm->ed_in2out_node_index;
931
932   from = vlib_frame_vector_args (frame);
933   n_left_from = frame->n_vectors;
934   next_index = node->cached_next_index;
935
936   while (n_left_from > 0)
937     {
938       u32 n_left_to_next;
939
940       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
941
942       while (n_left_from > 0 && n_left_to_next > 0)
943         {
944           u32 bi0;
945           vlib_buffer_t *b0;
946           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
947             new_addr0, old_addr0;
948           u16 old_port0, new_port0;
949           ip4_header_t *ip0;
950           udp_header_t *udp0;
951           tcp_header_t *tcp0;
952           snat_session_t *s0 = 0;
953           clib_bihash_kv_16_8_t kv0, value0;
954           ip_csum_t sum0;
955
956           /* speculatively enqueue b0 to the current next frame */
957           bi0 = from[0];
958           to_next[0] = bi0;
959           from += 1;
960           to_next += 1;
961           n_left_from -= 1;
962           n_left_to_next -= 1;
963
964           b0 = vlib_get_buffer (vm, bi0);
965
966           if (is_output_feature)
967             {
968               vnet_feature_next (&vnet_buffer2 (b0)->nat.arc_next, b0);
969               iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
970             }
971
972           next0 = vnet_buffer2 (b0)->nat.arc_next;
973
974           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
975                                   iph_offset0);
976
977           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
978           rx_fib_index0 =
979             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
980                                                  sw_if_index0);
981
982           if (PREDICT_FALSE (ip0->ttl == 1))
983             {
984               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
985               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
986                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
987                                            0);
988               next0 = NAT_NEXT_ICMP_ERROR;
989               goto trace0;
990             }
991
992           udp0 = ip4_next_header (ip0);
993           tcp0 = (tcp_header_t *) udp0;
994           proto0 = ip_proto_to_nat_proto (ip0->protocol);
995
996           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
997             {
998               next0 = def_slow;
999               goto trace0;
1000             }
1001
1002           if (is_output_feature)
1003             {
1004               if (PREDICT_FALSE (nat_not_translate_output_feature_fwd
1005                                  (sm, ip0, thread_index, now, vm, b0)))
1006                 goto trace0;
1007             }
1008
1009           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1010             {
1011               next0 = def_slow;
1012               goto trace0;
1013             }
1014
1015           init_ed_k (&kv0, ip0->src_address,
1016                      vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1017                      vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1018                      ip0->protocol);
1019
1020           // lookup for session
1021           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1022             {
1023               // session does not exist go slow path
1024               next0 = def_slow;
1025               goto trace0;
1026             }
1027           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1028           s0 =
1029             pool_elt_at_index (tsm->sessions,
1030                                ed_value_get_session_index (&value0));
1031
1032           if (s0->tcp_closed_timestamp)
1033             {
1034               if (now >= s0->tcp_closed_timestamp)
1035                 {
1036                   // session is closed, go slow path
1037                   next0 = def_slow;
1038                 }
1039               else
1040                 {
1041                   // session in transitory timeout, drop
1042                   b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1043                   next0 = NAT_NEXT_DROP;
1044                 }
1045               goto trace0;
1046             }
1047
1048           // drop if session expired
1049           u64 sess_timeout_time;
1050           sess_timeout_time = s0->last_heard +
1051             (f64) nat44_session_get_timeout (sm, s0);
1052           if (now >= sess_timeout_time)
1053             {
1054               nat_free_session_data (sm, s0, thread_index, 0);
1055               nat_ed_session_delete (sm, s0, thread_index, 1);
1056               // session is closed, go slow path
1057               next0 = def_slow;
1058               goto trace0;
1059             }
1060
1061           b0->flags |= VNET_BUFFER_F_IS_NATED;
1062
1063           if (!is_output_feature)
1064             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1065
1066           old_addr0 = ip0->src_address.as_u32;
1067           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1068           sum0 = ip0->checksum;
1069           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1070                                  src_address);
1071           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1072             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1073                                    s0->ext_host_addr.as_u32, ip4_header_t,
1074                                    dst_address);
1075           ip0->checksum = ip_csum_fold (sum0);
1076
1077           old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1078
1079           if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1080             {
1081               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1082                 {
1083                   new_port0 = udp0->src_port = s0->out2in.port;
1084                   sum0 = tcp0->checksum;
1085                   sum0 =
1086                     ip_csum_update (sum0, old_addr0, new_addr0,
1087                                     ip4_header_t, dst_address);
1088                   sum0 =
1089                     ip_csum_update (sum0, old_port0, new_port0,
1090                                     ip4_header_t, length);
1091                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1092                     {
1093                       sum0 =
1094                         ip_csum_update (sum0, ip0->dst_address.as_u32,
1095                                         s0->ext_host_addr.as_u32,
1096                                         ip4_header_t, dst_address);
1097                       sum0 =
1098                         ip_csum_update (sum0,
1099                                         vnet_buffer (b0)->ip.
1100                                         reass.l4_dst_port, s0->ext_host_port,
1101                                         ip4_header_t, length);
1102                       tcp0->dst_port = s0->ext_host_port;
1103                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1104                     }
1105                   mss_clamping (sm->mss_clamping, tcp0, &sum0);
1106                   tcp0->checksum = ip_csum_fold (sum0);
1107                 }
1108               tcp_packets++;
1109               if (nat44_set_tcp_session_state_i2o
1110                   (sm, now, s0, b0, thread_index))
1111                 goto trace0;
1112             }
1113           else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1114                    && udp0->checksum)
1115             {
1116               new_port0 = udp0->src_port = s0->out2in.port;
1117               sum0 = udp0->checksum;
1118               sum0 =
1119                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1120                                 dst_address);
1121               sum0 =
1122                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1123                                 length);
1124               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1125                 {
1126                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1127                                          s0->ext_host_addr.as_u32,
1128                                          ip4_header_t, dst_address);
1129                   sum0 =
1130                     ip_csum_update (sum0,
1131                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1132                                     s0->ext_host_port, ip4_header_t, length);
1133                   udp0->dst_port = s0->ext_host_port;
1134                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1135                 }
1136               udp0->checksum = ip_csum_fold (sum0);
1137               udp_packets++;
1138             }
1139           else
1140             {
1141               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1142                 {
1143                   new_port0 = udp0->src_port = s0->out2in.port;
1144                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1145                     {
1146                       udp0->dst_port = s0->ext_host_port;
1147                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1148                     }
1149                   udp_packets++;
1150                 }
1151             }
1152
1153           /* Accounting */
1154           nat44_session_update_counters (s0, now,
1155                                          vlib_buffer_length_in_chain
1156                                          (vm, b0), thread_index);
1157           /* Per-user LRU list maintenance */
1158           nat44_session_update_lru (sm, s0, thread_index);
1159
1160         trace0:
1161           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1162                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1163             {
1164               nat_in2out_ed_trace_t *t =
1165                 vlib_add_trace (vm, node, b0, sizeof (*t));
1166               t->sw_if_index = sw_if_index0;
1167               t->next_index = next0;
1168               t->is_slow_path = 0;
1169
1170               if (s0)
1171                 t->session_index = s0 - tsm->sessions;
1172               else
1173                 t->session_index = ~0;
1174             }
1175
1176           pkts_processed += next0 == vnet_buffer2 (b0)->nat.arc_next;
1177           /* verify speculative enqueue, maybe switch current next frame */
1178           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1179                                            to_next, n_left_to_next,
1180                                            bi0, next0);
1181         }
1182
1183       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1184     }
1185
1186   vlib_node_increment_counter (vm, stats_node_index,
1187                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1188                                pkts_processed);
1189   vlib_node_increment_counter (vm, stats_node_index,
1190                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1191   vlib_node_increment_counter (vm, stats_node_index,
1192                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, udp_packets);
1193   vlib_node_increment_counter (vm, stats_node_index,
1194                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1195                                icmp_packets);
1196   vlib_node_increment_counter (vm, stats_node_index,
1197                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1198                                other_packets);
1199   return frame->n_vectors;
1200 }
1201
1202 static inline uword
1203 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
1204                                           vlib_node_runtime_t * node,
1205                                           vlib_frame_t * frame,
1206                                           int is_output_feature)
1207 {
1208   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
1209   nat_next_t next_index;
1210   snat_main_t *sm = &snat_main;
1211   f64 now = vlib_time_now (vm);
1212   u32 thread_index = vm->thread_index;
1213   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1214   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets = 0;
1215
1216   stats_node_index = sm->ed_in2out_slowpath_node_index;
1217
1218   from = vlib_frame_vector_args (frame);
1219   n_left_from = frame->n_vectors;
1220   next_index = node->cached_next_index;
1221
1222   while (n_left_from > 0)
1223     {
1224       u32 n_left_to_next;
1225
1226       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1227
1228       while (n_left_from > 0 && n_left_to_next > 0)
1229         {
1230           u32 bi0;
1231           vlib_buffer_t *b0;
1232           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
1233             new_addr0, old_addr0;
1234           u16 old_port0, new_port0;
1235           ip4_header_t *ip0;
1236           udp_header_t *udp0;
1237           tcp_header_t *tcp0;
1238           icmp46_header_t *icmp0;
1239           snat_session_t *s0 = 0;
1240           clib_bihash_kv_16_8_t kv0, value0;
1241           ip_csum_t sum0;
1242
1243           /* speculatively enqueue b0 to the current next frame */
1244           bi0 = from[0];
1245           to_next[0] = bi0;
1246           from += 1;
1247           to_next += 1;
1248           n_left_from -= 1;
1249           n_left_to_next -= 1;
1250
1251           b0 = vlib_get_buffer (vm, bi0);
1252
1253           if (is_output_feature)
1254             iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1255
1256           next0 = vnet_buffer2 (b0)->nat.arc_next;
1257
1258           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1259                                   iph_offset0);
1260
1261           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1262           rx_fib_index0 =
1263             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1264                                                  sw_if_index0);
1265
1266           if (PREDICT_FALSE (ip0->ttl == 1))
1267             {
1268               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1269               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1270                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1271                                            0);
1272               next0 = NAT_NEXT_ICMP_ERROR;
1273               goto trace0;
1274             }
1275
1276           udp0 = ip4_next_header (ip0);
1277           tcp0 = (tcp_header_t *) udp0;
1278           icmp0 = (icmp46_header_t *) udp0;
1279           proto0 = ip_proto_to_nat_proto (ip0->protocol);
1280
1281           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1282             {
1283               s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1284                                                   rx_fib_index0,
1285                                                   thread_index, now,
1286                                                   vm, node);
1287               if (!s0)
1288                 next0 = NAT_NEXT_DROP;
1289
1290               other_packets++;
1291               goto trace0;
1292             }
1293
1294           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1295             {
1296               next0 = icmp_in2out_ed_slow_path
1297                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1298                  node, next0, now, thread_index, &s0);
1299               icmp_packets++;
1300               goto trace0;
1301             }
1302
1303           init_ed_k (&kv0, ip0->src_address,
1304                      vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1305                      vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1306                      ip0->protocol);
1307           if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1308             {
1309               ASSERT (thread_index == ed_value_get_thread_index (&value0));
1310               s0 =
1311                 pool_elt_at_index (tsm->sessions,
1312                                    ed_value_get_session_index (&value0));
1313
1314               if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1315                 {
1316                   nat_free_session_data (sm, s0, thread_index, 0);
1317                   nat_ed_session_delete (sm, s0, thread_index, 1);
1318                   s0 = NULL;
1319                 }
1320             }
1321
1322           if (!s0)
1323             {
1324               if (is_output_feature)
1325                 {
1326                   if (PREDICT_FALSE
1327                       (nat44_ed_not_translate_output_feature
1328                        (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1329                         vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1330                         sw_if_index0,
1331                         vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1332                     goto trace0;
1333
1334                   /*
1335                    * Send DHCP packets to the ipv4 stack, or we won't
1336                    * be able to use dhcp client on the outside interface
1337                    */
1338                   if (PREDICT_FALSE
1339                       (proto0 == NAT_PROTOCOL_UDP
1340                        && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1341                            clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1342                        && ip0->dst_address.as_u32 == 0xffffffff))
1343                     goto trace0;
1344                 }
1345               else
1346                 {
1347                   if (PREDICT_FALSE
1348                       (nat44_ed_not_translate
1349                        (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
1350                         thread_index)))
1351                     goto trace0;
1352                 }
1353
1354               next0 =
1355                 slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address,
1356                               vnet_buffer (b0)->ip.reass.l4_src_port,
1357                               vnet_buffer (b0)->ip.reass.l4_dst_port,
1358                               ip0->protocol, rx_fib_index0, &s0, node, next0,
1359                               thread_index, now);
1360
1361               if (PREDICT_FALSE (next0 == NAT_NEXT_DROP))
1362                 goto trace0;
1363
1364               if (PREDICT_FALSE (!s0))
1365                 goto trace0;
1366
1367             }
1368
1369           b0->flags |= VNET_BUFFER_F_IS_NATED;
1370
1371           if (!is_output_feature)
1372             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1373
1374           old_addr0 = ip0->src_address.as_u32;
1375           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1376           sum0 = ip0->checksum;
1377           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1378                                  src_address);
1379           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1380             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1381                                    s0->ext_host_addr.as_u32, ip4_header_t,
1382                                    dst_address);
1383           ip0->checksum = ip_csum_fold (sum0);
1384
1385           old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1386
1387           if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1388             {
1389               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1390                 {
1391                   new_port0 = udp0->src_port = s0->out2in.port;
1392                   sum0 = tcp0->checksum;
1393                   sum0 =
1394                     ip_csum_update (sum0, old_addr0, new_addr0,
1395                                     ip4_header_t, dst_address);
1396                   sum0 =
1397                     ip_csum_update (sum0, old_port0, new_port0,
1398                                     ip4_header_t, length);
1399                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1400                     {
1401                       sum0 =
1402                         ip_csum_update (sum0, ip0->dst_address.as_u32,
1403                                         s0->ext_host_addr.as_u32,
1404                                         ip4_header_t, dst_address);
1405                       sum0 =
1406                         ip_csum_update (sum0,
1407                                         vnet_buffer (b0)->ip.
1408                                         reass.l4_dst_port, s0->ext_host_port,
1409                                         ip4_header_t, length);
1410                       tcp0->dst_port = s0->ext_host_port;
1411                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1412                     }
1413                   mss_clamping (sm->mss_clamping, tcp0, &sum0);
1414                   tcp0->checksum = ip_csum_fold (sum0);
1415                 }
1416               tcp_packets++;
1417               if (nat44_set_tcp_session_state_i2o
1418                   (sm, now, s0, b0, thread_index))
1419                 goto trace0;
1420             }
1421           else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1422                    && udp0->checksum)
1423             {
1424               new_port0 = udp0->src_port = s0->out2in.port;
1425               sum0 = udp0->checksum;
1426               sum0 =
1427                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1428                                 dst_address);
1429               sum0 =
1430                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1431                                 length);
1432               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1433                 {
1434                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1435                                          s0->ext_host_addr.as_u32,
1436                                          ip4_header_t, dst_address);
1437                   sum0 =
1438                     ip_csum_update (sum0,
1439                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1440                                     s0->ext_host_port, ip4_header_t, length);
1441                   udp0->dst_port = s0->ext_host_port;
1442                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1443                 }
1444               udp0->checksum = ip_csum_fold (sum0);
1445               udp_packets++;
1446             }
1447           else
1448             {
1449               if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1450                 {
1451                   new_port0 = udp0->src_port = s0->out2in.port;
1452                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
1453                     {
1454                       udp0->dst_port = s0->ext_host_port;
1455                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1456                     }
1457                   udp_packets++;
1458                 }
1459             }
1460
1461           /* Accounting */
1462           nat44_session_update_counters (s0, now,
1463                                          vlib_buffer_length_in_chain
1464                                          (vm, b0), thread_index);
1465           /* Per-user LRU list maintenance */
1466           nat44_session_update_lru (sm, s0, thread_index);
1467
1468         trace0:
1469           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1470                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1471             {
1472               nat_in2out_ed_trace_t *t =
1473                 vlib_add_trace (vm, node, b0, sizeof (*t));
1474               t->sw_if_index = sw_if_index0;
1475               t->next_index = next0;
1476               t->is_slow_path = 1;
1477
1478               if (s0)
1479                 t->session_index = s0 - tsm->sessions;
1480               else
1481                 t->session_index = ~0;
1482             }
1483
1484           pkts_processed += next0 == vnet_buffer2 (b0)->nat.arc_next;
1485
1486           /* verify speculative enqueue, maybe switch current next frame */
1487           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1488                                            to_next, n_left_to_next,
1489                                            bi0, next0);
1490         }
1491
1492       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1493     }
1494
1495   vlib_node_increment_counter (vm, stats_node_index,
1496                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1497                                pkts_processed);
1498   vlib_node_increment_counter (vm, stats_node_index,
1499                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1500   vlib_node_increment_counter (vm, stats_node_index,
1501                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, udp_packets);
1502   vlib_node_increment_counter (vm, stats_node_index,
1503                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1504                                icmp_packets);
1505   vlib_node_increment_counter (vm, stats_node_index,
1506                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1507                                other_packets);
1508   return frame->n_vectors;
1509 }
1510
1511 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1512                                      vlib_node_runtime_t * node,
1513                                      vlib_frame_t * frame)
1514 {
1515   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0);
1516 }
1517
1518 /* *INDENT-OFF* */
1519 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1520   .name = "nat44-ed-in2out",
1521   .vector_size = sizeof (u32),
1522   .sibling_of = "nat-default",
1523   .format_trace = format_nat_in2out_ed_trace,
1524   .type = VLIB_NODE_TYPE_INTERNAL,
1525   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1526   .error_strings = nat_in2out_ed_error_strings,
1527   .runtime_data_bytes = sizeof (snat_runtime_t),
1528 };
1529 /* *INDENT-ON* */
1530
1531 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1532                                             vlib_node_runtime_t * node,
1533                                             vlib_frame_t * frame)
1534 {
1535   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1);
1536 }
1537
1538 /* *INDENT-OFF* */
1539 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1540   .name = "nat44-ed-in2out-output",
1541   .vector_size = sizeof (u32),
1542   .sibling_of = "nat-default",
1543   .format_trace = format_nat_in2out_ed_trace,
1544   .type = VLIB_NODE_TYPE_INTERNAL,
1545   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1546   .error_strings = nat_in2out_ed_error_strings,
1547   .runtime_data_bytes = sizeof (snat_runtime_t),
1548 };
1549 /* *INDENT-ON* */
1550
1551 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1552                                               vlib_node_runtime_t *
1553                                               node, vlib_frame_t * frame)
1554 {
1555   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0);
1556 }
1557
1558 /* *INDENT-OFF* */
1559 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1560   .name = "nat44-ed-in2out-slowpath",
1561   .vector_size = sizeof (u32),
1562   .sibling_of = "nat-default",
1563   .format_trace = format_nat_in2out_ed_trace,
1564   .type = VLIB_NODE_TYPE_INTERNAL,
1565   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1566   .error_strings = nat_in2out_ed_error_strings,
1567   .runtime_data_bytes = sizeof (snat_runtime_t),
1568 };
1569 /* *INDENT-ON* */
1570
1571 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1572                                                      vlib_node_runtime_t
1573                                                      * node,
1574                                                      vlib_frame_t * frame)
1575 {
1576   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1);
1577 }
1578
1579 /* *INDENT-OFF* */
1580 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1581   .name = "nat44-ed-in2out-output-slowpath",
1582   .vector_size = sizeof (u32),
1583   .sibling_of = "nat-default",
1584   .format_trace = format_nat_in2out_ed_trace,
1585   .type = VLIB_NODE_TYPE_INTERNAL,
1586   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1587   .error_strings = nat_in2out_ed_error_strings,
1588   .runtime_data_bytes = sizeof (snat_runtime_t),
1589 };
1590 /* *INDENT-ON* */
1591
1592 static u8 *
1593 format_nat_pre_trace (u8 * s, va_list * args)
1594 {
1595   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1596   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1597   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1598   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1599                  t->arc_next_index);
1600 }
1601
1602 VLIB_NODE_FN (nat_pre_in2out_node)
1603   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1604 {
1605   return nat_pre_node_fn_inline (vm, node, frame,
1606                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1607 }
1608
1609 /* *INDENT-OFF* */
1610 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1611   .name = "nat-pre-in2out",
1612   .vector_size = sizeof (u32),
1613   .sibling_of = "nat-default",
1614   .format_trace = format_nat_pre_trace,
1615   .type = VLIB_NODE_TYPE_INTERNAL,
1616   .n_errors = 0,
1617 };
1618 /* *INDENT-ON* */
1619
1620 /*
1621  * fd.io coding-style-patch-verification: ON
1622  *
1623  * Local Variables:
1624  * eval: (c-set-style "gnu")
1625  * End:
1626  */