nat: Fix next feature for ED with multiple workers
[vpp.git] / src / plugins / nat / in2out_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/pg/pg.h>
23 #include <vnet/ip/ip.h>
24 #include <vnet/ethernet/ethernet.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/udp/udp.h>
27 #include <vppinfra/error.h>
28 #include <nat/nat.h>
29 #include <nat/nat_ipfix_logging.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat44/inlines.h>
32 #include <nat/nat_syslog.h>
33 #include <nat/nat_ha.h>
34 #include <nat/nat44/ed_inlines.h>
35 #include <nat/lib/nat_inlines.h>
36
37 /* number of attempts to get a port for ED overloading algorithm, if rolling
38  * a dice this many times doesn't produce a free port, it's treated
39  * as if there were no free ports available to conserve resources */
40 #define ED_PORT_ALLOC_ATTEMPTS (10)
41
42 static char *nat_in2out_ed_error_strings[] = {
43 #define _(sym,string) string,
44   foreach_nat_in2out_ed_error
45 #undef _
46 };
47
48 typedef struct
49 {
50   u32 sw_if_index;
51   u32 next_index;
52   u32 session_index;
53   u32 is_slow_path;
54 } nat_in2out_ed_trace_t;
55
56 static u8 *
57 format_nat_in2out_ed_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
62   char *tag;
63
64   tag =
65     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
66     "NAT44_IN2OUT_ED_FAST_PATH";
67
68   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
69               t->sw_if_index, t->next_index, t->session_index);
70
71   return s;
72 }
73
74 #ifndef CLIB_MARCH_VARIANT
75 int
76 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
77 {
78   snat_main_t *sm = &snat_main;
79   nat44_is_idle_session_ctx_t *ctx = arg;
80   snat_session_t *s;
81   u64 sess_timeout_time;
82   u8 proto;
83   u16 r_port, l_port;
84   ip4_address_t *l_addr, *r_addr;
85   u32 fib_index;
86   clib_bihash_kv_16_8_t ed_kv;
87   int i;
88   snat_address_t *a;
89   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
90                                                        ctx->thread_index);
91
92   ASSERT (ctx->thread_index == ed_value_get_thread_index (kv));
93   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (kv));
94   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
95   if (ctx->now >= sess_timeout_time)
96     {
97       if (is_fwd_bypass_session (s))
98         goto delete;
99
100       l_addr = &s->out2in.addr;
101       r_addr = &s->ext_host_addr;
102       fib_index = s->out2in.fib_index;
103       if (snat_is_unk_proto_session (s))
104         {
105           proto = s->in2out.port;
106           r_port = 0;
107           l_port = 0;
108         }
109       else
110         {
111           proto = nat_proto_to_ip_proto (s->nat_proto);
112           l_port = s->out2in.port;
113           r_port = s->ext_host_port;
114         }
115       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
116       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
117         nat_elog_warn ("out2in_ed key del failed");
118
119       if (snat_is_unk_proto_session (s))
120         goto delete;
121
122       snat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
123                                            s->in2out.addr.as_u32,
124                                            s->out2in.addr.as_u32,
125                                            s->nat_proto,
126                                            s->in2out.port,
127                                            s->out2in.port,
128                                            s->in2out.fib_index);
129
130       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
131                              &s->in2out.addr, s->in2out.port,
132                              &s->ext_host_nat_addr, s->ext_host_nat_port,
133                              &s->out2in.addr, s->out2in.port,
134                              &s->ext_host_addr, s->ext_host_port,
135                              s->nat_proto, is_twice_nat_session (s));
136
137       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
138                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
139                    ctx->thread_index);
140
141       if (is_twice_nat_session (s))
142         {
143           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
144             {
145               // TODO FIXME this is obviously broken - which address should be
146               // freed here?!
147               a = sm->twice_nat_addresses + i;
148               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
149                 {
150                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
151                                                       ctx->thread_index,
152                                                       &s->ext_host_nat_addr,
153                                                       s->ext_host_nat_port,
154                                                       s->nat_proto);
155                   break;
156                 }
157             }
158         }
159
160       if (snat_is_session_static (s))
161         goto delete;
162
163       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
164                                           &s->out2in.addr, s->out2in.port,
165                                           s->nat_proto);
166     delete:
167       nat_ed_session_delete (sm, s, ctx->thread_index, 1);
168       return 1;
169     }
170
171   return 0;
172 }
173 #endif
174
175 static inline u32
176 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
177                           ip4_header_t * ip0, icmp46_header_t * icmp0,
178                           u32 sw_if_index0, u32 rx_fib_index0,
179                           vlib_node_runtime_t * node, u32 next0, f64 now,
180                           u32 thread_index, snat_session_t ** p_s0)
181 {
182   vlib_main_t *vm = vlib_get_main ();
183
184   next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
185                        next0, thread_index, p_s0, 0);
186   snat_session_t *s0 = *p_s0;
187   if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
188     {
189       /* Accounting */
190       nat44_session_update_counters (s0, now,
191                                      vlib_buffer_length_in_chain
192                                      (vm, b0), thread_index);
193       /* Per-user LRU list maintenance */
194       nat44_session_update_lru (sm, s0, thread_index);
195     }
196   return next0;
197 }
198
199 static int
200 nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index,
201                             u32 nat_proto, u32 thread_index,
202                             ip4_address_t r_addr, u16 r_port, u8 proto,
203                             u16 port_per_thread, u32 snat_thread_index,
204                             snat_session_t * s,
205                             ip4_address_t * outside_addr,
206                             u16 * outside_port,
207                             clib_bihash_kv_16_8_t * out2in_ed_kv)
208 {
209   int i;
210   snat_address_t *a, *ga = 0;
211   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
212
213   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
214
215   for (i = 0; i < vec_len (sm->addresses); i++)
216     {
217       a = sm->addresses + i;
218       switch (nat_proto)
219         {
220 #define _(N, j, n, unused)                                                   \
221   case NAT_PROTOCOL_##N:                                                     \
222     if (a->fib_index == rx_fib_index)                                        \
223       {                                                                      \
224         /* first try port suggested by caller */                             \
225         u16 port = clib_net_to_host_u16 (*outside_port);                     \
226         u16 port_offset = port - port_thread_offset;                         \
227         if (port <= port_thread_offset ||                                    \
228             port > port_thread_offset + port_per_thread)                     \
229           {                                                                  \
230             /* need to pick a different port, suggested port doesn't fit in  \
231              * this thread's port range */                                   \
232             port_offset = snat_random_port (0, port_per_thread - 1);         \
233             port = port_thread_offset + port_offset;                         \
234           }                                                                  \
235         u16 attempts = ED_PORT_ALLOC_ATTEMPTS;                               \
236         do                                                                   \
237           {                                                                  \
238             init_ed_kv (out2in_ed_kv, a->addr, clib_host_to_net_u16 (port),  \
239                         r_addr, r_port, s->out2in.fib_index, proto,          \
240                         thread_index, s - tsm->sessions);                    \
241             int rv = clib_bihash_add_del_16_8 (&sm->out2in_ed, out2in_ed_kv, \
242                                                2 /* is_add */);              \
243             if (0 == rv)                                                     \
244               {                                                              \
245                 ++a->busy_##n##_port_refcounts[port];                        \
246                 a->busy_##n##_ports_per_thread[thread_index]++;              \
247                 a->busy_##n##_ports++;                                       \
248                 *outside_addr = a->addr;                                     \
249                 *outside_port = clib_host_to_net_u16 (port);                 \
250                 return 0;                                                    \
251               }                                                              \
252             port_offset = snat_random_port (0, port_per_thread - 1);         \
253             port = port_thread_offset + port_offset;                         \
254             --attempts;                                                      \
255           }                                                                  \
256         while (attempts > 0);                                                \
257       }                                                                      \
258     else if (a->fib_index == ~0)                                             \
259       {                                                                      \
260         ga = a;                                                              \
261       }                                                                      \
262     break;
263
264           foreach_nat_protocol;
265         default:
266           nat_elog_info ("unknown protocol");
267           return 1;
268         }
269     }
270
271   if (ga)
272     {
273       /* fake fib_index to reuse macro */
274       rx_fib_index = ~0;
275       a = ga;
276       switch (nat_proto)
277         {
278           foreach_nat_protocol;
279         default:
280           nat_elog_info ("unknown protocol");
281           return 1;
282         }
283     }
284
285 #undef _
286
287   /* Totally out of translations to use... */
288   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
289   return 1;
290 }
291
292 static_always_inline u32
293 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
294 {
295   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
296   nat_outside_fib_t *outside_fib;
297   fib_prefix_t pfx = {
298     .fp_proto = FIB_PROTOCOL_IP4,
299     .fp_len = 32,
300     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
301     ,
302   };
303   // TODO: multiple vrfs none can resolve addr
304   /* *INDENT-OFF* */
305   vec_foreach (outside_fib, sm->outside_fibs)
306     {
307       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
308       if (FIB_NODE_INDEX_INVALID != fei)
309         {
310           if (fib_entry_get_resolving_interface (fei) != ~0)
311             {
312               return outside_fib->fib_index;
313             }
314         }
315     }
316   /* *INDENT-ON* */
317   return ~0;
318 }
319
320 static u32
321 slow_path_ed (snat_main_t * sm,
322               vlib_buffer_t * b,
323               ip4_address_t l_addr,
324               ip4_address_t r_addr,
325               u16 l_port,
326               u16 r_port,
327               u8 proto,
328               u32 rx_fib_index,
329               snat_session_t ** sessionp,
330               vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now)
331 {
332   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
333   clib_bihash_kv_16_8_t out2in_ed_kv;
334   nat44_is_idle_session_ctx_t ctx;
335   ip4_address_t outside_addr;
336   u16 outside_port;
337   u8 identity_nat;
338
339   u32 nat_proto = ip_proto_to_nat_proto (proto);
340   snat_session_t *s = NULL;
341   lb_nat_type_t lb = 0;
342
343   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
344     {
345       if (PREDICT_FALSE
346           (!tcp_flags_is_init
347            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
348         {
349           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
350           return NAT_NEXT_DROP;
351         }
352     }
353
354   if (PREDICT_FALSE
355       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
356     {
357       if (!nat_lru_free_one (sm, thread_index, now))
358         {
359           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
360           nat_ipfix_logging_max_sessions (thread_index,
361                                           sm->max_translations_per_thread);
362           nat_elog_notice ("maximum sessions exceeded");
363           return NAT_NEXT_DROP;
364         }
365     }
366
367   ip4_address_t sm_addr;
368   u16 sm_port;
369   u32 sm_fib_index;
370   /* First try to match static mapping by local address and port */
371   if (snat_static_mapping_match
372       (sm, l_addr, l_port, rx_fib_index, nat_proto, &sm_addr, &sm_port,
373        &sm_fib_index, 0, 0, 0, &lb, 0, &identity_nat, 0))
374     {
375       s = nat_ed_session_alloc (sm, thread_index, now, proto);
376       ASSERT (s);
377       s->in2out.addr = l_addr;
378       s->in2out.port = l_port;
379       s->nat_proto = nat_proto;
380       s->in2out.fib_index = rx_fib_index;
381       s->out2in.fib_index = sm->outside_fib_index;
382
383       switch (vec_len (sm->outside_fibs))
384         {
385         case 0:
386           s->out2in.fib_index = sm->outside_fib_index;
387           break;
388         case 1:
389           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
390           break;
391         default:
392           s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr);
393           break;
394         }
395
396       /* Try to create dynamic translation */
397       outside_port = l_port;    // suggest using local port to allocation function
398       if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, nat_proto,
399                                       thread_index, r_addr, r_port, proto,
400                                       sm->port_per_thread,
401                                       tsm->snat_thread_index, s,
402                                       &outside_addr,
403                                       &outside_port, &out2in_ed_kv))
404         {
405           nat_elog_notice ("addresses exhausted");
406           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
407           nat_ed_session_delete (sm, s, thread_index, 1);
408           return NAT_NEXT_DROP;
409         }
410       s->out2in.addr = outside_addr;
411       s->out2in.port = outside_port;
412     }
413   else
414     {
415       if (PREDICT_FALSE (identity_nat))
416         {
417           *sessionp = NULL;
418           return next;
419         }
420       s = nat_ed_session_alloc (sm, thread_index, now, proto);
421       ASSERT (s);
422       s->out2in.addr = sm_addr;
423       s->out2in.port = sm_port;
424       s->in2out.addr = l_addr;
425       s->in2out.port = l_port;
426       s->nat_proto = nat_proto;
427       s->in2out.fib_index = rx_fib_index;
428       s->out2in.fib_index = sm->outside_fib_index;
429       switch (vec_len (sm->outside_fibs))
430         {
431         case 0:
432           s->out2in.fib_index = sm->outside_fib_index;
433           break;
434         case 1:
435           s->out2in.fib_index = sm->outside_fibs[0].fib_index;
436           break;
437         default:
438           s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr);
439           break;
440         }
441
442       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
443
444       init_ed_kv (&out2in_ed_kv, sm_addr, sm_port, r_addr, r_port,
445                   s->out2in.fib_index, proto, thread_index,
446                   s - tsm->sessions);
447       if (clib_bihash_add_or_overwrite_stale_16_8
448           (&sm->out2in_ed, &out2in_ed_kv, nat44_o2i_ed_is_idle_session_cb,
449            &ctx))
450         nat_elog_notice ("out2in-ed key add failed");
451     }
452
453   if (lb)
454     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
455   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
456   s->ext_host_addr = r_addr;
457   s->ext_host_port = r_port;
458
459   clib_bihash_kv_16_8_t in2out_ed_kv;
460   init_ed_kv (&in2out_ed_kv, l_addr, l_port, r_addr, r_port, rx_fib_index,
461               proto, thread_index, s - tsm->sessions);
462   ctx.now = now;
463   ctx.thread_index = thread_index;
464   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &in2out_ed_kv,
465                                                nat44_i2o_ed_is_idle_session_cb,
466                                                &ctx))
467     nat_elog_notice ("in2out-ed key add failed");
468
469   *sessionp = s;
470
471   /* log NAT event */
472   snat_ipfix_logging_nat44_ses_create (thread_index,
473                                        s->in2out.addr.as_u32,
474                                        s->out2in.addr.as_u32,
475                                        s->nat_proto,
476                                        s->in2out.port,
477                                        s->out2in.port, s->in2out.fib_index);
478
479   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
480                          &s->in2out.addr, s->in2out.port,
481                          &s->ext_host_nat_addr, s->ext_host_nat_port,
482                          &s->out2in.addr, s->out2in.port,
483                          &s->ext_host_addr, s->ext_host_port, s->nat_proto,
484                          0);
485
486   nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
487                s->out2in.port, &s->ext_host_addr, s->ext_host_port,
488                &s->ext_host_nat_addr, s->ext_host_nat_port,
489                s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);
490
491   per_vrf_sessions_register_session (s, thread_index);
492
493   return next;
494 }
495
496 static_always_inline int
497 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
498                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
499                         u32 rx_fib_index, u32 thread_index)
500 {
501   udp_header_t *udp = ip4_next_header (ip);
502   clib_bihash_kv_16_8_t kv, value;
503
504   init_ed_k (&kv, ip->dst_address, udp->dst_port, ip->src_address,
505              udp->src_port, sm->outside_fib_index, ip->protocol);
506
507   /* NAT packet aimed at external address if has active sessions */
508   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
509     {
510       /* or is static mappings */
511       ip4_address_t placeholder_addr;
512       u16 placeholder_port;
513       u32 placeholder_fib_index;
514       if (!snat_static_mapping_match
515           (sm, ip->dst_address, udp->dst_port, sm->outside_fib_index, proto,
516            &placeholder_addr, &placeholder_port, &placeholder_fib_index, 1, 0,
517            0, 0, 0, 0, 0))
518         return 0;
519     }
520   else
521     return 0;
522
523   if (sm->forwarding_enabled)
524     return 1;
525
526   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
527                                   rx_fib_index);
528 }
529
530 static_always_inline int
531 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
532                                       u32 thread_index, f64 now,
533                                       vlib_main_t * vm, vlib_buffer_t * b)
534 {
535   clib_bihash_kv_16_8_t kv, value;
536   snat_session_t *s = 0;
537   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
538
539   if (!sm->forwarding_enabled)
540     return 0;
541
542   if (ip->protocol == IP_PROTOCOL_ICMP)
543     {
544       if (get_icmp_i2o_ed_key (b, ip, 0, ~0, ~0, 0, 0, 0, &kv))
545         return 0;
546     }
547   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
548     {
549       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
550                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
551                  ip->protocol);
552     }
553   else
554     {
555       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
556                  ip->protocol);
557     }
558
559   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
560     {
561       ASSERT (thread_index == ed_value_get_thread_index (&value));
562       s =
563         pool_elt_at_index (tsm->sessions,
564                            ed_value_get_session_index (&value));
565       if (is_fwd_bypass_session (s))
566         {
567           if (ip->protocol == IP_PROTOCOL_TCP)
568             {
569               if (nat44_set_tcp_session_state_i2o
570                   (sm, now, s, b, thread_index))
571                 return 1;
572             }
573           /* Accounting */
574           nat44_session_update_counters (s, now,
575                                          vlib_buffer_length_in_chain (vm, b),
576                                          thread_index);
577           /* Per-user LRU list maintenance */
578           nat44_session_update_lru (sm, s, thread_index);
579           return 1;
580         }
581       else
582         return 0;
583     }
584
585   return 0;
586 }
587
588 static_always_inline int
589 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
590                                        u16 src_port, u16 dst_port,
591                                        u32 thread_index, u32 rx_sw_if_index,
592                                        u32 tx_sw_if_index)
593 {
594   clib_bihash_kv_16_8_t kv, value;
595   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
596   snat_interface_t *i;
597   snat_session_t *s;
598   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
599   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
600
601   /* src NAT check */
602   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
603              tx_fib_index, ip->protocol);
604   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
605     {
606       ASSERT (thread_index == ed_value_get_thread_index (&value));
607       s =
608         pool_elt_at_index (tsm->sessions,
609                            ed_value_get_session_index (&value));
610       if (nat44_is_ses_closed (s))
611         {
612           nat_free_session_data (sm, s, thread_index, 0);
613           nat_ed_session_delete (sm, s, thread_index, 1);
614         }
615       else
616         s->flags |= SNAT_SESSION_FLAG_OUTPUT_FEATURE;
617       return 1;
618     }
619
620   /* dst NAT check */
621   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
622              rx_fib_index, ip->protocol);
623   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
624     {
625       ASSERT (thread_index == ed_value_get_thread_index (&value));
626       s =
627         pool_elt_at_index (tsm->sessions,
628                            ed_value_get_session_index (&value));
629       if (is_fwd_bypass_session (s))
630         return 0;
631
632       /* hairpinning */
633       /* *INDENT-OFF* */
634       pool_foreach (i, sm->output_feature_interfaces,
635       ({
636         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
637            return 0;
638       }));
639       /* *INDENT-ON* */
640       return 1;
641     }
642
643   return 0;
644 }
645
646 #ifndef CLIB_MARCH_VARIANT
647 u32
648 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
649                       u32 thread_index, vlib_buffer_t * b,
650                       ip4_header_t * ip, ip4_address_t * addr,
651                       u16 * port, u32 * fib_index, nat_protocol_t * proto,
652                       void *d, void *e, u8 * dont_translate)
653 {
654   u32 sw_if_index;
655   u32 rx_fib_index;
656   clib_bihash_kv_16_8_t kv, value;
657   u32 next = ~0;
658   int err;
659   snat_session_t *s = NULL;
660   u16 l_port = 0, r_port = 0;   // initialize to workaround gcc warning
661   vlib_main_t *vm = vlib_get_main ();
662   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
663   *dont_translate = 0;
664
665   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
666   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
667
668   err =
669     get_icmp_i2o_ed_key (b, ip, rx_fib_index, ~0, ~0, proto, &l_port,
670                          &r_port, &kv);
671   if (err != 0)
672     {
673       b->error = node->errors[err];
674       next = NAT_NEXT_DROP;
675       goto out;
676     }
677
678   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
679     {
680       if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
681         {
682           if (PREDICT_FALSE
683               (nat44_ed_not_translate_output_feature
684                (sm, ip, l_port, r_port, thread_index,
685                 sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX])))
686             {
687               *dont_translate = 1;
688               goto out;
689             }
690         }
691       else
692         {
693           if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index,
694                                                      ip, NAT_PROTOCOL_ICMP,
695                                                      rx_fib_index,
696                                                      thread_index)))
697             {
698               *dont_translate = 1;
699               goto out;
700             }
701         }
702
703       if (PREDICT_FALSE
704           (icmp_type_is_error_message
705            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
706         {
707           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
708           next = NAT_NEXT_DROP;
709           goto out;
710         }
711
712       next =
713         slow_path_ed (sm, b, ip->src_address, ip->dst_address, l_port, r_port,
714                       ip->protocol, rx_fib_index, &s, node, next,
715                       thread_index, vlib_time_now (vm));
716
717       if (PREDICT_FALSE (next == NAT_NEXT_DROP))
718         goto out;
719
720       if (!s)
721         {
722           *dont_translate = 1;
723           goto out;
724         }
725     }
726   else
727     {
728       if (PREDICT_FALSE
729           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
730            ICMP4_echo_request
731            && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
732            ICMP4_echo_reply
733            && !icmp_type_is_error_message (vnet_buffer (b)->ip.
734                                            reass.icmp_type_or_tcp_flags)))
735         {
736           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
737           next = NAT_NEXT_DROP;
738           goto out;
739         }
740
741       ASSERT (thread_index == ed_value_get_thread_index (&value));
742       s =
743         pool_elt_at_index (tsm->sessions,
744                            ed_value_get_session_index (&value));
745     }
746 out:
747   if (s)
748     {
749       *addr = s->out2in.addr;
750       *port = s->out2in.port;
751       *fib_index = s->out2in.fib_index;
752     }
753   if (d)
754     {
755       *(snat_session_t **) d = s;
756     }
757   return next;
758 }
759 #endif
760
761 static snat_session_t *
762 nat44_ed_in2out_unknown_proto (snat_main_t * sm,
763                                vlib_buffer_t * b,
764                                ip4_header_t * ip,
765                                u32 rx_fib_index,
766                                u32 thread_index,
767                                f64 now,
768                                vlib_main_t * vm, vlib_node_runtime_t * node)
769 {
770   clib_bihash_kv_8_8_t kv, value;
771   clib_bihash_kv_16_8_t s_kv, s_value;
772   snat_static_mapping_t *m;
773   u32 old_addr, new_addr = 0;
774   ip_csum_t sum;
775   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
776   snat_session_t *s;
777   u32 outside_fib_index = sm->outside_fib_index;
778   int i;
779   u8 is_sm = 0;
780
781   switch (vec_len (sm->outside_fibs))
782     {
783     case 0:
784       outside_fib_index = sm->outside_fib_index;
785       break;
786     case 1:
787       outside_fib_index = sm->outside_fibs[0].fib_index;
788       break;
789     default:
790       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
791       break;
792     }
793   old_addr = ip->src_address.as_u32;
794
795   init_ed_k (&s_kv, ip->src_address, 0, ip->dst_address, 0, rx_fib_index,
796              ip->protocol);
797
798   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
799     {
800       ASSERT (thread_index == ed_value_get_thread_index (&s_value));
801       s =
802         pool_elt_at_index (tsm->sessions,
803                            ed_value_get_session_index (&s_value));
804       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
805     }
806   else
807     {
808       if (PREDICT_FALSE
809           (nat44_ed_maximum_sessions_exceeded
810            (sm, rx_fib_index, thread_index)))
811         {
812           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
813           nat_ipfix_logging_max_sessions (thread_index,
814                                           sm->max_translations_per_thread);
815           nat_elog_notice ("maximum sessions exceeded");
816           return 0;
817         }
818
819       init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
820
821       /* Try to find static mapping first */
822       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
823         {
824           m = pool_elt_at_index (sm->static_mappings, value.value);
825           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
826           is_sm = 1;
827           goto create_ses;
828         }
829       else
830         {
831           /* *INDENT-OFF* */
832           pool_foreach (s, tsm->sessions, {
833             if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
834               {
835                 new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
836
837                 init_ed_k(&s_kv, s->out2in.addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol);
838                 if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
839                   goto create_ses;
840
841                 break;
842               }
843           });
844           /* *INDENT-ON* */
845
846           for (i = 0; i < vec_len (sm->addresses); i++)
847             {
848               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
849                          outside_fib_index, ip->protocol);
850               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
851                 {
852                   new_addr = ip->src_address.as_u32 =
853                     sm->addresses[i].addr.as_u32;
854                   goto create_ses;
855                 }
856             }
857           return 0;
858         }
859
860     create_ses:
861       s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
862       if (!s)
863         {
864           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
865           nat_elog_warn ("create NAT session failed");
866           return 0;
867         }
868
869       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
870       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
871       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
872       s->out2in.addr.as_u32 = new_addr;
873       s->out2in.fib_index = outside_fib_index;
874       s->in2out.addr.as_u32 = old_addr;
875       s->in2out.fib_index = rx_fib_index;
876       s->in2out.port = s->out2in.port = ip->protocol;
877       if (is_sm)
878         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
879
880       /* Add to lookup tables */
881       init_ed_kv (&s_kv, s->in2out.addr, 0, ip->dst_address, 0, rx_fib_index,
882                   ip->protocol, thread_index, s - tsm->sessions);
883       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
884         nat_elog_notice ("in2out key add failed");
885
886       init_ed_kv (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
887                   outside_fib_index, ip->protocol, thread_index,
888                   s - tsm->sessions);
889       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
890         nat_elog_notice ("out2in key add failed");
891
892       per_vrf_sessions_register_session (s, thread_index);
893     }
894
895   /* Update IP checksum */
896   sum = ip->checksum;
897   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
898   ip->checksum = ip_csum_fold (sum);
899
900   /* Accounting */
901   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
902                                  thread_index);
903   /* Per-user LRU list maintenance */
904   nat44_session_update_lru (sm, s, thread_index);
905
906   /* Hairpinning */
907   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
908     nat44_ed_hairpinning_unknown_proto (sm, b, ip);
909
910   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
911     vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index;
912
913   return s;
914 }
915
916 static inline uword
917 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
918                                           vlib_node_runtime_t * node,
919                                           vlib_frame_t * frame,
920                                           int is_output_feature)
921 {
922   u32 n_left_from, *from;
923   snat_main_t *sm = &snat_main;
924   f64 now = vlib_time_now (vm);
925   u32 thread_index = vm->thread_index;
926   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
927   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
928     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
929
930   from = vlib_frame_vector_args (frame);
931   n_left_from = frame->n_vectors;
932
933   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
934   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
935   vlib_get_buffers (vm, from, b, n_left_from);
936
937   while (n_left_from > 0)
938     {
939       vlib_buffer_t *b0;
940       u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0,
941         iph_offset0 = 0;
942       u16 old_port0, new_port0;
943       ip4_header_t *ip0;
944       udp_header_t *udp0;
945       tcp_header_t *tcp0;
946       snat_session_t *s0 = 0;
947       clib_bihash_kv_16_8_t kv0, value0;
948       ip_csum_t sum0;
949
950       b0 = *b;
951       b++;
952
953       /* Prefetch next iteration. */
954       if (PREDICT_TRUE (n_left_from >= 2))
955         {
956           vlib_buffer_t *p2;
957
958           p2 = *b;
959
960           vlib_prefetch_buffer_header (p2, LOAD);
961
962           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
963         }
964
965       if (is_output_feature)
966         {
967           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
968         }
969
970       next[0] = vnet_buffer2 (b0)->nat.arc_next;
971
972       ip0 =
973         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
974
975       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
976       rx_fib_index0 =
977         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
978
979       if (PREDICT_FALSE (ip0->ttl == 1))
980         {
981           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
982           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
983                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
984                                        0);
985           next[0] = NAT_NEXT_ICMP_ERROR;
986           goto trace0;
987         }
988
989       udp0 = ip4_next_header (ip0);
990       tcp0 = (tcp_header_t *) udp0;
991       proto0 = ip_proto_to_nat_proto (ip0->protocol);
992
993       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
994         {
995           next[0] = def_slow;
996           goto trace0;
997         }
998
999       if (is_output_feature)
1000         {
1001           if (PREDICT_FALSE
1002               (nat_not_translate_output_feature_fwd
1003                (sm, ip0, thread_index, now, vm, b0)))
1004             goto trace0;
1005         }
1006
1007       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1008         {
1009           next[0] = def_slow;
1010           goto trace0;
1011         }
1012
1013       init_ed_k (&kv0, ip0->src_address,
1014                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1015                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1016                  ip0->protocol);
1017
1018       // lookup for session
1019       if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1020         {
1021           // session does not exist go slow path
1022           next[0] = def_slow;
1023           goto trace0;
1024         }
1025       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1026       s0 =
1027         pool_elt_at_index (tsm->sessions,
1028                            ed_value_get_session_index (&value0));
1029
1030       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1031         {
1032           // session is closed, go slow path
1033           nat_free_session_data (sm, s0, thread_index, 0);
1034           nat_ed_session_delete (sm, s0, thread_index, 1);
1035           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1036           goto trace0;
1037         }
1038
1039       if (s0->tcp_closed_timestamp)
1040         {
1041           if (now >= s0->tcp_closed_timestamp)
1042             {
1043               // session is closed, go slow path, freed in slow path
1044               next[0] = def_slow;
1045             }
1046           else
1047             {
1048               // session in transitory timeout, drop
1049               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1050               next[0] = NAT_NEXT_DROP;
1051             }
1052           goto trace0;
1053         }
1054
1055       // drop if session expired
1056       u64 sess_timeout_time;
1057       sess_timeout_time =
1058         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1059       if (now >= sess_timeout_time)
1060         {
1061           nat_free_session_data (sm, s0, thread_index, 0);
1062           nat_ed_session_delete (sm, s0, thread_index, 1);
1063           // session is closed, go slow path
1064           next[0] = def_slow;
1065           goto trace0;
1066         }
1067
1068       b0->flags |= VNET_BUFFER_F_IS_NATED;
1069
1070       if (!is_output_feature)
1071         vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1072
1073       old_addr0 = ip0->src_address.as_u32;
1074       new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1075       sum0 = ip0->checksum;
1076       sum0 =
1077         ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1078                         src_address);
1079       if (PREDICT_FALSE (is_twice_nat_session (s0)))
1080         sum0 =
1081           ip_csum_update (sum0, ip0->dst_address.as_u32,
1082                           s0->ext_host_addr.as_u32, ip4_header_t,
1083                           dst_address);
1084       ip0->checksum = ip_csum_fold (sum0);
1085
1086       old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1087
1088       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1089         {
1090           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1091             {
1092               new_port0 = udp0->src_port = s0->out2in.port;
1093               sum0 = tcp0->checksum;
1094               sum0 =
1095                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1096                                 dst_address);
1097               sum0 =
1098                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1099                                 length);
1100               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1101                 {
1102                   sum0 =
1103                     ip_csum_update (sum0, ip0->dst_address.as_u32,
1104                                     s0->ext_host_addr.as_u32, ip4_header_t,
1105                                     dst_address);
1106                   sum0 =
1107                     ip_csum_update (sum0,
1108                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1109                                     s0->ext_host_port, ip4_header_t, length);
1110                   tcp0->dst_port = s0->ext_host_port;
1111                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1112                 }
1113               mss_clamping (sm->mss_clamping, tcp0, &sum0);
1114               tcp0->checksum = ip_csum_fold (sum0);
1115             }
1116           vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.tcp,
1117                                          thread_index, sw_if_index0, 1);
1118           if (nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index))
1119             goto trace0;
1120         }
1121       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1122                && udp0->checksum)
1123         {
1124           new_port0 = udp0->src_port = s0->out2in.port;
1125           sum0 = udp0->checksum;
1126           sum0 =
1127             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1128                             dst_address);
1129           sum0 =
1130             ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length);
1131           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1132             {
1133               sum0 =
1134                 ip_csum_update (sum0, ip0->dst_address.as_u32,
1135                                 s0->ext_host_addr.as_u32, ip4_header_t,
1136                                 dst_address);
1137               sum0 =
1138                 ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_dst_port,
1139                                 s0->ext_host_port, ip4_header_t, length);
1140               udp0->dst_port = s0->ext_host_port;
1141               ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1142             }
1143           udp0->checksum = ip_csum_fold (sum0);
1144           vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.udp,
1145                                          thread_index, sw_if_index0, 1);
1146         }
1147       else
1148         {
1149           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1150             {
1151               new_port0 = udp0->src_port = s0->out2in.port;
1152               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1153                 {
1154                   udp0->dst_port = s0->ext_host_port;
1155                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1156                 }
1157               vlib_increment_simple_counter (&sm->counters.fastpath.
1158                                              in2out_ed.udp, thread_index,
1159                                              sw_if_index0, 1);
1160             }
1161         }
1162
1163       /* Accounting */
1164       nat44_session_update_counters (s0, now,
1165                                      vlib_buffer_length_in_chain (vm, b0),
1166                                      thread_index);
1167       /* Per-user LRU list maintenance */
1168       nat44_session_update_lru (sm, s0, thread_index);
1169
1170     trace0:
1171       if (PREDICT_FALSE
1172           ((node->flags & VLIB_NODE_FLAG_TRACE)
1173            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1174         {
1175           nat_in2out_ed_trace_t *t =
1176             vlib_add_trace (vm, node, b0, sizeof (*t));
1177           t->sw_if_index = sw_if_index0;
1178           t->next_index = next[0];
1179           t->is_slow_path = 0;
1180
1181           if (s0)
1182             t->session_index = s0 - tsm->sessions;
1183           else
1184             t->session_index = ~0;
1185         }
1186
1187       if (next[0] == NAT_NEXT_DROP)
1188         {
1189           vlib_increment_simple_counter (&sm->counters.fastpath.
1190                                          in2out_ed.drops, thread_index,
1191                                          sw_if_index0, 1);
1192         }
1193
1194       n_left_from--;
1195       next++;
1196     }
1197
1198   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1199                                frame->n_vectors);
1200   return frame->n_vectors;
1201 }
1202
1203 static inline uword
1204 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
1205                                           vlib_node_runtime_t * node,
1206                                           vlib_frame_t * frame,
1207                                           int is_output_feature)
1208 {
1209   u32 n_left_from, *from;
1210   snat_main_t *sm = &snat_main;
1211   f64 now = vlib_time_now (vm);
1212   u32 thread_index = vm->thread_index;
1213   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1214
1215   from = vlib_frame_vector_args (frame);
1216   n_left_from = frame->n_vectors;
1217
1218   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1219   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1220   vlib_get_buffers (vm, from, b, n_left_from);
1221
1222   while (n_left_from > 0)
1223     {
1224       vlib_buffer_t *b0;
1225       u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0,
1226         iph_offset0 = 0;
1227       u16 old_port0, new_port0;
1228       ip4_header_t *ip0;
1229       udp_header_t *udp0;
1230       tcp_header_t *tcp0;
1231       icmp46_header_t *icmp0;
1232       snat_session_t *s0 = 0;
1233       clib_bihash_kv_16_8_t kv0, value0;
1234       ip_csum_t sum0;
1235
1236       b0 = *b;
1237
1238       if (is_output_feature)
1239         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1240
1241       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1242
1243       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1244                               iph_offset0);
1245
1246       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1247       rx_fib_index0 =
1248         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1249
1250       if (PREDICT_FALSE (ip0->ttl == 1))
1251         {
1252           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1253           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1254                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1255                                        0);
1256           next[0] = NAT_NEXT_ICMP_ERROR;
1257           goto trace0;
1258         }
1259
1260       udp0 = ip4_next_header (ip0);
1261       tcp0 = (tcp_header_t *) udp0;
1262       icmp0 = (icmp46_header_t *) udp0;
1263       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1264
1265       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1266         {
1267           s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1268                                               rx_fib_index0,
1269                                               thread_index, now, vm, node);
1270           if (!s0)
1271             next[0] = NAT_NEXT_DROP;
1272
1273           vlib_increment_simple_counter (&sm->counters.slowpath.
1274                                          in2out_ed.other, thread_index,
1275                                          sw_if_index0, 1);
1276           goto trace0;
1277         }
1278
1279       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1280         {
1281           next[0] =
1282             icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0,
1283                                       rx_fib_index0, node, next[0], now,
1284                                       thread_index, &s0);
1285           vlib_increment_simple_counter (&sm->counters.slowpath.
1286                                          in2out_ed.icmp, thread_index,
1287                                          sw_if_index0, 1);
1288           goto trace0;
1289         }
1290
1291       init_ed_k (&kv0, ip0->src_address,
1292                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1293                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1294                  ip0->protocol);
1295       if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1296         {
1297           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1298           s0 =
1299             pool_elt_at_index (tsm->sessions,
1300                                ed_value_get_session_index (&value0));
1301
1302           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1303             {
1304               nat_free_session_data (sm, s0, thread_index, 0);
1305               nat_ed_session_delete (sm, s0, thread_index, 1);
1306               s0 = NULL;
1307             }
1308         }
1309
1310       if (!s0)
1311         {
1312           if (is_output_feature)
1313             {
1314               if (PREDICT_FALSE
1315                   (nat44_ed_not_translate_output_feature
1316                    (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1317                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1318                     sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1319                 goto trace0;
1320
1321               /*
1322                * Send DHCP packets to the ipv4 stack, or we won't
1323                * be able to use dhcp client on the outside interface
1324                */
1325               if (PREDICT_FALSE
1326                   (proto0 == NAT_PROTOCOL_UDP
1327                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1328                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1329                    && ip0->dst_address.as_u32 == 0xffffffff))
1330                 goto trace0;
1331             }
1332           else
1333             {
1334               if (PREDICT_FALSE
1335                   (nat44_ed_not_translate
1336                    (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
1337                     thread_index)))
1338                 goto trace0;
1339             }
1340
1341           next[0] =
1342             slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address,
1343                           vnet_buffer (b0)->ip.reass.l4_src_port,
1344                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1345                           ip0->protocol, rx_fib_index0, &s0, node, next[0],
1346                           thread_index, now);
1347
1348           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1349             goto trace0;
1350
1351           if (PREDICT_FALSE (!s0))
1352             goto trace0;
1353
1354         }
1355
1356       b0->flags |= VNET_BUFFER_F_IS_NATED;
1357
1358       if (!is_output_feature)
1359         vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1360
1361       old_addr0 = ip0->src_address.as_u32;
1362       new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1363       sum0 = ip0->checksum;
1364       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1365                              src_address);
1366       if (PREDICT_FALSE (is_twice_nat_session (s0)))
1367         sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1368                                s0->ext_host_addr.as_u32, ip4_header_t,
1369                                dst_address);
1370       ip0->checksum = ip_csum_fold (sum0);
1371
1372       old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
1373
1374       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1375         {
1376           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1377             {
1378               new_port0 = udp0->src_port = s0->out2in.port;
1379               sum0 = tcp0->checksum;
1380               sum0 =
1381                 ip_csum_update (sum0, old_addr0, new_addr0,
1382                                 ip4_header_t, dst_address);
1383               sum0 =
1384                 ip_csum_update (sum0, old_port0, new_port0,
1385                                 ip4_header_t, length);
1386               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1387                 {
1388                   sum0 =
1389                     ip_csum_update (sum0, ip0->dst_address.as_u32,
1390                                     s0->ext_host_addr.as_u32,
1391                                     ip4_header_t, dst_address);
1392                   sum0 =
1393                     ip_csum_update (sum0,
1394                                     vnet_buffer (b0)->ip.reass.l4_dst_port,
1395                                     s0->ext_host_port, ip4_header_t, length);
1396                   tcp0->dst_port = s0->ext_host_port;
1397                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1398                 }
1399               mss_clamping (sm->mss_clamping, tcp0, &sum0);
1400               tcp0->checksum = ip_csum_fold (sum0);
1401             }
1402           vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.tcp,
1403                                          thread_index, sw_if_index0, 1);
1404           if (nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index))
1405             goto trace0;
1406         }
1407       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1408                && udp0->checksum)
1409         {
1410           new_port0 = udp0->src_port = s0->out2in.port;
1411           sum0 = udp0->checksum;
1412           sum0 =
1413             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1414                             dst_address);
1415           sum0 =
1416             ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length);
1417           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1418             {
1419               sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1420                                      s0->ext_host_addr.as_u32,
1421                                      ip4_header_t, dst_address);
1422               sum0 =
1423                 ip_csum_update (sum0,
1424                                 vnet_buffer (b0)->ip.reass.l4_dst_port,
1425                                 s0->ext_host_port, ip4_header_t, length);
1426               udp0->dst_port = s0->ext_host_port;
1427               ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1428             }
1429           udp0->checksum = ip_csum_fold (sum0);
1430           vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.udp,
1431                                          thread_index, sw_if_index0, 1);
1432         }
1433       else
1434         {
1435           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1436             {
1437               new_port0 = udp0->src_port = s0->out2in.port;
1438               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1439                 {
1440                   udp0->dst_port = s0->ext_host_port;
1441                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1442                 }
1443               vlib_increment_simple_counter (&sm->counters.slowpath.
1444                                              in2out_ed.udp, thread_index,
1445                                              sw_if_index0, 1);
1446             }
1447         }
1448
1449       /* Accounting */
1450       nat44_session_update_counters (s0, now,
1451                                      vlib_buffer_length_in_chain
1452                                      (vm, b0), thread_index);
1453       /* Per-user LRU list maintenance */
1454       nat44_session_update_lru (sm, s0, thread_index);
1455
1456     trace0:
1457       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1458                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1459         {
1460           nat_in2out_ed_trace_t *t =
1461             vlib_add_trace (vm, node, b0, sizeof (*t));
1462           t->sw_if_index = sw_if_index0;
1463           t->next_index = next[0];
1464           t->is_slow_path = 1;
1465
1466           if (s0)
1467             t->session_index = s0 - tsm->sessions;
1468           else
1469             t->session_index = ~0;
1470         }
1471
1472       if (next[0] == NAT_NEXT_DROP)
1473         {
1474           vlib_increment_simple_counter (&sm->counters.slowpath.
1475                                          in2out_ed.drops, thread_index,
1476                                          sw_if_index0, 1);
1477         }
1478
1479       n_left_from--;
1480       next++;
1481       b++;
1482     }
1483
1484   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1485                                frame->n_vectors);
1486
1487   return frame->n_vectors;
1488 }
1489
1490 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1491                                      vlib_node_runtime_t * node,
1492                                      vlib_frame_t * frame)
1493 {
1494   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0);
1495 }
1496
1497 /* *INDENT-OFF* */
1498 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1499   .name = "nat44-ed-in2out",
1500   .vector_size = sizeof (u32),
1501   .sibling_of = "nat-default",
1502   .format_trace = format_nat_in2out_ed_trace,
1503   .type = VLIB_NODE_TYPE_INTERNAL,
1504   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1505   .error_strings = nat_in2out_ed_error_strings,
1506   .runtime_data_bytes = sizeof (snat_runtime_t),
1507 };
1508 /* *INDENT-ON* */
1509
1510 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1511                                             vlib_node_runtime_t * node,
1512                                             vlib_frame_t * frame)
1513 {
1514   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1);
1515 }
1516
1517 /* *INDENT-OFF* */
1518 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1519   .name = "nat44-ed-in2out-output",
1520   .vector_size = sizeof (u32),
1521   .sibling_of = "nat-default",
1522   .format_trace = format_nat_in2out_ed_trace,
1523   .type = VLIB_NODE_TYPE_INTERNAL,
1524   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1525   .error_strings = nat_in2out_ed_error_strings,
1526   .runtime_data_bytes = sizeof (snat_runtime_t),
1527 };
1528 /* *INDENT-ON* */
1529
1530 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1531                                               vlib_node_runtime_t *
1532                                               node, vlib_frame_t * frame)
1533 {
1534   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0);
1535 }
1536
1537 /* *INDENT-OFF* */
1538 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1539   .name = "nat44-ed-in2out-slowpath",
1540   .vector_size = sizeof (u32),
1541   .sibling_of = "nat-default",
1542   .format_trace = format_nat_in2out_ed_trace,
1543   .type = VLIB_NODE_TYPE_INTERNAL,
1544   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1545   .error_strings = nat_in2out_ed_error_strings,
1546   .runtime_data_bytes = sizeof (snat_runtime_t),
1547 };
1548 /* *INDENT-ON* */
1549
1550 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1551                                                      vlib_node_runtime_t
1552                                                      * node,
1553                                                      vlib_frame_t * frame)
1554 {
1555   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1);
1556 }
1557
1558 /* *INDENT-OFF* */
1559 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1560   .name = "nat44-ed-in2out-output-slowpath",
1561   .vector_size = sizeof (u32),
1562   .sibling_of = "nat-default",
1563   .format_trace = format_nat_in2out_ed_trace,
1564   .type = VLIB_NODE_TYPE_INTERNAL,
1565   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1566   .error_strings = nat_in2out_ed_error_strings,
1567   .runtime_data_bytes = sizeof (snat_runtime_t),
1568 };
1569 /* *INDENT-ON* */
1570
1571 static u8 *
1572 format_nat_pre_trace (u8 * s, va_list * args)
1573 {
1574   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1575   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1576   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1577   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1578                  t->arc_next_index);
1579 }
1580
1581 VLIB_NODE_FN (nat_pre_in2out_node)
1582   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1583 {
1584   return nat_pre_node_fn_inline (vm, node, frame,
1585                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1586 }
1587
1588 VLIB_NODE_FN (nat_pre_in2out_output_node)
1589   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1590 {
1591   return nat_pre_node_fn_inline (vm, node, frame,
1592                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1593 }
1594
1595 /* *INDENT-OFF* */
1596 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1597   .name = "nat-pre-in2out",
1598   .vector_size = sizeof (u32),
1599   .sibling_of = "nat-default",
1600   .format_trace = format_nat_pre_trace,
1601   .type = VLIB_NODE_TYPE_INTERNAL,
1602   .n_errors = 0,
1603 };
1604
1605 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1606   .name = "nat-pre-in2out-output",
1607   .vector_size = sizeof (u32),
1608   .sibling_of = "nat-default",
1609   .format_trace = format_nat_pre_trace,
1610   .type = VLIB_NODE_TYPE_INTERNAL,
1611   .n_errors = 0,
1612 };
1613 /* *INDENT-ON* */
1614
1615 /*
1616  * fd.io coding-style-patch-verification: ON
1617  *
1618  * Local Variables:
1619  * eval: (c-set-style "gnu")
1620  * End:
1621  */