nat: remove unusued parameters and use correct parameter types
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
73               t->next_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", session %d, translation result '%U' via %s",
77                   t->session_index, format_nat_ed_translation_error,
78                   t->translation_error,
79                   t->translation_via_i2of ? "i2of" : "o2if");
80       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
81       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
82     }
83   if (!t->is_slow_path)
84     {
85       if (t->lookup_skipped)
86         {
87           s = format (s, "\n lookup skipped - cached session index used");
88         }
89       else
90         {
91           s = format (s, "\n  search key %U", format_ed_session_kvp,
92                       &t->search_key);
93         }
94     }
95
96   return s;
97 }
98
99 /**
100  * @brief Check if packet should be translated
101  *
102  * Packets aimed at outside interface and external address with active session
103  * should be translated.
104  *
105  * @param sm            NAT main
106  * @param rt            NAT runtime data
107  * @param sw_if_index0  index of the inside interface
108  * @param ip0           IPv4 header
109  * @param rx_fib_index0 RX FIB index
110  *
111  * @returns 0 if packet should be translated otherwise 1
112  */
113 static inline int
114 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
115                          u32 sw_if_index0, ip4_header_t *ip0,
116                          u32 rx_fib_index0)
117 {
118   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
119   nat_outside_fib_t *outside_fib;
120   fib_prefix_t pfx = {
121     .fp_proto = FIB_PROTOCOL_IP4,
122     .fp_len = 32,
123     .fp_addr = {
124                 .ip4.as_u32 = ip0->dst_address.as_u32,
125                 }
126     ,
127   };
128
129   /* Don't NAT packet aimed at the intfc address */
130   if (PREDICT_FALSE (
131         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
132     return 1;
133
134   fei = fib_table_lookup (rx_fib_index0, &pfx);
135   if (FIB_NODE_INDEX_INVALID != fei)
136     {
137       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
138       if (sw_if_index == ~0)
139         {
140           vec_foreach (outside_fib, sm->outside_fibs)
141             {
142               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
143               if (FIB_NODE_INDEX_INVALID != fei)
144                 {
145                   sw_if_index = fib_entry_get_resolving_interface (fei);
146                   if (sw_if_index != ~0)
147                     break;
148                 }
149             }
150         }
151       if (sw_if_index == ~0)
152         return 1;
153
154       snat_interface_t *i;
155       pool_foreach (i, sm->interfaces)
156         {
157           /* NAT packet aimed at outside interface */
158           if ((nat44_ed_is_interface_outside (i)) &&
159               (sw_if_index == i->sw_if_index))
160             return 0;
161         }
162     }
163
164   return 1;
165 }
166
167 static int
168 nat_ed_alloc_addr_and_port_with_snat_address (
169   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
170   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
171   ip4_address_t *outside_addr, u16 *outside_port)
172 {
173   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
174
175   s->o2i.match.daddr = a->addr;
176   /* first try port suggested by caller */
177   u16 port = clib_net_to_host_u16 (*outside_port);
178   u16 port_offset = port - port_thread_offset;
179   if (port <= port_thread_offset ||
180       port > port_thread_offset + port_per_thread)
181     {
182       /* need to pick a different port, suggested port doesn't fit in
183        * this thread's port range */
184       port_offset = snat_random_port (0, port_per_thread - 1);
185       port = port_thread_offset + port_offset;
186     }
187   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
188   do
189     {
190       if (NAT_PROTOCOL_ICMP == nat_proto)
191         {
192           s->o2i.match.sport = clib_host_to_net_u16 (port);
193         }
194       s->o2i.match.dport = clib_host_to_net_u16 (port);
195       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
196         {
197 #define _(N, i, n, s)                                                         \
198   case NAT_PROTOCOL_##N:                                                      \
199     ++a->busy_##n##_port_refcounts[port];                                     \
200     a->busy_##n##_ports_per_thread[thread_index]++;                           \
201     a->busy_##n##_ports++;                                                    \
202     break;
203           switch (nat_proto)
204             {
205               foreach_nat_protocol;
206             default:
207               nat_elog_info (sm, "unknown protocol");
208               return 1;
209             }
210 #undef _
211           *outside_addr = a->addr;
212           *outside_port = clib_host_to_net_u16 (port);
213           return 0;
214         }
215       port_offset = snat_random_port (0, port_per_thread - 1);
216       port = port_thread_offset + port_offset;
217       --attempts;
218     }
219   while (attempts > 0);
220   return 1;
221 }
222
223 static int
224 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
225                             u32 thread_index, ip4_address_t s_addr,
226                             u16 port_per_thread, u32 snat_thread_index,
227                             snat_session_t *s, ip4_address_t *outside_addr,
228                             u16 *outside_port)
229 {
230   int i;
231   snat_address_t *a, *ga = 0;
232
233   if (vec_len (sm->addresses) > 0)
234     {
235       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
236
237       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
238         {
239           a = sm->addresses + i;
240           if (a->fib_index == rx_fib_index)
241             {
242               return nat_ed_alloc_addr_and_port_with_snat_address (
243                 sm, nat_proto, thread_index, a, port_per_thread,
244                 snat_thread_index, s, outside_addr, outside_port);
245             }
246           else if (a->fib_index == ~0)
247             {
248               ga = a;
249             }
250         }
251
252       for (i = 0; i < s_addr_offset; ++i)
253         {
254           a = sm->addresses + i;
255           if (a->fib_index == rx_fib_index)
256             {
257               return nat_ed_alloc_addr_and_port_with_snat_address (
258                 sm, nat_proto, thread_index, a, port_per_thread,
259                 snat_thread_index, s, outside_addr, outside_port);
260             }
261           else if (a->fib_index == ~0)
262             {
263               ga = a;
264             }
265         }
266
267       if (ga)
268         {
269           return nat_ed_alloc_addr_and_port_with_snat_address (
270             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
271             s, outside_addr, outside_port);
272         }
273     }
274   /* Totally out of translations to use... */
275   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
276   return 1;
277 }
278
279 static_always_inline u32
280 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
281 {
282   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
283   nat_outside_fib_t *outside_fib;
284   fib_prefix_t pfx = {
285     .fp_proto = FIB_PROTOCOL_IP4,
286     .fp_len = 32,
287     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
288     ,
289   };
290   // TODO: multiple vrfs none can resolve addr
291   vec_foreach (outside_fib, sm->outside_fibs)
292     {
293       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
294       if (FIB_NODE_INDEX_INVALID != fei)
295         {
296           if (fib_entry_get_resolving_interface (fei) != ~0)
297             {
298               return outside_fib->fib_index;
299             }
300         }
301     }
302   return ~0;
303 }
304
305 static_always_inline int
306 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
307                              u16 match_port, nat_protocol_t match_protocol,
308                              u32 match_fib_index, ip4_address_t *daddr,
309                              u16 *dport)
310 {
311   clib_bihash_kv_8_8_t kv, value;
312   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
313   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
314     {
315       /* Try address only mapping */
316       init_nat_k (&kv, match_addr, 0, 0, 0);
317       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
318                                   &value))
319         return 0;
320     }
321
322   snat_static_mapping_t *m =
323     pool_elt_at_index (sm->static_mappings, value.value);
324   *daddr = m->local_addr;
325   if (dport)
326     {
327       /* Address only mapping doesn't change port */
328       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
329     }
330   return 1;
331 }
332
333 static u32
334 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
335               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
336               u16 r_port, u8 proto, u32 rx_fib_index,
337               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
338               u32 thread_index, f64 now)
339 {
340   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
341   ip4_address_t outside_addr;
342   u16 outside_port;
343   u32 outside_fib_index;
344   u8 is_identity_nat = 0;
345
346   u32 nat_proto = ip_proto_to_nat_proto (proto);
347   snat_session_t *s = NULL;
348   lb_nat_type_t lb = 0;
349   ip4_address_t daddr = r_addr;
350   u16 dport = r_port;
351
352   if (PREDICT_FALSE
353       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
354     {
355       if (!nat_lru_free_one (sm, thread_index, now))
356         {
357           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
358           nat_ipfix_logging_max_sessions (thread_index,
359                                           sm->max_translations_per_thread);
360           nat_elog_notice (sm, "maximum sessions exceeded");
361           return NAT_NEXT_DROP;
362         }
363     }
364
365   outside_fib_index = sm->outside_fib_index;
366
367   switch (vec_len (sm->outside_fibs))
368     {
369     case 0:
370       outside_fib_index = sm->outside_fib_index;
371       break;
372     case 1:
373       outside_fib_index = sm->outside_fibs[0].fib_index;
374       break;
375     default:
376       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
377       break;
378     }
379
380   ip4_address_t sm_addr;
381   u16 sm_port;
382   u32 sm_fib_index;
383   /* First try to match static mapping by local address and port */
384   int is_sm;
385   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
386                                  nat_proto, &sm_addr, &sm_port, &sm_fib_index,
387                                  0, 0, 0, &lb, 0, &is_identity_nat, 0))
388     {
389       is_sm = 0;
390     }
391   else
392     {
393       if (PREDICT_FALSE (is_identity_nat))
394         {
395           *sessionp = NULL;
396           return next;
397         }
398       is_sm = 1;
399     }
400
401   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
402     {
403       if (PREDICT_FALSE (!tcp_flags_is_init (
404             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
405         {
406           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
407           return NAT_NEXT_DROP;
408         }
409     }
410
411   s = nat_ed_session_alloc (sm, thread_index, now, proto);
412   ASSERT (s);
413
414   if (!is_sm)
415     {
416       s->in2out.addr = l_addr;
417       s->in2out.port = l_port;
418       s->nat_proto = nat_proto;
419       s->in2out.fib_index = rx_fib_index;
420       s->out2in.fib_index = outside_fib_index;
421
422       // suggest using local port to allocation function
423       outside_port = l_port;
424
425       // hairpinning?
426       int is_hairpinning = nat44_ed_external_sm_lookup (
427         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
428       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
429
430       // destination addr/port updated with real values in
431       // nat_ed_alloc_addr_and_port
432       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
433                             s->out2in.fib_index, proto);
434       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
435       if (NAT_PROTOCOL_ICMP == nat_proto)
436         {
437           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
438         }
439       else
440         {
441           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
442         }
443       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
444
445       if (nat_ed_alloc_addr_and_port (
446             sm, rx_fib_index, nat_proto, thread_index, l_addr,
447             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
448             &outside_port))
449         {
450           nat_elog_notice (sm, "addresses exhausted");
451           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
452           nat_ed_session_delete (sm, s, thread_index, 1);
453           return NAT_NEXT_DROP;
454         }
455       s->out2in.addr = outside_addr;
456       s->out2in.port = outside_port;
457     }
458   else
459     {
460       // static mapping
461       s->out2in.addr = outside_addr = sm_addr;
462       s->out2in.port = outside_port = sm_port;
463       s->in2out.addr = l_addr;
464       s->in2out.port = l_port;
465       s->nat_proto = nat_proto;
466       s->in2out.fib_index = rx_fib_index;
467       s->out2in.fib_index = outside_fib_index;
468       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
469
470       // hairpinning?
471       int is_hairpinning = nat44_ed_external_sm_lookup (
472         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
473       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
474
475       if (NAT_PROTOCOL_ICMP == nat_proto)
476         {
477           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
478                                 sm_port, s->out2in.fib_index, proto);
479           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
480         }
481       else
482         {
483           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
484                                 sm_port, s->out2in.fib_index, proto);
485           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
486         }
487       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
488       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
489       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
490         {
491           nat_elog_notice (sm, "out2in key add failed");
492           goto error;
493         }
494     }
495
496   if (lb)
497     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
498   s->ext_host_addr = r_addr;
499   s->ext_host_port = r_port;
500
501   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
502                         rx_fib_index, proto);
503   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
504   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
505
506   if (NAT_PROTOCOL_ICMP == nat_proto)
507     {
508       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
509     }
510   else
511     {
512       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
513       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
514     }
515   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
516
517   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
518     {
519       nat_elog_notice (sm, "in2out key add failed");
520       goto error;
521     }
522
523   /* log NAT event */
524   nat_ipfix_logging_nat44_ses_create (thread_index,
525                                       s->in2out.addr.as_u32,
526                                       s->out2in.addr.as_u32,
527                                       s->nat_proto,
528                                       s->in2out.port,
529                                       s->out2in.port, s->in2out.fib_index);
530
531   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
532                          s->in2out.port, &s->ext_host_nat_addr,
533                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
534                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
535
536   per_vrf_sessions_register_session (s, thread_index);
537
538   *sessionp = s;
539   return next;
540 error:
541   if (s)
542     {
543       if (!is_sm)
544         {
545           snat_free_outside_address_and_port (sm->addresses, thread_index,
546                                               &outside_addr, outside_port,
547                                               nat_proto);
548         }
549       nat_ed_session_delete (sm, s, thread_index, 1);
550     }
551   *sessionp = s = NULL;
552   return NAT_NEXT_DROP;
553 }
554
555 static_always_inline int
556 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
557                         vlib_node_runtime_t *node, u32 sw_if_index,
558                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
559                         u32 rx_fib_index)
560 {
561   clib_bihash_kv_16_8_t kv, value;
562
563   init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
564              ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
565              sm->outside_fib_index, ip->protocol);
566
567   /* NAT packet aimed at external address if has active sessions */
568   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
569     {
570       /* or is static mappings */
571       ip4_address_t placeholder_addr;
572       u16 placeholder_port;
573       u32 placeholder_fib_index;
574       if (!snat_static_mapping_match (
575             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
576             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
577             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
578         return 0;
579     }
580   else
581     return 0;
582
583   if (sm->forwarding_enabled)
584     return 1;
585
586   return snat_not_translate_fast (sm, node, sw_if_index, ip, rx_fib_index);
587 }
588
589 static_always_inline int
590 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
591                                       u32 thread_index, f64 now,
592                                       vlib_main_t * vm, vlib_buffer_t * b)
593 {
594   clib_bihash_kv_16_8_t kv, value;
595   snat_session_t *s = 0;
596   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
597
598   if (!sm->forwarding_enabled)
599     return 0;
600
601   if (ip->protocol == IP_PROTOCOL_ICMP)
602     {
603       ip4_address_t lookup_saddr, lookup_daddr;
604       u16 lookup_sport, lookup_dport;
605       u8 lookup_protocol;
606       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
607                                               &lookup_sport, &lookup_daddr,
608                                               &lookup_dport, &lookup_protocol))
609         return 0;
610       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
611                  0, lookup_protocol);
612     }
613   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
614     {
615       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
616                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
617                  ip->protocol);
618     }
619   else
620     {
621       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
622                  ip->protocol);
623     }
624
625   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
626     {
627       ASSERT (thread_index == ed_value_get_thread_index (&value));
628       s =
629         pool_elt_at_index (tsm->sessions,
630                            ed_value_get_session_index (&value));
631
632       if (na44_ed_is_fwd_bypass_session (s))
633         {
634           if (ip->protocol == IP_PROTOCOL_TCP)
635             {
636               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
637             }
638           /* Accounting */
639           nat44_session_update_counters (s, now,
640                                          vlib_buffer_length_in_chain (vm, b),
641                                          thread_index);
642           /* Per-user LRU list maintenance */
643           nat44_session_update_lru (sm, s, thread_index);
644           return 1;
645         }
646       else
647         return 0;
648     }
649
650   return 0;
651 }
652
653 static_always_inline int
654 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
655                                        ip4_header_t *ip, u16 src_port,
656                                        u16 dst_port, u32 thread_index,
657                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
658                                        f64 now, int is_multi_worker)
659 {
660   clib_bihash_kv_16_8_t kv, value;
661   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
662   snat_interface_t *i;
663   snat_session_t *s;
664   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
665   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
666
667   /* src NAT check */
668   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
669              tx_fib_index, ip->protocol);
670   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
671     {
672       ASSERT (thread_index == ed_value_get_thread_index (&value));
673       s =
674         pool_elt_at_index (tsm->sessions,
675                            ed_value_get_session_index (&value));
676       if (nat44_is_ses_closed (s)
677           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
678         {
679           nat_free_session_data (sm, s, thread_index, 0);
680           nat_ed_session_delete (sm, s, thread_index, 1);
681         }
682       return 1;
683     }
684
685   /* dst NAT check */
686   if (is_multi_worker &&
687       PREDICT_TRUE (!pool_is_free_index (
688         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
689     {
690       nat_6t_t lookup;
691       lookup.fib_index = rx_fib_index;
692       lookup.proto = ip->protocol;
693       lookup.daddr.as_u32 = ip->src_address.as_u32;
694       lookup.dport = src_port;
695       lookup.saddr.as_u32 = ip->dst_address.as_u32;
696       lookup.sport = dst_port;
697       s = pool_elt_at_index (
698         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
699       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
700         {
701           goto skip_dst_nat_lookup;
702         }
703       s = NULL;
704     }
705
706   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
707              rx_fib_index, ip->protocol);
708   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
709     {
710       ASSERT (thread_index == ed_value_get_thread_index (&value));
711       s =
712         pool_elt_at_index (tsm->sessions,
713                            ed_value_get_session_index (&value));
714
715     skip_dst_nat_lookup:
716       if (na44_ed_is_fwd_bypass_session (s))
717         return 0;
718
719       /* hairpinning */
720       pool_foreach (i, sm->output_feature_interfaces)
721        {
722          if ((nat44_ed_is_interface_inside (i)) &&
723              (rx_sw_if_index == i->sw_if_index))
724            return 0;
725       }
726       return 1;
727     }
728
729   return 0;
730 }
731
732 static inline u32
733 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
734                           icmp46_header_t *icmp, u32 sw_if_index,
735                           u32 rx_fib_index, vlib_node_runtime_t *node,
736                           u32 next, f64 now, u32 thread_index,
737                           snat_session_t **s_p, int is_multi_worker)
738 {
739   vlib_main_t *vm = vlib_get_main ();
740   u16 checksum;
741   int err;
742   snat_session_t *s = NULL;
743   u8 lookup_protocol = ip->protocol;
744   u16 lookup_sport, lookup_dport;
745   ip4_address_t lookup_saddr, lookup_daddr;
746
747   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
748                                             &lookup_sport, &lookup_daddr,
749                                             &lookup_dport, &lookup_protocol);
750   if (err != 0)
751     {
752       b->error = node->errors[err];
753       return NAT_NEXT_DROP;
754     }
755
756   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
757     {
758       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
759             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
760             vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
761         {
762           return next;
763         }
764     }
765   else
766     {
767       if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
768                                                  ip, NAT_PROTOCOL_ICMP,
769                                                  rx_fib_index)))
770         {
771           return next;
772         }
773     }
774
775   if (PREDICT_FALSE (icmp_type_is_error_message (
776         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
777     {
778       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
779       return NAT_NEXT_DROP;
780     }
781
782   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
783                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
784                        &s, node, next, thread_index, vlib_time_now (vm));
785
786   if (NAT_NEXT_DROP == next)
787     goto out;
788
789   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
790     {
791       ip_csum_t sum = ip_incremental_checksum_buffer (
792         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
793         ntohs (ip->length) - ip4_header_bytes (ip), 0);
794       checksum = ~ip_csum_fold (sum);
795       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
796         {
797           next = NAT_NEXT_DROP;
798           goto out;
799         }
800     }
801
802 out:
803   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
804     {
805       /* Accounting */
806       nat44_session_update_counters (
807         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
808       /* Per-user LRU list maintenance */
809       nat44_session_update_lru (sm, s, thread_index);
810     }
811   *s_p = s;
812   return next;
813 }
814
815 static snat_session_t *
816 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
817                                         ip4_header_t *ip, u32 rx_fib_index,
818                                         u32 thread_index, f64 now,
819                                         vlib_main_t *vm,
820                                         vlib_node_runtime_t *node)
821 {
822   clib_bihash_kv_8_8_t kv, value;
823   clib_bihash_kv_16_8_t s_kv, s_value;
824   snat_static_mapping_t *m = NULL;
825   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
826   snat_session_t *s = NULL;
827   u32 outside_fib_index = sm->outside_fib_index;
828   int i;
829   ip4_address_t new_src_addr = { 0 };
830   ip4_address_t new_dst_addr = ip->dst_address;
831
832   if (PREDICT_FALSE (
833         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
834     {
835       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
836       nat_ipfix_logging_max_sessions (thread_index,
837                                       sm->max_translations_per_thread);
838       nat_elog_notice (sm, "maximum sessions exceeded");
839       return 0;
840     }
841
842   switch (vec_len (sm->outside_fibs))
843     {
844     case 0:
845       outside_fib_index = sm->outside_fib_index;
846       break;
847     case 1:
848       outside_fib_index = sm->outside_fibs[0].fib_index;
849       break;
850     default:
851       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
852       break;
853     }
854
855   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
856
857   /* Try to find static mapping first */
858   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
859     {
860       m = pool_elt_at_index (sm->static_mappings, value.value);
861       new_src_addr = m->external_addr;
862     }
863   else
864     {
865       pool_foreach (s, tsm->sessions)
866         {
867           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
868             {
869               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
870                          outside_fib_index, ip->protocol);
871               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
872                 {
873                   new_src_addr = s->out2in.addr;
874                 }
875               break;
876             }
877         }
878
879       if (!new_src_addr.as_u32)
880         {
881           for (i = 0; i < vec_len (sm->addresses); i++)
882             {
883               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
884                          outside_fib_index, ip->protocol);
885               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
886                 {
887                   new_src_addr = sm->addresses[i].addr;
888                 }
889             }
890         }
891     }
892
893   if (!new_src_addr.as_u32)
894     {
895       // could not allocate address for translation ...
896       return 0;
897     }
898
899   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
900   if (!s)
901     {
902       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
903       nat_elog_warn (sm, "create NAT session failed");
904       return 0;
905     }
906
907   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
908                         ip->dst_address, 0, rx_fib_index, ip->protocol);
909   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
910   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
911
912   // hairpinning?
913   int is_hairpinning =
914     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
915                                  outside_fib_index, &new_dst_addr, NULL);
916   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
917
918   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
919   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
920
921   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
922                         outside_fib_index, ip->protocol);
923   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
924   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
925   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
926
927   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
928   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
929   s->out2in.addr.as_u32 = new_src_addr.as_u32;
930   s->out2in.fib_index = outside_fib_index;
931   s->in2out.addr.as_u32 = ip->src_address.as_u32;
932   s->in2out.fib_index = rx_fib_index;
933   s->in2out.port = s->out2in.port = ip->protocol;
934   if (m)
935     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
936
937   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
938     {
939       nat_elog_notice (sm, "in2out flow hash add failed");
940       nat_ed_session_delete (sm, s, thread_index, 1);
941       return NULL;
942     }
943
944   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
945     {
946       nat_elog_notice (sm, "out2in flow hash add failed");
947       nat_ed_session_delete (sm, s, thread_index, 1);
948       return NULL;
949     }
950
951   per_vrf_sessions_register_session (s, thread_index);
952
953   /* Accounting */
954   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
955                                  thread_index);
956   /* Per-user LRU list maintenance */
957   nat44_session_update_lru (sm, s, thread_index);
958
959   return s;
960 }
961
962 static inline uword
963 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
964                                           vlib_node_runtime_t *node,
965                                           vlib_frame_t *frame,
966                                           int is_output_feature,
967                                           int is_multi_worker)
968 {
969   u32 n_left_from, *from;
970   snat_main_t *sm = &snat_main;
971   f64 now = vlib_time_now (vm);
972   u32 thread_index = vm->thread_index;
973   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
974   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
975     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
976
977   from = vlib_frame_vector_args (frame);
978   n_left_from = frame->n_vectors;
979
980   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
981   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
982   vlib_get_buffers (vm, from, b, n_left_from);
983
984   while (n_left_from > 0)
985     {
986       vlib_buffer_t *b0;
987       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
988       u32 tx_sw_if_index0;
989       u32 cntr_sw_if_index0;
990       nat_protocol_t proto0;
991       ip4_header_t *ip0;
992       snat_session_t *s0 = 0;
993       clib_bihash_kv_16_8_t kv0, value0;
994       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
995       nat_6t_flow_t *f = 0;
996       nat_6t_t lookup;
997       int lookup_skipped = 0;
998
999       b0 = *b;
1000       b++;
1001
1002       /* Prefetch next iteration. */
1003       if (PREDICT_TRUE (n_left_from >= 2))
1004         {
1005           vlib_buffer_t *p2;
1006
1007           p2 = *b;
1008
1009           vlib_prefetch_buffer_header (p2, LOAD);
1010
1011           clib_prefetch_load (p2->data);
1012         }
1013
1014       if (is_output_feature)
1015         {
1016           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1017         }
1018
1019       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1020
1021       ip0 =
1022         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1023
1024       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1025       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1026       cntr_sw_if_index0 =
1027         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1028       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1029                                                            rx_sw_if_index0);
1030       lookup.fib_index = rx_fib_index0;
1031
1032       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1033         {
1034           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1035           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1036                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1037                                        0);
1038           next[0] = NAT_NEXT_ICMP_ERROR;
1039           goto trace0;
1040         }
1041
1042       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1043
1044       if (is_output_feature)
1045         {
1046           if (PREDICT_FALSE
1047               (nat_not_translate_output_feature_fwd
1048                (sm, ip0, thread_index, now, vm, b0)))
1049             goto trace0;
1050         }
1051
1052       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1053         {
1054           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1055                 ICMP4_echo_request &&
1056               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1057                 ICMP4_echo_reply &&
1058               !icmp_type_is_error_message (
1059                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1060             {
1061               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1062               next[0] = NAT_NEXT_DROP;
1063               goto trace0;
1064             }
1065           int err = nat_get_icmp_session_lookup_values (
1066             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1067             &lookup.dport, &lookup.proto);
1068           if (err != 0)
1069             {
1070               b0->error = node->errors[err];
1071               next[0] = NAT_NEXT_DROP;
1072               goto trace0;
1073             }
1074         }
1075       else
1076         {
1077           lookup.proto = ip0->protocol;
1078           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1079           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1080           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1081           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1082         }
1083
1084       /* there might be a stashed index in vnet_buffer2 from handoff or
1085        * classify node, see if it can be used */
1086       if (is_multi_worker &&
1087           !pool_is_free_index (tsm->sessions,
1088                                vnet_buffer2 (b0)->nat.cached_session_index))
1089         {
1090           s0 = pool_elt_at_index (tsm->sessions,
1091                                   vnet_buffer2 (b0)->nat.cached_session_index);
1092           if (PREDICT_TRUE (
1093                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1094                 // for some hairpinning cases there are two "i2i" flows instead
1095                 // of i2o and o2i as both hosts are on inside
1096                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1097                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1098             {
1099               /* yes, this is the droid we're looking for */
1100               lookup_skipped = 1;
1101               goto skip_lookup;
1102             }
1103           s0 = NULL;
1104         }
1105
1106       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1107                  lookup.fib_index, lookup.proto);
1108
1109       // lookup flow
1110       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1111         {
1112           // flow does not exist go slow path
1113           next[0] = def_slow;
1114           goto trace0;
1115         }
1116
1117       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1118       s0 =
1119         pool_elt_at_index (tsm->sessions,
1120                            ed_value_get_session_index (&value0));
1121
1122     skip_lookup:
1123
1124       ASSERT (thread_index == s0->thread_index);
1125
1126       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1127         {
1128           // session is closed, go slow path
1129           nat_free_session_data (sm, s0, thread_index, 0);
1130           nat_ed_session_delete (sm, s0, thread_index, 1);
1131           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1132           goto trace0;
1133         }
1134
1135       if (s0->tcp_closed_timestamp)
1136         {
1137           if (now >= s0->tcp_closed_timestamp)
1138             {
1139               // session is closed, go slow path, freed in slow path
1140               next[0] = def_slow;
1141             }
1142           else
1143             {
1144               // session in transitory timeout, drop
1145               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1146               next[0] = NAT_NEXT_DROP;
1147             }
1148           goto trace0;
1149         }
1150
1151       // drop if session expired
1152       u64 sess_timeout_time;
1153       sess_timeout_time =
1154         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1155       if (now >= sess_timeout_time)
1156         {
1157           nat_free_session_data (sm, s0, thread_index, 0);
1158           nat_ed_session_delete (sm, s0, thread_index, 1);
1159           // session is closed, go slow path
1160           next[0] = def_slow;
1161           goto trace0;
1162         }
1163
1164       b0->flags |= VNET_BUFFER_F_IS_NATED;
1165
1166       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1167         {
1168           f = &s0->i2o;
1169         }
1170       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1171                nat_6t_t_eq (&s0->o2i.match, &lookup))
1172         {
1173           f = &s0->o2i;
1174         }
1175       else
1176         {
1177           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1178           nat_free_session_data (sm, s0, thread_index, 0);
1179           nat_ed_session_delete (sm, s0, thread_index, 1);
1180           next[0] = NAT_NEXT_DROP;
1181           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1182           goto trace0;
1183         }
1184
1185       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1186           (translation_error = nat_6t_flow_buf_translate_i2o (
1187              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1188         {
1189           nat_free_session_data (sm, s0, thread_index, 0);
1190           nat_ed_session_delete (sm, s0, thread_index, 1);
1191           next[0] = NAT_NEXT_DROP;
1192           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1193           goto trace0;
1194         }
1195
1196       switch (proto0)
1197         {
1198         case NAT_PROTOCOL_TCP:
1199           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1200                                          thread_index, cntr_sw_if_index0, 1);
1201           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1202           break;
1203         case NAT_PROTOCOL_UDP:
1204           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1205                                          thread_index, cntr_sw_if_index0, 1);
1206           break;
1207         case NAT_PROTOCOL_ICMP:
1208           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1209                                          thread_index, cntr_sw_if_index0, 1);
1210           break;
1211         case NAT_PROTOCOL_OTHER:
1212           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1213                                          thread_index, cntr_sw_if_index0, 1);
1214           break;
1215         }
1216
1217       /* Accounting */
1218       nat44_session_update_counters (s0, now,
1219                                      vlib_buffer_length_in_chain (vm, b0),
1220                                      thread_index);
1221       /* Per-user LRU list maintenance */
1222       nat44_session_update_lru (sm, s0, thread_index);
1223
1224     trace0:
1225       if (PREDICT_FALSE
1226           ((node->flags & VLIB_NODE_FLAG_TRACE)
1227            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1228         {
1229           nat_in2out_ed_trace_t *t =
1230             vlib_add_trace (vm, node, b0, sizeof (*t));
1231           t->sw_if_index = rx_sw_if_index0;
1232           t->next_index = next[0];
1233           t->is_slow_path = 0;
1234           t->translation_error = translation_error;
1235           t->lookup_skipped = lookup_skipped;
1236           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1237
1238           if (s0)
1239             {
1240               t->session_index = s0 - tsm->sessions;
1241               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1242               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1243               t->translation_via_i2of = (&s0->i2o == f);
1244             }
1245           else
1246             {
1247               t->session_index = ~0;
1248             }
1249         }
1250
1251       if (next[0] == NAT_NEXT_DROP)
1252         {
1253           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1254                                          thread_index, cntr_sw_if_index0, 1);
1255         }
1256
1257       n_left_from--;
1258       next++;
1259     }
1260
1261   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1262                                frame->n_vectors);
1263   return frame->n_vectors;
1264 }
1265
1266 static inline uword
1267 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1268                                           vlib_node_runtime_t *node,
1269                                           vlib_frame_t *frame,
1270                                           int is_output_feature,
1271                                           int is_multi_worker)
1272 {
1273   u32 n_left_from, *from;
1274   snat_main_t *sm = &snat_main;
1275   f64 now = vlib_time_now (vm);
1276   u32 thread_index = vm->thread_index;
1277   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1278
1279   from = vlib_frame_vector_args (frame);
1280   n_left_from = frame->n_vectors;
1281
1282   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1283   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1284   vlib_get_buffers (vm, from, b, n_left_from);
1285
1286   while (n_left_from > 0)
1287     {
1288       vlib_buffer_t *b0;
1289       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1290       u32 tx_sw_if_index0;
1291       u32 cntr_sw_if_index0;
1292       nat_protocol_t proto0;
1293       ip4_header_t *ip0;
1294       udp_header_t *udp0;
1295       icmp46_header_t *icmp0;
1296       snat_session_t *s0 = 0;
1297       clib_bihash_kv_16_8_t kv0, value0;
1298       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1299
1300       b0 = *b;
1301
1302       if (is_output_feature)
1303         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1304
1305       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1306
1307       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1308                               iph_offset0);
1309
1310       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1311       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1312       cntr_sw_if_index0 =
1313         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1314       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1315                                                            rx_sw_if_index0);
1316
1317       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1318         {
1319           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1320           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1321                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1322                                        0);
1323           next[0] = NAT_NEXT_ICMP_ERROR;
1324           goto trace0;
1325         }
1326
1327       udp0 = ip4_next_header (ip0);
1328       icmp0 = (icmp46_header_t *) udp0;
1329       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1330
1331       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1332         {
1333           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1334             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1335           if (!s0)
1336             next[0] = NAT_NEXT_DROP;
1337
1338           if (NAT_NEXT_DROP != next[0] && s0 &&
1339               NAT_ED_TRNSL_ERR_SUCCESS !=
1340                 (translation_error = nat_6t_flow_buf_translate_i2o (
1341                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1342             {
1343               nat_free_session_data (sm, s0, thread_index, 0);
1344               nat_ed_session_delete (sm, s0, thread_index, 1);
1345               next[0] = NAT_NEXT_DROP;
1346               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1347               goto trace0;
1348             }
1349
1350           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1351                                          thread_index, cntr_sw_if_index0, 1);
1352           goto trace0;
1353         }
1354
1355       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1356         {
1357           next[0] = icmp_in2out_ed_slow_path (
1358             sm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node, next[0],
1359             now, thread_index, &s0, is_multi_worker);
1360           if (NAT_NEXT_DROP != next[0] && s0 &&
1361               NAT_ED_TRNSL_ERR_SUCCESS !=
1362                 (translation_error = nat_6t_flow_buf_translate_i2o (
1363                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1364             {
1365               nat_free_session_data (sm, s0, thread_index, 0);
1366               nat_ed_session_delete (sm, s0, thread_index, 1);
1367               next[0] = NAT_NEXT_DROP;
1368               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1369               goto trace0;
1370             }
1371
1372           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1373                                          thread_index, cntr_sw_if_index0, 1);
1374           goto trace0;
1375         }
1376
1377       init_ed_k (&kv0, ip0->src_address,
1378                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1379                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1380                  ip0->protocol);
1381       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1382         {
1383           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1384           s0 =
1385             pool_elt_at_index (tsm->sessions,
1386                                ed_value_get_session_index (&value0));
1387
1388           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1389             {
1390               nat_free_session_data (sm, s0, thread_index, 0);
1391               nat_ed_session_delete (sm, s0, thread_index, 1);
1392               s0 = NULL;
1393             }
1394         }
1395
1396       if (!s0)
1397         {
1398           if (is_output_feature)
1399             {
1400               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1401                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1402                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1403                     rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker)))
1404                 goto trace0;
1405
1406               /*
1407                * Send DHCP packets to the ipv4 stack, or we won't
1408                * be able to use dhcp client on the outside interface
1409                */
1410               if (PREDICT_FALSE
1411                   (proto0 == NAT_PROTOCOL_UDP
1412                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1413                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1414                    && ip0->dst_address.as_u32 == 0xffffffff))
1415                 goto trace0;
1416             }
1417           else
1418             {
1419               if (PREDICT_FALSE (
1420                     nat44_ed_not_translate (vm, sm, node, rx_sw_if_index0, b0,
1421                                             ip0, proto0, rx_fib_index0)))
1422                 goto trace0;
1423             }
1424
1425           next[0] = slow_path_ed (
1426             vm, sm, b0, ip0->src_address, ip0->dst_address,
1427             vnet_buffer (b0)->ip.reass.l4_src_port,
1428             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1429             rx_fib_index0, &s0, node, next[0], thread_index, now);
1430
1431           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1432             goto trace0;
1433
1434           if (PREDICT_FALSE (!s0))
1435             goto trace0;
1436
1437         }
1438
1439       b0->flags |= VNET_BUFFER_F_IS_NATED;
1440
1441       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1442           (translation_error = nat_6t_flow_buf_translate_i2o (
1443              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1444         {
1445           nat_free_session_data (sm, s0, thread_index, 0);
1446           nat_ed_session_delete (sm, s0, thread_index, 1);
1447           next[0] = NAT_NEXT_DROP;
1448           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1449           goto trace0;
1450         }
1451
1452       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1453         {
1454           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1455                                          thread_index, cntr_sw_if_index0, 1);
1456           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1457         }
1458       else
1459         {
1460           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1461                                          thread_index, cntr_sw_if_index0, 1);
1462         }
1463
1464       /* Accounting */
1465       nat44_session_update_counters (s0, now,
1466                                      vlib_buffer_length_in_chain
1467                                      (vm, b0), thread_index);
1468       /* Per-user LRU list maintenance */
1469       nat44_session_update_lru (sm, s0, thread_index);
1470
1471     trace0:
1472       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1473                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1474         {
1475           nat_in2out_ed_trace_t *t =
1476             vlib_add_trace (vm, node, b0, sizeof (*t));
1477           t->sw_if_index = rx_sw_if_index0;
1478           t->next_index = next[0];
1479           t->is_slow_path = 1;
1480           t->translation_error = translation_error;
1481           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1482
1483           if (s0)
1484             {
1485               t->session_index = s0 - tsm->sessions;
1486               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1487               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1488               t->translation_via_i2of = 1;
1489             }
1490
1491           else
1492             {
1493               t->session_index = ~0;
1494             }
1495         }
1496
1497       if (next[0] == NAT_NEXT_DROP)
1498         {
1499           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1500                                          thread_index, cntr_sw_if_index0, 1);
1501         }
1502
1503       n_left_from--;
1504       next++;
1505       b++;
1506     }
1507
1508   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1509                                frame->n_vectors);
1510
1511   return frame->n_vectors;
1512 }
1513
1514 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1515                                      vlib_node_runtime_t * node,
1516                                      vlib_frame_t * frame)
1517 {
1518   if (snat_main.num_workers > 1)
1519     {
1520       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1521     }
1522   else
1523     {
1524       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1525     }
1526 }
1527
1528 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1529   .name = "nat44-ed-in2out",
1530   .vector_size = sizeof (u32),
1531   .sibling_of = "nat-default",
1532   .format_trace = format_nat_in2out_ed_trace,
1533   .type = VLIB_NODE_TYPE_INTERNAL,
1534   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1535   .error_strings = nat_in2out_ed_error_strings,
1536   .runtime_data_bytes = sizeof (snat_runtime_t),
1537 };
1538
1539 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1540                                             vlib_node_runtime_t * node,
1541                                             vlib_frame_t * frame)
1542 {
1543   if (snat_main.num_workers > 1)
1544     {
1545       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1546     }
1547   else
1548     {
1549       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1550     }
1551 }
1552
1553 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1554   .name = "nat44-ed-in2out-output",
1555   .vector_size = sizeof (u32),
1556   .sibling_of = "nat-default",
1557   .format_trace = format_nat_in2out_ed_trace,
1558   .type = VLIB_NODE_TYPE_INTERNAL,
1559   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1560   .error_strings = nat_in2out_ed_error_strings,
1561   .runtime_data_bytes = sizeof (snat_runtime_t),
1562 };
1563
1564 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1565                                               vlib_node_runtime_t *
1566                                               node, vlib_frame_t * frame)
1567 {
1568   if (snat_main.num_workers > 1)
1569     {
1570       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1571     }
1572   else
1573     {
1574       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1575     }
1576 }
1577
1578 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1579   .name = "nat44-ed-in2out-slowpath",
1580   .vector_size = sizeof (u32),
1581   .sibling_of = "nat-default",
1582   .format_trace = format_nat_in2out_ed_trace,
1583   .type = VLIB_NODE_TYPE_INTERNAL,
1584   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1585   .error_strings = nat_in2out_ed_error_strings,
1586   .runtime_data_bytes = sizeof (snat_runtime_t),
1587 };
1588
1589 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1590                                                      vlib_node_runtime_t
1591                                                      * node,
1592                                                      vlib_frame_t * frame)
1593 {
1594   if (snat_main.num_workers > 1)
1595     {
1596       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1597     }
1598   else
1599     {
1600       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1601     }
1602 }
1603
1604 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1605   .name = "nat44-ed-in2out-output-slowpath",
1606   .vector_size = sizeof (u32),
1607   .sibling_of = "nat-default",
1608   .format_trace = format_nat_in2out_ed_trace,
1609   .type = VLIB_NODE_TYPE_INTERNAL,
1610   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1611   .error_strings = nat_in2out_ed_error_strings,
1612   .runtime_data_bytes = sizeof (snat_runtime_t),
1613 };
1614
1615 static u8 *
1616 format_nat_pre_trace (u8 * s, va_list * args)
1617 {
1618   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1619   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1620   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1621   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1622                  t->arc_next_index);
1623 }
1624
1625 VLIB_NODE_FN (nat_pre_in2out_node)
1626   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1627 {
1628   return nat_pre_node_fn_inline (vm, node, frame,
1629                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1630 }
1631
1632 VLIB_NODE_FN (nat_pre_in2out_output_node)
1633   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1634 {
1635   return nat_pre_node_fn_inline (vm, node, frame,
1636                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1637 }
1638
1639 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1640   .name = "nat-pre-in2out",
1641   .vector_size = sizeof (u32),
1642   .sibling_of = "nat-default",
1643   .format_trace = format_nat_pre_trace,
1644   .type = VLIB_NODE_TYPE_INTERNAL,
1645   .n_errors = 0,
1646 };
1647
1648 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1649   .name = "nat-pre-in2out-output",
1650   .vector_size = sizeof (u32),
1651   .sibling_of = "nat-default",
1652   .format_trace = format_nat_pre_trace,
1653   .type = VLIB_NODE_TYPE_INTERNAL,
1654   .n_errors = 0,
1655 };
1656
1657 /*
1658  * fd.io coding-style-patch-verification: ON
1659  *
1660  * Local Variables:
1661  * eval: (c-set-style "gnu")
1662  * End:
1663  */