nat: VRF routing & FIB improvements
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52   u8 tcp_state;
53 } nat_in2out_ed_trace_t;
54
55 static u8 *
56 format_nat_in2out_ed_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
61   char *tag;
62
63   tag =
64     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
65     "NAT44_IN2OUT_ED_FAST_PATH";
66
67   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
68               t->next_index);
69   if (~0 != t->session_index)
70     {
71       s = format (s, ", session %d, translation result '%U' via %s",
72                   t->session_index, format_nat_ed_translation_error,
73                   t->translation_error,
74                   t->translation_via_i2of ? "i2of" : "o2if");
75       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
76       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
77     }
78   if (!t->is_slow_path)
79     {
80       if (t->lookup_skipped)
81         {
82           s = format (s, "\n  lookup skipped - cached session index used");
83         }
84       else
85         {
86           s = format (s, "\n  search key %U", format_ed_session_kvp,
87                       &t->search_key);
88         }
89     }
90   if (IP_PROTOCOL_TCP == t->i2of.match.proto)
91     {
92       s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
93                   t->tcp_state);
94     }
95
96   return s;
97 }
98
99 static int
100 nat_ed_alloc_addr_and_port_with_snat_address (
101   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
102   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
103   ip4_address_t *outside_addr, u16 *outside_port)
104 {
105   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
106
107   s->o2i.match.daddr = a->addr;
108   /* first try port suggested by caller */
109   u16 port = clib_net_to_host_u16 (*outside_port);
110   u16 port_offset = port - port_thread_offset;
111   if (port <= port_thread_offset ||
112       port > port_thread_offset + port_per_thread)
113     {
114       /* need to pick a different port, suggested port doesn't fit in
115        * this thread's port range */
116       port_offset = snat_random_port (0, port_per_thread - 1);
117       port = port_thread_offset + port_offset;
118     }
119   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
120   do
121     {
122       if (IP_PROTOCOL_ICMP == proto)
123         {
124           s->o2i.match.sport = clib_host_to_net_u16 (port);
125         }
126       s->o2i.match.dport = clib_host_to_net_u16 (port);
127       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
128         {
129           *outside_addr = a->addr;
130           *outside_port = clib_host_to_net_u16 (port);
131           return 0;
132         }
133       port_offset = snat_random_port (0, port_per_thread - 1);
134       port = port_thread_offset + port_offset;
135       --attempts;
136     }
137   while (attempts > 0);
138   return 1;
139 }
140
141 static int
142 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
143                             u32 tx_sw_if_index, u32 nat_proto,
144                             u32 thread_index, ip4_address_t s_addr,
145                             ip4_address_t d_addr, u32 snat_thread_index,
146                             snat_session_t *s, ip4_address_t *outside_addr,
147                             u16 *outside_port)
148 {
149   if (vec_len (sm->addresses) > 0)
150     {
151       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
152       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
153       int i;
154
155       // output feature
156       if (tx_sw_if_index != ~0)
157         {
158           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
159             {
160               a = sm->addresses + i;
161               if (a->fib_index == rx_fib_index)
162                 {
163                   if (a->sw_if_index == tx_sw_if_index)
164                     {
165                       if ((a->addr_len != ~0) &&
166                           (a->net.as_u32 ==
167                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
168
169                         {
170                           return nat_ed_alloc_addr_and_port_with_snat_address (
171                             sm, nat_proto, thread_index, a,
172                             sm->port_per_thread, snat_thread_index, s,
173                             outside_addr, outside_port);
174                         }
175                       ra = a;
176                     }
177                   ja = a;
178                 }
179               else if (a->fib_index == ~0)
180                 {
181                   ba = a;
182                 }
183             }
184           for (i = 0; i < s_addr_offset; ++i)
185             {
186               a = sm->addresses + i;
187               if (a->fib_index == rx_fib_index)
188                 {
189                   if (a->sw_if_index == tx_sw_if_index)
190                     {
191                       if ((a->addr_len != ~0) &&
192                           (a->net.as_u32 ==
193                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
194
195                         {
196                           return nat_ed_alloc_addr_and_port_with_snat_address (
197                             sm, nat_proto, thread_index, a,
198                             sm->port_per_thread, snat_thread_index, s,
199                             outside_addr, outside_port);
200                         }
201                       ra = a;
202                     }
203                   ja = a;
204                 }
205               else if (a->fib_index == ~0)
206                 {
207                   ba = a;
208                 }
209             }
210           if (ra)
211             {
212               return nat_ed_alloc_addr_and_port_with_snat_address (
213                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
214                 snat_thread_index, s, outside_addr, outside_port);
215             }
216         }
217       else
218         {
219           // first try nat pool addresses to sw interface addreses mappings
220           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
221             {
222               a = sm->addresses + i;
223               if (a->fib_index == rx_fib_index)
224                 {
225                   if ((a->addr_len != ~0) &&
226                       (a->net.as_u32 ==
227                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
228                     {
229                       return nat_ed_alloc_addr_and_port_with_snat_address (
230                         sm, nat_proto, thread_index, a, sm->port_per_thread,
231                         snat_thread_index, s, outside_addr, outside_port);
232                     }
233                   ja = a;
234                 }
235               else if (a->fib_index == ~0)
236                 {
237                   ba = a;
238                 }
239             }
240           for (i = 0; i < s_addr_offset; ++i)
241             {
242               a = sm->addresses + i;
243               if (a->fib_index == rx_fib_index)
244                 {
245                   if ((a->addr_len != ~0) &&
246                       (a->net.as_u32 ==
247                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
248                     {
249                       return nat_ed_alloc_addr_and_port_with_snat_address (
250                         sm, nat_proto, thread_index, a, sm->port_per_thread,
251                         snat_thread_index, s, outside_addr, outside_port);
252                     }
253                   ja = a;
254                 }
255               else if (a->fib_index == ~0)
256                 {
257                   ba = a;
258                 }
259             }
260         }
261
262       if (ja || ba)
263         {
264           a = ja ? ja : ba;
265           return nat_ed_alloc_addr_and_port_with_snat_address (
266             sm, nat_proto, thread_index, a, sm->port_per_thread,
267             snat_thread_index, s, outside_addr, outside_port);
268         }
269     }
270   /* Totally out of translations to use... */
271   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
272   return 1;
273 }
274
275 static_always_inline int
276 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
277                              u16 match_port, ip_protocol_t match_protocol,
278                              ip4_address_t *daddr, u16 *dport)
279 {
280   snat_static_mapping_t *m =
281     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
282   if (!m)
283     {
284       /* Try address only mapping */
285       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
286       if (!m)
287         return 0;
288     }
289   *daddr = m->local_addr;
290   if (dport)
291     {
292       /* Address only mapping doesn't change port */
293       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
294     }
295   return 1;
296 }
297
298 static_always_inline vrf_table_t *
299 get_vrf_table_by_fib (u32 fib_index)
300 {
301   snat_main_t *sm = &snat_main;
302   vrf_table_t *t;
303
304   pool_foreach (t, sm->vrf_tables)
305     {
306       if (fib_index == t->table_fib_index)
307         {
308           return t;
309         }
310     }
311
312   return 0;
313 }
314
315 static_always_inline u32
316 get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
317 {
318   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
319   fib_prefix_t pfx = {
320     .fp_proto = FIB_PROTOCOL_IP4,
321     .fp_len = 32,
322     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
323     ,
324   };
325
326   snat_main_t *sm = &snat_main;
327   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
328   // default to rx fib
329   u32 tx_fib_index = rx_fib_index;
330
331   if (0 != t)
332     {
333       // managed routes to other fibs
334       vrf_route_t *r;
335       pool_foreach (r, t->routes)
336         {
337           fei = fib_table_lookup (r->fib_index, &pfx);
338           if ((FIB_NODE_INDEX_INVALID != fei) &&
339               (~0 != fib_entry_get_resolving_interface (fei)))
340             {
341               tx_fib_index = r->fib_index;
342               break;
343             }
344         }
345     }
346   else
347     {
348       // default to configured fib
349       tx_fib_index = sm->outside_fib_index;
350
351       // default routes to other fibs
352       nat_fib_t *f;
353       vec_foreach (f, sm->outside_fibs)
354         {
355           fei = fib_table_lookup (f->fib_index, &pfx);
356           if ((FIB_NODE_INDEX_INVALID != fei) &&
357               (~0 != fib_entry_get_resolving_interface (fei)))
358             {
359               tx_fib_index = f->fib_index;
360               break;
361             }
362         }
363     }
364
365   return tx_fib_index;
366 }
367
368 static_always_inline int
369 is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
370 {
371   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
372   fib_prefix_t pfx = {
373     .fp_proto = FIB_PROTOCOL_IP4,
374     .fp_len = 32,
375     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
376     ,
377   };
378
379   snat_main_t *sm = &snat_main;
380   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
381   u32 ii;
382
383   if (0 != t)
384     {
385       // managed routes to other fibs
386       vrf_route_t *r;
387       pool_foreach (r, t->routes)
388         {
389           fei = fib_table_lookup (r->fib_index, &pfx);
390           if ((FIB_NODE_INDEX_INVALID != fei) &&
391               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
392             {
393               return 1;
394             }
395         }
396     }
397   else
398     {
399       // default routes to other fibs
400       nat_fib_t *f;
401       vec_foreach (f, sm->outside_fibs)
402         {
403           fei = fib_table_lookup (f->fib_index, &pfx);
404           if ((FIB_NODE_INDEX_INVALID != fei) &&
405               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
406             {
407               snat_interface_t *i;
408               pool_foreach (i, sm->interfaces)
409                 {
410                   if ((nat44_ed_is_interface_outside (i)) &&
411                       (ii == i->sw_if_index))
412                     {
413                       return 1;
414                     }
415                 }
416             }
417         }
418     }
419
420   return 0;
421 }
422
423 static u32
424 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
425               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
426               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
427               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
428               u32 thread_index, f64 now)
429 {
430   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
431   ip4_address_t outside_addr;
432   u16 outside_port;
433   u32 tx_fib_index;
434   u8 is_identity_nat = 0;
435
436   snat_session_t *s = NULL;
437   lb_nat_type_t lb = 0;
438   ip4_address_t daddr = r_addr;
439   u16 dport = r_port;
440
441   if (PREDICT_FALSE
442       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
443     {
444       if (!nat_lru_free_one (sm, thread_index, now))
445         {
446           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
447           nat_ipfix_logging_max_sessions (thread_index,
448                                           sm->max_translations_per_thread);
449           nat_elog_notice (sm, "maximum sessions exceeded");
450           return NAT_NEXT_DROP;
451         }
452     }
453
454   ip4_address_t sm_addr;
455   u16 sm_port;
456   u32 sm_fib_index;
457   int is_sm = 0;
458   // First try to match static mapping by local address and port
459   if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
460                                   &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
461                                   &lb, 0, &is_identity_nat, 0))
462     {
463       if (PREDICT_FALSE (is_identity_nat))
464         {
465           *sessionp = NULL;
466           return next;
467         }
468       is_sm = 1;
469     }
470
471   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
472     {
473       if (PREDICT_FALSE (!tcp_flags_is_init (
474             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
475         {
476           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
477           return NAT_NEXT_DROP;
478         }
479     }
480
481   s = nat_ed_session_alloc (sm, thread_index, now, proto);
482   ASSERT (s);
483
484   tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
485
486   if (!is_sm)
487     {
488       s->in2out.addr = l_addr;
489       s->in2out.port = l_port;
490       s->proto = proto;
491       s->in2out.fib_index = rx_fib_index;
492       s->out2in.fib_index = tx_fib_index;
493
494       // suggest using local port to allocation function
495       outside_port = l_port;
496
497       if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
498                                                       proto, &daddr, &dport)))
499         {
500           s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
501         }
502
503       // destination addr/port updated with real values in
504       // nat_ed_alloc_addr_and_port
505       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
506                             s->out2in.fib_index, proto);
507       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
508       if (IP_PROTOCOL_ICMP == proto)
509         {
510           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
511         }
512       else
513         {
514           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
515         }
516       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
517
518       if (nat_ed_alloc_addr_and_port (
519             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
520             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
521         {
522           nat_elog_notice (sm, "addresses exhausted");
523           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
524           nat_ed_session_delete (sm, s, thread_index, 1);
525           return NAT_NEXT_DROP;
526         }
527       s->out2in.addr = outside_addr;
528       s->out2in.port = outside_port;
529     }
530   else
531     {
532       // static mapping
533       s->out2in.addr = outside_addr = sm_addr;
534       s->out2in.port = outside_port = sm_port;
535       s->in2out.addr = l_addr;
536       s->in2out.port = l_port;
537       s->proto = proto;
538       s->in2out.fib_index = rx_fib_index;
539       s->out2in.fib_index = tx_fib_index;
540       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
541
542       // hairpinning?
543       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
544                                                         proto, &daddr, &dport);
545       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
546
547       if (IP_PROTOCOL_ICMP == proto)
548         {
549           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
550                                 sm_port, s->out2in.fib_index, proto);
551           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
552         }
553       else
554         {
555           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
556                                 sm_port, s->out2in.fib_index, proto);
557           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
558         }
559       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
560       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
561       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
562         {
563           nat_elog_notice (sm, "out2in key add failed");
564           goto error;
565         }
566     }
567
568   if (lb)
569     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
570   s->ext_host_addr = r_addr;
571   s->ext_host_port = r_port;
572
573   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
574                         rx_fib_index, proto);
575   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
576   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
577
578   if (IP_PROTOCOL_ICMP == proto)
579     {
580       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
581     }
582   else
583     {
584       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
585       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
586     }
587   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
588
589   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
590     {
591       nat_elog_notice (sm, "in2out key add failed");
592       goto error;
593     }
594
595   /* log NAT event */
596   nat_ipfix_logging_nat44_ses_create (
597     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
598     s->in2out.port, s->out2in.port, s->in2out.fib_index);
599
600   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
601                          s->in2out.port, &s->ext_host_nat_addr,
602                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
603                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
604
605   per_vrf_sessions_register_session (s, thread_index);
606
607   *sessionp = s;
608   return next;
609 error:
610   if (s)
611     {
612       nat_ed_session_delete (sm, s, thread_index, 1);
613     }
614   *sessionp = s = NULL;
615   return NAT_NEXT_DROP;
616 }
617
618 static_always_inline int
619 nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
620                         u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
621                         u32 proto, u32 rx_fib_index)
622 {
623   snat_main_t *sm = &snat_main;
624
625   clib_bihash_kv_16_8_t kv, value;
626   ip4_address_t placeholder_addr;
627   u32 placeholder_fib_index;
628   u16 placeholder_port;
629
630   init_ed_k (&kv, ip->dst_address.as_u32,
631              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
632              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
633              ip->protocol);
634
635   // do nat if active session or is static mapping
636   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
637       !snat_static_mapping_match (
638         vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
639         sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
640         &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
641     {
642       return 0;
643     }
644
645   // do not nat if forwarding enabled
646   if (sm->forwarding_enabled)
647     {
648       return 1;
649     }
650
651   // do not nat packet aimed at the interface address
652   if (PREDICT_FALSE (
653         is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
654     {
655       return 1;
656     }
657
658   // do nat packets with resolvable destination
659   // destination can be resolved either by:
660   // a) vrf routing table entry
661   // b) (non output feature) outside interface fib
662   if (is_destination_resolvable (rx_fib_index, ip->dst_address))
663     {
664       return 0;
665     }
666
667   return 1;
668 }
669
670 static_always_inline int
671 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
672                                       u32 thread_index, f64 now,
673                                       vlib_main_t * vm, vlib_buffer_t * b)
674 {
675   clib_bihash_kv_16_8_t kv, value;
676   snat_session_t *s = 0;
677   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
678
679   if (!sm->forwarding_enabled)
680     return 0;
681
682   if (ip->protocol == IP_PROTOCOL_ICMP)
683     {
684       ip4_address_t lookup_saddr, lookup_daddr;
685       u16 lookup_sport, lookup_dport;
686       u8 lookup_protocol;
687       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
688                                               &lookup_sport, &lookup_daddr,
689                                               &lookup_dport, &lookup_protocol))
690         return 0;
691       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
692                  lookup_dport, 0, lookup_protocol);
693     }
694   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
695     {
696       init_ed_k (&kv, ip->src_address.as_u32,
697                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
698                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
699     }
700   else
701     {
702       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
703                  ip->protocol);
704     }
705
706   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
707     {
708       ASSERT (thread_index == ed_value_get_thread_index (&value));
709       s =
710         pool_elt_at_index (tsm->sessions,
711                            ed_value_get_session_index (&value));
712
713       if (na44_ed_is_fwd_bypass_session (s))
714         {
715           if (ip->protocol == IP_PROTOCOL_TCP)
716             {
717               nat44_set_tcp_session_state_i2o (
718                 sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
719                 thread_index);
720             }
721           /* Accounting */
722           nat44_session_update_counters (s, now,
723                                          vlib_buffer_length_in_chain (vm, b),
724                                          thread_index);
725           /* Per-user LRU list maintenance */
726           nat44_session_update_lru (sm, s, thread_index);
727           return 1;
728         }
729       else
730         return 0;
731     }
732
733   return 0;
734 }
735
736 static_always_inline int
737 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
738                                        ip4_header_t *ip, u16 src_port,
739                                        u16 dst_port, u32 thread_index,
740                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
741                                        int is_multi_worker)
742 {
743   clib_bihash_kv_16_8_t kv, value;
744   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
745   snat_interface_t *i;
746   snat_session_t *s;
747   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
748   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
749
750   /* src NAT check */
751   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
752              dst_port, tx_fib_index, ip->protocol);
753   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
754     {
755       ASSERT (thread_index == ed_value_get_thread_index (&value));
756       s =
757         pool_elt_at_index (tsm->sessions,
758                            ed_value_get_session_index (&value));
759       return 1;
760     }
761
762   /* dst NAT check */
763   if (is_multi_worker &&
764       PREDICT_TRUE (!pool_is_free_index (
765         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
766     {
767       nat_6t_t lookup;
768       lookup.fib_index = rx_fib_index;
769       lookup.proto = ip->protocol;
770       lookup.daddr.as_u32 = ip->src_address.as_u32;
771       lookup.dport = src_port;
772       lookup.saddr.as_u32 = ip->dst_address.as_u32;
773       lookup.sport = dst_port;
774       s = pool_elt_at_index (
775         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
776       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
777         {
778           goto skip_dst_nat_lookup;
779         }
780       s = NULL;
781     }
782
783   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
784              src_port, rx_fib_index, ip->protocol);
785   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
786     {
787       ASSERT (thread_index == ed_value_get_thread_index (&value));
788       s =
789         pool_elt_at_index (tsm->sessions,
790                            ed_value_get_session_index (&value));
791
792     skip_dst_nat_lookup:
793       if (na44_ed_is_fwd_bypass_session (s))
794         return 0;
795
796       /* hairpinning */
797       pool_foreach (i, sm->output_feature_interfaces)
798         {
799           if ((nat44_ed_is_interface_inside (i)) &&
800               (rx_sw_if_index == i->sw_if_index))
801             return 0;
802         }
803       return 1;
804     }
805
806   return 0;
807 }
808
809 static inline u32
810 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
811                           icmp46_header_t *icmp, u32 sw_if_index,
812                           u32 tx_sw_if_index, u32 rx_fib_index,
813                           vlib_node_runtime_t *node, u32 next, f64 now,
814                           u32 thread_index, snat_session_t **s_p,
815                           int is_multi_worker)
816 {
817   vlib_main_t *vm = vlib_get_main ();
818   u16 checksum;
819   int err;
820   snat_session_t *s = NULL;
821   u8 lookup_protocol = ip->protocol;
822   u16 lookup_sport, lookup_dport;
823   ip4_address_t lookup_saddr, lookup_daddr;
824
825   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
826                                             &lookup_sport, &lookup_daddr,
827                                             &lookup_dport, &lookup_protocol);
828   if (err != 0)
829     {
830       b->error = node->errors[err];
831       return NAT_NEXT_DROP;
832     }
833
834   if (tx_sw_if_index != ~0)
835     {
836       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
837             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
838             tx_sw_if_index, is_multi_worker)))
839         {
840           return next;
841         }
842     }
843   else
844     {
845       if (PREDICT_FALSE (nat44_ed_not_translate (
846             vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
847         {
848           return next;
849         }
850     }
851
852   if (PREDICT_FALSE (icmp_type_is_error_message (
853         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
854     {
855       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
856       return NAT_NEXT_DROP;
857     }
858
859   next =
860     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
861                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
862                   node, next, thread_index, vlib_time_now (vm));
863
864   if (NAT_NEXT_DROP == next)
865     goto out;
866
867   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
868     {
869       ip_csum_t sum = ip_incremental_checksum_buffer (
870         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
871         ntohs (ip->length) - ip4_header_bytes (ip), 0);
872       checksum = ~ip_csum_fold (sum);
873       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
874         {
875           next = NAT_NEXT_DROP;
876           goto out;
877         }
878     }
879
880 out:
881   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
882     {
883       /* Accounting */
884       nat44_session_update_counters (
885         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
886       /* Per-user LRU list maintenance */
887       nat44_session_update_lru (sm, s, thread_index);
888     }
889   *s_p = s;
890   return next;
891 }
892
893 static snat_session_t *
894 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
895                                         ip4_header_t *ip, u32 rx_fib_index,
896                                         u32 thread_index, f64 now,
897                                         vlib_main_t *vm,
898                                         vlib_node_runtime_t *node)
899 {
900   clib_bihash_kv_16_8_t s_kv, s_value;
901   snat_static_mapping_t *m = NULL;
902   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
903   snat_session_t *s = NULL;
904   u32 tx_fib_index;
905   int i;
906   ip4_address_t new_src_addr = { 0 };
907   ip4_address_t new_dst_addr = ip->dst_address;
908
909   if (PREDICT_FALSE (
910         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
911     {
912       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
913       nat_ipfix_logging_max_sessions (thread_index,
914                                       sm->max_translations_per_thread);
915       nat_elog_notice (sm, "maximum sessions exceeded");
916       return 0;
917     }
918
919   tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
920
921   // Try to find static mapping first
922   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
923                               ip->protocol);
924   if (m)
925     {
926       new_src_addr = m->external_addr;
927     }
928   else
929     {
930       pool_foreach (s, tsm->sessions)
931         {
932           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
933             {
934               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
935                          ip->dst_address.as_u32, 0, tx_fib_index,
936                          ip->protocol);
937               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
938                 {
939                   new_src_addr = s->out2in.addr;
940                 }
941               break;
942             }
943         }
944
945       if (!new_src_addr.as_u32)
946         {
947           for (i = 0; i < vec_len (sm->addresses); i++)
948             {
949               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
950                          ip->dst_address.as_u32, 0, tx_fib_index,
951                          ip->protocol);
952               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
953                 {
954                   new_src_addr = sm->addresses[i].addr;
955                 }
956             }
957         }
958     }
959
960   if (!new_src_addr.as_u32)
961     {
962       // could not allocate address for translation ...
963       return 0;
964     }
965
966   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
967   if (!s)
968     {
969       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
970       nat_elog_warn (sm, "create NAT session failed");
971       return 0;
972     }
973
974   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
975                         ip->dst_address, 0, rx_fib_index, ip->protocol);
976   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
977   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
978
979   // hairpinning?
980   int is_hairpinning = nat44_ed_external_sm_lookup (
981     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
982   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
983
984   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
985   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
986
987   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
988                         tx_fib_index, ip->protocol);
989   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
990   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
991   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
992
993   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
994   s->out2in.addr.as_u32 = new_src_addr.as_u32;
995   s->out2in.fib_index = tx_fib_index;
996   s->in2out.addr.as_u32 = ip->src_address.as_u32;
997   s->in2out.fib_index = rx_fib_index;
998   s->in2out.port = s->out2in.port = ip->protocol;
999   if (m)
1000     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1001
1002   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
1003     {
1004       nat_elog_notice (sm, "in2out flow hash add failed");
1005       nat_ed_session_delete (sm, s, thread_index, 1);
1006       return NULL;
1007     }
1008
1009   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
1010     {
1011       nat_elog_notice (sm, "out2in flow hash add failed");
1012       nat_ed_session_delete (sm, s, thread_index, 1);
1013       return NULL;
1014     }
1015
1016   per_vrf_sessions_register_session (s, thread_index);
1017
1018   /* Accounting */
1019   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
1020                                  thread_index);
1021   /* Per-user LRU list maintenance */
1022   nat44_session_update_lru (sm, s, thread_index);
1023
1024   return s;
1025 }
1026
1027 static inline uword
1028 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1029                                           vlib_node_runtime_t *node,
1030                                           vlib_frame_t *frame,
1031                                           int is_output_feature,
1032                                           int is_multi_worker)
1033 {
1034   u32 n_left_from, *from;
1035   snat_main_t *sm = &snat_main;
1036   f64 now = vlib_time_now (vm);
1037   u32 thread_index = vm->thread_index;
1038   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1039   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1040     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1041
1042   from = vlib_frame_vector_args (frame);
1043   n_left_from = frame->n_vectors;
1044
1045   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1046   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1047   vlib_get_buffers (vm, from, b, n_left_from);
1048
1049   while (n_left_from > 0)
1050     {
1051       vlib_buffer_t *b0;
1052       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1053       u32 tx_sw_if_index0;
1054       u32 cntr_sw_if_index0;
1055       ip_protocol_t proto0;
1056       ip4_header_t *ip0;
1057       snat_session_t *s0 = 0;
1058       clib_bihash_kv_16_8_t kv0, value0;
1059       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1060       nat_6t_flow_t *f = 0;
1061       nat_6t_t lookup;
1062       int lookup_skipped = 0;
1063
1064       b0 = *b;
1065       b++;
1066
1067       /* Prefetch next iteration. */
1068       if (PREDICT_TRUE (n_left_from >= 2))
1069         {
1070           vlib_buffer_t *p2;
1071
1072           p2 = *b;
1073
1074           vlib_prefetch_buffer_header (p2, LOAD);
1075
1076           clib_prefetch_load (p2->data);
1077         }
1078
1079       if (is_output_feature)
1080         {
1081           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1082         }
1083
1084       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1085
1086       ip0 =
1087         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1088
1089       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1090       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1091       cntr_sw_if_index0 =
1092         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1093       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1094                                                            rx_sw_if_index0);
1095       lookup.fib_index = rx_fib_index0;
1096
1097       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1098         {
1099           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1100           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1101                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1102                                        0);
1103           next[0] = NAT_NEXT_ICMP_ERROR;
1104           goto trace0;
1105         }
1106
1107       proto0 = ip0->protocol;
1108
1109       if (is_output_feature)
1110         {
1111           if (PREDICT_FALSE
1112               (nat_not_translate_output_feature_fwd
1113                (sm, ip0, thread_index, now, vm, b0)))
1114             goto trace0;
1115         }
1116
1117       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1118         {
1119           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1120                 ICMP4_echo_request &&
1121               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1122                 ICMP4_echo_reply &&
1123               !icmp_type_is_error_message (
1124                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1125             {
1126               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1127               next[0] = NAT_NEXT_DROP;
1128               goto trace0;
1129             }
1130           int err = nat_get_icmp_session_lookup_values (
1131             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1132             &lookup.dport, &lookup.proto);
1133           if (err != 0)
1134             {
1135               b0->error = node->errors[err];
1136               next[0] = NAT_NEXT_DROP;
1137               goto trace0;
1138             }
1139         }
1140       else
1141         {
1142           lookup.proto = ip0->protocol;
1143           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1144           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1145           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1146           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1147         }
1148
1149       /* there might be a stashed index in vnet_buffer2 from handoff or
1150        * classify node, see if it can be used */
1151       if (is_multi_worker &&
1152           !pool_is_free_index (tsm->sessions,
1153                                vnet_buffer2 (b0)->nat.cached_session_index))
1154         {
1155           s0 = pool_elt_at_index (tsm->sessions,
1156                                   vnet_buffer2 (b0)->nat.cached_session_index);
1157           if (PREDICT_TRUE (
1158                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1159                 // for some hairpinning cases there are two "i2i" flows instead
1160                 // of i2o and o2i as both hosts are on inside
1161                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1162                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1163             {
1164               /* yes, this is the droid we're looking for */
1165               lookup_skipped = 1;
1166               goto skip_lookup;
1167             }
1168           s0 = NULL;
1169         }
1170
1171       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1172                  lookup.dport, lookup.fib_index, lookup.proto);
1173
1174       // lookup flow
1175       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1176         {
1177           // flow does not exist go slow path
1178           next[0] = def_slow;
1179           goto trace0;
1180         }
1181
1182       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1183       s0 =
1184         pool_elt_at_index (tsm->sessions,
1185                            ed_value_get_session_index (&value0));
1186
1187     skip_lookup:
1188
1189       ASSERT (thread_index == s0->thread_index);
1190
1191       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1192         {
1193           // session is closed, go slow path
1194           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1195           nat_ed_session_delete (sm, s0, thread_index, 1);
1196           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1197           goto trace0;
1198         }
1199
1200       // drop if session expired
1201       u64 sess_timeout_time;
1202       sess_timeout_time =
1203         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1204       if (now >= sess_timeout_time)
1205         {
1206           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1207           nat_ed_session_delete (sm, s0, thread_index, 1);
1208           // session is closed, go slow path
1209           next[0] = def_slow;
1210           goto trace0;
1211         }
1212
1213       b0->flags |= VNET_BUFFER_F_IS_NATED;
1214
1215       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1216         {
1217           f = &s0->i2o;
1218         }
1219       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1220                nat_6t_t_eq (&s0->o2i.match, &lookup))
1221         {
1222           f = &s0->o2i;
1223         }
1224       else
1225         {
1226           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1227           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1228           nat_ed_session_delete (sm, s0, thread_index, 1);
1229           next[0] = NAT_NEXT_DROP;
1230           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1231           goto trace0;
1232         }
1233
1234       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1235           (translation_error = nat_6t_flow_buf_translate_i2o (
1236              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1237         {
1238           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1239           nat_ed_session_delete (sm, s0, thread_index, 1);
1240           next[0] = NAT_NEXT_DROP;
1241           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1242           goto trace0;
1243         }
1244
1245       switch (proto0)
1246         {
1247         case IP_PROTOCOL_TCP:
1248           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1249                                          thread_index, cntr_sw_if_index0, 1);
1250           nat44_set_tcp_session_state_i2o (
1251             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1252             thread_index);
1253           break;
1254         case IP_PROTOCOL_UDP:
1255           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1256                                          thread_index, cntr_sw_if_index0, 1);
1257           break;
1258         case IP_PROTOCOL_ICMP:
1259           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1260                                          thread_index, cntr_sw_if_index0, 1);
1261           break;
1262         default:
1263           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1264                                          thread_index, cntr_sw_if_index0, 1);
1265           break;
1266         }
1267
1268       /* Accounting */
1269       nat44_session_update_counters (s0, now,
1270                                      vlib_buffer_length_in_chain (vm, b0),
1271                                      thread_index);
1272       /* Per-user LRU list maintenance */
1273       nat44_session_update_lru (sm, s0, thread_index);
1274
1275     trace0:
1276       if (PREDICT_FALSE
1277           ((node->flags & VLIB_NODE_FLAG_TRACE)
1278            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1279         {
1280           nat_in2out_ed_trace_t *t =
1281             vlib_add_trace (vm, node, b0, sizeof (*t));
1282           t->sw_if_index = rx_sw_if_index0;
1283           t->next_index = next[0];
1284           t->is_slow_path = 0;
1285           t->translation_error = translation_error;
1286           t->lookup_skipped = lookup_skipped;
1287           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1288
1289           if (s0)
1290             {
1291               t->session_index = s0 - tsm->sessions;
1292               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1293               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1294               t->translation_via_i2of = (&s0->i2o == f);
1295               t->tcp_state = s0->tcp_state;
1296             }
1297           else
1298             {
1299               t->session_index = ~0;
1300             }
1301         }
1302
1303       if (next[0] == NAT_NEXT_DROP)
1304         {
1305           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1306                                          thread_index, cntr_sw_if_index0, 1);
1307         }
1308
1309       n_left_from--;
1310       next++;
1311     }
1312
1313   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1314                                frame->n_vectors);
1315   return frame->n_vectors;
1316 }
1317
1318 static inline uword
1319 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1320                                           vlib_node_runtime_t *node,
1321                                           vlib_frame_t *frame,
1322                                           int is_output_feature,
1323                                           int is_multi_worker)
1324 {
1325   u32 n_left_from, *from;
1326   snat_main_t *sm = &snat_main;
1327   f64 now = vlib_time_now (vm);
1328   u32 thread_index = vm->thread_index;
1329   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1330
1331   from = vlib_frame_vector_args (frame);
1332   n_left_from = frame->n_vectors;
1333
1334   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1335   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1336   vlib_get_buffers (vm, from, b, n_left_from);
1337
1338   while (n_left_from > 0)
1339     {
1340       vlib_buffer_t *b0;
1341       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1342       u32 tx_sw_if_index0;
1343       u32 cntr_sw_if_index0;
1344       ip_protocol_t proto0;
1345       ip4_header_t *ip0;
1346       udp_header_t *udp0;
1347       icmp46_header_t *icmp0;
1348       snat_session_t *s0 = 0;
1349       clib_bihash_kv_16_8_t kv0, value0;
1350       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1351
1352       b0 = *b;
1353
1354       if (is_output_feature)
1355         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1356
1357       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1358
1359       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1360                               iph_offset0);
1361
1362       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1363       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1364       cntr_sw_if_index0 =
1365         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1366       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1367                                                            rx_sw_if_index0);
1368
1369       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1370         {
1371           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1372           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1373                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1374                                        0);
1375           next[0] = NAT_NEXT_ICMP_ERROR;
1376           goto trace0;
1377         }
1378
1379       udp0 = ip4_next_header (ip0);
1380       icmp0 = (icmp46_header_t *) udp0;
1381       proto0 = ip0->protocol;
1382
1383       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1384         {
1385           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1386             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1387           if (!s0)
1388             next[0] = NAT_NEXT_DROP;
1389
1390           if (NAT_NEXT_DROP != next[0] && s0 &&
1391               NAT_ED_TRNSL_ERR_SUCCESS !=
1392                 (translation_error = nat_6t_flow_buf_translate_i2o (
1393                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1394             {
1395               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1396               nat_ed_session_delete (sm, s0, thread_index, 1);
1397               next[0] = NAT_NEXT_DROP;
1398               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1399               goto trace0;
1400             }
1401
1402           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1403                                          thread_index, cntr_sw_if_index0, 1);
1404           goto trace0;
1405         }
1406
1407       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1408         {
1409           next[0] = icmp_in2out_ed_slow_path (
1410             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1411             rx_fib_index0, node, next[0], now, thread_index, &s0,
1412             is_multi_worker);
1413           if (NAT_NEXT_DROP != next[0] && s0 &&
1414               NAT_ED_TRNSL_ERR_SUCCESS !=
1415                 (translation_error = nat_6t_flow_buf_translate_i2o (
1416                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1417             {
1418               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1419               nat_ed_session_delete (sm, s0, thread_index, 1);
1420               next[0] = NAT_NEXT_DROP;
1421               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1422               goto trace0;
1423             }
1424
1425           if (NAT_NEXT_DROP != next[0])
1426             {
1427               vlib_increment_simple_counter (
1428                 &sm->counters.slowpath.in2out.icmp, thread_index,
1429                 cntr_sw_if_index0, 1);
1430             }
1431           goto trace0;
1432         }
1433
1434       init_ed_k (
1435         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1436         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1437         rx_fib_index0, ip0->protocol);
1438       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1439         {
1440           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1441           s0 =
1442             pool_elt_at_index (tsm->sessions,
1443                                ed_value_get_session_index (&value0));
1444         }
1445
1446       if (!s0)
1447         {
1448           if (is_output_feature)
1449             {
1450               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1451                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1452                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1453                     rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
1454                 goto trace0;
1455
1456               /*
1457                * Send DHCP packets to the ipv4 stack, or we won't
1458                * be able to use dhcp client on the outside interface
1459                */
1460               if (PREDICT_FALSE (
1461                     proto0 == IP_PROTOCOL_UDP &&
1462                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1463                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1464                     ip0->dst_address.as_u32 == 0xffffffff))
1465                 goto trace0;
1466             }
1467           else
1468             {
1469               if (PREDICT_FALSE (
1470                     nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
1471                                             proto0, rx_fib_index0)))
1472                 goto trace0;
1473             }
1474
1475           next[0] =
1476             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1477                           vnet_buffer (b0)->ip.reass.l4_src_port,
1478                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1479                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1480                           node, next[0], thread_index, now);
1481
1482           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1483             goto trace0;
1484
1485           if (PREDICT_FALSE (!s0))
1486             goto trace0;
1487
1488         }
1489
1490       b0->flags |= VNET_BUFFER_F_IS_NATED;
1491
1492       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1493           (translation_error = nat_6t_flow_buf_translate_i2o (
1494              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1495         {
1496           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1497           nat_ed_session_delete (sm, s0, thread_index, 1);
1498           next[0] = NAT_NEXT_DROP;
1499           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1500           goto trace0;
1501         }
1502
1503       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1504         {
1505           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1506                                          thread_index, cntr_sw_if_index0, 1);
1507           nat44_set_tcp_session_state_i2o (
1508             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1509             thread_index);
1510         }
1511       else
1512         {
1513           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1514                                          thread_index, cntr_sw_if_index0, 1);
1515         }
1516
1517       /* Accounting */
1518       nat44_session_update_counters (s0, now,
1519                                      vlib_buffer_length_in_chain
1520                                      (vm, b0), thread_index);
1521       /* Per-user LRU list maintenance */
1522       nat44_session_update_lru (sm, s0, thread_index);
1523
1524     trace0:
1525       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1526                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1527         {
1528           nat_in2out_ed_trace_t *t =
1529             vlib_add_trace (vm, node, b0, sizeof (*t));
1530           t->sw_if_index = rx_sw_if_index0;
1531           t->next_index = next[0];
1532           t->is_slow_path = 1;
1533           t->translation_error = translation_error;
1534           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1535
1536           if (s0)
1537             {
1538               t->session_index = s0 - tsm->sessions;
1539               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1540               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1541               t->translation_via_i2of = 1;
1542               t->tcp_state = s0->tcp_state;
1543             }
1544
1545           else
1546             {
1547               t->session_index = ~0;
1548             }
1549         }
1550
1551       if (next[0] == NAT_NEXT_DROP)
1552         {
1553           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1554                                          thread_index, cntr_sw_if_index0, 1);
1555         }
1556
1557       n_left_from--;
1558       next++;
1559       b++;
1560     }
1561
1562   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1563                                frame->n_vectors);
1564
1565   return frame->n_vectors;
1566 }
1567
1568 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1569                                      vlib_node_runtime_t * node,
1570                                      vlib_frame_t * frame)
1571 {
1572   if (snat_main.num_workers > 1)
1573     {
1574       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1575     }
1576   else
1577     {
1578       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1579     }
1580 }
1581
1582 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1583   .name = "nat44-ed-in2out",
1584   .vector_size = sizeof (u32),
1585   .sibling_of = "nat-default",
1586   .format_trace = format_nat_in2out_ed_trace,
1587   .type = VLIB_NODE_TYPE_INTERNAL,
1588   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1589   .error_strings = nat_in2out_ed_error_strings,
1590   .runtime_data_bytes = sizeof (snat_runtime_t),
1591 };
1592
1593 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1594                                             vlib_node_runtime_t * node,
1595                                             vlib_frame_t * frame)
1596 {
1597   if (snat_main.num_workers > 1)
1598     {
1599       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1600     }
1601   else
1602     {
1603       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1604     }
1605 }
1606
1607 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1608   .name = "nat44-ed-in2out-output",
1609   .vector_size = sizeof (u32),
1610   .sibling_of = "nat-default",
1611   .format_trace = format_nat_in2out_ed_trace,
1612   .type = VLIB_NODE_TYPE_INTERNAL,
1613   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1614   .error_strings = nat_in2out_ed_error_strings,
1615   .runtime_data_bytes = sizeof (snat_runtime_t),
1616 };
1617
1618 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1619                                               vlib_node_runtime_t *
1620                                               node, vlib_frame_t * frame)
1621 {
1622   if (snat_main.num_workers > 1)
1623     {
1624       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1625     }
1626   else
1627     {
1628       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1629     }
1630 }
1631
1632 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1633   .name = "nat44-ed-in2out-slowpath",
1634   .vector_size = sizeof (u32),
1635   .sibling_of = "nat-default",
1636   .format_trace = format_nat_in2out_ed_trace,
1637   .type = VLIB_NODE_TYPE_INTERNAL,
1638   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1639   .error_strings = nat_in2out_ed_error_strings,
1640   .runtime_data_bytes = sizeof (snat_runtime_t),
1641 };
1642
1643 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1644                                                      vlib_node_runtime_t
1645                                                      * node,
1646                                                      vlib_frame_t * frame)
1647 {
1648   if (snat_main.num_workers > 1)
1649     {
1650       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1651     }
1652   else
1653     {
1654       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1655     }
1656 }
1657
1658 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1659   .name = "nat44-ed-in2out-output-slowpath",
1660   .vector_size = sizeof (u32),
1661   .sibling_of = "nat-default",
1662   .format_trace = format_nat_in2out_ed_trace,
1663   .type = VLIB_NODE_TYPE_INTERNAL,
1664   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1665   .error_strings = nat_in2out_ed_error_strings,
1666   .runtime_data_bytes = sizeof (snat_runtime_t),
1667 };
1668
1669 static u8 *
1670 format_nat_pre_trace (u8 * s, va_list * args)
1671 {
1672   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1673   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1674   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1675   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1676                  t->arc_next_index);
1677 }
1678
1679 VLIB_NODE_FN (nat_pre_in2out_node)
1680   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1681 {
1682   return nat_pre_node_fn_inline (vm, node, frame,
1683                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1684 }
1685
1686 VLIB_NODE_FN (nat_pre_in2out_output_node)
1687   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1688 {
1689   return nat_pre_node_fn_inline (vm, node, frame,
1690                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1691 }
1692
1693 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1694   .name = "nat-pre-in2out",
1695   .vector_size = sizeof (u32),
1696   .sibling_of = "nat-default",
1697   .format_trace = format_nat_pre_trace,
1698   .type = VLIB_NODE_TYPE_INTERNAL,
1699   .n_errors = 0,
1700 };
1701
1702 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1703   .name = "nat-pre-in2out-output",
1704   .vector_size = sizeof (u32),
1705   .sibling_of = "nat-default",
1706   .format_trace = format_nat_pre_trace,
1707   .type = VLIB_NODE_TYPE_INTERNAL,
1708   .n_errors = 0,
1709 };
1710
1711 /*
1712  * fd.io coding-style-patch-verification: ON
1713  *
1714  * Local Variables:
1715  * eval: (c-set-style "gnu")
1716  * End:
1717  */