nat: fix nat44-ed port range with multiple workers
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52   u8 tcp_state;
53 } nat_in2out_ed_trace_t;
54
55 static u8 *
56 format_nat_in2out_ed_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
61   char *tag;
62
63   tag =
64     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
65     "NAT44_IN2OUT_ED_FAST_PATH";
66
67   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
68               t->next_index);
69   if (~0 != t->session_index)
70     {
71       s = format (s, ", session %d, translation result '%U' via %s",
72                   t->session_index, format_nat_ed_translation_error,
73                   t->translation_error,
74                   t->translation_via_i2of ? "i2of" : "o2if");
75       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
76       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
77     }
78   if (!t->is_slow_path)
79     {
80       if (t->lookup_skipped)
81         {
82           s = format (s, "\n  lookup skipped - cached session index used");
83         }
84       else
85         {
86           s = format (s, "\n  search key %U", format_ed_session_kvp,
87                       &t->search_key);
88         }
89     }
90   if (IP_PROTOCOL_TCP == t->i2of.match.proto)
91     {
92       s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
93                   t->tcp_state);
94     }
95
96   return s;
97 }
98
99 static int
100 nat_ed_alloc_addr_and_port_with_snat_address (
101   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
102   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
103   ip4_address_t *outside_addr, u16 *outside_port)
104 {
105   const u16 port_thread_offset =
106     (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
107
108   s->o2i.match.daddr = a->addr;
109   /* first try port suggested by caller */
110   u16 port = clib_net_to_host_u16 (*outside_port);
111   u16 port_offset = port - port_thread_offset;
112   if (port < port_thread_offset ||
113       port >= port_thread_offset + port_per_thread)
114     {
115       /* need to pick a different port, suggested port doesn't fit in
116        * this thread's port range */
117       port_offset = snat_random_port (0, port_per_thread - 1);
118       port = port_thread_offset + port_offset;
119     }
120   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
121   do
122     {
123       if (IP_PROTOCOL_ICMP == proto)
124         {
125           s->o2i.match.sport = clib_host_to_net_u16 (port);
126         }
127       s->o2i.match.dport = clib_host_to_net_u16 (port);
128       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
129         {
130           *outside_addr = a->addr;
131           *outside_port = clib_host_to_net_u16 (port);
132           return 0;
133         }
134       port_offset = snat_random_port (0, port_per_thread - 1);
135       port = port_thread_offset + port_offset;
136       --attempts;
137     }
138   while (attempts > 0);
139   return 1;
140 }
141
142 static int
143 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
144                             u32 tx_sw_if_index, u32 nat_proto,
145                             u32 thread_index, ip4_address_t s_addr,
146                             ip4_address_t d_addr, u32 snat_thread_index,
147                             snat_session_t *s, ip4_address_t *outside_addr,
148                             u16 *outside_port)
149 {
150   if (vec_len (sm->addresses) > 0)
151     {
152       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
153       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
154       int i;
155
156       // output feature
157       if (tx_sw_if_index != ~0)
158         {
159           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
160             {
161               a = sm->addresses + i;
162               if (a->fib_index == rx_fib_index)
163                 {
164                   if (a->sw_if_index == tx_sw_if_index)
165                     {
166                       if ((a->addr_len != ~0) &&
167                           (a->net.as_u32 ==
168                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
169
170                         {
171                           return nat_ed_alloc_addr_and_port_with_snat_address (
172                             sm, nat_proto, thread_index, a,
173                             sm->port_per_thread, snat_thread_index, s,
174                             outside_addr, outside_port);
175                         }
176                       ra = a;
177                     }
178                   ja = a;
179                 }
180               else if (a->fib_index == ~0)
181                 {
182                   ba = a;
183                 }
184             }
185           for (i = 0; i < s_addr_offset; ++i)
186             {
187               a = sm->addresses + i;
188               if (a->fib_index == rx_fib_index)
189                 {
190                   if (a->sw_if_index == tx_sw_if_index)
191                     {
192                       if ((a->addr_len != ~0) &&
193                           (a->net.as_u32 ==
194                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
195
196                         {
197                           return nat_ed_alloc_addr_and_port_with_snat_address (
198                             sm, nat_proto, thread_index, a,
199                             sm->port_per_thread, snat_thread_index, s,
200                             outside_addr, outside_port);
201                         }
202                       ra = a;
203                     }
204                   ja = a;
205                 }
206               else if (a->fib_index == ~0)
207                 {
208                   ba = a;
209                 }
210             }
211           if (ra)
212             {
213               return nat_ed_alloc_addr_and_port_with_snat_address (
214                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
215                 snat_thread_index, s, outside_addr, outside_port);
216             }
217         }
218       else
219         {
220           // first try nat pool addresses to sw interface addreses mappings
221           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
222             {
223               a = sm->addresses + i;
224               if (a->fib_index == rx_fib_index)
225                 {
226                   if ((a->addr_len != ~0) &&
227                       (a->net.as_u32 ==
228                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
229                     {
230                       return nat_ed_alloc_addr_and_port_with_snat_address (
231                         sm, nat_proto, thread_index, a, sm->port_per_thread,
232                         snat_thread_index, s, outside_addr, outside_port);
233                     }
234                   ja = a;
235                 }
236               else if (a->fib_index == ~0)
237                 {
238                   ba = a;
239                 }
240             }
241           for (i = 0; i < s_addr_offset; ++i)
242             {
243               a = sm->addresses + i;
244               if (a->fib_index == rx_fib_index)
245                 {
246                   if ((a->addr_len != ~0) &&
247                       (a->net.as_u32 ==
248                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
249                     {
250                       return nat_ed_alloc_addr_and_port_with_snat_address (
251                         sm, nat_proto, thread_index, a, sm->port_per_thread,
252                         snat_thread_index, s, outside_addr, outside_port);
253                     }
254                   ja = a;
255                 }
256               else if (a->fib_index == ~0)
257                 {
258                   ba = a;
259                 }
260             }
261         }
262
263       if (ja || ba)
264         {
265           a = ja ? ja : ba;
266           return nat_ed_alloc_addr_and_port_with_snat_address (
267             sm, nat_proto, thread_index, a, sm->port_per_thread,
268             snat_thread_index, s, outside_addr, outside_port);
269         }
270     }
271   /* Totally out of translations to use... */
272   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
273   return 1;
274 }
275
276 static_always_inline int
277 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
278                              u16 match_port, ip_protocol_t match_protocol,
279                              ip4_address_t *daddr, u16 *dport)
280 {
281   snat_static_mapping_t *m =
282     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
283   if (!m)
284     {
285       /* Try address only mapping */
286       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
287       if (!m)
288         return 0;
289     }
290   *daddr = m->local_addr;
291   if (dport)
292     {
293       /* Address only mapping doesn't change port */
294       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
295     }
296   return 1;
297 }
298
299 static_always_inline vrf_table_t *
300 get_vrf_table_by_fib (u32 fib_index)
301 {
302   snat_main_t *sm = &snat_main;
303   vrf_table_t *t;
304
305   pool_foreach (t, sm->vrf_tables)
306     {
307       if (fib_index == t->table_fib_index)
308         {
309           return t;
310         }
311     }
312
313   return 0;
314 }
315
316 static_always_inline u32
317 get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
318 {
319   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
320   fib_prefix_t pfx = {
321     .fp_proto = FIB_PROTOCOL_IP4,
322     .fp_len = 32,
323     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
324     ,
325   };
326
327   snat_main_t *sm = &snat_main;
328   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
329   // default to rx fib
330   u32 tx_fib_index = rx_fib_index;
331
332   if (0 != t)
333     {
334       // managed routes to other fibs
335       vrf_route_t *r;
336       pool_foreach (r, t->routes)
337         {
338           fei = fib_table_lookup (r->fib_index, &pfx);
339           if ((FIB_NODE_INDEX_INVALID != fei) &&
340               (~0 != fib_entry_get_resolving_interface (fei)))
341             {
342               tx_fib_index = r->fib_index;
343               break;
344             }
345         }
346     }
347   else
348     {
349       // default to configured fib
350       tx_fib_index = sm->outside_fib_index;
351
352       // default routes to other fibs
353       nat_fib_t *f;
354       vec_foreach (f, sm->outside_fibs)
355         {
356           fei = fib_table_lookup (f->fib_index, &pfx);
357           if ((FIB_NODE_INDEX_INVALID != fei) &&
358               (~0 != fib_entry_get_resolving_interface (fei)))
359             {
360               tx_fib_index = f->fib_index;
361               break;
362             }
363         }
364     }
365
366   return tx_fib_index;
367 }
368
369 static_always_inline int
370 is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
371 {
372   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
373   fib_prefix_t pfx = {
374     .fp_proto = FIB_PROTOCOL_IP4,
375     .fp_len = 32,
376     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
377     ,
378   };
379
380   snat_main_t *sm = &snat_main;
381   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
382   u32 ii;
383
384   if (0 != t)
385     {
386       // managed routes to other fibs
387       vrf_route_t *r;
388       pool_foreach (r, t->routes)
389         {
390           fei = fib_table_lookup (r->fib_index, &pfx);
391           if ((FIB_NODE_INDEX_INVALID != fei) &&
392               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
393             {
394               return 1;
395             }
396         }
397     }
398   else
399     {
400       // default routes to other fibs
401       nat_fib_t *f;
402       vec_foreach (f, sm->outside_fibs)
403         {
404           fei = fib_table_lookup (f->fib_index, &pfx);
405           if ((FIB_NODE_INDEX_INVALID != fei) &&
406               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
407             {
408               snat_interface_t *i;
409               pool_foreach (i, sm->interfaces)
410                 {
411                   if ((nat44_ed_is_interface_outside (i)) &&
412                       (ii == i->sw_if_index))
413                     {
414                       return 1;
415                     }
416                 }
417             }
418         }
419     }
420
421   return 0;
422 }
423
424 static u32
425 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
426               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
427               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
428               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
429               u32 thread_index, f64 now)
430 {
431   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
432   ip4_address_t outside_addr;
433   u16 outside_port;
434   u32 tx_fib_index;
435   u8 is_identity_nat = 0;
436
437   snat_session_t *s = NULL;
438   lb_nat_type_t lb = 0;
439   ip4_address_t daddr = r_addr;
440   u16 dport = r_port;
441
442   if (PREDICT_FALSE
443       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
444     {
445       if (!nat_lru_free_one (sm, thread_index, now))
446         {
447           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
448           nat_ipfix_logging_max_sessions (thread_index,
449                                           sm->max_translations_per_thread);
450           nat_elog_notice (sm, "maximum sessions exceeded");
451           return NAT_NEXT_DROP;
452         }
453     }
454
455   ip4_address_t sm_addr;
456   u16 sm_port;
457   u32 sm_fib_index;
458   int is_sm = 0;
459   // First try to match static mapping by local address and port
460   if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
461                                   &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
462                                   &lb, 0, &is_identity_nat, 0))
463     {
464       if (PREDICT_FALSE (is_identity_nat))
465         {
466           *sessionp = NULL;
467           return next;
468         }
469       is_sm = 1;
470     }
471
472   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
473     {
474       if (PREDICT_FALSE (!tcp_flags_is_init (
475             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
476         {
477           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
478           return NAT_NEXT_DROP;
479         }
480     }
481
482   s = nat_ed_session_alloc (sm, thread_index, now, proto);
483   ASSERT (s);
484
485   tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
486
487   if (!is_sm)
488     {
489       s->in2out.addr = l_addr;
490       s->in2out.port = l_port;
491       s->proto = proto;
492       s->in2out.fib_index = rx_fib_index;
493       s->out2in.fib_index = tx_fib_index;
494
495       // suggest using local port to allocation function
496       outside_port = l_port;
497
498       if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
499                                                       proto, &daddr, &dport)))
500         {
501           s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
502         }
503
504       // destination addr/port updated with real values in
505       // nat_ed_alloc_addr_and_port
506       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
507                             s->out2in.fib_index, proto);
508       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
509       if (IP_PROTOCOL_ICMP == proto)
510         {
511           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
512         }
513       else
514         {
515           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
516         }
517       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
518
519       if (nat_ed_alloc_addr_and_port (
520             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
521             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
522         {
523           nat_elog_notice (sm, "addresses exhausted");
524           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
525           nat_ed_session_delete (sm, s, thread_index, 1);
526           return NAT_NEXT_DROP;
527         }
528       s->out2in.addr = outside_addr;
529       s->out2in.port = outside_port;
530     }
531   else
532     {
533       // static mapping
534       s->out2in.addr = outside_addr = sm_addr;
535       s->out2in.port = outside_port = sm_port;
536       s->in2out.addr = l_addr;
537       s->in2out.port = l_port;
538       s->proto = proto;
539       s->in2out.fib_index = rx_fib_index;
540       s->out2in.fib_index = tx_fib_index;
541       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
542
543       // hairpinning?
544       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
545                                                         proto, &daddr, &dport);
546       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
547
548       if (IP_PROTOCOL_ICMP == proto)
549         {
550           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
551                                 sm_port, s->out2in.fib_index, proto);
552           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
553         }
554       else
555         {
556           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
557                                 sm_port, s->out2in.fib_index, proto);
558           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
559         }
560       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
561       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
562       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
563         {
564           nat_elog_notice (sm, "out2in key add failed");
565           goto error;
566         }
567     }
568
569   if (lb)
570     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
571   s->ext_host_addr = r_addr;
572   s->ext_host_port = r_port;
573
574   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
575                         rx_fib_index, proto);
576   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
577   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
578
579   if (IP_PROTOCOL_ICMP == proto)
580     {
581       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
582     }
583   else
584     {
585       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
586       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
587     }
588   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
589
590   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
591     {
592       nat_elog_notice (sm, "in2out key add failed");
593       goto error;
594     }
595
596   /* log NAT event */
597   nat_ipfix_logging_nat44_ses_create (
598     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
599     s->in2out.port, s->out2in.port, s->in2out.fib_index);
600
601   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
602                          s->in2out.port, &s->ext_host_nat_addr,
603                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
604                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
605
606   per_vrf_sessions_register_session (s, thread_index);
607
608   *sessionp = s;
609   return next;
610 error:
611   if (s)
612     {
613       nat_ed_session_delete (sm, s, thread_index, 1);
614     }
615   *sessionp = s = NULL;
616   return NAT_NEXT_DROP;
617 }
618
619 static_always_inline int
620 nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
621                         u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
622                         u32 proto, u32 rx_fib_index)
623 {
624   snat_main_t *sm = &snat_main;
625
626   clib_bihash_kv_16_8_t kv, value;
627   ip4_address_t placeholder_addr;
628   u32 placeholder_fib_index;
629   u16 placeholder_port;
630
631   init_ed_k (&kv, ip->dst_address.as_u32,
632              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
633              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
634              ip->protocol);
635
636   // do nat if active session or is static mapping
637   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
638       !snat_static_mapping_match (
639         vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
640         sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
641         &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
642     {
643       return 0;
644     }
645
646   // do not nat if forwarding enabled
647   if (sm->forwarding_enabled)
648     {
649       return 1;
650     }
651
652   // do not nat packet aimed at the interface address
653   if (PREDICT_FALSE (
654         is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
655     {
656       return 1;
657     }
658
659   // do nat packets with resolvable destination
660   // destination can be resolved either by:
661   // a) vrf routing table entry
662   // b) (non output feature) outside interface fib
663   if (is_destination_resolvable (rx_fib_index, ip->dst_address))
664     {
665       return 0;
666     }
667
668   return 1;
669 }
670
671 static_always_inline int
672 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
673                                       u32 thread_index, f64 now,
674                                       vlib_main_t * vm, vlib_buffer_t * b)
675 {
676   clib_bihash_kv_16_8_t kv, value;
677   snat_session_t *s = 0;
678   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
679
680   if (!sm->forwarding_enabled)
681     return 0;
682
683   if (ip->protocol == IP_PROTOCOL_ICMP)
684     {
685       ip4_address_t lookup_saddr, lookup_daddr;
686       u16 lookup_sport, lookup_dport;
687       u8 lookup_protocol;
688       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
689                                               &lookup_sport, &lookup_daddr,
690                                               &lookup_dport, &lookup_protocol))
691         return 0;
692       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
693                  lookup_dport, 0, lookup_protocol);
694     }
695   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
696     {
697       init_ed_k (&kv, ip->src_address.as_u32,
698                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
699                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
700     }
701   else
702     {
703       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
704                  ip->protocol);
705     }
706
707   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
708     {
709       ASSERT (thread_index == ed_value_get_thread_index (&value));
710       s =
711         pool_elt_at_index (tsm->sessions,
712                            ed_value_get_session_index (&value));
713
714       if (na44_ed_is_fwd_bypass_session (s))
715         {
716           if (ip->protocol == IP_PROTOCOL_TCP)
717             {
718               nat44_set_tcp_session_state_i2o (
719                 sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
720                 thread_index);
721             }
722           /* Accounting */
723           nat44_session_update_counters (s, now,
724                                          vlib_buffer_length_in_chain (vm, b),
725                                          thread_index);
726           /* Per-user LRU list maintenance */
727           nat44_session_update_lru (sm, s, thread_index);
728           return 1;
729         }
730       else
731         return 0;
732     }
733
734   return 0;
735 }
736
737 static_always_inline int
738 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
739                                        ip4_header_t *ip, u16 src_port,
740                                        u16 dst_port, u32 thread_index,
741                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
742                                        int is_multi_worker)
743 {
744   clib_bihash_kv_16_8_t kv, value;
745   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
746   snat_interface_t *i;
747   snat_session_t *s;
748   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
749   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
750
751   /* src NAT check */
752   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
753              dst_port, tx_fib_index, ip->protocol);
754   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
755     {
756       ASSERT (thread_index == ed_value_get_thread_index (&value));
757       s =
758         pool_elt_at_index (tsm->sessions,
759                            ed_value_get_session_index (&value));
760       return 1;
761     }
762
763   /* dst NAT check */
764   if (is_multi_worker &&
765       PREDICT_TRUE (!pool_is_free_index (
766         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
767     {
768       nat_6t_t lookup;
769       lookup.fib_index = rx_fib_index;
770       lookup.proto = ip->protocol;
771       lookup.daddr.as_u32 = ip->src_address.as_u32;
772       lookup.dport = src_port;
773       lookup.saddr.as_u32 = ip->dst_address.as_u32;
774       lookup.sport = dst_port;
775       s = pool_elt_at_index (
776         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
777       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
778         {
779           goto skip_dst_nat_lookup;
780         }
781       s = NULL;
782     }
783
784   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
785              src_port, rx_fib_index, ip->protocol);
786   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
787     {
788       ASSERT (thread_index == ed_value_get_thread_index (&value));
789       s =
790         pool_elt_at_index (tsm->sessions,
791                            ed_value_get_session_index (&value));
792
793     skip_dst_nat_lookup:
794       if (na44_ed_is_fwd_bypass_session (s))
795         return 0;
796
797       /* hairpinning */
798       pool_foreach (i, sm->output_feature_interfaces)
799         {
800           if ((nat44_ed_is_interface_inside (i)) &&
801               (rx_sw_if_index == i->sw_if_index))
802             return 0;
803         }
804       return 1;
805     }
806
807   return 0;
808 }
809
810 static inline u32
811 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
812                           icmp46_header_t *icmp, u32 sw_if_index,
813                           u32 tx_sw_if_index, u32 rx_fib_index,
814                           vlib_node_runtime_t *node, u32 next, f64 now,
815                           u32 thread_index, snat_session_t **s_p,
816                           int is_multi_worker)
817 {
818   vlib_main_t *vm = vlib_get_main ();
819   u16 checksum;
820   int err;
821   snat_session_t *s = NULL;
822   u8 lookup_protocol = ip->protocol;
823   u16 lookup_sport, lookup_dport;
824   ip4_address_t lookup_saddr, lookup_daddr;
825
826   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
827                                             &lookup_sport, &lookup_daddr,
828                                             &lookup_dport, &lookup_protocol);
829   if (err != 0)
830     {
831       b->error = node->errors[err];
832       return NAT_NEXT_DROP;
833     }
834
835   if (tx_sw_if_index != ~0)
836     {
837       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
838             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
839             tx_sw_if_index, is_multi_worker)))
840         {
841           return next;
842         }
843     }
844   else
845     {
846       if (PREDICT_FALSE (nat44_ed_not_translate (
847             vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
848         {
849           return next;
850         }
851     }
852
853   if (PREDICT_FALSE (icmp_type_is_error_message (
854         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
855     {
856       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
857       return NAT_NEXT_DROP;
858     }
859
860   next =
861     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
862                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
863                   node, next, thread_index, vlib_time_now (vm));
864
865   if (NAT_NEXT_DROP == next)
866     goto out;
867
868   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
869     {
870       ip_csum_t sum = ip_incremental_checksum_buffer (
871         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
872         ntohs (ip->length) - ip4_header_bytes (ip), 0);
873       checksum = ~ip_csum_fold (sum);
874       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
875         {
876           next = NAT_NEXT_DROP;
877           goto out;
878         }
879     }
880
881 out:
882   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
883     {
884       /* Accounting */
885       nat44_session_update_counters (
886         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
887       /* Per-user LRU list maintenance */
888       nat44_session_update_lru (sm, s, thread_index);
889     }
890   *s_p = s;
891   return next;
892 }
893
894 static snat_session_t *
895 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
896                                         ip4_header_t *ip, u32 rx_fib_index,
897                                         u32 thread_index, f64 now,
898                                         vlib_main_t *vm,
899                                         vlib_node_runtime_t *node)
900 {
901   clib_bihash_kv_16_8_t s_kv, s_value;
902   snat_static_mapping_t *m = NULL;
903   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
904   snat_session_t *s = NULL;
905   u32 tx_fib_index;
906   int i;
907   ip4_address_t new_src_addr = { 0 };
908   ip4_address_t new_dst_addr = ip->dst_address;
909
910   if (PREDICT_FALSE (
911         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
912     {
913       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
914       nat_ipfix_logging_max_sessions (thread_index,
915                                       sm->max_translations_per_thread);
916       nat_elog_notice (sm, "maximum sessions exceeded");
917       return 0;
918     }
919
920   tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
921
922   // Try to find static mapping first
923   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
924                               ip->protocol);
925   if (m)
926     {
927       new_src_addr = m->external_addr;
928     }
929   else
930     {
931       pool_foreach (s, tsm->sessions)
932         {
933           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
934             {
935               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
936                          ip->dst_address.as_u32, 0, tx_fib_index,
937                          ip->protocol);
938               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
939                 {
940                   new_src_addr = s->out2in.addr;
941                 }
942               break;
943             }
944         }
945
946       if (!new_src_addr.as_u32)
947         {
948           for (i = 0; i < vec_len (sm->addresses); i++)
949             {
950               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
951                          ip->dst_address.as_u32, 0, tx_fib_index,
952                          ip->protocol);
953               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
954                 {
955                   new_src_addr = sm->addresses[i].addr;
956                 }
957             }
958         }
959     }
960
961   if (!new_src_addr.as_u32)
962     {
963       // could not allocate address for translation ...
964       return 0;
965     }
966
967   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
968   if (!s)
969     {
970       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
971       nat_elog_warn (sm, "create NAT session failed");
972       return 0;
973     }
974
975   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
976                         ip->dst_address, 0, rx_fib_index, ip->protocol);
977   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
978   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
979
980   // hairpinning?
981   int is_hairpinning = nat44_ed_external_sm_lookup (
982     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
983   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
984
985   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
986   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
987
988   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
989                         tx_fib_index, ip->protocol);
990   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
991   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
992   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
993
994   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
995   s->out2in.addr.as_u32 = new_src_addr.as_u32;
996   s->out2in.fib_index = tx_fib_index;
997   s->in2out.addr.as_u32 = ip->src_address.as_u32;
998   s->in2out.fib_index = rx_fib_index;
999   s->in2out.port = s->out2in.port = ip->protocol;
1000   if (m)
1001     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1002
1003   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
1004     {
1005       nat_elog_notice (sm, "in2out flow hash add failed");
1006       nat_ed_session_delete (sm, s, thread_index, 1);
1007       return NULL;
1008     }
1009
1010   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
1011     {
1012       nat_elog_notice (sm, "out2in flow hash add failed");
1013       nat_ed_session_delete (sm, s, thread_index, 1);
1014       return NULL;
1015     }
1016
1017   per_vrf_sessions_register_session (s, thread_index);
1018
1019   /* Accounting */
1020   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
1021                                  thread_index);
1022   /* Per-user LRU list maintenance */
1023   nat44_session_update_lru (sm, s, thread_index);
1024
1025   return s;
1026 }
1027
1028 static inline uword
1029 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1030                                           vlib_node_runtime_t *node,
1031                                           vlib_frame_t *frame,
1032                                           int is_output_feature,
1033                                           int is_multi_worker)
1034 {
1035   u32 n_left_from, *from;
1036   snat_main_t *sm = &snat_main;
1037   f64 now = vlib_time_now (vm);
1038   u32 thread_index = vm->thread_index;
1039   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1040   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1041     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1042
1043   from = vlib_frame_vector_args (frame);
1044   n_left_from = frame->n_vectors;
1045
1046   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1047   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1048   vlib_get_buffers (vm, from, b, n_left_from);
1049
1050   while (n_left_from > 0)
1051     {
1052       vlib_buffer_t *b0;
1053       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1054       u32 tx_sw_if_index0;
1055       u32 cntr_sw_if_index0;
1056       ip_protocol_t proto0;
1057       ip4_header_t *ip0;
1058       snat_session_t *s0 = 0;
1059       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1060       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1061       nat_6t_flow_t *f = 0;
1062       nat_6t_t lookup;
1063       int lookup_skipped = 0;
1064
1065       b0 = *b;
1066       b++;
1067
1068       /* Prefetch next iteration. */
1069       if (PREDICT_TRUE (n_left_from >= 2))
1070         {
1071           vlib_buffer_t *p2;
1072
1073           p2 = *b;
1074
1075           vlib_prefetch_buffer_header (p2, LOAD);
1076
1077           clib_prefetch_load (p2->data);
1078         }
1079
1080       if (is_output_feature)
1081         {
1082           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1083         }
1084
1085       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1086
1087       ip0 =
1088         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1089
1090       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1091       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1092       cntr_sw_if_index0 =
1093         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1094       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1095                                                            rx_sw_if_index0);
1096       lookup.fib_index = rx_fib_index0;
1097
1098       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1099         {
1100           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1101           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1102                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1103                                        0);
1104           next[0] = NAT_NEXT_ICMP_ERROR;
1105           goto trace0;
1106         }
1107
1108       proto0 = ip0->protocol;
1109
1110       if (is_output_feature)
1111         {
1112           if (PREDICT_FALSE
1113               (nat_not_translate_output_feature_fwd
1114                (sm, ip0, thread_index, now, vm, b0)))
1115             goto trace0;
1116         }
1117
1118       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1119         {
1120           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1121                 ICMP4_echo_request &&
1122               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1123                 ICMP4_echo_reply &&
1124               !icmp_type_is_error_message (
1125                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1126             {
1127               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1128               next[0] = NAT_NEXT_DROP;
1129               goto trace0;
1130             }
1131           int err = nat_get_icmp_session_lookup_values (
1132             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1133             &lookup.dport, &lookup.proto);
1134           if (err != 0)
1135             {
1136               b0->error = node->errors[err];
1137               next[0] = NAT_NEXT_DROP;
1138               goto trace0;
1139             }
1140         }
1141       else
1142         {
1143           lookup.proto = ip0->protocol;
1144           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1145           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1146           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1147           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1148         }
1149
1150       /* there might be a stashed index in vnet_buffer2 from handoff or
1151        * classify node, see if it can be used */
1152       if (is_multi_worker &&
1153           !pool_is_free_index (tsm->sessions,
1154                                vnet_buffer2 (b0)->nat.cached_session_index))
1155         {
1156           s0 = pool_elt_at_index (tsm->sessions,
1157                                   vnet_buffer2 (b0)->nat.cached_session_index);
1158           if (PREDICT_TRUE (
1159                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1160                 // for some hairpinning cases there are two "i2i" flows instead
1161                 // of i2o and o2i as both hosts are on inside
1162                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1163                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1164             {
1165               /* yes, this is the droid we're looking for */
1166               lookup_skipped = 1;
1167               goto skip_lookup;
1168             }
1169           s0 = NULL;
1170         }
1171
1172       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1173                  lookup.dport, lookup.fib_index, lookup.proto);
1174
1175       // lookup flow
1176       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1177         {
1178           // flow does not exist go slow path
1179           next[0] = def_slow;
1180           goto trace0;
1181         }
1182
1183       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1184       s0 =
1185         pool_elt_at_index (tsm->sessions,
1186                            ed_value_get_session_index (&value0));
1187
1188     skip_lookup:
1189
1190       ASSERT (thread_index == s0->thread_index);
1191
1192       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1193         {
1194           // session is closed, go slow path
1195           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1196           nat_ed_session_delete (sm, s0, thread_index, 1);
1197           next[0] = def_slow;
1198           goto trace0;
1199         }
1200
1201       // drop if session expired
1202       u64 sess_timeout_time;
1203       sess_timeout_time =
1204         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1205       if (now >= sess_timeout_time)
1206         {
1207           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1208           nat_ed_session_delete (sm, s0, thread_index, 1);
1209           // session is closed, go slow path
1210           next[0] = def_slow;
1211           goto trace0;
1212         }
1213
1214       b0->flags |= VNET_BUFFER_F_IS_NATED;
1215
1216       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1217         {
1218           f = &s0->i2o;
1219         }
1220       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1221                nat_6t_t_eq (&s0->o2i.match, &lookup))
1222         {
1223           f = &s0->o2i;
1224         }
1225       else
1226         {
1227           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1228           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1229           nat_ed_session_delete (sm, s0, thread_index, 1);
1230           next[0] = NAT_NEXT_DROP;
1231           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1232           goto trace0;
1233         }
1234
1235       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1236           (translation_error = nat_6t_flow_buf_translate_i2o (
1237              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1238         {
1239           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1240           nat_ed_session_delete (sm, s0, thread_index, 1);
1241           next[0] = NAT_NEXT_DROP;
1242           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1243           goto trace0;
1244         }
1245
1246       switch (proto0)
1247         {
1248         case IP_PROTOCOL_TCP:
1249           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1250                                          thread_index, cntr_sw_if_index0, 1);
1251           nat44_set_tcp_session_state_i2o (
1252             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1253             thread_index);
1254           break;
1255         case IP_PROTOCOL_UDP:
1256           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1257                                          thread_index, cntr_sw_if_index0, 1);
1258           break;
1259         case IP_PROTOCOL_ICMP:
1260           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1261                                          thread_index, cntr_sw_if_index0, 1);
1262           break;
1263         default:
1264           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1265                                          thread_index, cntr_sw_if_index0, 1);
1266           break;
1267         }
1268
1269       /* Accounting */
1270       nat44_session_update_counters (s0, now,
1271                                      vlib_buffer_length_in_chain (vm, b0),
1272                                      thread_index);
1273       /* Per-user LRU list maintenance */
1274       nat44_session_update_lru (sm, s0, thread_index);
1275
1276     trace0:
1277       if (PREDICT_FALSE
1278           ((node->flags & VLIB_NODE_FLAG_TRACE)
1279            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1280         {
1281           nat_in2out_ed_trace_t *t =
1282             vlib_add_trace (vm, node, b0, sizeof (*t));
1283           t->sw_if_index = rx_sw_if_index0;
1284           t->next_index = next[0];
1285           t->is_slow_path = 0;
1286           t->translation_error = translation_error;
1287           t->lookup_skipped = lookup_skipped;
1288           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1289
1290           if (s0)
1291             {
1292               t->session_index = s0 - tsm->sessions;
1293               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1294               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1295               t->translation_via_i2of = (&s0->i2o == f);
1296               t->tcp_state = s0->tcp_state;
1297             }
1298           else
1299             {
1300               t->session_index = ~0;
1301             }
1302         }
1303
1304       if (next[0] == NAT_NEXT_DROP)
1305         {
1306           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1307                                          thread_index, cntr_sw_if_index0, 1);
1308         }
1309
1310       n_left_from--;
1311       next++;
1312     }
1313
1314   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1315                                frame->n_vectors);
1316   return frame->n_vectors;
1317 }
1318
1319 static inline uword
1320 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1321                                           vlib_node_runtime_t *node,
1322                                           vlib_frame_t *frame,
1323                                           int is_output_feature,
1324                                           int is_multi_worker)
1325 {
1326   u32 n_left_from, *from;
1327   snat_main_t *sm = &snat_main;
1328   f64 now = vlib_time_now (vm);
1329   u32 thread_index = vm->thread_index;
1330   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1331
1332   from = vlib_frame_vector_args (frame);
1333   n_left_from = frame->n_vectors;
1334
1335   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1336   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1337   vlib_get_buffers (vm, from, b, n_left_from);
1338
1339   while (n_left_from > 0)
1340     {
1341       vlib_buffer_t *b0;
1342       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1343       u32 tx_sw_if_index0;
1344       u32 cntr_sw_if_index0;
1345       ip_protocol_t proto0;
1346       ip4_header_t *ip0;
1347       udp_header_t *udp0;
1348       icmp46_header_t *icmp0;
1349       snat_session_t *s0 = 0;
1350       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1351       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1352
1353       b0 = *b;
1354
1355       if (is_output_feature)
1356         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1357
1358       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1359
1360       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1361                               iph_offset0);
1362
1363       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1364       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1365       cntr_sw_if_index0 =
1366         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1367       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1368                                                            rx_sw_if_index0);
1369
1370       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1371         {
1372           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1373           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1374                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1375                                        0);
1376           next[0] = NAT_NEXT_ICMP_ERROR;
1377           goto trace0;
1378         }
1379
1380       udp0 = ip4_next_header (ip0);
1381       icmp0 = (icmp46_header_t *) udp0;
1382       proto0 = ip0->protocol;
1383
1384       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1385         {
1386           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1387             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1388           if (!s0)
1389             next[0] = NAT_NEXT_DROP;
1390
1391           if (NAT_NEXT_DROP != next[0] && s0 &&
1392               NAT_ED_TRNSL_ERR_SUCCESS !=
1393                 (translation_error = nat_6t_flow_buf_translate_i2o (
1394                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1395             {
1396               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1397               nat_ed_session_delete (sm, s0, thread_index, 1);
1398               next[0] = NAT_NEXT_DROP;
1399               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1400               goto trace0;
1401             }
1402
1403           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1404                                          thread_index, cntr_sw_if_index0, 1);
1405           goto trace0;
1406         }
1407
1408       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1409         {
1410           next[0] = icmp_in2out_ed_slow_path (
1411             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1412             rx_fib_index0, node, next[0], now, thread_index, &s0,
1413             is_multi_worker);
1414           if (NAT_NEXT_DROP != next[0] && s0 &&
1415               NAT_ED_TRNSL_ERR_SUCCESS !=
1416                 (translation_error = nat_6t_flow_buf_translate_i2o (
1417                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1418             {
1419               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1420               nat_ed_session_delete (sm, s0, thread_index, 1);
1421               next[0] = NAT_NEXT_DROP;
1422               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1423               goto trace0;
1424             }
1425
1426           if (NAT_NEXT_DROP != next[0])
1427             {
1428               vlib_increment_simple_counter (
1429                 &sm->counters.slowpath.in2out.icmp, thread_index,
1430                 cntr_sw_if_index0, 1);
1431             }
1432           goto trace0;
1433         }
1434
1435       init_ed_k (
1436         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1437         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1438         rx_fib_index0, ip0->protocol);
1439       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1440         {
1441           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1442           s0 =
1443             pool_elt_at_index (tsm->sessions,
1444                                ed_value_get_session_index (&value0));
1445         }
1446
1447       if (!s0)
1448         {
1449           if (is_output_feature)
1450             {
1451               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1452                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1453                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1454                     rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
1455                 goto trace0;
1456
1457               /*
1458                * Send DHCP packets to the ipv4 stack, or we won't
1459                * be able to use dhcp client on the outside interface
1460                */
1461               if (PREDICT_FALSE (
1462                     proto0 == IP_PROTOCOL_UDP &&
1463                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1464                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1465                     ip0->dst_address.as_u32 == 0xffffffff))
1466                 goto trace0;
1467             }
1468           else
1469             {
1470               if (PREDICT_FALSE (
1471                     nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
1472                                             proto0, rx_fib_index0)))
1473                 goto trace0;
1474             }
1475
1476           next[0] =
1477             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1478                           vnet_buffer (b0)->ip.reass.l4_src_port,
1479                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1480                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1481                           node, next[0], thread_index, now);
1482
1483           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1484             goto trace0;
1485
1486           if (PREDICT_FALSE (!s0))
1487             goto trace0;
1488
1489         }
1490
1491       b0->flags |= VNET_BUFFER_F_IS_NATED;
1492
1493       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1494           (translation_error = nat_6t_flow_buf_translate_i2o (
1495              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1496         {
1497           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1498           nat_ed_session_delete (sm, s0, thread_index, 1);
1499           next[0] = NAT_NEXT_DROP;
1500           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1501           goto trace0;
1502         }
1503
1504       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1505         {
1506           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1507                                          thread_index, cntr_sw_if_index0, 1);
1508           nat44_set_tcp_session_state_i2o (
1509             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1510             thread_index);
1511         }
1512       else
1513         {
1514           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1515                                          thread_index, cntr_sw_if_index0, 1);
1516         }
1517
1518       /* Accounting */
1519       nat44_session_update_counters (s0, now,
1520                                      vlib_buffer_length_in_chain
1521                                      (vm, b0), thread_index);
1522       /* Per-user LRU list maintenance */
1523       nat44_session_update_lru (sm, s0, thread_index);
1524
1525     trace0:
1526       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1527                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1528         {
1529           nat_in2out_ed_trace_t *t =
1530             vlib_add_trace (vm, node, b0, sizeof (*t));
1531           t->sw_if_index = rx_sw_if_index0;
1532           t->next_index = next[0];
1533           t->is_slow_path = 1;
1534           t->translation_error = translation_error;
1535           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1536
1537           if (s0)
1538             {
1539               t->session_index = s0 - tsm->sessions;
1540               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1541               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1542               t->translation_via_i2of = 1;
1543               t->tcp_state = s0->tcp_state;
1544             }
1545
1546           else
1547             {
1548               t->session_index = ~0;
1549             }
1550         }
1551
1552       if (next[0] == NAT_NEXT_DROP)
1553         {
1554           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1555                                          thread_index, cntr_sw_if_index0, 1);
1556         }
1557
1558       n_left_from--;
1559       next++;
1560       b++;
1561     }
1562
1563   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1564                                frame->n_vectors);
1565
1566   return frame->n_vectors;
1567 }
1568
1569 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1570                                      vlib_node_runtime_t * node,
1571                                      vlib_frame_t * frame)
1572 {
1573   if (snat_main.num_workers > 1)
1574     {
1575       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1576     }
1577   else
1578     {
1579       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1580     }
1581 }
1582
1583 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1584   .name = "nat44-ed-in2out",
1585   .vector_size = sizeof (u32),
1586   .sibling_of = "nat-default",
1587   .format_trace = format_nat_in2out_ed_trace,
1588   .type = VLIB_NODE_TYPE_INTERNAL,
1589   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1590   .error_strings = nat_in2out_ed_error_strings,
1591   .runtime_data_bytes = sizeof (snat_runtime_t),
1592 };
1593
1594 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1595                                             vlib_node_runtime_t * node,
1596                                             vlib_frame_t * frame)
1597 {
1598   if (snat_main.num_workers > 1)
1599     {
1600       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1601     }
1602   else
1603     {
1604       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1605     }
1606 }
1607
1608 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1609   .name = "nat44-ed-in2out-output",
1610   .vector_size = sizeof (u32),
1611   .sibling_of = "nat-default",
1612   .format_trace = format_nat_in2out_ed_trace,
1613   .type = VLIB_NODE_TYPE_INTERNAL,
1614   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1615   .error_strings = nat_in2out_ed_error_strings,
1616   .runtime_data_bytes = sizeof (snat_runtime_t),
1617 };
1618
1619 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1620                                               vlib_node_runtime_t *
1621                                               node, vlib_frame_t * frame)
1622 {
1623   if (snat_main.num_workers > 1)
1624     {
1625       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1626     }
1627   else
1628     {
1629       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1630     }
1631 }
1632
1633 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1634   .name = "nat44-ed-in2out-slowpath",
1635   .vector_size = sizeof (u32),
1636   .sibling_of = "nat-default",
1637   .format_trace = format_nat_in2out_ed_trace,
1638   .type = VLIB_NODE_TYPE_INTERNAL,
1639   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1640   .error_strings = nat_in2out_ed_error_strings,
1641   .runtime_data_bytes = sizeof (snat_runtime_t),
1642 };
1643
1644 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1645                                                      vlib_node_runtime_t
1646                                                      * node,
1647                                                      vlib_frame_t * frame)
1648 {
1649   if (snat_main.num_workers > 1)
1650     {
1651       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1652     }
1653   else
1654     {
1655       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1656     }
1657 }
1658
1659 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1660   .name = "nat44-ed-in2out-output-slowpath",
1661   .vector_size = sizeof (u32),
1662   .sibling_of = "nat-default",
1663   .format_trace = format_nat_in2out_ed_trace,
1664   .type = VLIB_NODE_TYPE_INTERNAL,
1665   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1666   .error_strings = nat_in2out_ed_error_strings,
1667   .runtime_data_bytes = sizeof (snat_runtime_t),
1668 };
1669
1670 static u8 *
1671 format_nat_pre_trace (u8 * s, va_list * args)
1672 {
1673   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1674   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1675   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1676   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1677                  t->arc_next_index);
1678 }
1679
1680 VLIB_NODE_FN (nat_pre_in2out_node)
1681   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1682 {
1683   return nat_pre_node_fn_inline (vm, node, frame,
1684                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1685 }
1686
1687 VLIB_NODE_FN (nat_pre_in2out_output_node)
1688   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1689 {
1690   return nat_pre_node_fn_inline (vm, node, frame,
1691                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1692 }
1693
1694 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1695   .name = "nat-pre-in2out",
1696   .vector_size = sizeof (u32),
1697   .sibling_of = "nat-default",
1698   .format_trace = format_nat_pre_trace,
1699   .type = VLIB_NODE_TYPE_INTERNAL,
1700   .n_errors = 0,
1701 };
1702
1703 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1704   .name = "nat-pre-in2out-output",
1705   .vector_size = sizeof (u32),
1706   .sibling_of = "nat-default",
1707   .format_trace = format_nat_pre_trace,
1708   .type = VLIB_NODE_TYPE_INTERNAL,
1709   .n_errors = 0,
1710 };
1711
1712 /*
1713  * fd.io coding-style-patch-verification: ON
1714  *
1715  * Local Variables:
1716  * eval: (c-set-style "gnu")
1717  * End:
1718  */