nat: add saddr info to nat44-ed o2i flow's rewrite
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52   u8 tcp_state;
53 } nat_in2out_ed_trace_t;
54
55 static u8 *
56 format_nat_in2out_ed_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
61   char *tag;
62
63   tag =
64     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
65     "NAT44_IN2OUT_ED_FAST_PATH";
66
67   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
68               t->next_index);
69   if (~0 != t->session_index)
70     {
71       s = format (s, ", session %d, translation result '%U' via %s",
72                   t->session_index, format_nat_ed_translation_error,
73                   t->translation_error,
74                   t->translation_via_i2of ? "i2of" : "o2if");
75       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
76       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
77     }
78   if (!t->is_slow_path)
79     {
80       if (t->lookup_skipped)
81         {
82           s = format (s, "\n  lookup skipped - cached session index used");
83         }
84       else
85         {
86           s = format (s, "\n  search key %U", format_ed_session_kvp,
87                       &t->search_key);
88         }
89     }
90   if (IP_PROTOCOL_TCP == t->i2of.match.proto)
91     {
92       s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
93                   t->tcp_state);
94     }
95
96   return s;
97 }
98
99 static int
100 nat_ed_alloc_addr_and_port_with_snat_address (
101   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
102   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
103   ip4_address_t *outside_addr, u16 *outside_port)
104 {
105   const u16 port_thread_offset =
106     (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
107
108   /* Backup original match in case of failure */
109   const nat_6t_t match = s->o2i.match;
110
111   s->o2i.match.daddr = a->addr;
112   /* first try port suggested by caller */
113   u16 port = clib_net_to_host_u16 (*outside_port);
114   u16 port_offset = port - port_thread_offset;
115   if (port < port_thread_offset ||
116       port >= port_thread_offset + port_per_thread)
117     {
118       /* need to pick a different port, suggested port doesn't fit in
119        * this thread's port range */
120       port_offset = snat_random_port (0, port_per_thread - 1);
121       port = port_thread_offset + port_offset;
122     }
123   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
124   do
125     {
126       if (IP_PROTOCOL_ICMP == proto)
127         {
128           s->o2i.match.sport = clib_host_to_net_u16 (port);
129         }
130       s->o2i.match.dport = clib_host_to_net_u16 (port);
131       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
132         {
133           *outside_addr = a->addr;
134           *outside_port = clib_host_to_net_u16 (port);
135           return 0;
136         }
137       port_offset = snat_random_port (0, port_per_thread - 1);
138       port = port_thread_offset + port_offset;
139       --attempts;
140     }
141   while (attempts > 0);
142
143   /* Revert match */
144   s->o2i.match = match;
145   return 1;
146 }
147
148 static int
149 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
150                             u32 tx_sw_if_index, u32 nat_proto,
151                             u32 thread_index, ip4_address_t s_addr,
152                             ip4_address_t d_addr, u32 snat_thread_index,
153                             snat_session_t *s, ip4_address_t *outside_addr,
154                             u16 *outside_port)
155 {
156   if (vec_len (sm->addresses) > 0)
157     {
158       u32 s_addr_offset = (s_addr.as_u32 + (s_addr.as_u32 >> 8) +
159                            (s_addr.as_u32 >> 16) + (s_addr.as_u32 >> 24)) %
160                           vec_len (sm->addresses);
161       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
162       int i;
163
164       // output feature
165       if (tx_sw_if_index != ~0)
166         {
167           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
168             {
169               a = sm->addresses + i;
170               if (a->fib_index == rx_fib_index)
171                 {
172                   if (a->sw_if_index == tx_sw_if_index)
173                     {
174                       if ((a->addr_len != ~0) &&
175                           (a->net.as_u32 ==
176                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
177
178                         {
179                           return nat_ed_alloc_addr_and_port_with_snat_address (
180                             sm, nat_proto, thread_index, a,
181                             sm->port_per_thread, snat_thread_index, s,
182                             outside_addr, outside_port);
183                         }
184                       ra = a;
185                     }
186                   ja = a;
187                 }
188               else if (a->fib_index == ~0)
189                 {
190                   ba = a;
191                 }
192             }
193           for (i = 0; i < s_addr_offset; ++i)
194             {
195               a = sm->addresses + i;
196               if (a->fib_index == rx_fib_index)
197                 {
198                   if (a->sw_if_index == tx_sw_if_index)
199                     {
200                       if ((a->addr_len != ~0) &&
201                           (a->net.as_u32 ==
202                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
203
204                         {
205                           return nat_ed_alloc_addr_and_port_with_snat_address (
206                             sm, nat_proto, thread_index, a,
207                             sm->port_per_thread, snat_thread_index, s,
208                             outside_addr, outside_port);
209                         }
210                       ra = a;
211                     }
212                   ja = a;
213                 }
214               else if (a->fib_index == ~0)
215                 {
216                   ba = a;
217                 }
218             }
219           if (ra)
220             {
221               return nat_ed_alloc_addr_and_port_with_snat_address (
222                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
223                 snat_thread_index, s, outside_addr, outside_port);
224             }
225         }
226       else
227         {
228           // first try nat pool addresses to sw interface addreses mappings
229           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
230             {
231               a = sm->addresses + i;
232               if (a->fib_index == rx_fib_index)
233                 {
234                   if ((a->addr_len != ~0) &&
235                       (a->net.as_u32 ==
236                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
237                     {
238                       return nat_ed_alloc_addr_and_port_with_snat_address (
239                         sm, nat_proto, thread_index, a, sm->port_per_thread,
240                         snat_thread_index, s, outside_addr, outside_port);
241                     }
242                   ja = a;
243                 }
244               else if (a->fib_index == ~0)
245                 {
246                   ba = a;
247                 }
248             }
249           for (i = 0; i < s_addr_offset; ++i)
250             {
251               a = sm->addresses + i;
252               if (a->fib_index == rx_fib_index)
253                 {
254                   if ((a->addr_len != ~0) &&
255                       (a->net.as_u32 ==
256                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
257                     {
258                       return nat_ed_alloc_addr_and_port_with_snat_address (
259                         sm, nat_proto, thread_index, a, sm->port_per_thread,
260                         snat_thread_index, s, outside_addr, outside_port);
261                     }
262                   ja = a;
263                 }
264               else if (a->fib_index == ~0)
265                 {
266                   ba = a;
267                 }
268             }
269         }
270
271       if (ja || ba)
272         {
273           a = ja ? ja : ba;
274           return nat_ed_alloc_addr_and_port_with_snat_address (
275             sm, nat_proto, thread_index, a, sm->port_per_thread,
276             snat_thread_index, s, outside_addr, outside_port);
277         }
278     }
279   /* Totally out of translations to use... */
280   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
281   return 1;
282 }
283
284 static_always_inline int
285 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
286                              u16 match_port, ip_protocol_t match_protocol,
287                              ip4_address_t *daddr, u16 *dport)
288 {
289   snat_static_mapping_t *m =
290     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
291   if (!m)
292     {
293       /* Try address only mapping */
294       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
295       if (!m)
296         return 0;
297     }
298   *daddr = m->local_addr;
299   if (dport)
300     {
301       /* Address only mapping doesn't change port */
302       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
303     }
304   return 1;
305 }
306
307 static_always_inline vrf_table_t *
308 get_vrf_table_by_fib (u32 fib_index)
309 {
310   snat_main_t *sm = &snat_main;
311   vrf_table_t *t;
312
313   pool_foreach (t, sm->vrf_tables)
314     {
315       if (fib_index == t->table_fib_index)
316         {
317           return t;
318         }
319     }
320
321   return 0;
322 }
323
324 static_always_inline u32
325 get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
326 {
327   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
328   fib_prefix_t pfx = {
329     .fp_proto = FIB_PROTOCOL_IP4,
330     .fp_len = 32,
331     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
332     ,
333   };
334
335   snat_main_t *sm = &snat_main;
336   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
337   // default to rx fib
338   u32 tx_fib_index = rx_fib_index;
339
340   if (0 != t)
341     {
342       // managed routes to other fibs
343       vrf_route_t *r;
344       pool_foreach (r, t->routes)
345         {
346           fei = fib_table_lookup (r->fib_index, &pfx);
347           if ((FIB_NODE_INDEX_INVALID != fei) &&
348               (~0 != fib_entry_get_resolving_interface (fei)))
349             {
350               tx_fib_index = r->fib_index;
351               break;
352             }
353         }
354     }
355   else
356     {
357       // default to configured fib
358       tx_fib_index = sm->outside_fib_index;
359
360       // default routes to other fibs
361       nat_fib_t *f;
362       vec_foreach (f, sm->outside_fibs)
363         {
364           fei = fib_table_lookup (f->fib_index, &pfx);
365           if ((FIB_NODE_INDEX_INVALID != fei) &&
366               (~0 != fib_entry_get_resolving_interface (fei)))
367             {
368               tx_fib_index = f->fib_index;
369               break;
370             }
371         }
372     }
373
374   return tx_fib_index;
375 }
376
377 static_always_inline int
378 is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
379 {
380   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
381   fib_prefix_t pfx = {
382     .fp_proto = FIB_PROTOCOL_IP4,
383     .fp_len = 32,
384     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
385     ,
386   };
387
388   snat_main_t *sm = &snat_main;
389   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
390   u32 ii;
391
392   if (0 != t)
393     {
394       // managed routes to other fibs
395       vrf_route_t *r;
396       pool_foreach (r, t->routes)
397         {
398           fei = fib_table_lookup (r->fib_index, &pfx);
399           if ((FIB_NODE_INDEX_INVALID != fei) &&
400               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
401             {
402               return 1;
403             }
404         }
405     }
406   else
407     {
408       // default routes to other fibs
409       nat_fib_t *f;
410       vec_foreach (f, sm->outside_fibs)
411         {
412           fei = fib_table_lookup (f->fib_index, &pfx);
413           if ((FIB_NODE_INDEX_INVALID != fei) &&
414               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
415             {
416               snat_interface_t *i;
417               pool_foreach (i, sm->interfaces)
418                 {
419                   if ((nat44_ed_is_interface_outside (i)) &&
420                       (ii == i->sw_if_index))
421                     {
422                       return 1;
423                     }
424                 }
425             }
426         }
427     }
428
429   return 0;
430 }
431
432 static u32
433 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
434               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
435               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
436               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
437               u32 thread_index, f64 now)
438 {
439   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
440   ip4_address_t outside_addr;
441   u16 outside_port;
442   u32 tx_fib_index;
443   u8 is_identity_nat = 0;
444
445   snat_session_t *s = NULL;
446   lb_nat_type_t lb = 0;
447   ip4_address_t daddr = r_addr;
448   u16 dport = r_port;
449
450   if (PREDICT_FALSE
451       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
452     {
453       if (!nat_lru_free_one (sm, thread_index, now))
454         {
455           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
456           nat_ipfix_logging_max_sessions (thread_index,
457                                           sm->max_translations_per_thread);
458           nat_elog_notice (sm, "maximum sessions exceeded");
459           return NAT_NEXT_DROP;
460         }
461     }
462
463   ip4_address_t sm_addr;
464   u16 sm_port;
465   u32 sm_fib_index;
466   int is_sm = 0;
467   // First try to match static mapping by local address and port
468   if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
469                                   &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
470                                   &lb, 0, &is_identity_nat, 0))
471     {
472       if (PREDICT_FALSE (is_identity_nat))
473         {
474           *sessionp = NULL;
475           return next;
476         }
477       is_sm = 1;
478     }
479
480   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
481     {
482       if (PREDICT_FALSE (!tcp_flags_is_init (
483             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
484         {
485           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
486           return NAT_NEXT_DROP;
487         }
488     }
489
490   s = nat_ed_session_alloc (sm, thread_index, now, proto);
491   ASSERT (s);
492
493   tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
494
495   if (!is_sm)
496     {
497       s->in2out.addr = l_addr;
498       s->in2out.port = l_port;
499       s->proto = proto;
500       s->in2out.fib_index = rx_fib_index;
501       s->out2in.fib_index = tx_fib_index;
502
503       // suggest using local port to allocation function
504       outside_port = l_port;
505
506       if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
507                                                       proto, &daddr, &dport)))
508         {
509           s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
510         }
511
512       // destination addr/port updated with real values in
513       // nat_ed_alloc_addr_and_port
514       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
515                             s->out2in.fib_index, proto);
516       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
517       if (IP_PROTOCOL_ICMP == proto)
518         {
519           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
520         }
521       else
522         {
523           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
524         }
525       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
526       nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
527
528       if (nat_ed_alloc_addr_and_port (
529             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
530             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
531         {
532           nat_elog_notice (sm, "addresses exhausted");
533           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
534           nat_ed_session_delete (sm, s, thread_index, 1);
535           return NAT_NEXT_DROP;
536         }
537       s->out2in.addr = outside_addr;
538       s->out2in.port = outside_port;
539     }
540   else
541     {
542       // static mapping
543       s->out2in.addr = outside_addr = sm_addr;
544       s->out2in.port = outside_port = sm_port;
545       s->in2out.addr = l_addr;
546       s->in2out.port = l_port;
547       s->proto = proto;
548       s->in2out.fib_index = rx_fib_index;
549       s->out2in.fib_index = tx_fib_index;
550       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
551
552       // hairpinning?
553       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
554                                                         proto, &daddr, &dport);
555       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
556
557       if (IP_PROTOCOL_ICMP == proto)
558         {
559           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
560                                 sm_port, s->out2in.fib_index, proto);
561           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
562         }
563       else
564         {
565           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
566                                 sm_port, s->out2in.fib_index, proto);
567           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
568         }
569       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
570       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
571       nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
572       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
573         {
574           nat_elog_notice (sm, "out2in key add failed");
575           goto error;
576         }
577     }
578
579   if (lb)
580     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
581   s->ext_host_addr = r_addr;
582   s->ext_host_port = r_port;
583
584   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
585                         rx_fib_index, proto);
586   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
587   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
588
589   if (IP_PROTOCOL_ICMP == proto)
590     {
591       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
592     }
593   else
594     {
595       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
596       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
597     }
598   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
599
600   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
601     {
602       nat_elog_notice (sm, "in2out key add failed");
603       goto error;
604     }
605
606   /* log NAT event */
607   nat_ipfix_logging_nat44_ses_create (
608     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
609     s->in2out.port, s->out2in.port, s->in2out.fib_index);
610
611   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
612                          s->in2out.port, &s->ext_host_nat_addr,
613                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
614                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
615
616   per_vrf_sessions_register_session (s, thread_index);
617
618   *sessionp = s;
619   return next;
620 error:
621   if (s)
622     {
623       nat_ed_session_delete (sm, s, thread_index, 1);
624     }
625   *sessionp = s = NULL;
626   return NAT_NEXT_DROP;
627 }
628
629 static_always_inline int
630 nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
631                         u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
632                         u32 proto, u32 rx_fib_index)
633 {
634   snat_main_t *sm = &snat_main;
635
636   clib_bihash_kv_16_8_t kv, value;
637   ip4_address_t placeholder_addr;
638   u32 placeholder_fib_index;
639   u16 placeholder_port;
640
641   init_ed_k (&kv, ip->dst_address.as_u32,
642              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
643              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
644              ip->protocol);
645
646   // do nat if active session or is static mapping
647   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
648       !snat_static_mapping_match (
649         vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
650         sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
651         &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
652     {
653       return 0;
654     }
655
656   // do not nat if forwarding enabled
657   if (sm->forwarding_enabled)
658     {
659       return 1;
660     }
661
662   // do not nat packet aimed at the interface address
663   if (PREDICT_FALSE (
664         is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
665     {
666       return 1;
667     }
668
669   // do nat packets with resolvable destination
670   // destination can be resolved either by:
671   // a) vrf routing table entry
672   // b) (non output feature) outside interface fib
673   if (is_destination_resolvable (rx_fib_index, ip->dst_address))
674     {
675       return 0;
676     }
677
678   return 1;
679 }
680
681 static_always_inline int
682 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
683                                       u32 thread_index, f64 now,
684                                       vlib_main_t * vm, vlib_buffer_t * b)
685 {
686   clib_bihash_kv_16_8_t kv, value;
687   snat_session_t *s = 0;
688   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
689
690   if (!sm->forwarding_enabled)
691     return 0;
692
693   if (ip->protocol == IP_PROTOCOL_ICMP)
694     {
695       ip4_address_t lookup_saddr, lookup_daddr;
696       u16 lookup_sport, lookup_dport;
697       u8 lookup_protocol;
698       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
699                                               &lookup_sport, &lookup_daddr,
700                                               &lookup_dport, &lookup_protocol))
701         return 0;
702       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
703                  lookup_dport, 0, lookup_protocol);
704     }
705   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
706     {
707       init_ed_k (&kv, ip->src_address.as_u32,
708                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
709                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
710     }
711   else
712     {
713       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
714                  ip->protocol);
715     }
716
717   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
718     {
719       ASSERT (thread_index == ed_value_get_thread_index (&value));
720       s =
721         pool_elt_at_index (tsm->sessions,
722                            ed_value_get_session_index (&value));
723
724       if (na44_ed_is_fwd_bypass_session (s))
725         {
726           if (ip->protocol == IP_PROTOCOL_TCP)
727             {
728               nat44_set_tcp_session_state_i2o (
729                 sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
730                 thread_index);
731             }
732           /* Accounting */
733           nat44_session_update_counters (s, now,
734                                          vlib_buffer_length_in_chain (vm, b),
735                                          thread_index);
736           /* Per-user LRU list maintenance */
737           nat44_session_update_lru (sm, s, thread_index);
738           return 1;
739         }
740       else
741         return 0;
742     }
743
744   return 0;
745 }
746
747 static_always_inline int
748 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
749                                        ip4_header_t *ip, u16 src_port,
750                                        u16 dst_port, u32 thread_index,
751                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
752                                        int is_multi_worker)
753 {
754   clib_bihash_kv_16_8_t kv, value;
755   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
756   snat_interface_t *i;
757   snat_session_t *s;
758   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
759   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
760
761   /* src NAT check */
762   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
763              dst_port, tx_fib_index, ip->protocol);
764   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
765     {
766       ASSERT (thread_index == ed_value_get_thread_index (&value));
767       s =
768         pool_elt_at_index (tsm->sessions,
769                            ed_value_get_session_index (&value));
770       return 1;
771     }
772
773   /* dst NAT check */
774   if (is_multi_worker &&
775       PREDICT_TRUE (!pool_is_free_index (
776         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
777     {
778       nat_6t_t lookup;
779       lookup.fib_index = rx_fib_index;
780       lookup.proto = ip->protocol;
781       lookup.daddr.as_u32 = ip->src_address.as_u32;
782       lookup.dport = src_port;
783       lookup.saddr.as_u32 = ip->dst_address.as_u32;
784       lookup.sport = dst_port;
785       s = pool_elt_at_index (
786         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
787       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
788         {
789           goto skip_dst_nat_lookup;
790         }
791       s = NULL;
792     }
793
794   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
795              src_port, rx_fib_index, ip->protocol);
796   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
797     {
798       ASSERT (thread_index == ed_value_get_thread_index (&value));
799       s =
800         pool_elt_at_index (tsm->sessions,
801                            ed_value_get_session_index (&value));
802
803     skip_dst_nat_lookup:
804       if (na44_ed_is_fwd_bypass_session (s))
805         return 0;
806
807       /* hairpinning */
808       pool_foreach (i, sm->output_feature_interfaces)
809         {
810           if ((nat44_ed_is_interface_inside (i)) &&
811               (rx_sw_if_index == i->sw_if_index))
812             return 0;
813         }
814       return 1;
815     }
816
817   return 0;
818 }
819
820 static inline u32
821 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
822                           icmp46_header_t *icmp, u32 sw_if_index,
823                           u32 tx_sw_if_index, u32 rx_fib_index,
824                           vlib_node_runtime_t *node, u32 next, f64 now,
825                           u32 thread_index, snat_session_t **s_p,
826                           int is_multi_worker)
827 {
828   vlib_main_t *vm = vlib_get_main ();
829   u16 checksum;
830   int err;
831   snat_session_t *s = NULL;
832   u8 lookup_protocol = ip->protocol;
833   u16 lookup_sport, lookup_dport;
834   ip4_address_t lookup_saddr, lookup_daddr;
835
836   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
837                                             &lookup_sport, &lookup_daddr,
838                                             &lookup_dport, &lookup_protocol);
839   if (err != 0)
840     {
841       b->error = node->errors[err];
842       return NAT_NEXT_DROP;
843     }
844
845   if (tx_sw_if_index != ~0)
846     {
847       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
848             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
849             tx_sw_if_index, is_multi_worker)))
850         {
851           return next;
852         }
853     }
854   else
855     {
856       if (PREDICT_FALSE (nat44_ed_not_translate (
857             vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
858         {
859           return next;
860         }
861     }
862
863   if (PREDICT_FALSE (icmp_type_is_error_message (
864         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
865     {
866       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
867       return NAT_NEXT_DROP;
868     }
869
870   next =
871     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
872                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
873                   node, next, thread_index, vlib_time_now (vm));
874
875   if (NAT_NEXT_DROP == next)
876     goto out;
877
878   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
879     {
880       ip_csum_t sum = ip_incremental_checksum_buffer (
881         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
882         ntohs (ip->length) - ip4_header_bytes (ip), 0);
883       checksum = ~ip_csum_fold (sum);
884       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
885         {
886           next = NAT_NEXT_DROP;
887           goto out;
888         }
889     }
890
891 out:
892   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
893     {
894       /* Accounting */
895       nat44_session_update_counters (
896         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
897       /* Per-user LRU list maintenance */
898       nat44_session_update_lru (sm, s, thread_index);
899     }
900   *s_p = s;
901   return next;
902 }
903
904 static snat_session_t *
905 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
906                                         ip4_header_t *ip, u32 rx_fib_index,
907                                         u32 thread_index, f64 now,
908                                         vlib_main_t *vm,
909                                         vlib_node_runtime_t *node)
910 {
911   clib_bihash_kv_16_8_t s_kv, s_value;
912   snat_static_mapping_t *m = NULL;
913   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
914   snat_session_t *s = NULL;
915   u32 tx_fib_index;
916   int i;
917   ip4_address_t new_src_addr = { 0 };
918   ip4_address_t new_dst_addr = ip->dst_address;
919
920   if (PREDICT_FALSE (
921         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
922     {
923       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
924       nat_ipfix_logging_max_sessions (thread_index,
925                                       sm->max_translations_per_thread);
926       nat_elog_notice (sm, "maximum sessions exceeded");
927       return 0;
928     }
929
930   tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
931
932   // Try to find static mapping first
933   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
934                               ip->protocol);
935   if (m)
936     {
937       new_src_addr = m->external_addr;
938     }
939   else
940     {
941       pool_foreach (s, tsm->sessions)
942         {
943           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
944             {
945               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
946                          ip->dst_address.as_u32, 0, tx_fib_index,
947                          ip->protocol);
948               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
949                 {
950                   new_src_addr = s->out2in.addr;
951                 }
952               break;
953             }
954         }
955
956       if (!new_src_addr.as_u32)
957         {
958           for (i = 0; i < vec_len (sm->addresses); i++)
959             {
960               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
961                          ip->dst_address.as_u32, 0, tx_fib_index,
962                          ip->protocol);
963               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
964                 {
965                   new_src_addr = sm->addresses[i].addr;
966                 }
967             }
968         }
969     }
970
971   if (!new_src_addr.as_u32)
972     {
973       // could not allocate address for translation ...
974       return 0;
975     }
976
977   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
978   if (!s)
979     {
980       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
981       nat_elog_warn (sm, "create NAT session failed");
982       return 0;
983     }
984
985   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
986                         ip->dst_address, 0, rx_fib_index, ip->protocol);
987   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
988   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
989
990   // hairpinning?
991   int is_hairpinning = nat44_ed_external_sm_lookup (
992     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
993   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
994
995   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
996   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
997
998   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
999                         tx_fib_index, ip->protocol);
1000   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
1001   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
1002   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
1003
1004   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1005   s->out2in.addr.as_u32 = new_src_addr.as_u32;
1006   s->out2in.fib_index = tx_fib_index;
1007   s->in2out.addr.as_u32 = ip->src_address.as_u32;
1008   s->in2out.fib_index = rx_fib_index;
1009   s->in2out.port = s->out2in.port = ip->protocol;
1010   if (m)
1011     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1012
1013   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
1014     {
1015       nat_elog_notice (sm, "in2out flow hash add failed");
1016       nat_ed_session_delete (sm, s, thread_index, 1);
1017       return NULL;
1018     }
1019
1020   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
1021     {
1022       nat_elog_notice (sm, "out2in flow hash add failed");
1023       nat_ed_session_delete (sm, s, thread_index, 1);
1024       return NULL;
1025     }
1026
1027   per_vrf_sessions_register_session (s, thread_index);
1028
1029   /* Accounting */
1030   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
1031                                  thread_index);
1032   /* Per-user LRU list maintenance */
1033   nat44_session_update_lru (sm, s, thread_index);
1034
1035   return s;
1036 }
1037
1038 static inline uword
1039 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1040                                           vlib_node_runtime_t *node,
1041                                           vlib_frame_t *frame,
1042                                           int is_output_feature,
1043                                           int is_multi_worker)
1044 {
1045   u32 n_left_from, *from;
1046   snat_main_t *sm = &snat_main;
1047   f64 now = vlib_time_now (vm);
1048   u32 thread_index = vm->thread_index;
1049   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1050   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1051     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1052
1053   from = vlib_frame_vector_args (frame);
1054   n_left_from = frame->n_vectors;
1055
1056   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1057   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1058   vlib_get_buffers (vm, from, b, n_left_from);
1059
1060   while (n_left_from > 0)
1061     {
1062       vlib_buffer_t *b0;
1063       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1064       u32 tx_sw_if_index0;
1065       u32 cntr_sw_if_index0;
1066       ip_protocol_t proto0;
1067       ip4_header_t *ip0;
1068       snat_session_t *s0 = 0;
1069       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1070       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1071       nat_6t_flow_t *f = 0;
1072       nat_6t_t lookup;
1073       int lookup_skipped = 0;
1074
1075       b0 = *b;
1076       b++;
1077
1078       /* Prefetch next iteration. */
1079       if (PREDICT_TRUE (n_left_from >= 2))
1080         {
1081           vlib_buffer_t *p2;
1082
1083           p2 = *b;
1084
1085           vlib_prefetch_buffer_header (p2, LOAD);
1086
1087           clib_prefetch_load (p2->data);
1088         }
1089
1090       if (is_output_feature)
1091         {
1092           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1093         }
1094
1095       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1096
1097       ip0 =
1098         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1099
1100       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1101       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1102       cntr_sw_if_index0 =
1103         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1104       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1105                                                            rx_sw_if_index0);
1106       lookup.fib_index = rx_fib_index0;
1107
1108       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1109         {
1110           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1111           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1112                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1113                                        0);
1114           next[0] = NAT_NEXT_ICMP_ERROR;
1115           goto trace0;
1116         }
1117
1118       proto0 = ip0->protocol;
1119
1120       if (is_output_feature)
1121         {
1122           if (PREDICT_FALSE
1123               (nat_not_translate_output_feature_fwd
1124                (sm, ip0, thread_index, now, vm, b0)))
1125             goto trace0;
1126         }
1127
1128       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1129         {
1130           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1131                 ICMP4_echo_request &&
1132               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1133                 ICMP4_echo_reply &&
1134               !icmp_type_is_error_message (
1135                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1136             {
1137               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1138               next[0] = NAT_NEXT_DROP;
1139               goto trace0;
1140             }
1141           int err = nat_get_icmp_session_lookup_values (
1142             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1143             &lookup.dport, &lookup.proto);
1144           if (err != 0)
1145             {
1146               b0->error = node->errors[err];
1147               next[0] = NAT_NEXT_DROP;
1148               goto trace0;
1149             }
1150         }
1151       else
1152         {
1153           lookup.proto = ip0->protocol;
1154           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1155           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1156           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1157           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1158         }
1159
1160       /* there might be a stashed index in vnet_buffer2 from handoff or
1161        * classify node, see if it can be used */
1162       if (is_multi_worker &&
1163           !pool_is_free_index (tsm->sessions,
1164                                vnet_buffer2 (b0)->nat.cached_session_index))
1165         {
1166           s0 = pool_elt_at_index (tsm->sessions,
1167                                   vnet_buffer2 (b0)->nat.cached_session_index);
1168           if (PREDICT_TRUE (
1169                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1170                 // for some hairpinning cases there are two "i2i" flows instead
1171                 // of i2o and o2i as both hosts are on inside
1172                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1173                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1174             {
1175               /* yes, this is the droid we're looking for */
1176               lookup_skipped = 1;
1177               goto skip_lookup;
1178             }
1179           s0 = NULL;
1180         }
1181
1182       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1183                  lookup.dport, lookup.fib_index, lookup.proto);
1184
1185       // lookup flow
1186       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1187         {
1188           // flow does not exist go slow path
1189           next[0] = def_slow;
1190           goto trace0;
1191         }
1192
1193       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1194       s0 =
1195         pool_elt_at_index (tsm->sessions,
1196                            ed_value_get_session_index (&value0));
1197
1198     skip_lookup:
1199
1200       ASSERT (thread_index == s0->thread_index);
1201
1202       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1203         {
1204           // session is closed, go slow path
1205           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1206           nat_ed_session_delete (sm, s0, thread_index, 1);
1207           s0 = 0;
1208           next[0] = def_slow;
1209           goto trace0;
1210         }
1211
1212       // drop if session expired
1213       u64 sess_timeout_time;
1214       sess_timeout_time =
1215         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1216       if (now >= sess_timeout_time)
1217         {
1218           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1219           nat_ed_session_delete (sm, s0, thread_index, 1);
1220           s0 = 0;
1221           // session is closed, go slow path
1222           next[0] = def_slow;
1223           goto trace0;
1224         }
1225
1226       b0->flags |= VNET_BUFFER_F_IS_NATED;
1227
1228       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1229         {
1230           f = &s0->i2o;
1231         }
1232       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1233                nat_6t_t_eq (&s0->o2i.match, &lookup))
1234         {
1235           f = &s0->o2i;
1236         }
1237       else
1238         {
1239           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1240           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1241           nat_ed_session_delete (sm, s0, thread_index, 1);
1242           s0 = 0;
1243           next[0] = NAT_NEXT_DROP;
1244           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1245           goto trace0;
1246         }
1247
1248       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1249           (translation_error = nat_6t_flow_buf_translate_i2o (
1250              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1251         {
1252           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1253           nat_ed_session_delete (sm, s0, thread_index, 1);
1254           s0 = 0;
1255           next[0] = NAT_NEXT_DROP;
1256           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1257           goto trace0;
1258         }
1259
1260       switch (proto0)
1261         {
1262         case IP_PROTOCOL_TCP:
1263           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1264                                          thread_index, cntr_sw_if_index0, 1);
1265           nat44_set_tcp_session_state_i2o (
1266             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1267             thread_index);
1268           break;
1269         case IP_PROTOCOL_UDP:
1270           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1271                                          thread_index, cntr_sw_if_index0, 1);
1272           break;
1273         case IP_PROTOCOL_ICMP:
1274           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1275                                          thread_index, cntr_sw_if_index0, 1);
1276           break;
1277         default:
1278           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1279                                          thread_index, cntr_sw_if_index0, 1);
1280           break;
1281         }
1282
1283       /* Accounting */
1284       nat44_session_update_counters (s0, now,
1285                                      vlib_buffer_length_in_chain (vm, b0),
1286                                      thread_index);
1287       /* Per-user LRU list maintenance */
1288       nat44_session_update_lru (sm, s0, thread_index);
1289
1290     trace0:
1291       if (PREDICT_FALSE
1292           ((node->flags & VLIB_NODE_FLAG_TRACE)
1293            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1294         {
1295           nat_in2out_ed_trace_t *t =
1296             vlib_add_trace (vm, node, b0, sizeof (*t));
1297           t->sw_if_index = rx_sw_if_index0;
1298           t->next_index = next[0];
1299           t->is_slow_path = 0;
1300           t->translation_error = translation_error;
1301           t->lookup_skipped = lookup_skipped;
1302           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1303
1304           if (s0)
1305             {
1306               t->session_index = s0 - tsm->sessions;
1307               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1308               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1309               t->translation_via_i2of = (&s0->i2o == f);
1310               t->tcp_state = s0->tcp_state;
1311             }
1312           else
1313             {
1314               t->session_index = ~0;
1315             }
1316         }
1317
1318       if (next[0] == NAT_NEXT_DROP)
1319         {
1320           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1321                                          thread_index, cntr_sw_if_index0, 1);
1322         }
1323
1324       n_left_from--;
1325       next++;
1326     }
1327
1328   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1329                                frame->n_vectors);
1330   return frame->n_vectors;
1331 }
1332
1333 static inline uword
1334 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1335                                           vlib_node_runtime_t *node,
1336                                           vlib_frame_t *frame,
1337                                           int is_output_feature,
1338                                           int is_multi_worker)
1339 {
1340   u32 n_left_from, *from;
1341   snat_main_t *sm = &snat_main;
1342   f64 now = vlib_time_now (vm);
1343   u32 thread_index = vm->thread_index;
1344   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1345
1346   from = vlib_frame_vector_args (frame);
1347   n_left_from = frame->n_vectors;
1348
1349   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1350   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1351   vlib_get_buffers (vm, from, b, n_left_from);
1352
1353   while (n_left_from > 0)
1354     {
1355       vlib_buffer_t *b0;
1356       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1357       u32 tx_sw_if_index0;
1358       u32 cntr_sw_if_index0;
1359       ip_protocol_t proto0;
1360       ip4_header_t *ip0;
1361       udp_header_t *udp0;
1362       icmp46_header_t *icmp0;
1363       snat_session_t *s0 = 0;
1364       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1365       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1366
1367       b0 = *b;
1368
1369       if (is_output_feature)
1370         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1371
1372       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1373
1374       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1375                               iph_offset0);
1376
1377       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1378       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1379       cntr_sw_if_index0 =
1380         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1381       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1382                                                            rx_sw_if_index0);
1383
1384       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1385         {
1386           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1387           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1388                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1389                                        0);
1390           next[0] = NAT_NEXT_ICMP_ERROR;
1391           goto trace0;
1392         }
1393
1394       udp0 = ip4_next_header (ip0);
1395       icmp0 = (icmp46_header_t *) udp0;
1396       proto0 = ip0->protocol;
1397
1398       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1399         {
1400           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1401             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1402           if (!s0)
1403             next[0] = NAT_NEXT_DROP;
1404
1405           if (NAT_NEXT_DROP != next[0] && s0 &&
1406               NAT_ED_TRNSL_ERR_SUCCESS !=
1407                 (translation_error = nat_6t_flow_buf_translate_i2o (
1408                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1409             {
1410               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1411               nat_ed_session_delete (sm, s0, thread_index, 1);
1412               s0 = 0;
1413               next[0] = NAT_NEXT_DROP;
1414               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1415               goto trace0;
1416             }
1417
1418           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1419                                          thread_index, cntr_sw_if_index0, 1);
1420           goto trace0;
1421         }
1422
1423       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1424         {
1425           next[0] = icmp_in2out_ed_slow_path (
1426             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1427             rx_fib_index0, node, next[0], now, thread_index, &s0,
1428             is_multi_worker);
1429           if (NAT_NEXT_DROP != next[0] && s0 &&
1430               NAT_ED_TRNSL_ERR_SUCCESS !=
1431                 (translation_error = nat_6t_flow_buf_translate_i2o (
1432                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1433             {
1434               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1435               nat_ed_session_delete (sm, s0, thread_index, 1);
1436               s0 = 0;
1437               next[0] = NAT_NEXT_DROP;
1438               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1439               goto trace0;
1440             }
1441
1442           if (NAT_NEXT_DROP != next[0])
1443             {
1444               vlib_increment_simple_counter (
1445                 &sm->counters.slowpath.in2out.icmp, thread_index,
1446                 cntr_sw_if_index0, 1);
1447             }
1448           goto trace0;
1449         }
1450
1451       init_ed_k (
1452         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1453         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1454         rx_fib_index0, ip0->protocol);
1455       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1456         {
1457           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1458           s0 =
1459             pool_elt_at_index (tsm->sessions,
1460                                ed_value_get_session_index (&value0));
1461         }
1462
1463       if (!s0)
1464         {
1465           if (is_output_feature)
1466             {
1467               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1468                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1469                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1470                     rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
1471                 goto trace0;
1472
1473               /*
1474                * Send DHCP packets to the ipv4 stack, or we won't
1475                * be able to use dhcp client on the outside interface
1476                */
1477               if (PREDICT_FALSE (
1478                     proto0 == IP_PROTOCOL_UDP &&
1479                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1480                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1481                     ip0->dst_address.as_u32 == 0xffffffff))
1482                 goto trace0;
1483             }
1484           else
1485             {
1486               if (PREDICT_FALSE (
1487                     nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
1488                                             proto0, rx_fib_index0)))
1489                 goto trace0;
1490             }
1491
1492           next[0] =
1493             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1494                           vnet_buffer (b0)->ip.reass.l4_src_port,
1495                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1496                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1497                           node, next[0], thread_index, now);
1498
1499           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1500             goto trace0;
1501
1502           if (PREDICT_FALSE (!s0))
1503             goto trace0;
1504
1505         }
1506
1507       b0->flags |= VNET_BUFFER_F_IS_NATED;
1508
1509       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1510           (translation_error = nat_6t_flow_buf_translate_i2o (
1511              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1512         {
1513           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1514           nat_ed_session_delete (sm, s0, thread_index, 1);
1515           s0 = 0;
1516           next[0] = NAT_NEXT_DROP;
1517           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1518           goto trace0;
1519         }
1520
1521       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1522         {
1523           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1524                                          thread_index, cntr_sw_if_index0, 1);
1525           nat44_set_tcp_session_state_i2o (
1526             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1527             thread_index);
1528         }
1529       else
1530         {
1531           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1532                                          thread_index, cntr_sw_if_index0, 1);
1533         }
1534
1535       /* Accounting */
1536       nat44_session_update_counters (s0, now,
1537                                      vlib_buffer_length_in_chain
1538                                      (vm, b0), thread_index);
1539       /* Per-user LRU list maintenance */
1540       nat44_session_update_lru (sm, s0, thread_index);
1541
1542     trace0:
1543       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1544                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1545         {
1546           nat_in2out_ed_trace_t *t =
1547             vlib_add_trace (vm, node, b0, sizeof (*t));
1548           t->sw_if_index = rx_sw_if_index0;
1549           t->next_index = next[0];
1550           t->is_slow_path = 1;
1551           t->translation_error = translation_error;
1552           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1553
1554           if (s0)
1555             {
1556               t->session_index = s0 - tsm->sessions;
1557               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1558               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1559               t->translation_via_i2of = 1;
1560               t->tcp_state = s0->tcp_state;
1561             }
1562
1563           else
1564             {
1565               t->session_index = ~0;
1566             }
1567         }
1568
1569       if (next[0] == NAT_NEXT_DROP)
1570         {
1571           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1572                                          thread_index, cntr_sw_if_index0, 1);
1573         }
1574
1575       n_left_from--;
1576       next++;
1577       b++;
1578     }
1579
1580   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1581                                frame->n_vectors);
1582
1583   return frame->n_vectors;
1584 }
1585
1586 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1587                                      vlib_node_runtime_t * node,
1588                                      vlib_frame_t * frame)
1589 {
1590   if (snat_main.num_workers > 1)
1591     {
1592       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1593     }
1594   else
1595     {
1596       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1597     }
1598 }
1599
1600 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1601   .name = "nat44-ed-in2out",
1602   .vector_size = sizeof (u32),
1603   .sibling_of = "nat-default",
1604   .format_trace = format_nat_in2out_ed_trace,
1605   .type = VLIB_NODE_TYPE_INTERNAL,
1606   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1607   .error_strings = nat_in2out_ed_error_strings,
1608   .runtime_data_bytes = sizeof (snat_runtime_t),
1609 };
1610
1611 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1612                                             vlib_node_runtime_t * node,
1613                                             vlib_frame_t * frame)
1614 {
1615   if (snat_main.num_workers > 1)
1616     {
1617       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1618     }
1619   else
1620     {
1621       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1622     }
1623 }
1624
1625 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1626   .name = "nat44-ed-in2out-output",
1627   .vector_size = sizeof (u32),
1628   .sibling_of = "nat-default",
1629   .format_trace = format_nat_in2out_ed_trace,
1630   .type = VLIB_NODE_TYPE_INTERNAL,
1631   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1632   .error_strings = nat_in2out_ed_error_strings,
1633   .runtime_data_bytes = sizeof (snat_runtime_t),
1634 };
1635
1636 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1637                                               vlib_node_runtime_t *
1638                                               node, vlib_frame_t * frame)
1639 {
1640   if (snat_main.num_workers > 1)
1641     {
1642       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1643     }
1644   else
1645     {
1646       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1647     }
1648 }
1649
1650 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1651   .name = "nat44-ed-in2out-slowpath",
1652   .vector_size = sizeof (u32),
1653   .sibling_of = "nat-default",
1654   .format_trace = format_nat_in2out_ed_trace,
1655   .type = VLIB_NODE_TYPE_INTERNAL,
1656   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1657   .error_strings = nat_in2out_ed_error_strings,
1658   .runtime_data_bytes = sizeof (snat_runtime_t),
1659 };
1660
1661 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1662                                                      vlib_node_runtime_t
1663                                                      * node,
1664                                                      vlib_frame_t * frame)
1665 {
1666   if (snat_main.num_workers > 1)
1667     {
1668       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1669     }
1670   else
1671     {
1672       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1673     }
1674 }
1675
1676 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1677   .name = "nat44-ed-in2out-output-slowpath",
1678   .vector_size = sizeof (u32),
1679   .sibling_of = "nat-default",
1680   .format_trace = format_nat_in2out_ed_trace,
1681   .type = VLIB_NODE_TYPE_INTERNAL,
1682   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1683   .error_strings = nat_in2out_ed_error_strings,
1684   .runtime_data_bytes = sizeof (snat_runtime_t),
1685 };
1686
1687 static u8 *
1688 format_nat_pre_trace (u8 * s, va_list * args)
1689 {
1690   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1691   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1692   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1693   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1694                  t->arc_next_index);
1695 }
1696
1697 VLIB_NODE_FN (nat_pre_in2out_node)
1698   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1699 {
1700   return nat_pre_node_fn_inline (vm, node, frame,
1701                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1702 }
1703
1704 VLIB_NODE_FN (nat_pre_in2out_output_node)
1705   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1706 {
1707   return nat_pre_node_fn_inline (vm, node, frame,
1708                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1709 }
1710
1711 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1712   .name = "nat-pre-in2out",
1713   .vector_size = sizeof (u32),
1714   .sibling_of = "nat-default",
1715   .format_trace = format_nat_pre_trace,
1716   .type = VLIB_NODE_TYPE_INTERNAL,
1717   .n_errors = 0,
1718 };
1719
1720 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1721   .name = "nat-pre-in2out-output",
1722   .vector_size = sizeof (u32),
1723   .sibling_of = "nat-default",
1724   .format_trace = format_nat_pre_trace,
1725   .type = VLIB_NODE_TYPE_INTERNAL,
1726   .n_errors = 0,
1727 };
1728
1729 /*
1730  * fd.io coding-style-patch-verification: ON
1731  *
1732  * Local Variables:
1733  * eval: (c-set-style "gnu")
1734  * End:
1735  */