4ba51bcaea6844a160b21b8a0a6b6cec3e9063fa
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52   u8 tcp_state;
53 } nat_in2out_ed_trace_t;
54
55 static u8 *
56 format_nat_in2out_ed_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
61   char *tag;
62
63   tag =
64     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
65     "NAT44_IN2OUT_ED_FAST_PATH";
66
67   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
68               t->next_index);
69   if (~0 != t->session_index)
70     {
71       s = format (s, ", session %d, translation result '%U' via %s",
72                   t->session_index, format_nat_ed_translation_error,
73                   t->translation_error,
74                   t->translation_via_i2of ? "i2of" : "o2if");
75       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
76       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
77     }
78   if (!t->is_slow_path)
79     {
80       if (t->lookup_skipped)
81         {
82           s = format (s, "\n  lookup skipped - cached session index used");
83         }
84       else
85         {
86           s = format (s, "\n  search key %U", format_ed_session_kvp,
87                       &t->search_key);
88         }
89     }
90   if (IP_PROTOCOL_TCP == t->i2of.match.proto)
91     {
92       s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
93                   t->tcp_state);
94     }
95
96   return s;
97 }
98
99 static int
100 nat_ed_alloc_addr_and_port_with_snat_address (
101   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
102   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
103   ip4_address_t *outside_addr, u16 *outside_port)
104 {
105   const u16 port_thread_offset =
106     (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
107
108   /* Backup original match in case of failure */
109   const nat_6t_t match = s->o2i.match;
110
111   s->o2i.match.daddr = a->addr;
112   /* first try port suggested by caller */
113   u16 port = clib_net_to_host_u16 (*outside_port);
114   u16 port_offset = port - port_thread_offset;
115   if (port < port_thread_offset ||
116       port >= port_thread_offset + port_per_thread)
117     {
118       /* need to pick a different port, suggested port doesn't fit in
119        * this thread's port range */
120       port_offset = snat_random_port (0, port_per_thread - 1);
121       port = port_thread_offset + port_offset;
122     }
123   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
124   do
125     {
126       if (IP_PROTOCOL_ICMP == proto)
127         {
128           s->o2i.match.sport = clib_host_to_net_u16 (port);
129         }
130       s->o2i.match.dport = clib_host_to_net_u16 (port);
131       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
132         {
133           *outside_addr = a->addr;
134           *outside_port = clib_host_to_net_u16 (port);
135           return 0;
136         }
137       port_offset = snat_random_port (0, port_per_thread - 1);
138       port = port_thread_offset + port_offset;
139       --attempts;
140     }
141   while (attempts > 0);
142
143   /* Revert match */
144   s->o2i.match = match;
145   return 1;
146 }
147
148 static int
149 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
150                             u32 tx_sw_if_index, u32 nat_proto,
151                             u32 thread_index, ip4_address_t s_addr,
152                             ip4_address_t d_addr, u32 snat_thread_index,
153                             snat_session_t *s, ip4_address_t *outside_addr,
154                             u16 *outside_port)
155 {
156   if (vec_len (sm->addresses) > 0)
157     {
158       u32 s_addr_offset = (s_addr.as_u32 + (s_addr.as_u32 >> 8) +
159                            (s_addr.as_u32 >> 16) + (s_addr.as_u32 >> 24)) %
160                           vec_len (sm->addresses);
161       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
162       int i;
163
164       // output feature
165       if (tx_sw_if_index != ~0)
166         {
167           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
168             {
169               a = sm->addresses + i;
170               if (a->fib_index == rx_fib_index)
171                 {
172                   if (a->sw_if_index == tx_sw_if_index)
173                     {
174                       if ((a->addr_len != ~0) &&
175                           (a->net.as_u32 ==
176                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
177
178                         {
179                           return nat_ed_alloc_addr_and_port_with_snat_address (
180                             sm, nat_proto, thread_index, a,
181                             sm->port_per_thread, snat_thread_index, s,
182                             outside_addr, outside_port);
183                         }
184                       ra = a;
185                     }
186                   ja = a;
187                 }
188               else if (a->fib_index == ~0)
189                 {
190                   ba = a;
191                 }
192             }
193           for (i = 0; i < s_addr_offset; ++i)
194             {
195               a = sm->addresses + i;
196               if (a->fib_index == rx_fib_index)
197                 {
198                   if (a->sw_if_index == tx_sw_if_index)
199                     {
200                       if ((a->addr_len != ~0) &&
201                           (a->net.as_u32 ==
202                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
203
204                         {
205                           return nat_ed_alloc_addr_and_port_with_snat_address (
206                             sm, nat_proto, thread_index, a,
207                             sm->port_per_thread, snat_thread_index, s,
208                             outside_addr, outside_port);
209                         }
210                       ra = a;
211                     }
212                   ja = a;
213                 }
214               else if (a->fib_index == ~0)
215                 {
216                   ba = a;
217                 }
218             }
219           if (ra)
220             {
221               return nat_ed_alloc_addr_and_port_with_snat_address (
222                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
223                 snat_thread_index, s, outside_addr, outside_port);
224             }
225         }
226       else
227         {
228           // first try nat pool addresses to sw interface addreses mappings
229           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
230             {
231               a = sm->addresses + i;
232               if (a->fib_index == rx_fib_index)
233                 {
234                   if ((a->addr_len != ~0) &&
235                       (a->net.as_u32 ==
236                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
237                     {
238                       return nat_ed_alloc_addr_and_port_with_snat_address (
239                         sm, nat_proto, thread_index, a, sm->port_per_thread,
240                         snat_thread_index, s, outside_addr, outside_port);
241                     }
242                   ja = a;
243                 }
244               else if (a->fib_index == ~0)
245                 {
246                   ba = a;
247                 }
248             }
249           for (i = 0; i < s_addr_offset; ++i)
250             {
251               a = sm->addresses + i;
252               if (a->fib_index == rx_fib_index)
253                 {
254                   if ((a->addr_len != ~0) &&
255                       (a->net.as_u32 ==
256                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
257                     {
258                       return nat_ed_alloc_addr_and_port_with_snat_address (
259                         sm, nat_proto, thread_index, a, sm->port_per_thread,
260                         snat_thread_index, s, outside_addr, outside_port);
261                     }
262                   ja = a;
263                 }
264               else if (a->fib_index == ~0)
265                 {
266                   ba = a;
267                 }
268             }
269         }
270
271       if (ja || ba)
272         {
273           a = ja ? ja : ba;
274           return nat_ed_alloc_addr_and_port_with_snat_address (
275             sm, nat_proto, thread_index, a, sm->port_per_thread,
276             snat_thread_index, s, outside_addr, outside_port);
277         }
278     }
279   /* Totally out of translations to use... */
280   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
281   return 1;
282 }
283
284 static_always_inline int
285 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
286                              u16 match_port, ip_protocol_t match_protocol,
287                              ip4_address_t *daddr, u16 *dport)
288 {
289   snat_static_mapping_t *m =
290     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
291   if (!m)
292     {
293       /* Try address only mapping */
294       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
295       if (!m)
296         return 0;
297     }
298   *daddr = m->local_addr;
299   if (dport)
300     {
301       /* Address only mapping doesn't change port */
302       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
303     }
304   return 1;
305 }
306
307 static_always_inline vrf_table_t *
308 get_vrf_table_by_fib (u32 fib_index)
309 {
310   snat_main_t *sm = &snat_main;
311   vrf_table_t *t;
312
313   pool_foreach (t, sm->vrf_tables)
314     {
315       if (fib_index == t->table_fib_index)
316         {
317           return t;
318         }
319     }
320
321   return 0;
322 }
323
324 static_always_inline u32
325 get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
326 {
327   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
328   fib_prefix_t pfx = {
329     .fp_proto = FIB_PROTOCOL_IP4,
330     .fp_len = 32,
331     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
332     ,
333   };
334
335   snat_main_t *sm = &snat_main;
336   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
337   // default to rx fib
338   u32 tx_fib_index = rx_fib_index;
339
340   if (0 != t)
341     {
342       // managed routes to other fibs
343       vrf_route_t *r;
344       pool_foreach (r, t->routes)
345         {
346           fei = fib_table_lookup (r->fib_index, &pfx);
347           if ((FIB_NODE_INDEX_INVALID != fei) &&
348               (~0 != fib_entry_get_resolving_interface (fei)))
349             {
350               tx_fib_index = r->fib_index;
351               break;
352             }
353         }
354     }
355   else
356     {
357       // default to configured fib
358       tx_fib_index = sm->outside_fib_index;
359
360       // default routes to other fibs
361       nat_fib_t *f;
362       vec_foreach (f, sm->outside_fibs)
363         {
364           fei = fib_table_lookup (f->fib_index, &pfx);
365           if ((FIB_NODE_INDEX_INVALID != fei) &&
366               (~0 != fib_entry_get_resolving_interface (fei)))
367             {
368               tx_fib_index = f->fib_index;
369               break;
370             }
371         }
372     }
373
374   return tx_fib_index;
375 }
376
377 static_always_inline int
378 is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
379 {
380   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
381   fib_prefix_t pfx = {
382     .fp_proto = FIB_PROTOCOL_IP4,
383     .fp_len = 32,
384     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
385     ,
386   };
387
388   snat_main_t *sm = &snat_main;
389   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
390   u32 ii;
391
392   if (0 != t)
393     {
394       // managed routes to other fibs
395       vrf_route_t *r;
396       pool_foreach (r, t->routes)
397         {
398           fei = fib_table_lookup (r->fib_index, &pfx);
399           if ((FIB_NODE_INDEX_INVALID != fei) &&
400               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
401             {
402               return 1;
403             }
404         }
405     }
406   else
407     {
408       // default routes to other fibs
409       nat_fib_t *f;
410       vec_foreach (f, sm->outside_fibs)
411         {
412           fei = fib_table_lookup (f->fib_index, &pfx);
413           if ((FIB_NODE_INDEX_INVALID != fei) &&
414               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
415             {
416               snat_interface_t *i;
417               pool_foreach (i, sm->interfaces)
418                 {
419                   if ((nat44_ed_is_interface_outside (i)) &&
420                       (ii == i->sw_if_index))
421                     {
422                       return 1;
423                     }
424                 }
425             }
426         }
427     }
428
429   return 0;
430 }
431
432 static u32
433 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
434               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
435               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
436               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
437               u32 thread_index, f64 now)
438 {
439   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
440   ip4_address_t outside_addr;
441   u16 outside_port;
442   u32 tx_fib_index;
443   u8 is_identity_nat = 0;
444
445   snat_session_t *s = NULL;
446   lb_nat_type_t lb = 0;
447   ip4_address_t daddr = r_addr;
448   u16 dport = r_port;
449
450   if (PREDICT_FALSE
451       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
452     {
453       if (!nat_lru_free_one (sm, thread_index, now))
454         {
455           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
456           nat_ipfix_logging_max_sessions (thread_index,
457                                           sm->max_translations_per_thread);
458           nat_elog_notice (sm, "maximum sessions exceeded");
459           return NAT_NEXT_DROP;
460         }
461     }
462
463   ip4_address_t sm_addr;
464   u16 sm_port;
465   u32 sm_fib_index;
466   int is_sm = 0;
467   // First try to match static mapping by local address and port
468   if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
469                                   &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
470                                   &lb, 0, &is_identity_nat, 0))
471     {
472       if (PREDICT_FALSE (is_identity_nat))
473         {
474           *sessionp = NULL;
475           return next;
476         }
477       is_sm = 1;
478     }
479
480   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
481     {
482       if (PREDICT_FALSE (!tcp_flags_is_init (
483             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
484         {
485           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
486           return NAT_NEXT_DROP;
487         }
488     }
489
490   s = nat_ed_session_alloc (sm, thread_index, now, proto);
491   ASSERT (s);
492
493   tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
494
495   if (!is_sm)
496     {
497       s->in2out.addr = l_addr;
498       s->in2out.port = l_port;
499       s->proto = proto;
500       s->in2out.fib_index = rx_fib_index;
501       s->out2in.fib_index = tx_fib_index;
502
503       // suggest using local port to allocation function
504       outside_port = l_port;
505
506       if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
507                                                       proto, &daddr, &dport)))
508         {
509           s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
510         }
511
512       // destination addr/port updated with real values in
513       // nat_ed_alloc_addr_and_port
514       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
515                             s->out2in.fib_index, proto);
516       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
517       if (IP_PROTOCOL_ICMP == proto)
518         {
519           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
520         }
521       else
522         {
523           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
524         }
525       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
526
527       if (nat_ed_alloc_addr_and_port (
528             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
529             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
530         {
531           nat_elog_notice (sm, "addresses exhausted");
532           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
533           nat_ed_session_delete (sm, s, thread_index, 1);
534           return NAT_NEXT_DROP;
535         }
536       s->out2in.addr = outside_addr;
537       s->out2in.port = outside_port;
538     }
539   else
540     {
541       // static mapping
542       s->out2in.addr = outside_addr = sm_addr;
543       s->out2in.port = outside_port = sm_port;
544       s->in2out.addr = l_addr;
545       s->in2out.port = l_port;
546       s->proto = proto;
547       s->in2out.fib_index = rx_fib_index;
548       s->out2in.fib_index = tx_fib_index;
549       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
550
551       // hairpinning?
552       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
553                                                         proto, &daddr, &dport);
554       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
555
556       if (IP_PROTOCOL_ICMP == proto)
557         {
558           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
559                                 sm_port, s->out2in.fib_index, proto);
560           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
561         }
562       else
563         {
564           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
565                                 sm_port, s->out2in.fib_index, proto);
566           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
567         }
568       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
569       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
570       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
571         {
572           nat_elog_notice (sm, "out2in key add failed");
573           goto error;
574         }
575     }
576
577   if (lb)
578     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
579   s->ext_host_addr = r_addr;
580   s->ext_host_port = r_port;
581
582   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
583                         rx_fib_index, proto);
584   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
585   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
586
587   if (IP_PROTOCOL_ICMP == proto)
588     {
589       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
590     }
591   else
592     {
593       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
594       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
595     }
596   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
597
598   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
599     {
600       nat_elog_notice (sm, "in2out key add failed");
601       goto error;
602     }
603
604   /* log NAT event */
605   nat_ipfix_logging_nat44_ses_create (
606     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
607     s->in2out.port, s->out2in.port, s->in2out.fib_index);
608
609   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
610                          s->in2out.port, &s->ext_host_nat_addr,
611                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
612                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
613
614   per_vrf_sessions_register_session (s, thread_index);
615
616   *sessionp = s;
617   return next;
618 error:
619   if (s)
620     {
621       nat_ed_session_delete (sm, s, thread_index, 1);
622     }
623   *sessionp = s = NULL;
624   return NAT_NEXT_DROP;
625 }
626
627 static_always_inline int
628 nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
629                         u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
630                         u32 proto, u32 rx_fib_index)
631 {
632   snat_main_t *sm = &snat_main;
633
634   clib_bihash_kv_16_8_t kv, value;
635   ip4_address_t placeholder_addr;
636   u32 placeholder_fib_index;
637   u16 placeholder_port;
638
639   init_ed_k (&kv, ip->dst_address.as_u32,
640              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
641              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
642              ip->protocol);
643
644   // do nat if active session or is static mapping
645   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
646       !snat_static_mapping_match (
647         vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
648         sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
649         &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
650     {
651       return 0;
652     }
653
654   // do not nat if forwarding enabled
655   if (sm->forwarding_enabled)
656     {
657       return 1;
658     }
659
660   // do not nat packet aimed at the interface address
661   if (PREDICT_FALSE (
662         is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
663     {
664       return 1;
665     }
666
667   // do nat packets with resolvable destination
668   // destination can be resolved either by:
669   // a) vrf routing table entry
670   // b) (non output feature) outside interface fib
671   if (is_destination_resolvable (rx_fib_index, ip->dst_address))
672     {
673       return 0;
674     }
675
676   return 1;
677 }
678
679 static_always_inline int
680 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
681                                       u32 thread_index, f64 now,
682                                       vlib_main_t * vm, vlib_buffer_t * b)
683 {
684   clib_bihash_kv_16_8_t kv, value;
685   snat_session_t *s = 0;
686   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
687
688   if (!sm->forwarding_enabled)
689     return 0;
690
691   if (ip->protocol == IP_PROTOCOL_ICMP)
692     {
693       ip4_address_t lookup_saddr, lookup_daddr;
694       u16 lookup_sport, lookup_dport;
695       u8 lookup_protocol;
696       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
697                                               &lookup_sport, &lookup_daddr,
698                                               &lookup_dport, &lookup_protocol))
699         return 0;
700       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
701                  lookup_dport, 0, lookup_protocol);
702     }
703   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
704     {
705       init_ed_k (&kv, ip->src_address.as_u32,
706                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
707                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
708     }
709   else
710     {
711       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
712                  ip->protocol);
713     }
714
715   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
716     {
717       ASSERT (thread_index == ed_value_get_thread_index (&value));
718       s =
719         pool_elt_at_index (tsm->sessions,
720                            ed_value_get_session_index (&value));
721
722       if (na44_ed_is_fwd_bypass_session (s))
723         {
724           if (ip->protocol == IP_PROTOCOL_TCP)
725             {
726               nat44_set_tcp_session_state_i2o (
727                 sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
728                 thread_index);
729             }
730           /* Accounting */
731           nat44_session_update_counters (s, now,
732                                          vlib_buffer_length_in_chain (vm, b),
733                                          thread_index);
734           /* Per-user LRU list maintenance */
735           nat44_session_update_lru (sm, s, thread_index);
736           return 1;
737         }
738       else
739         return 0;
740     }
741
742   return 0;
743 }
744
745 static_always_inline int
746 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
747                                        ip4_header_t *ip, u16 src_port,
748                                        u16 dst_port, u32 thread_index,
749                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
750                                        int is_multi_worker)
751 {
752   clib_bihash_kv_16_8_t kv, value;
753   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
754   snat_interface_t *i;
755   snat_session_t *s;
756   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
757   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
758
759   /* src NAT check */
760   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
761              dst_port, tx_fib_index, ip->protocol);
762   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
763     {
764       ASSERT (thread_index == ed_value_get_thread_index (&value));
765       s =
766         pool_elt_at_index (tsm->sessions,
767                            ed_value_get_session_index (&value));
768       return 1;
769     }
770
771   /* dst NAT check */
772   if (is_multi_worker &&
773       PREDICT_TRUE (!pool_is_free_index (
774         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
775     {
776       nat_6t_t lookup;
777       lookup.fib_index = rx_fib_index;
778       lookup.proto = ip->protocol;
779       lookup.daddr.as_u32 = ip->src_address.as_u32;
780       lookup.dport = src_port;
781       lookup.saddr.as_u32 = ip->dst_address.as_u32;
782       lookup.sport = dst_port;
783       s = pool_elt_at_index (
784         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
785       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
786         {
787           goto skip_dst_nat_lookup;
788         }
789       s = NULL;
790     }
791
792   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
793              src_port, rx_fib_index, ip->protocol);
794   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
795     {
796       ASSERT (thread_index == ed_value_get_thread_index (&value));
797       s =
798         pool_elt_at_index (tsm->sessions,
799                            ed_value_get_session_index (&value));
800
801     skip_dst_nat_lookup:
802       if (na44_ed_is_fwd_bypass_session (s))
803         return 0;
804
805       /* hairpinning */
806       pool_foreach (i, sm->output_feature_interfaces)
807         {
808           if ((nat44_ed_is_interface_inside (i)) &&
809               (rx_sw_if_index == i->sw_if_index))
810             return 0;
811         }
812       return 1;
813     }
814
815   return 0;
816 }
817
818 static inline u32
819 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
820                           icmp46_header_t *icmp, u32 sw_if_index,
821                           u32 tx_sw_if_index, u32 rx_fib_index,
822                           vlib_node_runtime_t *node, u32 next, f64 now,
823                           u32 thread_index, snat_session_t **s_p,
824                           int is_multi_worker)
825 {
826   vlib_main_t *vm = vlib_get_main ();
827   u16 checksum;
828   int err;
829   snat_session_t *s = NULL;
830   u8 lookup_protocol = ip->protocol;
831   u16 lookup_sport, lookup_dport;
832   ip4_address_t lookup_saddr, lookup_daddr;
833
834   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
835                                             &lookup_sport, &lookup_daddr,
836                                             &lookup_dport, &lookup_protocol);
837   if (err != 0)
838     {
839       b->error = node->errors[err];
840       return NAT_NEXT_DROP;
841     }
842
843   if (tx_sw_if_index != ~0)
844     {
845       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
846             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
847             tx_sw_if_index, is_multi_worker)))
848         {
849           return next;
850         }
851     }
852   else
853     {
854       if (PREDICT_FALSE (nat44_ed_not_translate (
855             vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
856         {
857           return next;
858         }
859     }
860
861   if (PREDICT_FALSE (icmp_type_is_error_message (
862         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
863     {
864       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
865       return NAT_NEXT_DROP;
866     }
867
868   next =
869     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
870                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
871                   node, next, thread_index, vlib_time_now (vm));
872
873   if (NAT_NEXT_DROP == next)
874     goto out;
875
876   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
877     {
878       ip_csum_t sum = ip_incremental_checksum_buffer (
879         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
880         ntohs (ip->length) - ip4_header_bytes (ip), 0);
881       checksum = ~ip_csum_fold (sum);
882       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
883         {
884           next = NAT_NEXT_DROP;
885           goto out;
886         }
887     }
888
889 out:
890   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
891     {
892       /* Accounting */
893       nat44_session_update_counters (
894         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
895       /* Per-user LRU list maintenance */
896       nat44_session_update_lru (sm, s, thread_index);
897     }
898   *s_p = s;
899   return next;
900 }
901
902 static snat_session_t *
903 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
904                                         ip4_header_t *ip, u32 rx_fib_index,
905                                         u32 thread_index, f64 now,
906                                         vlib_main_t *vm,
907                                         vlib_node_runtime_t *node)
908 {
909   clib_bihash_kv_16_8_t s_kv, s_value;
910   snat_static_mapping_t *m = NULL;
911   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
912   snat_session_t *s = NULL;
913   u32 tx_fib_index;
914   int i;
915   ip4_address_t new_src_addr = { 0 };
916   ip4_address_t new_dst_addr = ip->dst_address;
917
918   if (PREDICT_FALSE (
919         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
920     {
921       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
922       nat_ipfix_logging_max_sessions (thread_index,
923                                       sm->max_translations_per_thread);
924       nat_elog_notice (sm, "maximum sessions exceeded");
925       return 0;
926     }
927
928   tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
929
930   // Try to find static mapping first
931   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
932                               ip->protocol);
933   if (m)
934     {
935       new_src_addr = m->external_addr;
936     }
937   else
938     {
939       pool_foreach (s, tsm->sessions)
940         {
941           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
942             {
943               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
944                          ip->dst_address.as_u32, 0, tx_fib_index,
945                          ip->protocol);
946               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
947                 {
948                   new_src_addr = s->out2in.addr;
949                 }
950               break;
951             }
952         }
953
954       if (!new_src_addr.as_u32)
955         {
956           for (i = 0; i < vec_len (sm->addresses); i++)
957             {
958               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
959                          ip->dst_address.as_u32, 0, tx_fib_index,
960                          ip->protocol);
961               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
962                 {
963                   new_src_addr = sm->addresses[i].addr;
964                 }
965             }
966         }
967     }
968
969   if (!new_src_addr.as_u32)
970     {
971       // could not allocate address for translation ...
972       return 0;
973     }
974
975   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
976   if (!s)
977     {
978       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
979       nat_elog_warn (sm, "create NAT session failed");
980       return 0;
981     }
982
983   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
984                         ip->dst_address, 0, rx_fib_index, ip->protocol);
985   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
986   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
987
988   // hairpinning?
989   int is_hairpinning = nat44_ed_external_sm_lookup (
990     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
991   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
992
993   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
994   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
995
996   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
997                         tx_fib_index, ip->protocol);
998   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
999   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
1000   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
1001
1002   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1003   s->out2in.addr.as_u32 = new_src_addr.as_u32;
1004   s->out2in.fib_index = tx_fib_index;
1005   s->in2out.addr.as_u32 = ip->src_address.as_u32;
1006   s->in2out.fib_index = rx_fib_index;
1007   s->in2out.port = s->out2in.port = ip->protocol;
1008   if (m)
1009     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1010
1011   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
1012     {
1013       nat_elog_notice (sm, "in2out flow hash add failed");
1014       nat_ed_session_delete (sm, s, thread_index, 1);
1015       return NULL;
1016     }
1017
1018   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
1019     {
1020       nat_elog_notice (sm, "out2in flow hash add failed");
1021       nat_ed_session_delete (sm, s, thread_index, 1);
1022       return NULL;
1023     }
1024
1025   per_vrf_sessions_register_session (s, thread_index);
1026
1027   /* Accounting */
1028   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
1029                                  thread_index);
1030   /* Per-user LRU list maintenance */
1031   nat44_session_update_lru (sm, s, thread_index);
1032
1033   return s;
1034 }
1035
1036 static inline uword
1037 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1038                                           vlib_node_runtime_t *node,
1039                                           vlib_frame_t *frame,
1040                                           int is_output_feature,
1041                                           int is_multi_worker)
1042 {
1043   u32 n_left_from, *from;
1044   snat_main_t *sm = &snat_main;
1045   f64 now = vlib_time_now (vm);
1046   u32 thread_index = vm->thread_index;
1047   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1048   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1049     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1050
1051   from = vlib_frame_vector_args (frame);
1052   n_left_from = frame->n_vectors;
1053
1054   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1055   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1056   vlib_get_buffers (vm, from, b, n_left_from);
1057
1058   while (n_left_from > 0)
1059     {
1060       vlib_buffer_t *b0;
1061       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1062       u32 tx_sw_if_index0;
1063       u32 cntr_sw_if_index0;
1064       ip_protocol_t proto0;
1065       ip4_header_t *ip0;
1066       snat_session_t *s0 = 0;
1067       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1068       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1069       nat_6t_flow_t *f = 0;
1070       nat_6t_t lookup;
1071       int lookup_skipped = 0;
1072
1073       b0 = *b;
1074       b++;
1075
1076       /* Prefetch next iteration. */
1077       if (PREDICT_TRUE (n_left_from >= 2))
1078         {
1079           vlib_buffer_t *p2;
1080
1081           p2 = *b;
1082
1083           vlib_prefetch_buffer_header (p2, LOAD);
1084
1085           clib_prefetch_load (p2->data);
1086         }
1087
1088       if (is_output_feature)
1089         {
1090           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1091         }
1092
1093       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1094
1095       ip0 =
1096         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1097
1098       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1099       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1100       cntr_sw_if_index0 =
1101         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1102       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1103                                                            rx_sw_if_index0);
1104       lookup.fib_index = rx_fib_index0;
1105
1106       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1107         {
1108           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1109           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1110                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1111                                        0);
1112           next[0] = NAT_NEXT_ICMP_ERROR;
1113           goto trace0;
1114         }
1115
1116       proto0 = ip0->protocol;
1117
1118       if (is_output_feature)
1119         {
1120           if (PREDICT_FALSE
1121               (nat_not_translate_output_feature_fwd
1122                (sm, ip0, thread_index, now, vm, b0)))
1123             goto trace0;
1124         }
1125
1126       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1127         {
1128           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1129                 ICMP4_echo_request &&
1130               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1131                 ICMP4_echo_reply &&
1132               !icmp_type_is_error_message (
1133                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1134             {
1135               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1136               next[0] = NAT_NEXT_DROP;
1137               goto trace0;
1138             }
1139           int err = nat_get_icmp_session_lookup_values (
1140             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1141             &lookup.dport, &lookup.proto);
1142           if (err != 0)
1143             {
1144               b0->error = node->errors[err];
1145               next[0] = NAT_NEXT_DROP;
1146               goto trace0;
1147             }
1148         }
1149       else
1150         {
1151           lookup.proto = ip0->protocol;
1152           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1153           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1154           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1155           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1156         }
1157
1158       /* there might be a stashed index in vnet_buffer2 from handoff or
1159        * classify node, see if it can be used */
1160       if (is_multi_worker &&
1161           !pool_is_free_index (tsm->sessions,
1162                                vnet_buffer2 (b0)->nat.cached_session_index))
1163         {
1164           s0 = pool_elt_at_index (tsm->sessions,
1165                                   vnet_buffer2 (b0)->nat.cached_session_index);
1166           if (PREDICT_TRUE (
1167                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1168                 // for some hairpinning cases there are two "i2i" flows instead
1169                 // of i2o and o2i as both hosts are on inside
1170                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1171                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1172             {
1173               /* yes, this is the droid we're looking for */
1174               lookup_skipped = 1;
1175               goto skip_lookup;
1176             }
1177           s0 = NULL;
1178         }
1179
1180       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1181                  lookup.dport, lookup.fib_index, lookup.proto);
1182
1183       // lookup flow
1184       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1185         {
1186           // flow does not exist go slow path
1187           next[0] = def_slow;
1188           goto trace0;
1189         }
1190
1191       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1192       s0 =
1193         pool_elt_at_index (tsm->sessions,
1194                            ed_value_get_session_index (&value0));
1195
1196     skip_lookup:
1197
1198       ASSERT (thread_index == s0->thread_index);
1199
1200       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1201         {
1202           // session is closed, go slow path
1203           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1204           nat_ed_session_delete (sm, s0, thread_index, 1);
1205           s0 = 0;
1206           next[0] = def_slow;
1207           goto trace0;
1208         }
1209
1210       // drop if session expired
1211       u64 sess_timeout_time;
1212       sess_timeout_time =
1213         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1214       if (now >= sess_timeout_time)
1215         {
1216           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1217           nat_ed_session_delete (sm, s0, thread_index, 1);
1218           s0 = 0;
1219           // session is closed, go slow path
1220           next[0] = def_slow;
1221           goto trace0;
1222         }
1223
1224       b0->flags |= VNET_BUFFER_F_IS_NATED;
1225
1226       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1227         {
1228           f = &s0->i2o;
1229         }
1230       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1231                nat_6t_t_eq (&s0->o2i.match, &lookup))
1232         {
1233           f = &s0->o2i;
1234         }
1235       else
1236         {
1237           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1238           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1239           nat_ed_session_delete (sm, s0, thread_index, 1);
1240           s0 = 0;
1241           next[0] = NAT_NEXT_DROP;
1242           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1243           goto trace0;
1244         }
1245
1246       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1247           (translation_error = nat_6t_flow_buf_translate_i2o (
1248              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1249         {
1250           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1251           nat_ed_session_delete (sm, s0, thread_index, 1);
1252           s0 = 0;
1253           next[0] = NAT_NEXT_DROP;
1254           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1255           goto trace0;
1256         }
1257
1258       switch (proto0)
1259         {
1260         case IP_PROTOCOL_TCP:
1261           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1262                                          thread_index, cntr_sw_if_index0, 1);
1263           nat44_set_tcp_session_state_i2o (
1264             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1265             thread_index);
1266           break;
1267         case IP_PROTOCOL_UDP:
1268           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1269                                          thread_index, cntr_sw_if_index0, 1);
1270           break;
1271         case IP_PROTOCOL_ICMP:
1272           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1273                                          thread_index, cntr_sw_if_index0, 1);
1274           break;
1275         default:
1276           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1277                                          thread_index, cntr_sw_if_index0, 1);
1278           break;
1279         }
1280
1281       /* Accounting */
1282       nat44_session_update_counters (s0, now,
1283                                      vlib_buffer_length_in_chain (vm, b0),
1284                                      thread_index);
1285       /* Per-user LRU list maintenance */
1286       nat44_session_update_lru (sm, s0, thread_index);
1287
1288     trace0:
1289       if (PREDICT_FALSE
1290           ((node->flags & VLIB_NODE_FLAG_TRACE)
1291            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1292         {
1293           nat_in2out_ed_trace_t *t =
1294             vlib_add_trace (vm, node, b0, sizeof (*t));
1295           t->sw_if_index = rx_sw_if_index0;
1296           t->next_index = next[0];
1297           t->is_slow_path = 0;
1298           t->translation_error = translation_error;
1299           t->lookup_skipped = lookup_skipped;
1300           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1301
1302           if (s0)
1303             {
1304               t->session_index = s0 - tsm->sessions;
1305               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1306               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1307               t->translation_via_i2of = (&s0->i2o == f);
1308               t->tcp_state = s0->tcp_state;
1309             }
1310           else
1311             {
1312               t->session_index = ~0;
1313             }
1314         }
1315
1316       if (next[0] == NAT_NEXT_DROP)
1317         {
1318           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1319                                          thread_index, cntr_sw_if_index0, 1);
1320         }
1321
1322       n_left_from--;
1323       next++;
1324     }
1325
1326   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1327                                frame->n_vectors);
1328   return frame->n_vectors;
1329 }
1330
1331 static inline uword
1332 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1333                                           vlib_node_runtime_t *node,
1334                                           vlib_frame_t *frame,
1335                                           int is_output_feature,
1336                                           int is_multi_worker)
1337 {
1338   u32 n_left_from, *from;
1339   snat_main_t *sm = &snat_main;
1340   f64 now = vlib_time_now (vm);
1341   u32 thread_index = vm->thread_index;
1342   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1343
1344   from = vlib_frame_vector_args (frame);
1345   n_left_from = frame->n_vectors;
1346
1347   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1348   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1349   vlib_get_buffers (vm, from, b, n_left_from);
1350
1351   while (n_left_from > 0)
1352     {
1353       vlib_buffer_t *b0;
1354       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1355       u32 tx_sw_if_index0;
1356       u32 cntr_sw_if_index0;
1357       ip_protocol_t proto0;
1358       ip4_header_t *ip0;
1359       udp_header_t *udp0;
1360       icmp46_header_t *icmp0;
1361       snat_session_t *s0 = 0;
1362       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1363       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1364
1365       b0 = *b;
1366
1367       if (is_output_feature)
1368         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1369
1370       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1371
1372       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1373                               iph_offset0);
1374
1375       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1376       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1377       cntr_sw_if_index0 =
1378         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1379       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1380                                                            rx_sw_if_index0);
1381
1382       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1383         {
1384           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1385           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1386                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1387                                        0);
1388           next[0] = NAT_NEXT_ICMP_ERROR;
1389           goto trace0;
1390         }
1391
1392       udp0 = ip4_next_header (ip0);
1393       icmp0 = (icmp46_header_t *) udp0;
1394       proto0 = ip0->protocol;
1395
1396       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1397         {
1398           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1399             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1400           if (!s0)
1401             next[0] = NAT_NEXT_DROP;
1402
1403           if (NAT_NEXT_DROP != next[0] && s0 &&
1404               NAT_ED_TRNSL_ERR_SUCCESS !=
1405                 (translation_error = nat_6t_flow_buf_translate_i2o (
1406                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1407             {
1408               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1409               nat_ed_session_delete (sm, s0, thread_index, 1);
1410               s0 = 0;
1411               next[0] = NAT_NEXT_DROP;
1412               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1413               goto trace0;
1414             }
1415
1416           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1417                                          thread_index, cntr_sw_if_index0, 1);
1418           goto trace0;
1419         }
1420
1421       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1422         {
1423           next[0] = icmp_in2out_ed_slow_path (
1424             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1425             rx_fib_index0, node, next[0], now, thread_index, &s0,
1426             is_multi_worker);
1427           if (NAT_NEXT_DROP != next[0] && s0 &&
1428               NAT_ED_TRNSL_ERR_SUCCESS !=
1429                 (translation_error = nat_6t_flow_buf_translate_i2o (
1430                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1431             {
1432               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1433               nat_ed_session_delete (sm, s0, thread_index, 1);
1434               s0 = 0;
1435               next[0] = NAT_NEXT_DROP;
1436               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1437               goto trace0;
1438             }
1439
1440           if (NAT_NEXT_DROP != next[0])
1441             {
1442               vlib_increment_simple_counter (
1443                 &sm->counters.slowpath.in2out.icmp, thread_index,
1444                 cntr_sw_if_index0, 1);
1445             }
1446           goto trace0;
1447         }
1448
1449       init_ed_k (
1450         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1451         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1452         rx_fib_index0, ip0->protocol);
1453       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1454         {
1455           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1456           s0 =
1457             pool_elt_at_index (tsm->sessions,
1458                                ed_value_get_session_index (&value0));
1459         }
1460
1461       if (!s0)
1462         {
1463           if (is_output_feature)
1464             {
1465               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1466                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1467                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1468                     rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
1469                 goto trace0;
1470
1471               /*
1472                * Send DHCP packets to the ipv4 stack, or we won't
1473                * be able to use dhcp client on the outside interface
1474                */
1475               if (PREDICT_FALSE (
1476                     proto0 == IP_PROTOCOL_UDP &&
1477                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1478                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1479                     ip0->dst_address.as_u32 == 0xffffffff))
1480                 goto trace0;
1481             }
1482           else
1483             {
1484               if (PREDICT_FALSE (
1485                     nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
1486                                             proto0, rx_fib_index0)))
1487                 goto trace0;
1488             }
1489
1490           next[0] =
1491             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1492                           vnet_buffer (b0)->ip.reass.l4_src_port,
1493                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1494                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1495                           node, next[0], thread_index, now);
1496
1497           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1498             goto trace0;
1499
1500           if (PREDICT_FALSE (!s0))
1501             goto trace0;
1502
1503         }
1504
1505       b0->flags |= VNET_BUFFER_F_IS_NATED;
1506
1507       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1508           (translation_error = nat_6t_flow_buf_translate_i2o (
1509              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1510         {
1511           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1512           nat_ed_session_delete (sm, s0, thread_index, 1);
1513           s0 = 0;
1514           next[0] = NAT_NEXT_DROP;
1515           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1516           goto trace0;
1517         }
1518
1519       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1520         {
1521           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1522                                          thread_index, cntr_sw_if_index0, 1);
1523           nat44_set_tcp_session_state_i2o (
1524             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1525             thread_index);
1526         }
1527       else
1528         {
1529           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1530                                          thread_index, cntr_sw_if_index0, 1);
1531         }
1532
1533       /* Accounting */
1534       nat44_session_update_counters (s0, now,
1535                                      vlib_buffer_length_in_chain
1536                                      (vm, b0), thread_index);
1537       /* Per-user LRU list maintenance */
1538       nat44_session_update_lru (sm, s0, thread_index);
1539
1540     trace0:
1541       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1542                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1543         {
1544           nat_in2out_ed_trace_t *t =
1545             vlib_add_trace (vm, node, b0, sizeof (*t));
1546           t->sw_if_index = rx_sw_if_index0;
1547           t->next_index = next[0];
1548           t->is_slow_path = 1;
1549           t->translation_error = translation_error;
1550           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1551
1552           if (s0)
1553             {
1554               t->session_index = s0 - tsm->sessions;
1555               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1556               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1557               t->translation_via_i2of = 1;
1558               t->tcp_state = s0->tcp_state;
1559             }
1560
1561           else
1562             {
1563               t->session_index = ~0;
1564             }
1565         }
1566
1567       if (next[0] == NAT_NEXT_DROP)
1568         {
1569           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1570                                          thread_index, cntr_sw_if_index0, 1);
1571         }
1572
1573       n_left_from--;
1574       next++;
1575       b++;
1576     }
1577
1578   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1579                                frame->n_vectors);
1580
1581   return frame->n_vectors;
1582 }
1583
1584 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1585                                      vlib_node_runtime_t * node,
1586                                      vlib_frame_t * frame)
1587 {
1588   if (snat_main.num_workers > 1)
1589     {
1590       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1591     }
1592   else
1593     {
1594       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1595     }
1596 }
1597
1598 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1599   .name = "nat44-ed-in2out",
1600   .vector_size = sizeof (u32),
1601   .sibling_of = "nat-default",
1602   .format_trace = format_nat_in2out_ed_trace,
1603   .type = VLIB_NODE_TYPE_INTERNAL,
1604   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1605   .error_strings = nat_in2out_ed_error_strings,
1606   .runtime_data_bytes = sizeof (snat_runtime_t),
1607 };
1608
1609 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1610                                             vlib_node_runtime_t * node,
1611                                             vlib_frame_t * frame)
1612 {
1613   if (snat_main.num_workers > 1)
1614     {
1615       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1616     }
1617   else
1618     {
1619       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1620     }
1621 }
1622
1623 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1624   .name = "nat44-ed-in2out-output",
1625   .vector_size = sizeof (u32),
1626   .sibling_of = "nat-default",
1627   .format_trace = format_nat_in2out_ed_trace,
1628   .type = VLIB_NODE_TYPE_INTERNAL,
1629   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1630   .error_strings = nat_in2out_ed_error_strings,
1631   .runtime_data_bytes = sizeof (snat_runtime_t),
1632 };
1633
1634 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1635                                               vlib_node_runtime_t *
1636                                               node, vlib_frame_t * frame)
1637 {
1638   if (snat_main.num_workers > 1)
1639     {
1640       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1641     }
1642   else
1643     {
1644       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1645     }
1646 }
1647
1648 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1649   .name = "nat44-ed-in2out-slowpath",
1650   .vector_size = sizeof (u32),
1651   .sibling_of = "nat-default",
1652   .format_trace = format_nat_in2out_ed_trace,
1653   .type = VLIB_NODE_TYPE_INTERNAL,
1654   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1655   .error_strings = nat_in2out_ed_error_strings,
1656   .runtime_data_bytes = sizeof (snat_runtime_t),
1657 };
1658
1659 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1660                                                      vlib_node_runtime_t
1661                                                      * node,
1662                                                      vlib_frame_t * frame)
1663 {
1664   if (snat_main.num_workers > 1)
1665     {
1666       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1667     }
1668   else
1669     {
1670       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1671     }
1672 }
1673
1674 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1675   .name = "nat44-ed-in2out-output-slowpath",
1676   .vector_size = sizeof (u32),
1677   .sibling_of = "nat-default",
1678   .format_trace = format_nat_in2out_ed_trace,
1679   .type = VLIB_NODE_TYPE_INTERNAL,
1680   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1681   .error_strings = nat_in2out_ed_error_strings,
1682   .runtime_data_bytes = sizeof (snat_runtime_t),
1683 };
1684
1685 static u8 *
1686 format_nat_pre_trace (u8 * s, va_list * args)
1687 {
1688   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1689   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1690   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1691   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1692                  t->arc_next_index);
1693 }
1694
1695 VLIB_NODE_FN (nat_pre_in2out_node)
1696   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1697 {
1698   return nat_pre_node_fn_inline (vm, node, frame,
1699                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1700 }
1701
1702 VLIB_NODE_FN (nat_pre_in2out_output_node)
1703   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1704 {
1705   return nat_pre_node_fn_inline (vm, node, frame,
1706                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1707 }
1708
1709 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1710   .name = "nat-pre-in2out",
1711   .vector_size = sizeof (u32),
1712   .sibling_of = "nat-default",
1713   .format_trace = format_nat_pre_trace,
1714   .type = VLIB_NODE_TYPE_INTERNAL,
1715   .n_errors = 0,
1716 };
1717
1718 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1719   .name = "nat-pre-in2out-output",
1720   .vector_size = sizeof (u32),
1721   .sibling_of = "nat-default",
1722   .format_trace = format_nat_pre_trace,
1723   .type = VLIB_NODE_TYPE_INTERNAL,
1724   .n_errors = 0,
1725 };
1726
1727 /*
1728  * fd.io coding-style-patch-verification: ON
1729  *
1730  * Local Variables:
1731  * eval: (c-set-style "gnu")
1732  * End:
1733  */