nat: fix accidental o2i deletion/reuse
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52   u8 tcp_state;
53 } nat_in2out_ed_trace_t;
54
55 static u8 *
56 format_nat_in2out_ed_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
61   char *tag;
62
63   tag =
64     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
65     "NAT44_IN2OUT_ED_FAST_PATH";
66
67   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
68               t->next_index);
69   if (~0 != t->session_index)
70     {
71       s = format (s, ", session %d, translation result '%U' via %s",
72                   t->session_index, format_nat_ed_translation_error,
73                   t->translation_error,
74                   t->translation_via_i2of ? "i2of" : "o2if");
75       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
76       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
77     }
78   if (!t->is_slow_path)
79     {
80       if (t->lookup_skipped)
81         {
82           s = format (s, "\n  lookup skipped - cached session index used");
83         }
84       else
85         {
86           s = format (s, "\n  search key %U", format_ed_session_kvp,
87                       &t->search_key);
88         }
89     }
90   if (IP_PROTOCOL_TCP == t->i2of.match.proto)
91     {
92       s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
93                   t->tcp_state);
94     }
95
96   return s;
97 }
98
99 static int
100 nat_ed_alloc_addr_and_port_with_snat_address (
101   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
102   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
103   ip4_address_t *outside_addr, u16 *outside_port)
104 {
105   const u16 port_thread_offset =
106     (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
107
108   /* Backup original match in case of failure */
109   const nat_6t_t match = s->o2i.match;
110
111   s->o2i.match.daddr = a->addr;
112   /* first try port suggested by caller */
113   u16 port = clib_net_to_host_u16 (*outside_port);
114   u16 port_offset = port - port_thread_offset;
115   if (port < port_thread_offset ||
116       port >= port_thread_offset + port_per_thread)
117     {
118       /* need to pick a different port, suggested port doesn't fit in
119        * this thread's port range */
120       port_offset = snat_random_port (0, port_per_thread - 1);
121       port = port_thread_offset + port_offset;
122     }
123   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
124   do
125     {
126       if (IP_PROTOCOL_ICMP == proto)
127         {
128           s->o2i.match.sport = clib_host_to_net_u16 (port);
129         }
130       s->o2i.match.dport = clib_host_to_net_u16 (port);
131       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
132         {
133           *outside_addr = a->addr;
134           *outside_port = clib_host_to_net_u16 (port);
135           return 0;
136         }
137       port_offset = snat_random_port (0, port_per_thread - 1);
138       port = port_thread_offset + port_offset;
139       --attempts;
140     }
141   while (attempts > 0);
142
143   /* Revert match */
144   s->o2i.match = match;
145   return 1;
146 }
147
148 static int
149 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
150                             u32 tx_sw_if_index, u32 nat_proto,
151                             u32 thread_index, ip4_address_t s_addr,
152                             ip4_address_t d_addr, u32 snat_thread_index,
153                             snat_session_t *s, ip4_address_t *outside_addr,
154                             u16 *outside_port)
155 {
156   if (vec_len (sm->addresses) > 0)
157     {
158       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
159       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
160       int i;
161
162       // output feature
163       if (tx_sw_if_index != ~0)
164         {
165           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
166             {
167               a = sm->addresses + i;
168               if (a->fib_index == rx_fib_index)
169                 {
170                   if (a->sw_if_index == tx_sw_if_index)
171                     {
172                       if ((a->addr_len != ~0) &&
173                           (a->net.as_u32 ==
174                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
175
176                         {
177                           return nat_ed_alloc_addr_and_port_with_snat_address (
178                             sm, nat_proto, thread_index, a,
179                             sm->port_per_thread, snat_thread_index, s,
180                             outside_addr, outside_port);
181                         }
182                       ra = a;
183                     }
184                   ja = a;
185                 }
186               else if (a->fib_index == ~0)
187                 {
188                   ba = a;
189                 }
190             }
191           for (i = 0; i < s_addr_offset; ++i)
192             {
193               a = sm->addresses + i;
194               if (a->fib_index == rx_fib_index)
195                 {
196                   if (a->sw_if_index == tx_sw_if_index)
197                     {
198                       if ((a->addr_len != ~0) &&
199                           (a->net.as_u32 ==
200                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
201
202                         {
203                           return nat_ed_alloc_addr_and_port_with_snat_address (
204                             sm, nat_proto, thread_index, a,
205                             sm->port_per_thread, snat_thread_index, s,
206                             outside_addr, outside_port);
207                         }
208                       ra = a;
209                     }
210                   ja = a;
211                 }
212               else if (a->fib_index == ~0)
213                 {
214                   ba = a;
215                 }
216             }
217           if (ra)
218             {
219               return nat_ed_alloc_addr_and_port_with_snat_address (
220                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
221                 snat_thread_index, s, outside_addr, outside_port);
222             }
223         }
224       else
225         {
226           // first try nat pool addresses to sw interface addreses mappings
227           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
228             {
229               a = sm->addresses + i;
230               if (a->fib_index == rx_fib_index)
231                 {
232                   if ((a->addr_len != ~0) &&
233                       (a->net.as_u32 ==
234                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
235                     {
236                       return nat_ed_alloc_addr_and_port_with_snat_address (
237                         sm, nat_proto, thread_index, a, sm->port_per_thread,
238                         snat_thread_index, s, outside_addr, outside_port);
239                     }
240                   ja = a;
241                 }
242               else if (a->fib_index == ~0)
243                 {
244                   ba = a;
245                 }
246             }
247           for (i = 0; i < s_addr_offset; ++i)
248             {
249               a = sm->addresses + i;
250               if (a->fib_index == rx_fib_index)
251                 {
252                   if ((a->addr_len != ~0) &&
253                       (a->net.as_u32 ==
254                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
255                     {
256                       return nat_ed_alloc_addr_and_port_with_snat_address (
257                         sm, nat_proto, thread_index, a, sm->port_per_thread,
258                         snat_thread_index, s, outside_addr, outside_port);
259                     }
260                   ja = a;
261                 }
262               else if (a->fib_index == ~0)
263                 {
264                   ba = a;
265                 }
266             }
267         }
268
269       if (ja || ba)
270         {
271           a = ja ? ja : ba;
272           return nat_ed_alloc_addr_and_port_with_snat_address (
273             sm, nat_proto, thread_index, a, sm->port_per_thread,
274             snat_thread_index, s, outside_addr, outside_port);
275         }
276     }
277   /* Totally out of translations to use... */
278   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
279   return 1;
280 }
281
282 static_always_inline int
283 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
284                              u16 match_port, ip_protocol_t match_protocol,
285                              ip4_address_t *daddr, u16 *dport)
286 {
287   snat_static_mapping_t *m =
288     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
289   if (!m)
290     {
291       /* Try address only mapping */
292       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
293       if (!m)
294         return 0;
295     }
296   *daddr = m->local_addr;
297   if (dport)
298     {
299       /* Address only mapping doesn't change port */
300       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
301     }
302   return 1;
303 }
304
305 static_always_inline vrf_table_t *
306 get_vrf_table_by_fib (u32 fib_index)
307 {
308   snat_main_t *sm = &snat_main;
309   vrf_table_t *t;
310
311   pool_foreach (t, sm->vrf_tables)
312     {
313       if (fib_index == t->table_fib_index)
314         {
315           return t;
316         }
317     }
318
319   return 0;
320 }
321
322 static_always_inline u32
323 get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
324 {
325   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
326   fib_prefix_t pfx = {
327     .fp_proto = FIB_PROTOCOL_IP4,
328     .fp_len = 32,
329     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
330     ,
331   };
332
333   snat_main_t *sm = &snat_main;
334   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
335   // default to rx fib
336   u32 tx_fib_index = rx_fib_index;
337
338   if (0 != t)
339     {
340       // managed routes to other fibs
341       vrf_route_t *r;
342       pool_foreach (r, t->routes)
343         {
344           fei = fib_table_lookup (r->fib_index, &pfx);
345           if ((FIB_NODE_INDEX_INVALID != fei) &&
346               (~0 != fib_entry_get_resolving_interface (fei)))
347             {
348               tx_fib_index = r->fib_index;
349               break;
350             }
351         }
352     }
353   else
354     {
355       // default to configured fib
356       tx_fib_index = sm->outside_fib_index;
357
358       // default routes to other fibs
359       nat_fib_t *f;
360       vec_foreach (f, sm->outside_fibs)
361         {
362           fei = fib_table_lookup (f->fib_index, &pfx);
363           if ((FIB_NODE_INDEX_INVALID != fei) &&
364               (~0 != fib_entry_get_resolving_interface (fei)))
365             {
366               tx_fib_index = f->fib_index;
367               break;
368             }
369         }
370     }
371
372   return tx_fib_index;
373 }
374
375 static_always_inline int
376 is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
377 {
378   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
379   fib_prefix_t pfx = {
380     .fp_proto = FIB_PROTOCOL_IP4,
381     .fp_len = 32,
382     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
383     ,
384   };
385
386   snat_main_t *sm = &snat_main;
387   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
388   u32 ii;
389
390   if (0 != t)
391     {
392       // managed routes to other fibs
393       vrf_route_t *r;
394       pool_foreach (r, t->routes)
395         {
396           fei = fib_table_lookup (r->fib_index, &pfx);
397           if ((FIB_NODE_INDEX_INVALID != fei) &&
398               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
399             {
400               return 1;
401             }
402         }
403     }
404   else
405     {
406       // default routes to other fibs
407       nat_fib_t *f;
408       vec_foreach (f, sm->outside_fibs)
409         {
410           fei = fib_table_lookup (f->fib_index, &pfx);
411           if ((FIB_NODE_INDEX_INVALID != fei) &&
412               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
413             {
414               snat_interface_t *i;
415               pool_foreach (i, sm->interfaces)
416                 {
417                   if ((nat44_ed_is_interface_outside (i)) &&
418                       (ii == i->sw_if_index))
419                     {
420                       return 1;
421                     }
422                 }
423             }
424         }
425     }
426
427   return 0;
428 }
429
430 static u32
431 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
432               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
433               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
434               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
435               u32 thread_index, f64 now)
436 {
437   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
438   ip4_address_t outside_addr;
439   u16 outside_port;
440   u32 tx_fib_index;
441   u8 is_identity_nat = 0;
442
443   snat_session_t *s = NULL;
444   lb_nat_type_t lb = 0;
445   ip4_address_t daddr = r_addr;
446   u16 dport = r_port;
447
448   if (PREDICT_FALSE
449       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
450     {
451       if (!nat_lru_free_one (sm, thread_index, now))
452         {
453           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
454           nat_ipfix_logging_max_sessions (thread_index,
455                                           sm->max_translations_per_thread);
456           nat_elog_notice (sm, "maximum sessions exceeded");
457           return NAT_NEXT_DROP;
458         }
459     }
460
461   ip4_address_t sm_addr;
462   u16 sm_port;
463   u32 sm_fib_index;
464   int is_sm = 0;
465   // First try to match static mapping by local address and port
466   if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
467                                   &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
468                                   &lb, 0, &is_identity_nat, 0))
469     {
470       if (PREDICT_FALSE (is_identity_nat))
471         {
472           *sessionp = NULL;
473           return next;
474         }
475       is_sm = 1;
476     }
477
478   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
479     {
480       if (PREDICT_FALSE (!tcp_flags_is_init (
481             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
482         {
483           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
484           return NAT_NEXT_DROP;
485         }
486     }
487
488   s = nat_ed_session_alloc (sm, thread_index, now, proto);
489   ASSERT (s);
490
491   tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
492
493   if (!is_sm)
494     {
495       s->in2out.addr = l_addr;
496       s->in2out.port = l_port;
497       s->proto = proto;
498       s->in2out.fib_index = rx_fib_index;
499       s->out2in.fib_index = tx_fib_index;
500
501       // suggest using local port to allocation function
502       outside_port = l_port;
503
504       if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
505                                                       proto, &daddr, &dport)))
506         {
507           s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
508         }
509
510       // destination addr/port updated with real values in
511       // nat_ed_alloc_addr_and_port
512       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
513                             s->out2in.fib_index, proto);
514       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
515       if (IP_PROTOCOL_ICMP == proto)
516         {
517           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
518         }
519       else
520         {
521           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
522         }
523       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
524
525       if (nat_ed_alloc_addr_and_port (
526             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
527             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
528         {
529           nat_elog_notice (sm, "addresses exhausted");
530           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
531           nat_ed_session_delete (sm, s, thread_index, 1);
532           return NAT_NEXT_DROP;
533         }
534       s->out2in.addr = outside_addr;
535       s->out2in.port = outside_port;
536     }
537   else
538     {
539       // static mapping
540       s->out2in.addr = outside_addr = sm_addr;
541       s->out2in.port = outside_port = sm_port;
542       s->in2out.addr = l_addr;
543       s->in2out.port = l_port;
544       s->proto = proto;
545       s->in2out.fib_index = rx_fib_index;
546       s->out2in.fib_index = tx_fib_index;
547       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
548
549       // hairpinning?
550       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
551                                                         proto, &daddr, &dport);
552       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
553
554       if (IP_PROTOCOL_ICMP == proto)
555         {
556           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
557                                 sm_port, s->out2in.fib_index, proto);
558           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
559         }
560       else
561         {
562           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
563                                 sm_port, s->out2in.fib_index, proto);
564           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
565         }
566       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
567       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
568       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
569         {
570           nat_elog_notice (sm, "out2in key add failed");
571           goto error;
572         }
573     }
574
575   if (lb)
576     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
577   s->ext_host_addr = r_addr;
578   s->ext_host_port = r_port;
579
580   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
581                         rx_fib_index, proto);
582   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
583   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
584
585   if (IP_PROTOCOL_ICMP == proto)
586     {
587       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
588     }
589   else
590     {
591       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
592       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
593     }
594   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
595
596   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
597     {
598       nat_elog_notice (sm, "in2out key add failed");
599       goto error;
600     }
601
602   /* log NAT event */
603   nat_ipfix_logging_nat44_ses_create (
604     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
605     s->in2out.port, s->out2in.port, s->in2out.fib_index);
606
607   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
608                          s->in2out.port, &s->ext_host_nat_addr,
609                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
610                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
611
612   per_vrf_sessions_register_session (s, thread_index);
613
614   *sessionp = s;
615   return next;
616 error:
617   if (s)
618     {
619       nat_ed_session_delete (sm, s, thread_index, 1);
620     }
621   *sessionp = s = NULL;
622   return NAT_NEXT_DROP;
623 }
624
625 static_always_inline int
626 nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
627                         u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
628                         u32 proto, u32 rx_fib_index)
629 {
630   snat_main_t *sm = &snat_main;
631
632   clib_bihash_kv_16_8_t kv, value;
633   ip4_address_t placeholder_addr;
634   u32 placeholder_fib_index;
635   u16 placeholder_port;
636
637   init_ed_k (&kv, ip->dst_address.as_u32,
638              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
639              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
640              ip->protocol);
641
642   // do nat if active session or is static mapping
643   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
644       !snat_static_mapping_match (
645         vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
646         sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
647         &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
648     {
649       return 0;
650     }
651
652   // do not nat if forwarding enabled
653   if (sm->forwarding_enabled)
654     {
655       return 1;
656     }
657
658   // do not nat packet aimed at the interface address
659   if (PREDICT_FALSE (
660         is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
661     {
662       return 1;
663     }
664
665   // do nat packets with resolvable destination
666   // destination can be resolved either by:
667   // a) vrf routing table entry
668   // b) (non output feature) outside interface fib
669   if (is_destination_resolvable (rx_fib_index, ip->dst_address))
670     {
671       return 0;
672     }
673
674   return 1;
675 }
676
677 static_always_inline int
678 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
679                                       u32 thread_index, f64 now,
680                                       vlib_main_t * vm, vlib_buffer_t * b)
681 {
682   clib_bihash_kv_16_8_t kv, value;
683   snat_session_t *s = 0;
684   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
685
686   if (!sm->forwarding_enabled)
687     return 0;
688
689   if (ip->protocol == IP_PROTOCOL_ICMP)
690     {
691       ip4_address_t lookup_saddr, lookup_daddr;
692       u16 lookup_sport, lookup_dport;
693       u8 lookup_protocol;
694       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
695                                               &lookup_sport, &lookup_daddr,
696                                               &lookup_dport, &lookup_protocol))
697         return 0;
698       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
699                  lookup_dport, 0, lookup_protocol);
700     }
701   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
702     {
703       init_ed_k (&kv, ip->src_address.as_u32,
704                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
705                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
706     }
707   else
708     {
709       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
710                  ip->protocol);
711     }
712
713   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
714     {
715       ASSERT (thread_index == ed_value_get_thread_index (&value));
716       s =
717         pool_elt_at_index (tsm->sessions,
718                            ed_value_get_session_index (&value));
719
720       if (na44_ed_is_fwd_bypass_session (s))
721         {
722           if (ip->protocol == IP_PROTOCOL_TCP)
723             {
724               nat44_set_tcp_session_state_i2o (
725                 sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
726                 thread_index);
727             }
728           /* Accounting */
729           nat44_session_update_counters (s, now,
730                                          vlib_buffer_length_in_chain (vm, b),
731                                          thread_index);
732           /* Per-user LRU list maintenance */
733           nat44_session_update_lru (sm, s, thread_index);
734           return 1;
735         }
736       else
737         return 0;
738     }
739
740   return 0;
741 }
742
743 static_always_inline int
744 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
745                                        ip4_header_t *ip, u16 src_port,
746                                        u16 dst_port, u32 thread_index,
747                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
748                                        int is_multi_worker)
749 {
750   clib_bihash_kv_16_8_t kv, value;
751   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
752   snat_interface_t *i;
753   snat_session_t *s;
754   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
755   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
756
757   /* src NAT check */
758   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
759              dst_port, tx_fib_index, ip->protocol);
760   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
761     {
762       ASSERT (thread_index == ed_value_get_thread_index (&value));
763       s =
764         pool_elt_at_index (tsm->sessions,
765                            ed_value_get_session_index (&value));
766       return 1;
767     }
768
769   /* dst NAT check */
770   if (is_multi_worker &&
771       PREDICT_TRUE (!pool_is_free_index (
772         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
773     {
774       nat_6t_t lookup;
775       lookup.fib_index = rx_fib_index;
776       lookup.proto = ip->protocol;
777       lookup.daddr.as_u32 = ip->src_address.as_u32;
778       lookup.dport = src_port;
779       lookup.saddr.as_u32 = ip->dst_address.as_u32;
780       lookup.sport = dst_port;
781       s = pool_elt_at_index (
782         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
783       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
784         {
785           goto skip_dst_nat_lookup;
786         }
787       s = NULL;
788     }
789
790   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
791              src_port, rx_fib_index, ip->protocol);
792   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
793     {
794       ASSERT (thread_index == ed_value_get_thread_index (&value));
795       s =
796         pool_elt_at_index (tsm->sessions,
797                            ed_value_get_session_index (&value));
798
799     skip_dst_nat_lookup:
800       if (na44_ed_is_fwd_bypass_session (s))
801         return 0;
802
803       /* hairpinning */
804       pool_foreach (i, sm->output_feature_interfaces)
805         {
806           if ((nat44_ed_is_interface_inside (i)) &&
807               (rx_sw_if_index == i->sw_if_index))
808             return 0;
809         }
810       return 1;
811     }
812
813   return 0;
814 }
815
816 static inline u32
817 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
818                           icmp46_header_t *icmp, u32 sw_if_index,
819                           u32 tx_sw_if_index, u32 rx_fib_index,
820                           vlib_node_runtime_t *node, u32 next, f64 now,
821                           u32 thread_index, snat_session_t **s_p,
822                           int is_multi_worker)
823 {
824   vlib_main_t *vm = vlib_get_main ();
825   u16 checksum;
826   int err;
827   snat_session_t *s = NULL;
828   u8 lookup_protocol = ip->protocol;
829   u16 lookup_sport, lookup_dport;
830   ip4_address_t lookup_saddr, lookup_daddr;
831
832   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
833                                             &lookup_sport, &lookup_daddr,
834                                             &lookup_dport, &lookup_protocol);
835   if (err != 0)
836     {
837       b->error = node->errors[err];
838       return NAT_NEXT_DROP;
839     }
840
841   if (tx_sw_if_index != ~0)
842     {
843       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
844             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
845             tx_sw_if_index, is_multi_worker)))
846         {
847           return next;
848         }
849     }
850   else
851     {
852       if (PREDICT_FALSE (nat44_ed_not_translate (
853             vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
854         {
855           return next;
856         }
857     }
858
859   if (PREDICT_FALSE (icmp_type_is_error_message (
860         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
861     {
862       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
863       return NAT_NEXT_DROP;
864     }
865
866   next =
867     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
868                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
869                   node, next, thread_index, vlib_time_now (vm));
870
871   if (NAT_NEXT_DROP == next)
872     goto out;
873
874   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
875     {
876       ip_csum_t sum = ip_incremental_checksum_buffer (
877         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
878         ntohs (ip->length) - ip4_header_bytes (ip), 0);
879       checksum = ~ip_csum_fold (sum);
880       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
881         {
882           next = NAT_NEXT_DROP;
883           goto out;
884         }
885     }
886
887 out:
888   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
889     {
890       /* Accounting */
891       nat44_session_update_counters (
892         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
893       /* Per-user LRU list maintenance */
894       nat44_session_update_lru (sm, s, thread_index);
895     }
896   *s_p = s;
897   return next;
898 }
899
900 static snat_session_t *
901 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
902                                         ip4_header_t *ip, u32 rx_fib_index,
903                                         u32 thread_index, f64 now,
904                                         vlib_main_t *vm,
905                                         vlib_node_runtime_t *node)
906 {
907   clib_bihash_kv_16_8_t s_kv, s_value;
908   snat_static_mapping_t *m = NULL;
909   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
910   snat_session_t *s = NULL;
911   u32 tx_fib_index;
912   int i;
913   ip4_address_t new_src_addr = { 0 };
914   ip4_address_t new_dst_addr = ip->dst_address;
915
916   if (PREDICT_FALSE (
917         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
918     {
919       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
920       nat_ipfix_logging_max_sessions (thread_index,
921                                       sm->max_translations_per_thread);
922       nat_elog_notice (sm, "maximum sessions exceeded");
923       return 0;
924     }
925
926   tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
927
928   // Try to find static mapping first
929   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
930                               ip->protocol);
931   if (m)
932     {
933       new_src_addr = m->external_addr;
934     }
935   else
936     {
937       pool_foreach (s, tsm->sessions)
938         {
939           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
940             {
941               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
942                          ip->dst_address.as_u32, 0, tx_fib_index,
943                          ip->protocol);
944               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
945                 {
946                   new_src_addr = s->out2in.addr;
947                 }
948               break;
949             }
950         }
951
952       if (!new_src_addr.as_u32)
953         {
954           for (i = 0; i < vec_len (sm->addresses); i++)
955             {
956               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
957                          ip->dst_address.as_u32, 0, tx_fib_index,
958                          ip->protocol);
959               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
960                 {
961                   new_src_addr = sm->addresses[i].addr;
962                 }
963             }
964         }
965     }
966
967   if (!new_src_addr.as_u32)
968     {
969       // could not allocate address for translation ...
970       return 0;
971     }
972
973   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
974   if (!s)
975     {
976       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
977       nat_elog_warn (sm, "create NAT session failed");
978       return 0;
979     }
980
981   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
982                         ip->dst_address, 0, rx_fib_index, ip->protocol);
983   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
984   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
985
986   // hairpinning?
987   int is_hairpinning = nat44_ed_external_sm_lookup (
988     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
989   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
990
991   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
992   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
993
994   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
995                         tx_fib_index, ip->protocol);
996   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
997   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
998   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
999
1000   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1001   s->out2in.addr.as_u32 = new_src_addr.as_u32;
1002   s->out2in.fib_index = tx_fib_index;
1003   s->in2out.addr.as_u32 = ip->src_address.as_u32;
1004   s->in2out.fib_index = rx_fib_index;
1005   s->in2out.port = s->out2in.port = ip->protocol;
1006   if (m)
1007     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1008
1009   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
1010     {
1011       nat_elog_notice (sm, "in2out flow hash add failed");
1012       nat_ed_session_delete (sm, s, thread_index, 1);
1013       return NULL;
1014     }
1015
1016   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
1017     {
1018       nat_elog_notice (sm, "out2in flow hash add failed");
1019       nat_ed_session_delete (sm, s, thread_index, 1);
1020       return NULL;
1021     }
1022
1023   per_vrf_sessions_register_session (s, thread_index);
1024
1025   /* Accounting */
1026   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
1027                                  thread_index);
1028   /* Per-user LRU list maintenance */
1029   nat44_session_update_lru (sm, s, thread_index);
1030
1031   return s;
1032 }
1033
1034 static inline uword
1035 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1036                                           vlib_node_runtime_t *node,
1037                                           vlib_frame_t *frame,
1038                                           int is_output_feature,
1039                                           int is_multi_worker)
1040 {
1041   u32 n_left_from, *from;
1042   snat_main_t *sm = &snat_main;
1043   f64 now = vlib_time_now (vm);
1044   u32 thread_index = vm->thread_index;
1045   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1046   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1047     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1048
1049   from = vlib_frame_vector_args (frame);
1050   n_left_from = frame->n_vectors;
1051
1052   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1053   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1054   vlib_get_buffers (vm, from, b, n_left_from);
1055
1056   while (n_left_from > 0)
1057     {
1058       vlib_buffer_t *b0;
1059       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1060       u32 tx_sw_if_index0;
1061       u32 cntr_sw_if_index0;
1062       ip_protocol_t proto0;
1063       ip4_header_t *ip0;
1064       snat_session_t *s0 = 0;
1065       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1066       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1067       nat_6t_flow_t *f = 0;
1068       nat_6t_t lookup;
1069       int lookup_skipped = 0;
1070
1071       b0 = *b;
1072       b++;
1073
1074       /* Prefetch next iteration. */
1075       if (PREDICT_TRUE (n_left_from >= 2))
1076         {
1077           vlib_buffer_t *p2;
1078
1079           p2 = *b;
1080
1081           vlib_prefetch_buffer_header (p2, LOAD);
1082
1083           clib_prefetch_load (p2->data);
1084         }
1085
1086       if (is_output_feature)
1087         {
1088           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1089         }
1090
1091       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1092
1093       ip0 =
1094         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1095
1096       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1097       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1098       cntr_sw_if_index0 =
1099         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1100       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1101                                                            rx_sw_if_index0);
1102       lookup.fib_index = rx_fib_index0;
1103
1104       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1105         {
1106           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1107           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1108                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1109                                        0);
1110           next[0] = NAT_NEXT_ICMP_ERROR;
1111           goto trace0;
1112         }
1113
1114       proto0 = ip0->protocol;
1115
1116       if (is_output_feature)
1117         {
1118           if (PREDICT_FALSE
1119               (nat_not_translate_output_feature_fwd
1120                (sm, ip0, thread_index, now, vm, b0)))
1121             goto trace0;
1122         }
1123
1124       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1125         {
1126           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1127                 ICMP4_echo_request &&
1128               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1129                 ICMP4_echo_reply &&
1130               !icmp_type_is_error_message (
1131                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1132             {
1133               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1134               next[0] = NAT_NEXT_DROP;
1135               goto trace0;
1136             }
1137           int err = nat_get_icmp_session_lookup_values (
1138             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1139             &lookup.dport, &lookup.proto);
1140           if (err != 0)
1141             {
1142               b0->error = node->errors[err];
1143               next[0] = NAT_NEXT_DROP;
1144               goto trace0;
1145             }
1146         }
1147       else
1148         {
1149           lookup.proto = ip0->protocol;
1150           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1151           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1152           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1153           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1154         }
1155
1156       /* there might be a stashed index in vnet_buffer2 from handoff or
1157        * classify node, see if it can be used */
1158       if (is_multi_worker &&
1159           !pool_is_free_index (tsm->sessions,
1160                                vnet_buffer2 (b0)->nat.cached_session_index))
1161         {
1162           s0 = pool_elt_at_index (tsm->sessions,
1163                                   vnet_buffer2 (b0)->nat.cached_session_index);
1164           if (PREDICT_TRUE (
1165                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1166                 // for some hairpinning cases there are two "i2i" flows instead
1167                 // of i2o and o2i as both hosts are on inside
1168                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1169                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1170             {
1171               /* yes, this is the droid we're looking for */
1172               lookup_skipped = 1;
1173               goto skip_lookup;
1174             }
1175           s0 = NULL;
1176         }
1177
1178       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1179                  lookup.dport, lookup.fib_index, lookup.proto);
1180
1181       // lookup flow
1182       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1183         {
1184           // flow does not exist go slow path
1185           next[0] = def_slow;
1186           goto trace0;
1187         }
1188
1189       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1190       s0 =
1191         pool_elt_at_index (tsm->sessions,
1192                            ed_value_get_session_index (&value0));
1193
1194     skip_lookup:
1195
1196       ASSERT (thread_index == s0->thread_index);
1197
1198       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1199         {
1200           // session is closed, go slow path
1201           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1202           nat_ed_session_delete (sm, s0, thread_index, 1);
1203           s0 = 0;
1204           next[0] = def_slow;
1205           goto trace0;
1206         }
1207
1208       // drop if session expired
1209       u64 sess_timeout_time;
1210       sess_timeout_time =
1211         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1212       if (now >= sess_timeout_time)
1213         {
1214           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1215           nat_ed_session_delete (sm, s0, thread_index, 1);
1216           s0 = 0;
1217           // session is closed, go slow path
1218           next[0] = def_slow;
1219           goto trace0;
1220         }
1221
1222       b0->flags |= VNET_BUFFER_F_IS_NATED;
1223
1224       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1225         {
1226           f = &s0->i2o;
1227         }
1228       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1229                nat_6t_t_eq (&s0->o2i.match, &lookup))
1230         {
1231           f = &s0->o2i;
1232         }
1233       else
1234         {
1235           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1236           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1237           nat_ed_session_delete (sm, s0, thread_index, 1);
1238           s0 = 0;
1239           next[0] = NAT_NEXT_DROP;
1240           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1241           goto trace0;
1242         }
1243
1244       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1245           (translation_error = nat_6t_flow_buf_translate_i2o (
1246              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1247         {
1248           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1249           nat_ed_session_delete (sm, s0, thread_index, 1);
1250           s0 = 0;
1251           next[0] = NAT_NEXT_DROP;
1252           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1253           goto trace0;
1254         }
1255
1256       switch (proto0)
1257         {
1258         case IP_PROTOCOL_TCP:
1259           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1260                                          thread_index, cntr_sw_if_index0, 1);
1261           nat44_set_tcp_session_state_i2o (
1262             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1263             thread_index);
1264           break;
1265         case IP_PROTOCOL_UDP:
1266           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1267                                          thread_index, cntr_sw_if_index0, 1);
1268           break;
1269         case IP_PROTOCOL_ICMP:
1270           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1271                                          thread_index, cntr_sw_if_index0, 1);
1272           break;
1273         default:
1274           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1275                                          thread_index, cntr_sw_if_index0, 1);
1276           break;
1277         }
1278
1279       /* Accounting */
1280       nat44_session_update_counters (s0, now,
1281                                      vlib_buffer_length_in_chain (vm, b0),
1282                                      thread_index);
1283       /* Per-user LRU list maintenance */
1284       nat44_session_update_lru (sm, s0, thread_index);
1285
1286     trace0:
1287       if (PREDICT_FALSE
1288           ((node->flags & VLIB_NODE_FLAG_TRACE)
1289            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1290         {
1291           nat_in2out_ed_trace_t *t =
1292             vlib_add_trace (vm, node, b0, sizeof (*t));
1293           t->sw_if_index = rx_sw_if_index0;
1294           t->next_index = next[0];
1295           t->is_slow_path = 0;
1296           t->translation_error = translation_error;
1297           t->lookup_skipped = lookup_skipped;
1298           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1299
1300           if (s0)
1301             {
1302               t->session_index = s0 - tsm->sessions;
1303               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1304               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1305               t->translation_via_i2of = (&s0->i2o == f);
1306               t->tcp_state = s0->tcp_state;
1307             }
1308           else
1309             {
1310               t->session_index = ~0;
1311             }
1312         }
1313
1314       if (next[0] == NAT_NEXT_DROP)
1315         {
1316           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1317                                          thread_index, cntr_sw_if_index0, 1);
1318         }
1319
1320       n_left_from--;
1321       next++;
1322     }
1323
1324   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1325                                frame->n_vectors);
1326   return frame->n_vectors;
1327 }
1328
1329 static inline uword
1330 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1331                                           vlib_node_runtime_t *node,
1332                                           vlib_frame_t *frame,
1333                                           int is_output_feature,
1334                                           int is_multi_worker)
1335 {
1336   u32 n_left_from, *from;
1337   snat_main_t *sm = &snat_main;
1338   f64 now = vlib_time_now (vm);
1339   u32 thread_index = vm->thread_index;
1340   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1341
1342   from = vlib_frame_vector_args (frame);
1343   n_left_from = frame->n_vectors;
1344
1345   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1346   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1347   vlib_get_buffers (vm, from, b, n_left_from);
1348
1349   while (n_left_from > 0)
1350     {
1351       vlib_buffer_t *b0;
1352       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1353       u32 tx_sw_if_index0;
1354       u32 cntr_sw_if_index0;
1355       ip_protocol_t proto0;
1356       ip4_header_t *ip0;
1357       udp_header_t *udp0;
1358       icmp46_header_t *icmp0;
1359       snat_session_t *s0 = 0;
1360       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1361       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1362
1363       b0 = *b;
1364
1365       if (is_output_feature)
1366         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1367
1368       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1369
1370       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1371                               iph_offset0);
1372
1373       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1374       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1375       cntr_sw_if_index0 =
1376         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1377       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1378                                                            rx_sw_if_index0);
1379
1380       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1381         {
1382           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1383           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1384                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1385                                        0);
1386           next[0] = NAT_NEXT_ICMP_ERROR;
1387           goto trace0;
1388         }
1389
1390       udp0 = ip4_next_header (ip0);
1391       icmp0 = (icmp46_header_t *) udp0;
1392       proto0 = ip0->protocol;
1393
1394       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1395         {
1396           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1397             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1398           if (!s0)
1399             next[0] = NAT_NEXT_DROP;
1400
1401           if (NAT_NEXT_DROP != next[0] && s0 &&
1402               NAT_ED_TRNSL_ERR_SUCCESS !=
1403                 (translation_error = nat_6t_flow_buf_translate_i2o (
1404                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1405             {
1406               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1407               nat_ed_session_delete (sm, s0, thread_index, 1);
1408               s0 = 0;
1409               next[0] = NAT_NEXT_DROP;
1410               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1411               goto trace0;
1412             }
1413
1414           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1415                                          thread_index, cntr_sw_if_index0, 1);
1416           goto trace0;
1417         }
1418
1419       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1420         {
1421           next[0] = icmp_in2out_ed_slow_path (
1422             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1423             rx_fib_index0, node, next[0], now, thread_index, &s0,
1424             is_multi_worker);
1425           if (NAT_NEXT_DROP != next[0] && s0 &&
1426               NAT_ED_TRNSL_ERR_SUCCESS !=
1427                 (translation_error = nat_6t_flow_buf_translate_i2o (
1428                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1429             {
1430               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1431               nat_ed_session_delete (sm, s0, thread_index, 1);
1432               s0 = 0;
1433               next[0] = NAT_NEXT_DROP;
1434               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1435               goto trace0;
1436             }
1437
1438           if (NAT_NEXT_DROP != next[0])
1439             {
1440               vlib_increment_simple_counter (
1441                 &sm->counters.slowpath.in2out.icmp, thread_index,
1442                 cntr_sw_if_index0, 1);
1443             }
1444           goto trace0;
1445         }
1446
1447       init_ed_k (
1448         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1449         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1450         rx_fib_index0, ip0->protocol);
1451       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1452         {
1453           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1454           s0 =
1455             pool_elt_at_index (tsm->sessions,
1456                                ed_value_get_session_index (&value0));
1457         }
1458
1459       if (!s0)
1460         {
1461           if (is_output_feature)
1462             {
1463               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1464                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1465                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1466                     rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
1467                 goto trace0;
1468
1469               /*
1470                * Send DHCP packets to the ipv4 stack, or we won't
1471                * be able to use dhcp client on the outside interface
1472                */
1473               if (PREDICT_FALSE (
1474                     proto0 == IP_PROTOCOL_UDP &&
1475                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1476                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1477                     ip0->dst_address.as_u32 == 0xffffffff))
1478                 goto trace0;
1479             }
1480           else
1481             {
1482               if (PREDICT_FALSE (
1483                     nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
1484                                             proto0, rx_fib_index0)))
1485                 goto trace0;
1486             }
1487
1488           next[0] =
1489             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1490                           vnet_buffer (b0)->ip.reass.l4_src_port,
1491                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1492                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1493                           node, next[0], thread_index, now);
1494
1495           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1496             goto trace0;
1497
1498           if (PREDICT_FALSE (!s0))
1499             goto trace0;
1500
1501         }
1502
1503       b0->flags |= VNET_BUFFER_F_IS_NATED;
1504
1505       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1506           (translation_error = nat_6t_flow_buf_translate_i2o (
1507              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1508         {
1509           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1510           nat_ed_session_delete (sm, s0, thread_index, 1);
1511           s0 = 0;
1512           next[0] = NAT_NEXT_DROP;
1513           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1514           goto trace0;
1515         }
1516
1517       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1518         {
1519           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1520                                          thread_index, cntr_sw_if_index0, 1);
1521           nat44_set_tcp_session_state_i2o (
1522             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1523             thread_index);
1524         }
1525       else
1526         {
1527           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1528                                          thread_index, cntr_sw_if_index0, 1);
1529         }
1530
1531       /* Accounting */
1532       nat44_session_update_counters (s0, now,
1533                                      vlib_buffer_length_in_chain
1534                                      (vm, b0), thread_index);
1535       /* Per-user LRU list maintenance */
1536       nat44_session_update_lru (sm, s0, thread_index);
1537
1538     trace0:
1539       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1540                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1541         {
1542           nat_in2out_ed_trace_t *t =
1543             vlib_add_trace (vm, node, b0, sizeof (*t));
1544           t->sw_if_index = rx_sw_if_index0;
1545           t->next_index = next[0];
1546           t->is_slow_path = 1;
1547           t->translation_error = translation_error;
1548           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1549
1550           if (s0)
1551             {
1552               t->session_index = s0 - tsm->sessions;
1553               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1554               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1555               t->translation_via_i2of = 1;
1556               t->tcp_state = s0->tcp_state;
1557             }
1558
1559           else
1560             {
1561               t->session_index = ~0;
1562             }
1563         }
1564
1565       if (next[0] == NAT_NEXT_DROP)
1566         {
1567           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1568                                          thread_index, cntr_sw_if_index0, 1);
1569         }
1570
1571       n_left_from--;
1572       next++;
1573       b++;
1574     }
1575
1576   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1577                                frame->n_vectors);
1578
1579   return frame->n_vectors;
1580 }
1581
1582 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1583                                      vlib_node_runtime_t * node,
1584                                      vlib_frame_t * frame)
1585 {
1586   if (snat_main.num_workers > 1)
1587     {
1588       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1589     }
1590   else
1591     {
1592       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1593     }
1594 }
1595
1596 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1597   .name = "nat44-ed-in2out",
1598   .vector_size = sizeof (u32),
1599   .sibling_of = "nat-default",
1600   .format_trace = format_nat_in2out_ed_trace,
1601   .type = VLIB_NODE_TYPE_INTERNAL,
1602   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1603   .error_strings = nat_in2out_ed_error_strings,
1604   .runtime_data_bytes = sizeof (snat_runtime_t),
1605 };
1606
1607 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1608                                             vlib_node_runtime_t * node,
1609                                             vlib_frame_t * frame)
1610 {
1611   if (snat_main.num_workers > 1)
1612     {
1613       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1614     }
1615   else
1616     {
1617       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1618     }
1619 }
1620
1621 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1622   .name = "nat44-ed-in2out-output",
1623   .vector_size = sizeof (u32),
1624   .sibling_of = "nat-default",
1625   .format_trace = format_nat_in2out_ed_trace,
1626   .type = VLIB_NODE_TYPE_INTERNAL,
1627   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1628   .error_strings = nat_in2out_ed_error_strings,
1629   .runtime_data_bytes = sizeof (snat_runtime_t),
1630 };
1631
1632 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1633                                               vlib_node_runtime_t *
1634                                               node, vlib_frame_t * frame)
1635 {
1636   if (snat_main.num_workers > 1)
1637     {
1638       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1639     }
1640   else
1641     {
1642       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1643     }
1644 }
1645
1646 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1647   .name = "nat44-ed-in2out-slowpath",
1648   .vector_size = sizeof (u32),
1649   .sibling_of = "nat-default",
1650   .format_trace = format_nat_in2out_ed_trace,
1651   .type = VLIB_NODE_TYPE_INTERNAL,
1652   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1653   .error_strings = nat_in2out_ed_error_strings,
1654   .runtime_data_bytes = sizeof (snat_runtime_t),
1655 };
1656
1657 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1658                                                      vlib_node_runtime_t
1659                                                      * node,
1660                                                      vlib_frame_t * frame)
1661 {
1662   if (snat_main.num_workers > 1)
1663     {
1664       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1665     }
1666   else
1667     {
1668       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1669     }
1670 }
1671
1672 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1673   .name = "nat44-ed-in2out-output-slowpath",
1674   .vector_size = sizeof (u32),
1675   .sibling_of = "nat-default",
1676   .format_trace = format_nat_in2out_ed_trace,
1677   .type = VLIB_NODE_TYPE_INTERNAL,
1678   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1679   .error_strings = nat_in2out_ed_error_strings,
1680   .runtime_data_bytes = sizeof (snat_runtime_t),
1681 };
1682
1683 static u8 *
1684 format_nat_pre_trace (u8 * s, va_list * args)
1685 {
1686   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1687   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1688   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1689   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1690                  t->arc_next_index);
1691 }
1692
1693 VLIB_NODE_FN (nat_pre_in2out_node)
1694   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1695 {
1696   return nat_pre_node_fn_inline (vm, node, frame,
1697                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1698 }
1699
1700 VLIB_NODE_FN (nat_pre_in2out_output_node)
1701   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1702 {
1703   return nat_pre_node_fn_inline (vm, node, frame,
1704                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1705 }
1706
1707 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1708   .name = "nat-pre-in2out",
1709   .vector_size = sizeof (u32),
1710   .sibling_of = "nat-default",
1711   .format_trace = format_nat_pre_trace,
1712   .type = VLIB_NODE_TYPE_INTERNAL,
1713   .n_errors = 0,
1714 };
1715
1716 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1717   .name = "nat-pre-in2out-output",
1718   .vector_size = sizeof (u32),
1719   .sibling_of = "nat-default",
1720   .format_trace = format_nat_pre_trace,
1721   .type = VLIB_NODE_TYPE_INTERNAL,
1722   .n_errors = 0,
1723 };
1724
1725 /*
1726  * fd.io coding-style-patch-verification: ON
1727  *
1728  * Local Variables:
1729  * eval: (c-set-style "gnu")
1730  * End:
1731  */