nat: make tcp/udp/icmp packet counters consistent
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52   u8 tcp_state;
53 } nat_in2out_ed_trace_t;
54
55 static u8 *
56 format_nat_in2out_ed_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
61   char *tag;
62
63   tag =
64     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
65     "NAT44_IN2OUT_ED_FAST_PATH";
66
67   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
68               t->next_index);
69   if (~0 != t->session_index)
70     {
71       s = format (s, ", session %d, translation result '%U' via %s",
72                   t->session_index, format_nat_ed_translation_error,
73                   t->translation_error,
74                   t->translation_via_i2of ? "i2of" : "o2if");
75       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
76       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
77     }
78   if (!t->is_slow_path)
79     {
80       if (t->lookup_skipped)
81         {
82           s = format (s, "\n  lookup skipped - cached session index used");
83         }
84       else
85         {
86           s = format (s, "\n  search key %U", format_ed_session_kvp,
87                       &t->search_key);
88         }
89     }
90   if (IP_PROTOCOL_TCP == t->i2of.match.proto)
91     {
92       s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
93                   t->tcp_state);
94     }
95
96   return s;
97 }
98
99 /**
100  * @brief Check if packet should be translated
101  *
102  * Packets aimed at outside interface and external address with active session
103  * should be translated.
104  *
105  * @param sm            NAT main
106  * @param rt            NAT runtime data
107  * @param sw_if_index0  index of the inside interface
108  * @param ip0           IPv4 header
109  * @param rx_fib_index0 RX FIB index
110  *
111  * @returns 0 if packet should be translated otherwise 1
112  */
113 static inline int
114 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
115                          u32 sw_if_index0, ip4_header_t *ip0,
116                          u32 rx_fib_index0)
117 {
118   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
119   nat_outside_fib_t *outside_fib;
120   fib_prefix_t pfx = {
121     .fp_proto = FIB_PROTOCOL_IP4,
122     .fp_len = 32,
123     .fp_addr = {
124                 .ip4.as_u32 = ip0->dst_address.as_u32,
125                 }
126     ,
127   };
128
129   /* Don't NAT packet aimed at the intfc address */
130   if (PREDICT_FALSE (
131         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
132     return 1;
133
134   fei = fib_table_lookup (rx_fib_index0, &pfx);
135   if (FIB_NODE_INDEX_INVALID != fei)
136     {
137       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
138       if (sw_if_index == ~0)
139         {
140           vec_foreach (outside_fib, sm->outside_fibs)
141             {
142               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
143               if (FIB_NODE_INDEX_INVALID != fei)
144                 {
145                   sw_if_index = fib_entry_get_resolving_interface (fei);
146                   if (sw_if_index != ~0)
147                     break;
148                 }
149             }
150         }
151       if (sw_if_index == ~0)
152         return 1;
153
154       snat_interface_t *i;
155       pool_foreach (i, sm->interfaces)
156         {
157           /* NAT packet aimed at outside interface */
158           if ((nat44_ed_is_interface_outside (i)) &&
159               (sw_if_index == i->sw_if_index))
160             return 0;
161         }
162     }
163
164   return 1;
165 }
166
167 static int
168 nat_ed_alloc_addr_and_port_with_snat_address (
169   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
170   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
171   ip4_address_t *outside_addr, u16 *outside_port)
172 {
173   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
174
175   s->o2i.match.daddr = a->addr;
176   /* first try port suggested by caller */
177   u16 port = clib_net_to_host_u16 (*outside_port);
178   u16 port_offset = port - port_thread_offset;
179   if (port <= port_thread_offset ||
180       port > port_thread_offset + port_per_thread)
181     {
182       /* need to pick a different port, suggested port doesn't fit in
183        * this thread's port range */
184       port_offset = snat_random_port (0, port_per_thread - 1);
185       port = port_thread_offset + port_offset;
186     }
187   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
188   do
189     {
190       if (IP_PROTOCOL_ICMP == proto)
191         {
192           s->o2i.match.sport = clib_host_to_net_u16 (port);
193         }
194       s->o2i.match.dport = clib_host_to_net_u16 (port);
195       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
196         {
197           *outside_addr = a->addr;
198           *outside_port = clib_host_to_net_u16 (port);
199           return 0;
200         }
201       port_offset = snat_random_port (0, port_per_thread - 1);
202       port = port_thread_offset + port_offset;
203       --attempts;
204     }
205   while (attempts > 0);
206   return 1;
207 }
208
209 static int
210 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
211                             u32 tx_sw_if_index, u32 nat_proto,
212                             u32 thread_index, ip4_address_t s_addr,
213                             ip4_address_t d_addr, u32 snat_thread_index,
214                             snat_session_t *s, ip4_address_t *outside_addr,
215                             u16 *outside_port)
216 {
217   if (vec_len (sm->addresses) > 0)
218     {
219       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
220       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
221       int i;
222
223       // output feature
224       if (tx_sw_if_index != ~0)
225         {
226           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
227             {
228               a = sm->addresses + i;
229               if (a->fib_index == rx_fib_index)
230                 {
231                   if (a->sw_if_index == tx_sw_if_index)
232                     {
233                       if ((a->addr_len != ~0) &&
234                           (a->net.as_u32 ==
235                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
236
237                         {
238                           return nat_ed_alloc_addr_and_port_with_snat_address (
239                             sm, nat_proto, thread_index, a,
240                             sm->port_per_thread, snat_thread_index, s,
241                             outside_addr, outside_port);
242                         }
243                       ra = a;
244                     }
245                   ja = a;
246                 }
247               else if (a->fib_index == ~0)
248                 {
249                   ba = a;
250                 }
251             }
252           for (i = 0; i < s_addr_offset; ++i)
253             {
254               a = sm->addresses + i;
255               if (a->fib_index == rx_fib_index)
256                 {
257                   if (a->sw_if_index == tx_sw_if_index)
258                     {
259                       if ((a->addr_len != ~0) &&
260                           (a->net.as_u32 ==
261                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
262
263                         {
264                           return nat_ed_alloc_addr_and_port_with_snat_address (
265                             sm, nat_proto, thread_index, a,
266                             sm->port_per_thread, snat_thread_index, s,
267                             outside_addr, outside_port);
268                         }
269                       ra = a;
270                     }
271                   ja = a;
272                 }
273               else if (a->fib_index == ~0)
274                 {
275                   ba = a;
276                 }
277             }
278           if (ra)
279             {
280               return nat_ed_alloc_addr_and_port_with_snat_address (
281                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
282                 snat_thread_index, s, outside_addr, outside_port);
283             }
284         }
285       else
286         {
287           // first try nat pool addresses to sw interface addreses mappings
288           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
289             {
290               a = sm->addresses + i;
291               if (a->fib_index == rx_fib_index)
292                 {
293                   if ((a->addr_len != ~0) &&
294                       (a->net.as_u32 ==
295                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
296                     {
297                       return nat_ed_alloc_addr_and_port_with_snat_address (
298                         sm, nat_proto, thread_index, a, sm->port_per_thread,
299                         snat_thread_index, s, outside_addr, outside_port);
300                     }
301                   ja = a;
302                 }
303               else if (a->fib_index == ~0)
304                 {
305                   ba = a;
306                 }
307             }
308           for (i = 0; i < s_addr_offset; ++i)
309             {
310               a = sm->addresses + i;
311               if (a->fib_index == rx_fib_index)
312                 {
313                   if ((a->addr_len != ~0) &&
314                       (a->net.as_u32 ==
315                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
316                     {
317                       return nat_ed_alloc_addr_and_port_with_snat_address (
318                         sm, nat_proto, thread_index, a, sm->port_per_thread,
319                         snat_thread_index, s, outside_addr, outside_port);
320                     }
321                   ja = a;
322                 }
323               else if (a->fib_index == ~0)
324                 {
325                   ba = a;
326                 }
327             }
328         }
329
330       if (ja || ba)
331         {
332           a = ja ? ja : ba;
333           return nat_ed_alloc_addr_and_port_with_snat_address (
334             sm, nat_proto, thread_index, a, sm->port_per_thread,
335             snat_thread_index, s, outside_addr, outside_port);
336         }
337     }
338   /* Totally out of translations to use... */
339   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
340   return 1;
341 }
342
343 static_always_inline u32
344 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
345 {
346   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
347   nat_outside_fib_t *outside_fib;
348   fib_prefix_t pfx = {
349     .fp_proto = FIB_PROTOCOL_IP4,
350     .fp_len = 32,
351     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
352     ,
353   };
354   vec_foreach (outside_fib, sm->outside_fibs)
355     {
356       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
357       if (FIB_NODE_INDEX_INVALID != fei)
358         {
359           if (fib_entry_get_resolving_interface (fei) != ~0)
360             {
361               return outside_fib->fib_index;
362             }
363         }
364     }
365   return ~0;
366 }
367
368 static_always_inline int
369 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
370                              u16 match_port, ip_protocol_t match_protocol,
371                              ip4_address_t *daddr, u16 *dport)
372 {
373   snat_static_mapping_t *m =
374     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
375   if (!m)
376     {
377       /* Try address only mapping */
378       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
379       if (!m)
380         return 0;
381     }
382   *daddr = m->local_addr;
383   if (dport)
384     {
385       /* Address only mapping doesn't change port */
386       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
387     }
388   return 1;
389 }
390
391 static u32
392 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
393               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
394               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
395               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
396               u32 thread_index, f64 now)
397 {
398   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
399   ip4_address_t outside_addr;
400   u16 outside_port;
401   u32 outside_fib_index;
402   u8 is_identity_nat = 0;
403
404   snat_session_t *s = NULL;
405   lb_nat_type_t lb = 0;
406   ip4_address_t daddr = r_addr;
407   u16 dport = r_port;
408
409   if (PREDICT_FALSE
410       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
411     {
412       if (!nat_lru_free_one (sm, thread_index, now))
413         {
414           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
415           nat_ipfix_logging_max_sessions (thread_index,
416                                           sm->max_translations_per_thread);
417           nat_elog_notice (sm, "maximum sessions exceeded");
418           return NAT_NEXT_DROP;
419         }
420     }
421
422   outside_fib_index = sm->outside_fib_index;
423
424   switch (vec_len (sm->outside_fibs))
425     {
426     case 0:
427       outside_fib_index = sm->outside_fib_index;
428       break;
429     case 1:
430       outside_fib_index = sm->outside_fibs[0].fib_index;
431       break;
432     default:
433       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
434       break;
435     }
436
437   ip4_address_t sm_addr;
438   u16 sm_port;
439   u32 sm_fib_index;
440   /* First try to match static mapping by local address and port */
441   int is_sm;
442   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index, proto,
443                                  &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
444                                  &lb, 0, &is_identity_nat, 0))
445     {
446       is_sm = 0;
447     }
448   else
449     {
450       if (PREDICT_FALSE (is_identity_nat))
451         {
452           *sessionp = NULL;
453           return next;
454         }
455       is_sm = 1;
456     }
457
458   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
459     {
460       if (PREDICT_FALSE (!tcp_flags_is_init (
461             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
462         {
463           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
464           return NAT_NEXT_DROP;
465         }
466     }
467
468   s = nat_ed_session_alloc (sm, thread_index, now, proto);
469   ASSERT (s);
470
471   if (!is_sm)
472     {
473       s->in2out.addr = l_addr;
474       s->in2out.port = l_port;
475       s->proto = proto;
476       s->in2out.fib_index = rx_fib_index;
477       s->out2in.fib_index = outside_fib_index;
478
479       // suggest using local port to allocation function
480       outside_port = l_port;
481
482       // hairpinning?
483       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
484                                                         proto, &daddr, &dport);
485       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
486
487       // destination addr/port updated with real values in
488       // nat_ed_alloc_addr_and_port
489       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
490                             s->out2in.fib_index, proto);
491       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
492       if (IP_PROTOCOL_ICMP == proto)
493         {
494           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
495         }
496       else
497         {
498           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
499         }
500       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
501
502       if (nat_ed_alloc_addr_and_port (
503             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
504             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
505         {
506           nat_elog_notice (sm, "addresses exhausted");
507           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
508           nat_ed_session_delete (sm, s, thread_index, 1);
509           return NAT_NEXT_DROP;
510         }
511       s->out2in.addr = outside_addr;
512       s->out2in.port = outside_port;
513     }
514   else
515     {
516       // static mapping
517       s->out2in.addr = outside_addr = sm_addr;
518       s->out2in.port = outside_port = sm_port;
519       s->in2out.addr = l_addr;
520       s->in2out.port = l_port;
521       s->proto = proto;
522       s->in2out.fib_index = rx_fib_index;
523       s->out2in.fib_index = outside_fib_index;
524       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
525
526       // hairpinning?
527       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
528                                                         proto, &daddr, &dport);
529       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
530
531       if (IP_PROTOCOL_ICMP == proto)
532         {
533           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
534                                 sm_port, s->out2in.fib_index, proto);
535           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
536         }
537       else
538         {
539           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
540                                 sm_port, s->out2in.fib_index, proto);
541           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
542         }
543       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
544       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
545       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
546         {
547           nat_elog_notice (sm, "out2in key add failed");
548           goto error;
549         }
550     }
551
552   if (lb)
553     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
554   s->ext_host_addr = r_addr;
555   s->ext_host_port = r_port;
556
557   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
558                         rx_fib_index, proto);
559   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
560   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
561
562   if (IP_PROTOCOL_ICMP == proto)
563     {
564       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
565     }
566   else
567     {
568       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
569       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
570     }
571   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
572
573   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
574     {
575       nat_elog_notice (sm, "in2out key add failed");
576       goto error;
577     }
578
579   /* log NAT event */
580   nat_ipfix_logging_nat44_ses_create (
581     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
582     s->in2out.port, s->out2in.port, s->in2out.fib_index);
583
584   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
585                          s->in2out.port, &s->ext_host_nat_addr,
586                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
587                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
588
589   per_vrf_sessions_register_session (s, thread_index);
590
591   *sessionp = s;
592   return next;
593 error:
594   if (s)
595     {
596       nat_ed_session_delete (sm, s, thread_index, 1);
597     }
598   *sessionp = s = NULL;
599   return NAT_NEXT_DROP;
600 }
601
602 static_always_inline int
603 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
604                         vlib_node_runtime_t *node, u32 sw_if_index,
605                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
606                         u32 rx_fib_index)
607 {
608   clib_bihash_kv_16_8_t kv, value;
609
610   init_ed_k (&kv, ip->dst_address.as_u32,
611              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
612              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
613              ip->protocol);
614
615   /* NAT packet aimed at external address if has active sessions */
616   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
617     {
618       /* or is static mappings */
619       ip4_address_t placeholder_addr;
620       u16 placeholder_port;
621       u32 placeholder_fib_index;
622       if (!snat_static_mapping_match (
623             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
624             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
625             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
626         return 0;
627     }
628   else
629     return 0;
630
631   if (sm->forwarding_enabled)
632     return 1;
633
634   return snat_not_translate_fast (sm, node, sw_if_index, ip, rx_fib_index);
635 }
636
637 static_always_inline int
638 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
639                                       u32 thread_index, f64 now,
640                                       vlib_main_t * vm, vlib_buffer_t * b)
641 {
642   clib_bihash_kv_16_8_t kv, value;
643   snat_session_t *s = 0;
644   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
645
646   if (!sm->forwarding_enabled)
647     return 0;
648
649   if (ip->protocol == IP_PROTOCOL_ICMP)
650     {
651       ip4_address_t lookup_saddr, lookup_daddr;
652       u16 lookup_sport, lookup_dport;
653       u8 lookup_protocol;
654       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
655                                               &lookup_sport, &lookup_daddr,
656                                               &lookup_dport, &lookup_protocol))
657         return 0;
658       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
659                  lookup_dport, 0, lookup_protocol);
660     }
661   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
662     {
663       init_ed_k (&kv, ip->src_address.as_u32,
664                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
665                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
666     }
667   else
668     {
669       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
670                  ip->protocol);
671     }
672
673   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
674     {
675       ASSERT (thread_index == ed_value_get_thread_index (&value));
676       s =
677         pool_elt_at_index (tsm->sessions,
678                            ed_value_get_session_index (&value));
679
680       if (na44_ed_is_fwd_bypass_session (s))
681         {
682           if (ip->protocol == IP_PROTOCOL_TCP)
683             {
684               nat44_set_tcp_session_state_i2o (
685                 sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
686                 thread_index);
687             }
688           /* Accounting */
689           nat44_session_update_counters (s, now,
690                                          vlib_buffer_length_in_chain (vm, b),
691                                          thread_index);
692           /* Per-user LRU list maintenance */
693           nat44_session_update_lru (sm, s, thread_index);
694           return 1;
695         }
696       else
697         return 0;
698     }
699
700   return 0;
701 }
702
703 static_always_inline int
704 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
705                                        ip4_header_t *ip, u16 src_port,
706                                        u16 dst_port, u32 thread_index,
707                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
708                                        int is_multi_worker)
709 {
710   clib_bihash_kv_16_8_t kv, value;
711   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
712   snat_interface_t *i;
713   snat_session_t *s;
714   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
715   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
716
717   /* src NAT check */
718   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
719              dst_port, tx_fib_index, ip->protocol);
720   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
721     {
722       ASSERT (thread_index == ed_value_get_thread_index (&value));
723       s =
724         pool_elt_at_index (tsm->sessions,
725                            ed_value_get_session_index (&value));
726       return 1;
727     }
728
729   /* dst NAT check */
730   if (is_multi_worker &&
731       PREDICT_TRUE (!pool_is_free_index (
732         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
733     {
734       nat_6t_t lookup;
735       lookup.fib_index = rx_fib_index;
736       lookup.proto = ip->protocol;
737       lookup.daddr.as_u32 = ip->src_address.as_u32;
738       lookup.dport = src_port;
739       lookup.saddr.as_u32 = ip->dst_address.as_u32;
740       lookup.sport = dst_port;
741       s = pool_elt_at_index (
742         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
743       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
744         {
745           goto skip_dst_nat_lookup;
746         }
747       s = NULL;
748     }
749
750   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
751              src_port, rx_fib_index, ip->protocol);
752   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
753     {
754       ASSERT (thread_index == ed_value_get_thread_index (&value));
755       s =
756         pool_elt_at_index (tsm->sessions,
757                            ed_value_get_session_index (&value));
758
759     skip_dst_nat_lookup:
760       if (na44_ed_is_fwd_bypass_session (s))
761         return 0;
762
763       /* hairpinning */
764       pool_foreach (i, sm->output_feature_interfaces)
765         {
766           if ((nat44_ed_is_interface_inside (i)) &&
767               (rx_sw_if_index == i->sw_if_index))
768             return 0;
769         }
770       return 1;
771     }
772
773   return 0;
774 }
775
776 static inline u32
777 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
778                           icmp46_header_t *icmp, u32 sw_if_index,
779                           u32 tx_sw_if_index, u32 rx_fib_index,
780                           vlib_node_runtime_t *node, u32 next, f64 now,
781                           u32 thread_index, snat_session_t **s_p,
782                           int is_multi_worker)
783 {
784   vlib_main_t *vm = vlib_get_main ();
785   u16 checksum;
786   int err;
787   snat_session_t *s = NULL;
788   u8 lookup_protocol = ip->protocol;
789   u16 lookup_sport, lookup_dport;
790   ip4_address_t lookup_saddr, lookup_daddr;
791
792   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
793                                             &lookup_sport, &lookup_daddr,
794                                             &lookup_dport, &lookup_protocol);
795   if (err != 0)
796     {
797       b->error = node->errors[err];
798       return NAT_NEXT_DROP;
799     }
800
801   if (tx_sw_if_index != ~0)
802     {
803       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
804             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
805             tx_sw_if_index, is_multi_worker)))
806         {
807           return next;
808         }
809     }
810   else
811     {
812       if (PREDICT_FALSE (nat44_ed_not_translate (
813             vm, sm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
814         {
815           return next;
816         }
817     }
818
819   if (PREDICT_FALSE (icmp_type_is_error_message (
820         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
821     {
822       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
823       return NAT_NEXT_DROP;
824     }
825
826   next =
827     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
828                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
829                   node, next, thread_index, vlib_time_now (vm));
830
831   if (NAT_NEXT_DROP == next)
832     goto out;
833
834   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
835     {
836       ip_csum_t sum = ip_incremental_checksum_buffer (
837         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
838         ntohs (ip->length) - ip4_header_bytes (ip), 0);
839       checksum = ~ip_csum_fold (sum);
840       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
841         {
842           next = NAT_NEXT_DROP;
843           goto out;
844         }
845     }
846
847 out:
848   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
849     {
850       /* Accounting */
851       nat44_session_update_counters (
852         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
853       /* Per-user LRU list maintenance */
854       nat44_session_update_lru (sm, s, thread_index);
855     }
856   *s_p = s;
857   return next;
858 }
859
860 static snat_session_t *
861 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
862                                         ip4_header_t *ip, u32 rx_fib_index,
863                                         u32 thread_index, f64 now,
864                                         vlib_main_t *vm,
865                                         vlib_node_runtime_t *node)
866 {
867   clib_bihash_kv_16_8_t s_kv, s_value;
868   snat_static_mapping_t *m = NULL;
869   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
870   snat_session_t *s = NULL;
871   u32 outside_fib_index = sm->outside_fib_index;
872   int i;
873   ip4_address_t new_src_addr = { 0 };
874   ip4_address_t new_dst_addr = ip->dst_address;
875
876   if (PREDICT_FALSE (
877         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
878     {
879       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
880       nat_ipfix_logging_max_sessions (thread_index,
881                                       sm->max_translations_per_thread);
882       nat_elog_notice (sm, "maximum sessions exceeded");
883       return 0;
884     }
885
886   switch (vec_len (sm->outside_fibs))
887     {
888     case 0:
889       outside_fib_index = sm->outside_fib_index;
890       break;
891     case 1:
892       outside_fib_index = sm->outside_fibs[0].fib_index;
893       break;
894     default:
895       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
896       break;
897     }
898
899   /* Try to find static mapping first */
900   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
901                               ip->protocol);
902   if (m)
903     {
904       new_src_addr = m->external_addr;
905     }
906   else
907     {
908       pool_foreach (s, tsm->sessions)
909         {
910           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
911             {
912               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
913                          ip->dst_address.as_u32, 0, outside_fib_index,
914                          ip->protocol);
915               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
916                 {
917                   new_src_addr = s->out2in.addr;
918                 }
919               break;
920             }
921         }
922
923       if (!new_src_addr.as_u32)
924         {
925           for (i = 0; i < vec_len (sm->addresses); i++)
926             {
927               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
928                          ip->dst_address.as_u32, 0, outside_fib_index,
929                          ip->protocol);
930               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
931                 {
932                   new_src_addr = sm->addresses[i].addr;
933                 }
934             }
935         }
936     }
937
938   if (!new_src_addr.as_u32)
939     {
940       // could not allocate address for translation ...
941       return 0;
942     }
943
944   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
945   if (!s)
946     {
947       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
948       nat_elog_warn (sm, "create NAT session failed");
949       return 0;
950     }
951
952   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
953                         ip->dst_address, 0, rx_fib_index, ip->protocol);
954   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
955   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
956
957   // hairpinning?
958   int is_hairpinning = nat44_ed_external_sm_lookup (
959     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
960   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
961
962   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
963   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
964
965   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
966                         outside_fib_index, ip->protocol);
967   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
968   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
969   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
970
971   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
972   s->out2in.addr.as_u32 = new_src_addr.as_u32;
973   s->out2in.fib_index = outside_fib_index;
974   s->in2out.addr.as_u32 = ip->src_address.as_u32;
975   s->in2out.fib_index = rx_fib_index;
976   s->in2out.port = s->out2in.port = ip->protocol;
977   if (m)
978     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
979
980   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
981     {
982       nat_elog_notice (sm, "in2out flow hash add failed");
983       nat_ed_session_delete (sm, s, thread_index, 1);
984       return NULL;
985     }
986
987   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
988     {
989       nat_elog_notice (sm, "out2in flow hash add failed");
990       nat_ed_session_delete (sm, s, thread_index, 1);
991       return NULL;
992     }
993
994   per_vrf_sessions_register_session (s, thread_index);
995
996   /* Accounting */
997   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
998                                  thread_index);
999   /* Per-user LRU list maintenance */
1000   nat44_session_update_lru (sm, s, thread_index);
1001
1002   return s;
1003 }
1004
1005 static inline uword
1006 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1007                                           vlib_node_runtime_t *node,
1008                                           vlib_frame_t *frame,
1009                                           int is_output_feature,
1010                                           int is_multi_worker)
1011 {
1012   u32 n_left_from, *from;
1013   snat_main_t *sm = &snat_main;
1014   f64 now = vlib_time_now (vm);
1015   u32 thread_index = vm->thread_index;
1016   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1017   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1018     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1019
1020   from = vlib_frame_vector_args (frame);
1021   n_left_from = frame->n_vectors;
1022
1023   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1024   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1025   vlib_get_buffers (vm, from, b, n_left_from);
1026
1027   while (n_left_from > 0)
1028     {
1029       vlib_buffer_t *b0;
1030       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1031       u32 tx_sw_if_index0;
1032       u32 cntr_sw_if_index0;
1033       ip_protocol_t proto0;
1034       ip4_header_t *ip0;
1035       snat_session_t *s0 = 0;
1036       clib_bihash_kv_16_8_t kv0, value0;
1037       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1038       nat_6t_flow_t *f = 0;
1039       nat_6t_t lookup;
1040       int lookup_skipped = 0;
1041
1042       b0 = *b;
1043       b++;
1044
1045       /* Prefetch next iteration. */
1046       if (PREDICT_TRUE (n_left_from >= 2))
1047         {
1048           vlib_buffer_t *p2;
1049
1050           p2 = *b;
1051
1052           vlib_prefetch_buffer_header (p2, LOAD);
1053
1054           clib_prefetch_load (p2->data);
1055         }
1056
1057       if (is_output_feature)
1058         {
1059           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1060         }
1061
1062       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1063
1064       ip0 =
1065         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1066
1067       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1068       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1069       cntr_sw_if_index0 =
1070         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1071       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1072                                                            rx_sw_if_index0);
1073       lookup.fib_index = rx_fib_index0;
1074
1075       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1076         {
1077           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1078           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1079                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1080                                        0);
1081           next[0] = NAT_NEXT_ICMP_ERROR;
1082           goto trace0;
1083         }
1084
1085       proto0 = ip0->protocol;
1086
1087       if (is_output_feature)
1088         {
1089           if (PREDICT_FALSE
1090               (nat_not_translate_output_feature_fwd
1091                (sm, ip0, thread_index, now, vm, b0)))
1092             goto trace0;
1093         }
1094
1095       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1096         {
1097           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1098                 ICMP4_echo_request &&
1099               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1100                 ICMP4_echo_reply &&
1101               !icmp_type_is_error_message (
1102                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1103             {
1104               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1105               next[0] = NAT_NEXT_DROP;
1106               goto trace0;
1107             }
1108           int err = nat_get_icmp_session_lookup_values (
1109             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1110             &lookup.dport, &lookup.proto);
1111           if (err != 0)
1112             {
1113               b0->error = node->errors[err];
1114               next[0] = NAT_NEXT_DROP;
1115               goto trace0;
1116             }
1117         }
1118       else
1119         {
1120           lookup.proto = ip0->protocol;
1121           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1122           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1123           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1124           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1125         }
1126
1127       /* there might be a stashed index in vnet_buffer2 from handoff or
1128        * classify node, see if it can be used */
1129       if (is_multi_worker &&
1130           !pool_is_free_index (tsm->sessions,
1131                                vnet_buffer2 (b0)->nat.cached_session_index))
1132         {
1133           s0 = pool_elt_at_index (tsm->sessions,
1134                                   vnet_buffer2 (b0)->nat.cached_session_index);
1135           if (PREDICT_TRUE (
1136                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1137                 // for some hairpinning cases there are two "i2i" flows instead
1138                 // of i2o and o2i as both hosts are on inside
1139                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1140                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1141             {
1142               /* yes, this is the droid we're looking for */
1143               lookup_skipped = 1;
1144               goto skip_lookup;
1145             }
1146           s0 = NULL;
1147         }
1148
1149       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1150                  lookup.dport, lookup.fib_index, lookup.proto);
1151
1152       // lookup flow
1153       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1154         {
1155           // flow does not exist go slow path
1156           next[0] = def_slow;
1157           goto trace0;
1158         }
1159
1160       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1161       s0 =
1162         pool_elt_at_index (tsm->sessions,
1163                            ed_value_get_session_index (&value0));
1164
1165     skip_lookup:
1166
1167       ASSERT (thread_index == s0->thread_index);
1168
1169       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1170         {
1171           // session is closed, go slow path
1172           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1173           nat_ed_session_delete (sm, s0, thread_index, 1);
1174           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1175           goto trace0;
1176         }
1177
1178       // drop if session expired
1179       u64 sess_timeout_time;
1180       sess_timeout_time =
1181         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1182       if (now >= sess_timeout_time)
1183         {
1184           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1185           nat_ed_session_delete (sm, s0, thread_index, 1);
1186           // session is closed, go slow path
1187           next[0] = def_slow;
1188           goto trace0;
1189         }
1190
1191       b0->flags |= VNET_BUFFER_F_IS_NATED;
1192
1193       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1194         {
1195           f = &s0->i2o;
1196         }
1197       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1198                nat_6t_t_eq (&s0->o2i.match, &lookup))
1199         {
1200           f = &s0->o2i;
1201         }
1202       else
1203         {
1204           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1205           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1206           nat_ed_session_delete (sm, s0, thread_index, 1);
1207           next[0] = NAT_NEXT_DROP;
1208           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1209           goto trace0;
1210         }
1211
1212       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1213           (translation_error = nat_6t_flow_buf_translate_i2o (
1214              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1215         {
1216           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1217           nat_ed_session_delete (sm, s0, thread_index, 1);
1218           next[0] = NAT_NEXT_DROP;
1219           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1220           goto trace0;
1221         }
1222
1223       switch (proto0)
1224         {
1225         case IP_PROTOCOL_TCP:
1226           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1227                                          thread_index, cntr_sw_if_index0, 1);
1228           nat44_set_tcp_session_state_i2o (
1229             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1230             thread_index);
1231           break;
1232         case IP_PROTOCOL_UDP:
1233           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1234                                          thread_index, cntr_sw_if_index0, 1);
1235           break;
1236         case IP_PROTOCOL_ICMP:
1237           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1238                                          thread_index, cntr_sw_if_index0, 1);
1239           break;
1240         default:
1241           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1242                                          thread_index, cntr_sw_if_index0, 1);
1243           break;
1244         }
1245
1246       /* Accounting */
1247       nat44_session_update_counters (s0, now,
1248                                      vlib_buffer_length_in_chain (vm, b0),
1249                                      thread_index);
1250       /* Per-user LRU list maintenance */
1251       nat44_session_update_lru (sm, s0, thread_index);
1252
1253     trace0:
1254       if (PREDICT_FALSE
1255           ((node->flags & VLIB_NODE_FLAG_TRACE)
1256            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1257         {
1258           nat_in2out_ed_trace_t *t =
1259             vlib_add_trace (vm, node, b0, sizeof (*t));
1260           t->sw_if_index = rx_sw_if_index0;
1261           t->next_index = next[0];
1262           t->is_slow_path = 0;
1263           t->translation_error = translation_error;
1264           t->lookup_skipped = lookup_skipped;
1265           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1266
1267           if (s0)
1268             {
1269               t->session_index = s0 - tsm->sessions;
1270               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1271               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1272               t->translation_via_i2of = (&s0->i2o == f);
1273               t->tcp_state = s0->tcp_state;
1274             }
1275           else
1276             {
1277               t->session_index = ~0;
1278             }
1279         }
1280
1281       if (next[0] == NAT_NEXT_DROP)
1282         {
1283           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1284                                          thread_index, cntr_sw_if_index0, 1);
1285         }
1286
1287       n_left_from--;
1288       next++;
1289     }
1290
1291   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1292                                frame->n_vectors);
1293   return frame->n_vectors;
1294 }
1295
1296 static inline uword
1297 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1298                                           vlib_node_runtime_t *node,
1299                                           vlib_frame_t *frame,
1300                                           int is_output_feature,
1301                                           int is_multi_worker)
1302 {
1303   u32 n_left_from, *from;
1304   snat_main_t *sm = &snat_main;
1305   f64 now = vlib_time_now (vm);
1306   u32 thread_index = vm->thread_index;
1307   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1308
1309   from = vlib_frame_vector_args (frame);
1310   n_left_from = frame->n_vectors;
1311
1312   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1313   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1314   vlib_get_buffers (vm, from, b, n_left_from);
1315
1316   while (n_left_from > 0)
1317     {
1318       vlib_buffer_t *b0;
1319       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1320       u32 tx_sw_if_index0;
1321       u32 cntr_sw_if_index0;
1322       ip_protocol_t proto0;
1323       ip4_header_t *ip0;
1324       udp_header_t *udp0;
1325       icmp46_header_t *icmp0;
1326       snat_session_t *s0 = 0;
1327       clib_bihash_kv_16_8_t kv0, value0;
1328       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1329
1330       b0 = *b;
1331
1332       if (is_output_feature)
1333         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1334
1335       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1336
1337       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1338                               iph_offset0);
1339
1340       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1341       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1342       cntr_sw_if_index0 =
1343         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1344       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1345                                                            rx_sw_if_index0);
1346
1347       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1348         {
1349           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1350           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1351                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1352                                        0);
1353           next[0] = NAT_NEXT_ICMP_ERROR;
1354           goto trace0;
1355         }
1356
1357       udp0 = ip4_next_header (ip0);
1358       icmp0 = (icmp46_header_t *) udp0;
1359       proto0 = ip0->protocol;
1360
1361       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1362         {
1363           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1364             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1365           if (!s0)
1366             next[0] = NAT_NEXT_DROP;
1367
1368           if (NAT_NEXT_DROP != next[0] && s0 &&
1369               NAT_ED_TRNSL_ERR_SUCCESS !=
1370                 (translation_error = nat_6t_flow_buf_translate_i2o (
1371                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1372             {
1373               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1374               nat_ed_session_delete (sm, s0, thread_index, 1);
1375               next[0] = NAT_NEXT_DROP;
1376               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1377               goto trace0;
1378             }
1379
1380           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1381                                          thread_index, cntr_sw_if_index0, 1);
1382           goto trace0;
1383         }
1384
1385       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1386         {
1387           next[0] = icmp_in2out_ed_slow_path (
1388             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1389             rx_fib_index0, node, next[0], now, thread_index, &s0,
1390             is_multi_worker);
1391           if (NAT_NEXT_DROP != next[0] && s0 &&
1392               NAT_ED_TRNSL_ERR_SUCCESS !=
1393                 (translation_error = nat_6t_flow_buf_translate_i2o (
1394                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1395             {
1396               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1397               nat_ed_session_delete (sm, s0, thread_index, 1);
1398               next[0] = NAT_NEXT_DROP;
1399               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1400               goto trace0;
1401             }
1402
1403           if (NAT_NEXT_DROP != next[0])
1404             {
1405               vlib_increment_simple_counter (
1406                 &sm->counters.slowpath.in2out.icmp, thread_index,
1407                 cntr_sw_if_index0, 1);
1408             }
1409           goto trace0;
1410         }
1411
1412       init_ed_k (
1413         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1414         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1415         rx_fib_index0, ip0->protocol);
1416       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1417         {
1418           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1419           s0 =
1420             pool_elt_at_index (tsm->sessions,
1421                                ed_value_get_session_index (&value0));
1422         }
1423
1424       if (!s0)
1425         {
1426           if (is_output_feature)
1427             {
1428               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1429                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1430                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1431                     rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
1432                 goto trace0;
1433
1434               /*
1435                * Send DHCP packets to the ipv4 stack, or we won't
1436                * be able to use dhcp client on the outside interface
1437                */
1438               if (PREDICT_FALSE (
1439                     proto0 == IP_PROTOCOL_UDP &&
1440                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1441                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1442                     ip0->dst_address.as_u32 == 0xffffffff))
1443                 goto trace0;
1444             }
1445           else
1446             {
1447               if (PREDICT_FALSE (
1448                     nat44_ed_not_translate (vm, sm, node, rx_sw_if_index0, b0,
1449                                             ip0, proto0, rx_fib_index0)))
1450                 goto trace0;
1451             }
1452
1453           next[0] =
1454             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1455                           vnet_buffer (b0)->ip.reass.l4_src_port,
1456                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1457                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1458                           node, next[0], thread_index, now);
1459
1460           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1461             goto trace0;
1462
1463           if (PREDICT_FALSE (!s0))
1464             goto trace0;
1465
1466         }
1467
1468       b0->flags |= VNET_BUFFER_F_IS_NATED;
1469
1470       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1471           (translation_error = nat_6t_flow_buf_translate_i2o (
1472              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1473         {
1474           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1475           nat_ed_session_delete (sm, s0, thread_index, 1);
1476           next[0] = NAT_NEXT_DROP;
1477           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1478           goto trace0;
1479         }
1480
1481       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1482         {
1483           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1484                                          thread_index, cntr_sw_if_index0, 1);
1485           nat44_set_tcp_session_state_i2o (
1486             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1487             thread_index);
1488         }
1489       else
1490         {
1491           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1492                                          thread_index, cntr_sw_if_index0, 1);
1493         }
1494
1495       /* Accounting */
1496       nat44_session_update_counters (s0, now,
1497                                      vlib_buffer_length_in_chain
1498                                      (vm, b0), thread_index);
1499       /* Per-user LRU list maintenance */
1500       nat44_session_update_lru (sm, s0, thread_index);
1501
1502     trace0:
1503       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1504                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1505         {
1506           nat_in2out_ed_trace_t *t =
1507             vlib_add_trace (vm, node, b0, sizeof (*t));
1508           t->sw_if_index = rx_sw_if_index0;
1509           t->next_index = next[0];
1510           t->is_slow_path = 1;
1511           t->translation_error = translation_error;
1512           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1513
1514           if (s0)
1515             {
1516               t->session_index = s0 - tsm->sessions;
1517               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1518               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1519               t->translation_via_i2of = 1;
1520               t->tcp_state = s0->tcp_state;
1521             }
1522
1523           else
1524             {
1525               t->session_index = ~0;
1526             }
1527         }
1528
1529       if (next[0] == NAT_NEXT_DROP)
1530         {
1531           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1532                                          thread_index, cntr_sw_if_index0, 1);
1533         }
1534
1535       n_left_from--;
1536       next++;
1537       b++;
1538     }
1539
1540   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1541                                frame->n_vectors);
1542
1543   return frame->n_vectors;
1544 }
1545
1546 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1547                                      vlib_node_runtime_t * node,
1548                                      vlib_frame_t * frame)
1549 {
1550   if (snat_main.num_workers > 1)
1551     {
1552       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1553     }
1554   else
1555     {
1556       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1557     }
1558 }
1559
1560 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1561   .name = "nat44-ed-in2out",
1562   .vector_size = sizeof (u32),
1563   .sibling_of = "nat-default",
1564   .format_trace = format_nat_in2out_ed_trace,
1565   .type = VLIB_NODE_TYPE_INTERNAL,
1566   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1567   .error_strings = nat_in2out_ed_error_strings,
1568   .runtime_data_bytes = sizeof (snat_runtime_t),
1569 };
1570
1571 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1572                                             vlib_node_runtime_t * node,
1573                                             vlib_frame_t * frame)
1574 {
1575   if (snat_main.num_workers > 1)
1576     {
1577       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1578     }
1579   else
1580     {
1581       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1582     }
1583 }
1584
1585 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1586   .name = "nat44-ed-in2out-output",
1587   .vector_size = sizeof (u32),
1588   .sibling_of = "nat-default",
1589   .format_trace = format_nat_in2out_ed_trace,
1590   .type = VLIB_NODE_TYPE_INTERNAL,
1591   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1592   .error_strings = nat_in2out_ed_error_strings,
1593   .runtime_data_bytes = sizeof (snat_runtime_t),
1594 };
1595
1596 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1597                                               vlib_node_runtime_t *
1598                                               node, vlib_frame_t * frame)
1599 {
1600   if (snat_main.num_workers > 1)
1601     {
1602       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1603     }
1604   else
1605     {
1606       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1607     }
1608 }
1609
1610 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1611   .name = "nat44-ed-in2out-slowpath",
1612   .vector_size = sizeof (u32),
1613   .sibling_of = "nat-default",
1614   .format_trace = format_nat_in2out_ed_trace,
1615   .type = VLIB_NODE_TYPE_INTERNAL,
1616   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1617   .error_strings = nat_in2out_ed_error_strings,
1618   .runtime_data_bytes = sizeof (snat_runtime_t),
1619 };
1620
1621 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1622                                                      vlib_node_runtime_t
1623                                                      * node,
1624                                                      vlib_frame_t * frame)
1625 {
1626   if (snat_main.num_workers > 1)
1627     {
1628       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1629     }
1630   else
1631     {
1632       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1633     }
1634 }
1635
1636 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1637   .name = "nat44-ed-in2out-output-slowpath",
1638   .vector_size = sizeof (u32),
1639   .sibling_of = "nat-default",
1640   .format_trace = format_nat_in2out_ed_trace,
1641   .type = VLIB_NODE_TYPE_INTERNAL,
1642   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1643   .error_strings = nat_in2out_ed_error_strings,
1644   .runtime_data_bytes = sizeof (snat_runtime_t),
1645 };
1646
1647 static u8 *
1648 format_nat_pre_trace (u8 * s, va_list * args)
1649 {
1650   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1651   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1652   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1653   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1654                  t->arc_next_index);
1655 }
1656
1657 VLIB_NODE_FN (nat_pre_in2out_node)
1658   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1659 {
1660   return nat_pre_node_fn_inline (vm, node, frame,
1661                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1662 }
1663
1664 VLIB_NODE_FN (nat_pre_in2out_output_node)
1665   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1666 {
1667   return nat_pre_node_fn_inline (vm, node, frame,
1668                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1669 }
1670
1671 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1672   .name = "nat-pre-in2out",
1673   .vector_size = sizeof (u32),
1674   .sibling_of = "nat-default",
1675   .format_trace = format_nat_pre_trace,
1676   .type = VLIB_NODE_TYPE_INTERNAL,
1677   .n_errors = 0,
1678 };
1679
1680 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1681   .name = "nat-pre-in2out-output",
1682   .vector_size = sizeof (u32),
1683   .sibling_of = "nat-default",
1684   .format_trace = format_nat_pre_trace,
1685   .type = VLIB_NODE_TYPE_INTERNAL,
1686   .n_errors = 0,
1687 };
1688
1689 /*
1690  * fd.io coding-style-patch-verification: ON
1691  *
1692  * Local Variables:
1693  * eval: (c-set-style "gnu")
1694  * End:
1695  */