23e0957dabe4ae1530845e956a56722203f44b9c
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52 } nat_in2out_ed_trace_t;
53
54 static u8 *
55 format_nat_in2out_ed_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
60   char *tag;
61
62   tag =
63     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
64     "NAT44_IN2OUT_ED_FAST_PATH";
65
66   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
67               t->next_index);
68   if (~0 != t->session_index)
69     {
70       s = format (s, ", session %d, translation result '%U' via %s",
71                   t->session_index, format_nat_ed_translation_error,
72                   t->translation_error,
73                   t->translation_via_i2of ? "i2of" : "o2if");
74       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
75       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
76     }
77   if (!t->is_slow_path)
78     {
79       if (t->lookup_skipped)
80         {
81           s = format (s, "\n lookup skipped - cached session index used");
82         }
83       else
84         {
85           s = format (s, "\n  search key %U", format_ed_session_kvp,
86                       &t->search_key);
87         }
88     }
89
90   return s;
91 }
92
93 /**
94  * @brief Check if packet should be translated
95  *
96  * Packets aimed at outside interface and external address with active session
97  * should be translated.
98  *
99  * @param sm            NAT main
100  * @param rt            NAT runtime data
101  * @param sw_if_index0  index of the inside interface
102  * @param ip0           IPv4 header
103  * @param rx_fib_index0 RX FIB index
104  *
105  * @returns 0 if packet should be translated otherwise 1
106  */
107 static inline int
108 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
109                          u32 sw_if_index0, ip4_header_t *ip0,
110                          u32 rx_fib_index0)
111 {
112   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
113   nat_outside_fib_t *outside_fib;
114   fib_prefix_t pfx = {
115     .fp_proto = FIB_PROTOCOL_IP4,
116     .fp_len = 32,
117     .fp_addr = {
118                 .ip4.as_u32 = ip0->dst_address.as_u32,
119                 }
120     ,
121   };
122
123   /* Don't NAT packet aimed at the intfc address */
124   if (PREDICT_FALSE (
125         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
126     return 1;
127
128   fei = fib_table_lookup (rx_fib_index0, &pfx);
129   if (FIB_NODE_INDEX_INVALID != fei)
130     {
131       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
132       if (sw_if_index == ~0)
133         {
134           vec_foreach (outside_fib, sm->outside_fibs)
135             {
136               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
137               if (FIB_NODE_INDEX_INVALID != fei)
138                 {
139                   sw_if_index = fib_entry_get_resolving_interface (fei);
140                   if (sw_if_index != ~0)
141                     break;
142                 }
143             }
144         }
145       if (sw_if_index == ~0)
146         return 1;
147
148       snat_interface_t *i;
149       pool_foreach (i, sm->interfaces)
150         {
151           /* NAT packet aimed at outside interface */
152           if ((nat44_ed_is_interface_outside (i)) &&
153               (sw_if_index == i->sw_if_index))
154             return 0;
155         }
156     }
157
158   return 1;
159 }
160
161 static int
162 nat_ed_alloc_addr_and_port_with_snat_address (
163   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
164   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
165   ip4_address_t *outside_addr, u16 *outside_port)
166 {
167   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
168
169   s->o2i.match.daddr = a->addr;
170   /* first try port suggested by caller */
171   u16 port = clib_net_to_host_u16 (*outside_port);
172   u16 port_offset = port - port_thread_offset;
173   if (port <= port_thread_offset ||
174       port > port_thread_offset + port_per_thread)
175     {
176       /* need to pick a different port, suggested port doesn't fit in
177        * this thread's port range */
178       port_offset = snat_random_port (0, port_per_thread - 1);
179       port = port_thread_offset + port_offset;
180     }
181   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
182   do
183     {
184       if (IP_PROTOCOL_ICMP == proto)
185         {
186           s->o2i.match.sport = clib_host_to_net_u16 (port);
187         }
188       s->o2i.match.dport = clib_host_to_net_u16 (port);
189       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
190         {
191           *outside_addr = a->addr;
192           *outside_port = clib_host_to_net_u16 (port);
193           return 0;
194         }
195       port_offset = snat_random_port (0, port_per_thread - 1);
196       port = port_thread_offset + port_offset;
197       --attempts;
198     }
199   while (attempts > 0);
200   return 1;
201 }
202
203 static int
204 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
205                             u32 tx_sw_if_index, u32 nat_proto,
206                             u32 thread_index, ip4_address_t s_addr,
207                             ip4_address_t d_addr, u32 snat_thread_index,
208                             snat_session_t *s, ip4_address_t *outside_addr,
209                             u16 *outside_port)
210 {
211   if (vec_len (sm->addresses) > 0)
212     {
213       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
214       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
215       int i;
216
217       // output feature
218       if (tx_sw_if_index != ~0)
219         {
220           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
221             {
222               a = sm->addresses + i;
223               if (a->fib_index == rx_fib_index)
224                 {
225                   if (a->sw_if_index == tx_sw_if_index)
226                     {
227                       if ((a->addr_len != ~0) &&
228                           (a->net.as_u32 ==
229                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
230
231                         {
232                           return nat_ed_alloc_addr_and_port_with_snat_address (
233                             sm, nat_proto, thread_index, a,
234                             sm->port_per_thread, snat_thread_index, s,
235                             outside_addr, outside_port);
236                         }
237                       ra = a;
238                     }
239                   ja = a;
240                 }
241               else if (a->fib_index == ~0)
242                 {
243                   ba = a;
244                 }
245             }
246           for (i = 0; i < s_addr_offset; ++i)
247             {
248               a = sm->addresses + i;
249               if (a->fib_index == rx_fib_index)
250                 {
251                   if (a->sw_if_index == tx_sw_if_index)
252                     {
253                       if ((a->addr_len != ~0) &&
254                           (a->net.as_u32 ==
255                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
256
257                         {
258                           return nat_ed_alloc_addr_and_port_with_snat_address (
259                             sm, nat_proto, thread_index, a,
260                             sm->port_per_thread, snat_thread_index, s,
261                             outside_addr, outside_port);
262                         }
263                       ra = a;
264                     }
265                   ja = a;
266                 }
267               else if (a->fib_index == ~0)
268                 {
269                   ba = a;
270                 }
271             }
272           if (ra)
273             {
274               return nat_ed_alloc_addr_and_port_with_snat_address (
275                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
276                 snat_thread_index, s, outside_addr, outside_port);
277             }
278         }
279       else
280         {
281           // first try nat pool addresses to sw interface addreses mappings
282           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
283             {
284               a = sm->addresses + i;
285               if (a->fib_index == rx_fib_index)
286                 {
287                   if ((a->addr_len != ~0) &&
288                       (a->net.as_u32 ==
289                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
290                     {
291                       return nat_ed_alloc_addr_and_port_with_snat_address (
292                         sm, nat_proto, thread_index, a, sm->port_per_thread,
293                         snat_thread_index, s, outside_addr, outside_port);
294                     }
295                   ja = a;
296                 }
297               else if (a->fib_index == ~0)
298                 {
299                   ba = a;
300                 }
301             }
302           for (i = 0; i < s_addr_offset; ++i)
303             {
304               a = sm->addresses + i;
305               if (a->fib_index == rx_fib_index)
306                 {
307                   if ((a->addr_len != ~0) &&
308                       (a->net.as_u32 ==
309                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
310                     {
311                       return nat_ed_alloc_addr_and_port_with_snat_address (
312                         sm, nat_proto, thread_index, a, sm->port_per_thread,
313                         snat_thread_index, s, outside_addr, outside_port);
314                     }
315                   ja = a;
316                 }
317               else if (a->fib_index == ~0)
318                 {
319                   ba = a;
320                 }
321             }
322         }
323
324       if (ja || ba)
325         {
326           a = ja ? ja : ba;
327           return nat_ed_alloc_addr_and_port_with_snat_address (
328             sm, nat_proto, thread_index, a, sm->port_per_thread,
329             snat_thread_index, s, outside_addr, outside_port);
330         }
331     }
332   /* Totally out of translations to use... */
333   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
334   return 1;
335 }
336
337 static_always_inline u32
338 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
339 {
340   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
341   nat_outside_fib_t *outside_fib;
342   fib_prefix_t pfx = {
343     .fp_proto = FIB_PROTOCOL_IP4,
344     .fp_len = 32,
345     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
346     ,
347   };
348   vec_foreach (outside_fib, sm->outside_fibs)
349     {
350       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
351       if (FIB_NODE_INDEX_INVALID != fei)
352         {
353           if (fib_entry_get_resolving_interface (fei) != ~0)
354             {
355               return outside_fib->fib_index;
356             }
357         }
358     }
359   return ~0;
360 }
361
362 static_always_inline int
363 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
364                              u16 match_port, ip_protocol_t match_protocol,
365                              ip4_address_t *daddr, u16 *dport)
366 {
367   snat_static_mapping_t *m =
368     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
369   if (!m)
370     {
371       /* Try address only mapping */
372       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
373       if (!m)
374         return 0;
375     }
376   *daddr = m->local_addr;
377   if (dport)
378     {
379       /* Address only mapping doesn't change port */
380       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
381     }
382   return 1;
383 }
384
385 static u32
386 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
387               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
388               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
389               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
390               u32 thread_index, f64 now)
391 {
392   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
393   ip4_address_t outside_addr;
394   u16 outside_port;
395   u32 outside_fib_index;
396   u8 is_identity_nat = 0;
397
398   snat_session_t *s = NULL;
399   lb_nat_type_t lb = 0;
400   ip4_address_t daddr = r_addr;
401   u16 dport = r_port;
402
403   if (PREDICT_FALSE
404       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
405     {
406       if (!nat_lru_free_one (sm, thread_index, now))
407         {
408           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
409           nat_ipfix_logging_max_sessions (thread_index,
410                                           sm->max_translations_per_thread);
411           nat_elog_notice (sm, "maximum sessions exceeded");
412           return NAT_NEXT_DROP;
413         }
414     }
415
416   outside_fib_index = sm->outside_fib_index;
417
418   switch (vec_len (sm->outside_fibs))
419     {
420     case 0:
421       outside_fib_index = sm->outside_fib_index;
422       break;
423     case 1:
424       outside_fib_index = sm->outside_fibs[0].fib_index;
425       break;
426     default:
427       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
428       break;
429     }
430
431   ip4_address_t sm_addr;
432   u16 sm_port;
433   u32 sm_fib_index;
434   /* First try to match static mapping by local address and port */
435   int is_sm;
436   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index, proto,
437                                  &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
438                                  &lb, 0, &is_identity_nat, 0))
439     {
440       is_sm = 0;
441     }
442   else
443     {
444       if (PREDICT_FALSE (is_identity_nat))
445         {
446           *sessionp = NULL;
447           return next;
448         }
449       is_sm = 1;
450     }
451
452   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
453     {
454       if (PREDICT_FALSE (!tcp_flags_is_init (
455             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
456         {
457           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
458           return NAT_NEXT_DROP;
459         }
460     }
461
462   s = nat_ed_session_alloc (sm, thread_index, now, proto);
463   ASSERT (s);
464
465   if (!is_sm)
466     {
467       s->in2out.addr = l_addr;
468       s->in2out.port = l_port;
469       s->proto = proto;
470       s->in2out.fib_index = rx_fib_index;
471       s->out2in.fib_index = outside_fib_index;
472
473       // suggest using local port to allocation function
474       outside_port = l_port;
475
476       // hairpinning?
477       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
478                                                         proto, &daddr, &dport);
479       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
480
481       // destination addr/port updated with real values in
482       // nat_ed_alloc_addr_and_port
483       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
484                             s->out2in.fib_index, proto);
485       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
486       if (IP_PROTOCOL_ICMP == proto)
487         {
488           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
489         }
490       else
491         {
492           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
493         }
494       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
495
496       if (nat_ed_alloc_addr_and_port (
497             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
498             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
499         {
500           nat_elog_notice (sm, "addresses exhausted");
501           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
502           nat_ed_session_delete (sm, s, thread_index, 1);
503           return NAT_NEXT_DROP;
504         }
505       s->out2in.addr = outside_addr;
506       s->out2in.port = outside_port;
507     }
508   else
509     {
510       // static mapping
511       s->out2in.addr = outside_addr = sm_addr;
512       s->out2in.port = outside_port = sm_port;
513       s->in2out.addr = l_addr;
514       s->in2out.port = l_port;
515       s->proto = proto;
516       s->in2out.fib_index = rx_fib_index;
517       s->out2in.fib_index = outside_fib_index;
518       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
519
520       // hairpinning?
521       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
522                                                         proto, &daddr, &dport);
523       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
524
525       if (IP_PROTOCOL_ICMP == proto)
526         {
527           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
528                                 sm_port, s->out2in.fib_index, proto);
529           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
530         }
531       else
532         {
533           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
534                                 sm_port, s->out2in.fib_index, proto);
535           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
536         }
537       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
538       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
539       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
540         {
541           nat_elog_notice (sm, "out2in key add failed");
542           goto error;
543         }
544     }
545
546   if (lb)
547     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
548   s->ext_host_addr = r_addr;
549   s->ext_host_port = r_port;
550
551   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
552                         rx_fib_index, proto);
553   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
554   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
555
556   if (IP_PROTOCOL_ICMP == proto)
557     {
558       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
559     }
560   else
561     {
562       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
563       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
564     }
565   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
566
567   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
568     {
569       nat_elog_notice (sm, "in2out key add failed");
570       goto error;
571     }
572
573   /* log NAT event */
574   nat_ipfix_logging_nat44_ses_create (
575     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
576     s->in2out.port, s->out2in.port, s->in2out.fib_index);
577
578   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
579                          s->in2out.port, &s->ext_host_nat_addr,
580                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
581                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
582
583   per_vrf_sessions_register_session (s, thread_index);
584
585   *sessionp = s;
586   return next;
587 error:
588   if (s)
589     {
590       nat_ed_session_delete (sm, s, thread_index, 1);
591     }
592   *sessionp = s = NULL;
593   return NAT_NEXT_DROP;
594 }
595
596 static_always_inline int
597 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
598                         vlib_node_runtime_t *node, u32 sw_if_index,
599                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
600                         u32 rx_fib_index)
601 {
602   clib_bihash_kv_16_8_t kv, value;
603
604   init_ed_k (&kv, ip->dst_address.as_u32,
605              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
606              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
607              ip->protocol);
608
609   /* NAT packet aimed at external address if has active sessions */
610   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
611     {
612       /* or is static mappings */
613       ip4_address_t placeholder_addr;
614       u16 placeholder_port;
615       u32 placeholder_fib_index;
616       if (!snat_static_mapping_match (
617             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
618             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
619             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
620         return 0;
621     }
622   else
623     return 0;
624
625   if (sm->forwarding_enabled)
626     return 1;
627
628   return snat_not_translate_fast (sm, node, sw_if_index, ip, rx_fib_index);
629 }
630
631 static_always_inline int
632 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
633                                       u32 thread_index, f64 now,
634                                       vlib_main_t * vm, vlib_buffer_t * b)
635 {
636   clib_bihash_kv_16_8_t kv, value;
637   snat_session_t *s = 0;
638   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
639
640   if (!sm->forwarding_enabled)
641     return 0;
642
643   if (ip->protocol == IP_PROTOCOL_ICMP)
644     {
645       ip4_address_t lookup_saddr, lookup_daddr;
646       u16 lookup_sport, lookup_dport;
647       u8 lookup_protocol;
648       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
649                                               &lookup_sport, &lookup_daddr,
650                                               &lookup_dport, &lookup_protocol))
651         return 0;
652       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
653                  lookup_dport, 0, lookup_protocol);
654     }
655   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
656     {
657       init_ed_k (&kv, ip->src_address.as_u32,
658                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
659                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
660     }
661   else
662     {
663       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
664                  ip->protocol);
665     }
666
667   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
668     {
669       ASSERT (thread_index == ed_value_get_thread_index (&value));
670       s =
671         pool_elt_at_index (tsm->sessions,
672                            ed_value_get_session_index (&value));
673
674       if (na44_ed_is_fwd_bypass_session (s))
675         {
676           if (ip->protocol == IP_PROTOCOL_TCP)
677             {
678               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
679             }
680           /* Accounting */
681           nat44_session_update_counters (s, now,
682                                          vlib_buffer_length_in_chain (vm, b),
683                                          thread_index);
684           /* Per-user LRU list maintenance */
685           nat44_session_update_lru (sm, s, thread_index);
686           return 1;
687         }
688       else
689         return 0;
690     }
691
692   return 0;
693 }
694
695 static_always_inline int
696 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
697                                        ip4_header_t *ip, u16 src_port,
698                                        u16 dst_port, u32 thread_index,
699                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
700                                        f64 now, int is_multi_worker)
701 {
702   clib_bihash_kv_16_8_t kv, value;
703   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
704   snat_interface_t *i;
705   snat_session_t *s;
706   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
707   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
708
709   /* src NAT check */
710   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
711              dst_port, tx_fib_index, ip->protocol);
712   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
713     {
714       ASSERT (thread_index == ed_value_get_thread_index (&value));
715       s =
716         pool_elt_at_index (tsm->sessions,
717                            ed_value_get_session_index (&value));
718       if (nat44_is_ses_closed (s)
719           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
720         {
721           nat44_ed_free_session_data (sm, s, thread_index, 0);
722           nat_ed_session_delete (sm, s, thread_index, 1);
723         }
724       return 1;
725     }
726
727   /* dst NAT check */
728   if (is_multi_worker &&
729       PREDICT_TRUE (!pool_is_free_index (
730         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
731     {
732       nat_6t_t lookup;
733       lookup.fib_index = rx_fib_index;
734       lookup.proto = ip->protocol;
735       lookup.daddr.as_u32 = ip->src_address.as_u32;
736       lookup.dport = src_port;
737       lookup.saddr.as_u32 = ip->dst_address.as_u32;
738       lookup.sport = dst_port;
739       s = pool_elt_at_index (
740         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
741       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
742         {
743           goto skip_dst_nat_lookup;
744         }
745       s = NULL;
746     }
747
748   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
749              src_port, rx_fib_index, ip->protocol);
750   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
751     {
752       ASSERT (thread_index == ed_value_get_thread_index (&value));
753       s =
754         pool_elt_at_index (tsm->sessions,
755                            ed_value_get_session_index (&value));
756
757     skip_dst_nat_lookup:
758       if (na44_ed_is_fwd_bypass_session (s))
759         return 0;
760
761       /* hairpinning */
762       pool_foreach (i, sm->output_feature_interfaces)
763         {
764           if ((nat44_ed_is_interface_inside (i)) &&
765               (rx_sw_if_index == i->sw_if_index))
766             return 0;
767         }
768       return 1;
769     }
770
771   return 0;
772 }
773
774 static inline u32
775 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
776                           icmp46_header_t *icmp, u32 sw_if_index,
777                           u32 tx_sw_if_index, u32 rx_fib_index,
778                           vlib_node_runtime_t *node, u32 next, f64 now,
779                           u32 thread_index, snat_session_t **s_p,
780                           int is_multi_worker)
781 {
782   vlib_main_t *vm = vlib_get_main ();
783   u16 checksum;
784   int err;
785   snat_session_t *s = NULL;
786   u8 lookup_protocol = ip->protocol;
787   u16 lookup_sport, lookup_dport;
788   ip4_address_t lookup_saddr, lookup_daddr;
789
790   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
791                                             &lookup_sport, &lookup_daddr,
792                                             &lookup_dport, &lookup_protocol);
793   if (err != 0)
794     {
795       b->error = node->errors[err];
796       return NAT_NEXT_DROP;
797     }
798
799   if (tx_sw_if_index != ~0)
800     {
801       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
802             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
803             tx_sw_if_index, now, is_multi_worker)))
804         {
805           return next;
806         }
807     }
808   else
809     {
810       if (PREDICT_FALSE (nat44_ed_not_translate (
811             vm, sm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
812         {
813           return next;
814         }
815     }
816
817   if (PREDICT_FALSE (icmp_type_is_error_message (
818         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
819     {
820       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
821       return NAT_NEXT_DROP;
822     }
823
824   next =
825     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
826                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
827                   node, next, thread_index, vlib_time_now (vm));
828
829   if (NAT_NEXT_DROP == next)
830     goto out;
831
832   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
833     {
834       ip_csum_t sum = ip_incremental_checksum_buffer (
835         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
836         ntohs (ip->length) - ip4_header_bytes (ip), 0);
837       checksum = ~ip_csum_fold (sum);
838       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
839         {
840           next = NAT_NEXT_DROP;
841           goto out;
842         }
843     }
844
845 out:
846   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
847     {
848       /* Accounting */
849       nat44_session_update_counters (
850         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
851       /* Per-user LRU list maintenance */
852       nat44_session_update_lru (sm, s, thread_index);
853     }
854   *s_p = s;
855   return next;
856 }
857
858 static snat_session_t *
859 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
860                                         ip4_header_t *ip, u32 rx_fib_index,
861                                         u32 thread_index, f64 now,
862                                         vlib_main_t *vm,
863                                         vlib_node_runtime_t *node)
864 {
865   clib_bihash_kv_16_8_t s_kv, s_value;
866   snat_static_mapping_t *m = NULL;
867   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
868   snat_session_t *s = NULL;
869   u32 outside_fib_index = sm->outside_fib_index;
870   int i;
871   ip4_address_t new_src_addr = { 0 };
872   ip4_address_t new_dst_addr = ip->dst_address;
873
874   if (PREDICT_FALSE (
875         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
876     {
877       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
878       nat_ipfix_logging_max_sessions (thread_index,
879                                       sm->max_translations_per_thread);
880       nat_elog_notice (sm, "maximum sessions exceeded");
881       return 0;
882     }
883
884   switch (vec_len (sm->outside_fibs))
885     {
886     case 0:
887       outside_fib_index = sm->outside_fib_index;
888       break;
889     case 1:
890       outside_fib_index = sm->outside_fibs[0].fib_index;
891       break;
892     default:
893       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
894       break;
895     }
896
897   /* Try to find static mapping first */
898   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
899                               ip->protocol);
900   if (m)
901     {
902       new_src_addr = m->external_addr;
903     }
904   else
905     {
906       pool_foreach (s, tsm->sessions)
907         {
908           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
909             {
910               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
911                          ip->dst_address.as_u32, 0, outside_fib_index,
912                          ip->protocol);
913               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
914                 {
915                   new_src_addr = s->out2in.addr;
916                 }
917               break;
918             }
919         }
920
921       if (!new_src_addr.as_u32)
922         {
923           for (i = 0; i < vec_len (sm->addresses); i++)
924             {
925               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
926                          ip->dst_address.as_u32, 0, outside_fib_index,
927                          ip->protocol);
928               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
929                 {
930                   new_src_addr = sm->addresses[i].addr;
931                 }
932             }
933         }
934     }
935
936   if (!new_src_addr.as_u32)
937     {
938       // could not allocate address for translation ...
939       return 0;
940     }
941
942   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
943   if (!s)
944     {
945       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
946       nat_elog_warn (sm, "create NAT session failed");
947       return 0;
948     }
949
950   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
951                         ip->dst_address, 0, rx_fib_index, ip->protocol);
952   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
953   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
954
955   // hairpinning?
956   int is_hairpinning = nat44_ed_external_sm_lookup (
957     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
958   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
959
960   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
961   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
962
963   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
964                         outside_fib_index, ip->protocol);
965   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
966   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
967   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
968
969   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
970   s->out2in.addr.as_u32 = new_src_addr.as_u32;
971   s->out2in.fib_index = outside_fib_index;
972   s->in2out.addr.as_u32 = ip->src_address.as_u32;
973   s->in2out.fib_index = rx_fib_index;
974   s->in2out.port = s->out2in.port = ip->protocol;
975   if (m)
976     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
977
978   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
979     {
980       nat_elog_notice (sm, "in2out flow hash add failed");
981       nat_ed_session_delete (sm, s, thread_index, 1);
982       return NULL;
983     }
984
985   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
986     {
987       nat_elog_notice (sm, "out2in flow hash add failed");
988       nat_ed_session_delete (sm, s, thread_index, 1);
989       return NULL;
990     }
991
992   per_vrf_sessions_register_session (s, thread_index);
993
994   /* Accounting */
995   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
996                                  thread_index);
997   /* Per-user LRU list maintenance */
998   nat44_session_update_lru (sm, s, thread_index);
999
1000   return s;
1001 }
1002
1003 static inline uword
1004 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1005                                           vlib_node_runtime_t *node,
1006                                           vlib_frame_t *frame,
1007                                           int is_output_feature,
1008                                           int is_multi_worker)
1009 {
1010   u32 n_left_from, *from;
1011   snat_main_t *sm = &snat_main;
1012   f64 now = vlib_time_now (vm);
1013   u32 thread_index = vm->thread_index;
1014   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1015   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1016     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1017
1018   from = vlib_frame_vector_args (frame);
1019   n_left_from = frame->n_vectors;
1020
1021   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1022   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1023   vlib_get_buffers (vm, from, b, n_left_from);
1024
1025   while (n_left_from > 0)
1026     {
1027       vlib_buffer_t *b0;
1028       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1029       u32 tx_sw_if_index0;
1030       u32 cntr_sw_if_index0;
1031       ip_protocol_t proto0;
1032       ip4_header_t *ip0;
1033       snat_session_t *s0 = 0;
1034       clib_bihash_kv_16_8_t kv0, value0;
1035       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1036       nat_6t_flow_t *f = 0;
1037       nat_6t_t lookup;
1038       int lookup_skipped = 0;
1039
1040       b0 = *b;
1041       b++;
1042
1043       /* Prefetch next iteration. */
1044       if (PREDICT_TRUE (n_left_from >= 2))
1045         {
1046           vlib_buffer_t *p2;
1047
1048           p2 = *b;
1049
1050           vlib_prefetch_buffer_header (p2, LOAD);
1051
1052           clib_prefetch_load (p2->data);
1053         }
1054
1055       if (is_output_feature)
1056         {
1057           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1058         }
1059
1060       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1061
1062       ip0 =
1063         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1064
1065       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1066       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1067       cntr_sw_if_index0 =
1068         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1069       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1070                                                            rx_sw_if_index0);
1071       lookup.fib_index = rx_fib_index0;
1072
1073       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1074         {
1075           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1076           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1077                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1078                                        0);
1079           next[0] = NAT_NEXT_ICMP_ERROR;
1080           goto trace0;
1081         }
1082
1083       proto0 = ip0->protocol;
1084
1085       if (is_output_feature)
1086         {
1087           if (PREDICT_FALSE
1088               (nat_not_translate_output_feature_fwd
1089                (sm, ip0, thread_index, now, vm, b0)))
1090             goto trace0;
1091         }
1092
1093       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1094         {
1095           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1096                 ICMP4_echo_request &&
1097               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1098                 ICMP4_echo_reply &&
1099               !icmp_type_is_error_message (
1100                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1101             {
1102               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1103               next[0] = NAT_NEXT_DROP;
1104               goto trace0;
1105             }
1106           int err = nat_get_icmp_session_lookup_values (
1107             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1108             &lookup.dport, &lookup.proto);
1109           if (err != 0)
1110             {
1111               b0->error = node->errors[err];
1112               next[0] = NAT_NEXT_DROP;
1113               goto trace0;
1114             }
1115         }
1116       else
1117         {
1118           lookup.proto = ip0->protocol;
1119           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1120           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1121           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1122           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1123         }
1124
1125       /* there might be a stashed index in vnet_buffer2 from handoff or
1126        * classify node, see if it can be used */
1127       if (is_multi_worker &&
1128           !pool_is_free_index (tsm->sessions,
1129                                vnet_buffer2 (b0)->nat.cached_session_index))
1130         {
1131           s0 = pool_elt_at_index (tsm->sessions,
1132                                   vnet_buffer2 (b0)->nat.cached_session_index);
1133           if (PREDICT_TRUE (
1134                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1135                 // for some hairpinning cases there are two "i2i" flows instead
1136                 // of i2o and o2i as both hosts are on inside
1137                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1138                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1139             {
1140               /* yes, this is the droid we're looking for */
1141               lookup_skipped = 1;
1142               goto skip_lookup;
1143             }
1144           s0 = NULL;
1145         }
1146
1147       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1148                  lookup.dport, lookup.fib_index, lookup.proto);
1149
1150       // lookup flow
1151       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1152         {
1153           // flow does not exist go slow path
1154           next[0] = def_slow;
1155           goto trace0;
1156         }
1157
1158       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1159       s0 =
1160         pool_elt_at_index (tsm->sessions,
1161                            ed_value_get_session_index (&value0));
1162
1163     skip_lookup:
1164
1165       ASSERT (thread_index == s0->thread_index);
1166
1167       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1168         {
1169           // session is closed, go slow path
1170           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1171           nat_ed_session_delete (sm, s0, thread_index, 1);
1172           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1173           goto trace0;
1174         }
1175
1176       if (s0->tcp_closed_timestamp)
1177         {
1178           if (now >= s0->tcp_closed_timestamp)
1179             {
1180               // session is closed, go slow path, freed in slow path
1181               next[0] = def_slow;
1182             }
1183           else
1184             {
1185               // session in transitory timeout, drop
1186               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1187               next[0] = NAT_NEXT_DROP;
1188             }
1189           goto trace0;
1190         }
1191
1192       // drop if session expired
1193       u64 sess_timeout_time;
1194       sess_timeout_time =
1195         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1196       if (now >= sess_timeout_time)
1197         {
1198           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1199           nat_ed_session_delete (sm, s0, thread_index, 1);
1200           // session is closed, go slow path
1201           next[0] = def_slow;
1202           goto trace0;
1203         }
1204
1205       b0->flags |= VNET_BUFFER_F_IS_NATED;
1206
1207       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1208         {
1209           f = &s0->i2o;
1210         }
1211       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1212                nat_6t_t_eq (&s0->o2i.match, &lookup))
1213         {
1214           f = &s0->o2i;
1215         }
1216       else
1217         {
1218           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1219           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1220           nat_ed_session_delete (sm, s0, thread_index, 1);
1221           next[0] = NAT_NEXT_DROP;
1222           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1223           goto trace0;
1224         }
1225
1226       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1227           (translation_error = nat_6t_flow_buf_translate_i2o (
1228              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1229         {
1230           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1231           nat_ed_session_delete (sm, s0, thread_index, 1);
1232           next[0] = NAT_NEXT_DROP;
1233           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1234           goto trace0;
1235         }
1236
1237       switch (proto0)
1238         {
1239         case IP_PROTOCOL_TCP:
1240           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1241                                          thread_index, cntr_sw_if_index0, 1);
1242           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1243           break;
1244         case IP_PROTOCOL_UDP:
1245           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1246                                          thread_index, cntr_sw_if_index0, 1);
1247           break;
1248         case IP_PROTOCOL_ICMP:
1249           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1250                                          thread_index, cntr_sw_if_index0, 1);
1251           break;
1252         default:
1253           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1254                                          thread_index, cntr_sw_if_index0, 1);
1255           break;
1256         }
1257
1258       /* Accounting */
1259       nat44_session_update_counters (s0, now,
1260                                      vlib_buffer_length_in_chain (vm, b0),
1261                                      thread_index);
1262       /* Per-user LRU list maintenance */
1263       nat44_session_update_lru (sm, s0, thread_index);
1264
1265     trace0:
1266       if (PREDICT_FALSE
1267           ((node->flags & VLIB_NODE_FLAG_TRACE)
1268            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1269         {
1270           nat_in2out_ed_trace_t *t =
1271             vlib_add_trace (vm, node, b0, sizeof (*t));
1272           t->sw_if_index = rx_sw_if_index0;
1273           t->next_index = next[0];
1274           t->is_slow_path = 0;
1275           t->translation_error = translation_error;
1276           t->lookup_skipped = lookup_skipped;
1277           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1278
1279           if (s0)
1280             {
1281               t->session_index = s0 - tsm->sessions;
1282               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1283               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1284               t->translation_via_i2of = (&s0->i2o == f);
1285             }
1286           else
1287             {
1288               t->session_index = ~0;
1289             }
1290         }
1291
1292       if (next[0] == NAT_NEXT_DROP)
1293         {
1294           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1295                                          thread_index, cntr_sw_if_index0, 1);
1296         }
1297
1298       n_left_from--;
1299       next++;
1300     }
1301
1302   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1303                                frame->n_vectors);
1304   return frame->n_vectors;
1305 }
1306
1307 static inline uword
1308 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1309                                           vlib_node_runtime_t *node,
1310                                           vlib_frame_t *frame,
1311                                           int is_output_feature,
1312                                           int is_multi_worker)
1313 {
1314   u32 n_left_from, *from;
1315   snat_main_t *sm = &snat_main;
1316   f64 now = vlib_time_now (vm);
1317   u32 thread_index = vm->thread_index;
1318   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1319
1320   from = vlib_frame_vector_args (frame);
1321   n_left_from = frame->n_vectors;
1322
1323   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1324   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1325   vlib_get_buffers (vm, from, b, n_left_from);
1326
1327   while (n_left_from > 0)
1328     {
1329       vlib_buffer_t *b0;
1330       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1331       u32 tx_sw_if_index0;
1332       u32 cntr_sw_if_index0;
1333       ip_protocol_t proto0;
1334       ip4_header_t *ip0;
1335       udp_header_t *udp0;
1336       icmp46_header_t *icmp0;
1337       snat_session_t *s0 = 0;
1338       clib_bihash_kv_16_8_t kv0, value0;
1339       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1340
1341       b0 = *b;
1342
1343       if (is_output_feature)
1344         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1345
1346       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1347
1348       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1349                               iph_offset0);
1350
1351       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1352       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1353       cntr_sw_if_index0 =
1354         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1355       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1356                                                            rx_sw_if_index0);
1357
1358       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1359         {
1360           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1361           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1362                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1363                                        0);
1364           next[0] = NAT_NEXT_ICMP_ERROR;
1365           goto trace0;
1366         }
1367
1368       udp0 = ip4_next_header (ip0);
1369       icmp0 = (icmp46_header_t *) udp0;
1370       proto0 = ip0->protocol;
1371
1372       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1373         {
1374           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1375             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1376           if (!s0)
1377             next[0] = NAT_NEXT_DROP;
1378
1379           if (NAT_NEXT_DROP != next[0] && s0 &&
1380               NAT_ED_TRNSL_ERR_SUCCESS !=
1381                 (translation_error = nat_6t_flow_buf_translate_i2o (
1382                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1383             {
1384               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1385               nat_ed_session_delete (sm, s0, thread_index, 1);
1386               next[0] = NAT_NEXT_DROP;
1387               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1388               goto trace0;
1389             }
1390
1391           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1392                                          thread_index, cntr_sw_if_index0, 1);
1393           goto trace0;
1394         }
1395
1396       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1397         {
1398           next[0] = icmp_in2out_ed_slow_path (
1399             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1400             rx_fib_index0, node, next[0], now, thread_index, &s0,
1401             is_multi_worker);
1402           if (NAT_NEXT_DROP != next[0] && s0 &&
1403               NAT_ED_TRNSL_ERR_SUCCESS !=
1404                 (translation_error = nat_6t_flow_buf_translate_i2o (
1405                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1406             {
1407               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1408               nat_ed_session_delete (sm, s0, thread_index, 1);
1409               next[0] = NAT_NEXT_DROP;
1410               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1411               goto trace0;
1412             }
1413
1414           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1415                                          thread_index, cntr_sw_if_index0, 1);
1416           goto trace0;
1417         }
1418
1419       init_ed_k (
1420         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1421         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1422         rx_fib_index0, ip0->protocol);
1423       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1424         {
1425           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1426           s0 =
1427             pool_elt_at_index (tsm->sessions,
1428                                ed_value_get_session_index (&value0));
1429
1430           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1431             {
1432               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1433               nat_ed_session_delete (sm, s0, thread_index, 1);
1434               s0 = NULL;
1435             }
1436         }
1437
1438       if (!s0)
1439         {
1440           if (is_output_feature)
1441             {
1442               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1443                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1444                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1445                     rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker)))
1446                 goto trace0;
1447
1448               /*
1449                * Send DHCP packets to the ipv4 stack, or we won't
1450                * be able to use dhcp client on the outside interface
1451                */
1452               if (PREDICT_FALSE (
1453                     proto0 == IP_PROTOCOL_UDP &&
1454                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1455                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1456                     ip0->dst_address.as_u32 == 0xffffffff))
1457                 goto trace0;
1458             }
1459           else
1460             {
1461               if (PREDICT_FALSE (
1462                     nat44_ed_not_translate (vm, sm, node, rx_sw_if_index0, b0,
1463                                             ip0, proto0, rx_fib_index0)))
1464                 goto trace0;
1465             }
1466
1467           next[0] =
1468             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1469                           vnet_buffer (b0)->ip.reass.l4_src_port,
1470                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1471                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1472                           node, next[0], thread_index, now);
1473
1474           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1475             goto trace0;
1476
1477           if (PREDICT_FALSE (!s0))
1478             goto trace0;
1479
1480         }
1481
1482       b0->flags |= VNET_BUFFER_F_IS_NATED;
1483
1484       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1485           (translation_error = nat_6t_flow_buf_translate_i2o (
1486              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1487         {
1488           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1489           nat_ed_session_delete (sm, s0, thread_index, 1);
1490           next[0] = NAT_NEXT_DROP;
1491           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1492           goto trace0;
1493         }
1494
1495       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1496         {
1497           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1498                                          thread_index, cntr_sw_if_index0, 1);
1499           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1500         }
1501       else
1502         {
1503           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1504                                          thread_index, cntr_sw_if_index0, 1);
1505         }
1506
1507       /* Accounting */
1508       nat44_session_update_counters (s0, now,
1509                                      vlib_buffer_length_in_chain
1510                                      (vm, b0), thread_index);
1511       /* Per-user LRU list maintenance */
1512       nat44_session_update_lru (sm, s0, thread_index);
1513
1514     trace0:
1515       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1516                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1517         {
1518           nat_in2out_ed_trace_t *t =
1519             vlib_add_trace (vm, node, b0, sizeof (*t));
1520           t->sw_if_index = rx_sw_if_index0;
1521           t->next_index = next[0];
1522           t->is_slow_path = 1;
1523           t->translation_error = translation_error;
1524           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1525
1526           if (s0)
1527             {
1528               t->session_index = s0 - tsm->sessions;
1529               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1530               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1531               t->translation_via_i2of = 1;
1532             }
1533
1534           else
1535             {
1536               t->session_index = ~0;
1537             }
1538         }
1539
1540       if (next[0] == NAT_NEXT_DROP)
1541         {
1542           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1543                                          thread_index, cntr_sw_if_index0, 1);
1544         }
1545
1546       n_left_from--;
1547       next++;
1548       b++;
1549     }
1550
1551   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1552                                frame->n_vectors);
1553
1554   return frame->n_vectors;
1555 }
1556
1557 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1558                                      vlib_node_runtime_t * node,
1559                                      vlib_frame_t * frame)
1560 {
1561   if (snat_main.num_workers > 1)
1562     {
1563       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1564     }
1565   else
1566     {
1567       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1568     }
1569 }
1570
1571 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1572   .name = "nat44-ed-in2out",
1573   .vector_size = sizeof (u32),
1574   .sibling_of = "nat-default",
1575   .format_trace = format_nat_in2out_ed_trace,
1576   .type = VLIB_NODE_TYPE_INTERNAL,
1577   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1578   .error_strings = nat_in2out_ed_error_strings,
1579   .runtime_data_bytes = sizeof (snat_runtime_t),
1580 };
1581
1582 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1583                                             vlib_node_runtime_t * node,
1584                                             vlib_frame_t * frame)
1585 {
1586   if (snat_main.num_workers > 1)
1587     {
1588       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1589     }
1590   else
1591     {
1592       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1593     }
1594 }
1595
1596 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1597   .name = "nat44-ed-in2out-output",
1598   .vector_size = sizeof (u32),
1599   .sibling_of = "nat-default",
1600   .format_trace = format_nat_in2out_ed_trace,
1601   .type = VLIB_NODE_TYPE_INTERNAL,
1602   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1603   .error_strings = nat_in2out_ed_error_strings,
1604   .runtime_data_bytes = sizeof (snat_runtime_t),
1605 };
1606
1607 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1608                                               vlib_node_runtime_t *
1609                                               node, vlib_frame_t * frame)
1610 {
1611   if (snat_main.num_workers > 1)
1612     {
1613       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1614     }
1615   else
1616     {
1617       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1618     }
1619 }
1620
1621 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1622   .name = "nat44-ed-in2out-slowpath",
1623   .vector_size = sizeof (u32),
1624   .sibling_of = "nat-default",
1625   .format_trace = format_nat_in2out_ed_trace,
1626   .type = VLIB_NODE_TYPE_INTERNAL,
1627   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1628   .error_strings = nat_in2out_ed_error_strings,
1629   .runtime_data_bytes = sizeof (snat_runtime_t),
1630 };
1631
1632 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1633                                                      vlib_node_runtime_t
1634                                                      * node,
1635                                                      vlib_frame_t * frame)
1636 {
1637   if (snat_main.num_workers > 1)
1638     {
1639       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1640     }
1641   else
1642     {
1643       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1644     }
1645 }
1646
1647 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1648   .name = "nat44-ed-in2out-output-slowpath",
1649   .vector_size = sizeof (u32),
1650   .sibling_of = "nat-default",
1651   .format_trace = format_nat_in2out_ed_trace,
1652   .type = VLIB_NODE_TYPE_INTERNAL,
1653   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1654   .error_strings = nat_in2out_ed_error_strings,
1655   .runtime_data_bytes = sizeof (snat_runtime_t),
1656 };
1657
1658 static u8 *
1659 format_nat_pre_trace (u8 * s, va_list * args)
1660 {
1661   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1662   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1663   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1664   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1665                  t->arc_next_index);
1666 }
1667
1668 VLIB_NODE_FN (nat_pre_in2out_node)
1669   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1670 {
1671   return nat_pre_node_fn_inline (vm, node, frame,
1672                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1673 }
1674
1675 VLIB_NODE_FN (nat_pre_in2out_output_node)
1676   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1677 {
1678   return nat_pre_node_fn_inline (vm, node, frame,
1679                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1680 }
1681
1682 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1683   .name = "nat-pre-in2out",
1684   .vector_size = sizeof (u32),
1685   .sibling_of = "nat-default",
1686   .format_trace = format_nat_pre_trace,
1687   .type = VLIB_NODE_TYPE_INTERNAL,
1688   .n_errors = 0,
1689 };
1690
1691 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1692   .name = "nat-pre-in2out-output",
1693   .vector_size = sizeof (u32),
1694   .sibling_of = "nat-default",
1695   .format_trace = format_nat_pre_trace,
1696   .type = VLIB_NODE_TYPE_INTERNAL,
1697   .n_errors = 0,
1698 };
1699
1700 /*
1701  * fd.io coding-style-patch-verification: ON
1702  *
1703  * Local Variables:
1704  * eval: (c-set-style "gnu")
1705  * End:
1706  */