nat: NAT44 ED improvements and fixes
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
73               t->next_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", session %d, translation result '%U' via %s",
77                   t->session_index, format_nat_ed_translation_error,
78                   t->translation_error,
79                   t->translation_via_i2of ? "i2of" : "o2if");
80       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
81       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
82     }
83   if (!t->is_slow_path)
84     {
85       if (t->lookup_skipped)
86         {
87           s = format (s, "\n lookup skipped - cached session index used");
88         }
89       else
90         {
91           s = format (s, "\n  search key %U", format_ed_session_kvp,
92                       &t->search_key);
93         }
94     }
95
96   return s;
97 }
98
99 /**
100  * @brief Check if packet should be translated
101  *
102  * Packets aimed at outside interface and external address with active session
103  * should be translated.
104  *
105  * @param sm            NAT main
106  * @param rt            NAT runtime data
107  * @param sw_if_index0  index of the inside interface
108  * @param ip0           IPv4 header
109  * @param proto0        NAT protocol
110  * @param rx_fib_index0 RX FIB index
111  *
112  * @returns 0 if packet should be translated otherwise 1
113  */
114 static inline int
115 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
116                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
117                          u32 rx_fib_index0)
118 {
119   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
120   nat_outside_fib_t *outside_fib;
121   fib_prefix_t pfx = {
122     .fp_proto = FIB_PROTOCOL_IP4,
123     .fp_len = 32,
124     .fp_addr = {
125                 .ip4.as_u32 = ip0->dst_address.as_u32,
126                 }
127     ,
128   };
129
130   /* Don't NAT packet aimed at the intfc address */
131   if (PREDICT_FALSE (
132         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
133     return 1;
134
135   fei = fib_table_lookup (rx_fib_index0, &pfx);
136   if (FIB_NODE_INDEX_INVALID != fei)
137     {
138       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
139       if (sw_if_index == ~0)
140         {
141           vec_foreach (outside_fib, sm->outside_fibs)
142             {
143               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
144               if (FIB_NODE_INDEX_INVALID != fei)
145                 {
146                   sw_if_index = fib_entry_get_resolving_interface (fei);
147                   if (sw_if_index != ~0)
148                     break;
149                 }
150             }
151         }
152       if (sw_if_index == ~0)
153         return 1;
154
155       snat_interface_t *i;
156       pool_foreach (i, sm->interfaces)
157         {
158           /* NAT packet aimed at outside interface */
159           if ((nat44_ed_is_interface_outside (i)) &&
160               (sw_if_index == i->sw_if_index))
161             return 0;
162         }
163     }
164
165   return 1;
166 }
167
168 static int
169 nat_ed_alloc_addr_and_port_with_snat_address (
170   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
171   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
172   ip4_address_t *outside_addr, u16 *outside_port)
173 {
174   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
175
176   s->o2i.match.daddr = a->addr;
177   /* first try port suggested by caller */
178   u16 port = clib_net_to_host_u16 (*outside_port);
179   u16 port_offset = port - port_thread_offset;
180   if (port <= port_thread_offset ||
181       port > port_thread_offset + port_per_thread)
182     {
183       /* need to pick a different port, suggested port doesn't fit in
184        * this thread's port range */
185       port_offset = snat_random_port (0, port_per_thread - 1);
186       port = port_thread_offset + port_offset;
187     }
188   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
189   do
190     {
191       if (NAT_PROTOCOL_ICMP == nat_proto)
192         {
193           s->o2i.match.sport = clib_host_to_net_u16 (port);
194         }
195       s->o2i.match.dport = clib_host_to_net_u16 (port);
196       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
197         {
198 #define _(N, i, n, s)                                                         \
199   case NAT_PROTOCOL_##N:                                                      \
200     ++a->busy_##n##_port_refcounts[port];                                     \
201     a->busy_##n##_ports_per_thread[thread_index]++;                           \
202     a->busy_##n##_ports++;                                                    \
203     break;
204           switch (nat_proto)
205             {
206               foreach_nat_protocol;
207             default:
208               nat_elog_info (sm, "unknown protocol");
209               return 1;
210             }
211 #undef _
212           *outside_addr = a->addr;
213           *outside_port = clib_host_to_net_u16 (port);
214           return 0;
215         }
216       port_offset = snat_random_port (0, port_per_thread - 1);
217       port = port_thread_offset + port_offset;
218       --attempts;
219     }
220   while (attempts > 0);
221   return 1;
222 }
223
224 static int
225 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
226                             u32 thread_index, ip4_address_t s_addr,
227                             u16 port_per_thread, u32 snat_thread_index,
228                             snat_session_t *s, ip4_address_t *outside_addr,
229                             u16 *outside_port)
230 {
231   int i;
232   snat_address_t *a, *ga = 0;
233
234   if (vec_len (sm->addresses) > 0)
235     {
236       int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
237
238       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
239         {
240           a = sm->addresses + i;
241           if (a->fib_index == rx_fib_index)
242             {
243               return nat_ed_alloc_addr_and_port_with_snat_address (
244                 sm, nat_proto, thread_index, a, port_per_thread,
245                 snat_thread_index, s, outside_addr, outside_port);
246             }
247           else if (a->fib_index == ~0)
248             {
249               ga = a;
250             }
251         }
252
253       for (i = 0; i < s_addr_offset; ++i)
254         {
255           a = sm->addresses + i;
256           if (a->fib_index == rx_fib_index)
257             {
258               return nat_ed_alloc_addr_and_port_with_snat_address (
259                 sm, nat_proto, thread_index, a, port_per_thread,
260                 snat_thread_index, s, outside_addr, outside_port);
261             }
262           else if (a->fib_index == ~0)
263             {
264               ga = a;
265             }
266         }
267
268       if (ga)
269         {
270           return nat_ed_alloc_addr_and_port_with_snat_address (
271             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
272             s, outside_addr, outside_port);
273         }
274     }
275   /* Totally out of translations to use... */
276   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
277   return 1;
278 }
279
280 static_always_inline u32
281 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
282 {
283   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
284   nat_outside_fib_t *outside_fib;
285   fib_prefix_t pfx = {
286     .fp_proto = FIB_PROTOCOL_IP4,
287     .fp_len = 32,
288     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
289     ,
290   };
291   // TODO: multiple vrfs none can resolve addr
292   vec_foreach (outside_fib, sm->outside_fibs)
293     {
294       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
295       if (FIB_NODE_INDEX_INVALID != fei)
296         {
297           if (fib_entry_get_resolving_interface (fei) != ~0)
298             {
299               return outside_fib->fib_index;
300             }
301         }
302     }
303   return ~0;
304 }
305
306 static_always_inline int
307 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
308                              u16 match_port, nat_protocol_t match_protocol,
309                              u32 match_fib_index, ip4_address_t *daddr,
310                              u16 *dport)
311 {
312   clib_bihash_kv_8_8_t kv, value;
313   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
314   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
315     {
316       /* Try address only mapping */
317       init_nat_k (&kv, match_addr, 0, 0, 0);
318       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
319                                   &value))
320         return 0;
321     }
322
323   snat_static_mapping_t *m =
324     pool_elt_at_index (sm->static_mappings, value.value);
325   *daddr = m->local_addr;
326   if (dport)
327     {
328       /* Address only mapping doesn't change port */
329       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
330     }
331   return 1;
332 }
333
334 static u32
335 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
336               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
337               u16 r_port, u8 proto, u32 rx_fib_index,
338               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
339               u32 thread_index, f64 now)
340 {
341   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
342   ip4_address_t outside_addr;
343   u16 outside_port;
344   u32 outside_fib_index;
345   u8 is_identity_nat = 0;
346
347   u32 nat_proto = ip_proto_to_nat_proto (proto);
348   snat_session_t *s = NULL;
349   lb_nat_type_t lb = 0;
350   ip4_address_t daddr = r_addr;
351   u16 dport = r_port;
352
353   if (PREDICT_FALSE
354       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
355     {
356       if (!nat_lru_free_one (sm, thread_index, now))
357         {
358           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
359           nat_ipfix_logging_max_sessions (thread_index,
360                                           sm->max_translations_per_thread);
361           nat_elog_notice (sm, "maximum sessions exceeded");
362           return NAT_NEXT_DROP;
363         }
364     }
365
366   outside_fib_index = sm->outside_fib_index;
367
368   switch (vec_len (sm->outside_fibs))
369     {
370     case 0:
371       outside_fib_index = sm->outside_fib_index;
372       break;
373     case 1:
374       outside_fib_index = sm->outside_fibs[0].fib_index;
375       break;
376     default:
377       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
378       break;
379     }
380
381   ip4_address_t sm_addr;
382   u16 sm_port;
383   u32 sm_fib_index;
384   /* First try to match static mapping by local address and port */
385   int is_sm;
386   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
387                                  nat_proto, &sm_addr, &sm_port, &sm_fib_index,
388                                  0, 0, 0, &lb, 0, &is_identity_nat, 0))
389     {
390       is_sm = 0;
391     }
392   else
393     {
394       if (PREDICT_FALSE (is_identity_nat))
395         {
396           *sessionp = NULL;
397           return next;
398         }
399       is_sm = 1;
400     }
401
402   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
403     {
404       if (PREDICT_FALSE (!tcp_flags_is_init (
405             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
406         {
407           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
408           return NAT_NEXT_DROP;
409         }
410     }
411
412   s = nat_ed_session_alloc (sm, thread_index, now, proto);
413   ASSERT (s);
414
415   if (!is_sm)
416     {
417       s->in2out.addr = l_addr;
418       s->in2out.port = l_port;
419       s->nat_proto = nat_proto;
420       s->in2out.fib_index = rx_fib_index;
421       s->out2in.fib_index = outside_fib_index;
422
423       // suggest using local port to allocation function
424       outside_port = l_port;
425
426       // hairpinning?
427       int is_hairpinning = nat44_ed_external_sm_lookup (
428         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
429       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
430
431       // destination addr/port updated with real values in
432       // nat_ed_alloc_addr_and_port
433       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
434                             s->out2in.fib_index, proto);
435       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
436       if (NAT_PROTOCOL_ICMP == nat_proto)
437         {
438           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
439         }
440       else
441         {
442           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
443         }
444       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
445
446       if (nat_ed_alloc_addr_and_port (
447             sm, rx_fib_index, nat_proto, thread_index, l_addr,
448             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
449             &outside_port))
450         {
451           nat_elog_notice (sm, "addresses exhausted");
452           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
453           nat_ed_session_delete (sm, s, thread_index, 1);
454           return NAT_NEXT_DROP;
455         }
456       s->out2in.addr = outside_addr;
457       s->out2in.port = outside_port;
458     }
459   else
460     {
461       // static mapping
462       s->out2in.addr = outside_addr = sm_addr;
463       s->out2in.port = outside_port = sm_port;
464       s->in2out.addr = l_addr;
465       s->in2out.port = l_port;
466       s->nat_proto = nat_proto;
467       s->in2out.fib_index = rx_fib_index;
468       s->out2in.fib_index = outside_fib_index;
469       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
470
471       // hairpinning?
472       int is_hairpinning = nat44_ed_external_sm_lookup (
473         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
474       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
475
476       if (NAT_PROTOCOL_ICMP == nat_proto)
477         {
478           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
479                                 sm_port, s->out2in.fib_index, proto);
480           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
481         }
482       else
483         {
484           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
485                                 sm_port, s->out2in.fib_index, proto);
486           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
487         }
488       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
489       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
490       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
491         {
492           nat_elog_notice (sm, "out2in key add failed");
493           goto error;
494         }
495     }
496
497   if (lb)
498     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
499   s->ext_host_addr = r_addr;
500   s->ext_host_port = r_port;
501
502   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
503                         rx_fib_index, proto);
504   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
505   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
506
507   if (NAT_PROTOCOL_ICMP == nat_proto)
508     {
509       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
510     }
511   else
512     {
513       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
514       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
515     }
516   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
517
518   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
519     {
520       nat_elog_notice (sm, "in2out key add failed");
521       goto error;
522     }
523
524   /* log NAT event */
525   nat_ipfix_logging_nat44_ses_create (thread_index,
526                                       s->in2out.addr.as_u32,
527                                       s->out2in.addr.as_u32,
528                                       s->nat_proto,
529                                       s->in2out.port,
530                                       s->out2in.port, s->in2out.fib_index);
531
532   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
533                          s->in2out.port, &s->ext_host_nat_addr,
534                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
535                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
536
537   per_vrf_sessions_register_session (s, thread_index);
538
539   *sessionp = s;
540   return next;
541 error:
542   if (s)
543     {
544       if (!is_sm)
545         {
546           snat_free_outside_address_and_port (sm->addresses, thread_index,
547                                               &outside_addr, outside_port,
548                                               nat_proto);
549         }
550       nat_ed_session_delete (sm, s, thread_index, 1);
551     }
552   *sessionp = s = NULL;
553   return NAT_NEXT_DROP;
554 }
555
556 static_always_inline int
557 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
558                         vlib_node_runtime_t *node, u32 sw_if_index,
559                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
560                         u32 rx_fib_index, u32 thread_index)
561 {
562   clib_bihash_kv_16_8_t kv, value;
563
564   init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
565              ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
566              sm->outside_fib_index, ip->protocol);
567
568   /* NAT packet aimed at external address if has active sessions */
569   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
570     {
571       /* or is static mappings */
572       ip4_address_t placeholder_addr;
573       u16 placeholder_port;
574       u32 placeholder_fib_index;
575       if (!snat_static_mapping_match (
576             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
577             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
578             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
579         return 0;
580     }
581   else
582     return 0;
583
584   if (sm->forwarding_enabled)
585     return 1;
586
587   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
588                                   rx_fib_index);
589 }
590
591 static_always_inline int
592 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
593                                       u32 thread_index, f64 now,
594                                       vlib_main_t * vm, vlib_buffer_t * b)
595 {
596   clib_bihash_kv_16_8_t kv, value;
597   snat_session_t *s = 0;
598   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
599
600   if (!sm->forwarding_enabled)
601     return 0;
602
603   if (ip->protocol == IP_PROTOCOL_ICMP)
604     {
605       ip4_address_t lookup_saddr, lookup_daddr;
606       u16 lookup_sport, lookup_dport;
607       u8 lookup_protocol;
608       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
609                                               &lookup_sport, &lookup_daddr,
610                                               &lookup_dport, &lookup_protocol))
611         return 0;
612       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
613                  0, lookup_protocol);
614     }
615   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
616     {
617       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
618                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
619                  ip->protocol);
620     }
621   else
622     {
623       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
624                  ip->protocol);
625     }
626
627   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
628     {
629       ASSERT (thread_index == ed_value_get_thread_index (&value));
630       s =
631         pool_elt_at_index (tsm->sessions,
632                            ed_value_get_session_index (&value));
633
634       if (na44_ed_is_fwd_bypass_session (s))
635         {
636           if (ip->protocol == IP_PROTOCOL_TCP)
637             {
638               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
639             }
640           /* Accounting */
641           nat44_session_update_counters (s, now,
642                                          vlib_buffer_length_in_chain (vm, b),
643                                          thread_index);
644           /* Per-user LRU list maintenance */
645           nat44_session_update_lru (sm, s, thread_index);
646           return 1;
647         }
648       else
649         return 0;
650     }
651
652   return 0;
653 }
654
655 static_always_inline int
656 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
657                                        ip4_header_t *ip, u16 src_port,
658                                        u16 dst_port, u32 thread_index,
659                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
660                                        f64 now, int is_multi_worker)
661 {
662   clib_bihash_kv_16_8_t kv, value;
663   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
664   snat_interface_t *i;
665   snat_session_t *s;
666   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
667   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
668
669   /* src NAT check */
670   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
671              tx_fib_index, ip->protocol);
672   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
673     {
674       ASSERT (thread_index == ed_value_get_thread_index (&value));
675       s =
676         pool_elt_at_index (tsm->sessions,
677                            ed_value_get_session_index (&value));
678       if (nat44_is_ses_closed (s)
679           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
680         {
681           nat_free_session_data (sm, s, thread_index, 0);
682           nat_ed_session_delete (sm, s, thread_index, 1);
683         }
684       return 1;
685     }
686
687   /* dst NAT check */
688   if (is_multi_worker &&
689       PREDICT_TRUE (!pool_is_free_index (
690         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
691     {
692       nat_6t_t lookup;
693       lookup.fib_index = rx_fib_index;
694       lookup.proto = ip->protocol;
695       lookup.daddr.as_u32 = ip->src_address.as_u32;
696       lookup.dport = src_port;
697       lookup.saddr.as_u32 = ip->dst_address.as_u32;
698       lookup.sport = dst_port;
699       s = pool_elt_at_index (
700         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
701       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
702         {
703           goto skip_dst_nat_lookup;
704         }
705       s = NULL;
706     }
707
708   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
709              rx_fib_index, ip->protocol);
710   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
711     {
712       ASSERT (thread_index == ed_value_get_thread_index (&value));
713       s =
714         pool_elt_at_index (tsm->sessions,
715                            ed_value_get_session_index (&value));
716
717     skip_dst_nat_lookup:
718       if (na44_ed_is_fwd_bypass_session (s))
719         return 0;
720
721       /* hairpinning */
722       pool_foreach (i, sm->output_feature_interfaces)
723        {
724          if ((nat44_ed_is_interface_inside (i)) &&
725              (rx_sw_if_index == i->sw_if_index))
726            return 0;
727       }
728       return 1;
729     }
730
731   return 0;
732 }
733
734 static inline u32
735 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
736                           icmp46_header_t *icmp, u32 sw_if_index,
737                           u32 rx_fib_index, vlib_node_runtime_t *node,
738                           u32 next, f64 now, u32 thread_index,
739                           nat_protocol_t nat_proto, snat_session_t **s_p,
740                           int is_multi_worker)
741 {
742   vlib_main_t *vm = vlib_get_main ();
743   u16 checksum;
744   int err;
745   snat_session_t *s = NULL;
746   u8 lookup_protocol = ip->protocol;
747   u16 lookup_sport, lookup_dport;
748   ip4_address_t lookup_saddr, lookup_daddr;
749
750   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
751                                             &lookup_sport, &lookup_daddr,
752                                             &lookup_dport, &lookup_protocol);
753   if (err != 0)
754     {
755       b->error = node->errors[err];
756       return NAT_NEXT_DROP;
757     }
758
759   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
760     {
761       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
762             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
763             vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
764         {
765           return next;
766         }
767     }
768   else
769     {
770       if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
771                                                  ip, NAT_PROTOCOL_ICMP,
772                                                  rx_fib_index, thread_index)))
773         {
774           return next;
775         }
776     }
777
778   if (PREDICT_FALSE (icmp_type_is_error_message (
779         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
780     {
781       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
782       return NAT_NEXT_DROP;
783     }
784
785   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
786                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
787                        &s, node, next, thread_index, vlib_time_now (vm));
788
789   if (NAT_NEXT_DROP == next)
790     goto out;
791
792   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
793     {
794       ip_csum_t sum = ip_incremental_checksum_buffer (
795         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
796         ntohs (ip->length) - ip4_header_bytes (ip), 0);
797       checksum = ~ip_csum_fold (sum);
798       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
799         {
800           next = NAT_NEXT_DROP;
801           goto out;
802         }
803     }
804
805 out:
806   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
807     {
808       /* Accounting */
809       nat44_session_update_counters (
810         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
811       /* Per-user LRU list maintenance */
812       nat44_session_update_lru (sm, s, thread_index);
813     }
814   *s_p = s;
815   return next;
816 }
817
818 static snat_session_t *
819 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
820                                         ip4_header_t *ip, u32 rx_fib_index,
821                                         u32 thread_index, f64 now,
822                                         vlib_main_t *vm,
823                                         vlib_node_runtime_t *node)
824 {
825   clib_bihash_kv_8_8_t kv, value;
826   clib_bihash_kv_16_8_t s_kv, s_value;
827   snat_static_mapping_t *m = NULL;
828   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
829   snat_session_t *s = NULL;
830   u32 outside_fib_index = sm->outside_fib_index;
831   int i;
832   ip4_address_t new_src_addr = { 0 };
833   ip4_address_t new_dst_addr = ip->dst_address;
834
835   if (PREDICT_FALSE (
836         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
837     {
838       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
839       nat_ipfix_logging_max_sessions (thread_index,
840                                       sm->max_translations_per_thread);
841       nat_elog_notice (sm, "maximum sessions exceeded");
842       return 0;
843     }
844
845   switch (vec_len (sm->outside_fibs))
846     {
847     case 0:
848       outside_fib_index = sm->outside_fib_index;
849       break;
850     case 1:
851       outside_fib_index = sm->outside_fibs[0].fib_index;
852       break;
853     default:
854       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
855       break;
856     }
857
858   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
859
860   /* Try to find static mapping first */
861   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
862     {
863       m = pool_elt_at_index (sm->static_mappings, value.value);
864       new_src_addr = m->external_addr;
865     }
866   else
867     {
868       pool_foreach (s, tsm->sessions)
869         {
870           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
871             {
872               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
873                          outside_fib_index, ip->protocol);
874               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
875                 {
876                   new_src_addr = s->out2in.addr;
877                 }
878               break;
879             }
880         }
881
882       if (!new_src_addr.as_u32)
883         {
884           for (i = 0; i < vec_len (sm->addresses); i++)
885             {
886               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
887                          outside_fib_index, ip->protocol);
888               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
889                 {
890                   new_src_addr = sm->addresses[i].addr;
891                 }
892             }
893         }
894     }
895
896   if (!new_src_addr.as_u32)
897     {
898       // could not allocate address for translation ...
899       return 0;
900     }
901
902   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
903   if (!s)
904     {
905       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
906       nat_elog_warn (sm, "create NAT session failed");
907       return 0;
908     }
909
910   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
911                         ip->dst_address, 0, rx_fib_index, ip->protocol);
912   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
913   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
914
915   // hairpinning?
916   int is_hairpinning =
917     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
918                                  outside_fib_index, &new_dst_addr, NULL);
919   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
920
921   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
922   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
923
924   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
925                         outside_fib_index, ip->protocol);
926   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
927   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
928   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
929
930   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
931   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
932   s->out2in.addr.as_u32 = new_src_addr.as_u32;
933   s->out2in.fib_index = outside_fib_index;
934   s->in2out.addr.as_u32 = ip->src_address.as_u32;
935   s->in2out.fib_index = rx_fib_index;
936   s->in2out.port = s->out2in.port = ip->protocol;
937   if (m)
938     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
939
940   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
941     {
942       nat_elog_notice (sm, "in2out flow hash add failed");
943       nat_ed_session_delete (sm, s, thread_index, 1);
944       return NULL;
945     }
946
947   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
948     {
949       nat_elog_notice (sm, "out2in flow hash add failed");
950       nat_ed_session_delete (sm, s, thread_index, 1);
951       return NULL;
952     }
953
954   per_vrf_sessions_register_session (s, thread_index);
955
956   /* Accounting */
957   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
958                                  thread_index);
959   /* Per-user LRU list maintenance */
960   nat44_session_update_lru (sm, s, thread_index);
961
962   return s;
963 }
964
965 static inline uword
966 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
967                                           vlib_node_runtime_t *node,
968                                           vlib_frame_t *frame,
969                                           int is_output_feature,
970                                           int is_multi_worker)
971 {
972   u32 n_left_from, *from;
973   snat_main_t *sm = &snat_main;
974   f64 now = vlib_time_now (vm);
975   u32 thread_index = vm->thread_index;
976   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
977   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
978     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
979
980   from = vlib_frame_vector_args (frame);
981   n_left_from = frame->n_vectors;
982
983   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
984   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
985   vlib_get_buffers (vm, from, b, n_left_from);
986
987   while (n_left_from > 0)
988     {
989       vlib_buffer_t *b0;
990       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
991       u32 tx_sw_if_index0;
992       u32 cntr_sw_if_index0;
993       nat_protocol_t proto0;
994       ip4_header_t *ip0;
995       snat_session_t *s0 = 0;
996       clib_bihash_kv_16_8_t kv0, value0;
997       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
998       nat_6t_flow_t *f = 0;
999       nat_6t_t lookup;
1000       int lookup_skipped = 0;
1001
1002       b0 = *b;
1003       b++;
1004
1005       /* Prefetch next iteration. */
1006       if (PREDICT_TRUE (n_left_from >= 2))
1007         {
1008           vlib_buffer_t *p2;
1009
1010           p2 = *b;
1011
1012           vlib_prefetch_buffer_header (p2, LOAD);
1013
1014           clib_prefetch_load (p2->data);
1015         }
1016
1017       if (is_output_feature)
1018         {
1019           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1020         }
1021
1022       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1023
1024       ip0 =
1025         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1026
1027       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1028       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1029       cntr_sw_if_index0 =
1030         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1031       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1032                                                            rx_sw_if_index0);
1033       lookup.fib_index = rx_fib_index0;
1034
1035       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1036         {
1037           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1038           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1039                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1040                                        0);
1041           next[0] = NAT_NEXT_ICMP_ERROR;
1042           goto trace0;
1043         }
1044
1045       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1046
1047       if (is_output_feature)
1048         {
1049           if (PREDICT_FALSE
1050               (nat_not_translate_output_feature_fwd
1051                (sm, ip0, thread_index, now, vm, b0)))
1052             goto trace0;
1053         }
1054
1055       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1056         {
1057           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1058                 ICMP4_echo_request &&
1059               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1060                 ICMP4_echo_reply &&
1061               !icmp_type_is_error_message (
1062                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1063             {
1064               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1065               next[0] = NAT_NEXT_DROP;
1066               goto trace0;
1067             }
1068           int err = nat_get_icmp_session_lookup_values (
1069             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1070             &lookup.dport, &lookup.proto);
1071           if (err != 0)
1072             {
1073               b0->error = node->errors[err];
1074               next[0] = NAT_NEXT_DROP;
1075               goto trace0;
1076             }
1077         }
1078       else
1079         {
1080           lookup.proto = ip0->protocol;
1081           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1082           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1083           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1084           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1085         }
1086
1087       /* there might be a stashed index in vnet_buffer2 from handoff or
1088        * classify node, see if it can be used */
1089       if (is_multi_worker &&
1090           !pool_is_free_index (tsm->sessions,
1091                                vnet_buffer2 (b0)->nat.cached_session_index))
1092         {
1093           s0 = pool_elt_at_index (tsm->sessions,
1094                                   vnet_buffer2 (b0)->nat.cached_session_index);
1095           if (PREDICT_TRUE (
1096                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1097                 // for some hairpinning cases there are two "i2i" flows instead
1098                 // of i2o and o2i as both hosts are on inside
1099                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1100                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1101             {
1102               /* yes, this is the droid we're looking for */
1103               lookup_skipped = 1;
1104               goto skip_lookup;
1105             }
1106           s0 = NULL;
1107         }
1108
1109       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1110                  lookup.fib_index, lookup.proto);
1111
1112       // lookup flow
1113       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1114         {
1115           // flow does not exist go slow path
1116           next[0] = def_slow;
1117           goto trace0;
1118         }
1119
1120       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1121       s0 =
1122         pool_elt_at_index (tsm->sessions,
1123                            ed_value_get_session_index (&value0));
1124
1125     skip_lookup:
1126
1127       ASSERT (thread_index == s0->thread_index);
1128
1129       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1130         {
1131           // session is closed, go slow path
1132           nat_free_session_data (sm, s0, thread_index, 0);
1133           nat_ed_session_delete (sm, s0, thread_index, 1);
1134           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1135           goto trace0;
1136         }
1137
1138       if (s0->tcp_closed_timestamp)
1139         {
1140           if (now >= s0->tcp_closed_timestamp)
1141             {
1142               // session is closed, go slow path, freed in slow path
1143               next[0] = def_slow;
1144             }
1145           else
1146             {
1147               // session in transitory timeout, drop
1148               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1149               next[0] = NAT_NEXT_DROP;
1150             }
1151           goto trace0;
1152         }
1153
1154       // drop if session expired
1155       u64 sess_timeout_time;
1156       sess_timeout_time =
1157         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1158       if (now >= sess_timeout_time)
1159         {
1160           nat_free_session_data (sm, s0, thread_index, 0);
1161           nat_ed_session_delete (sm, s0, thread_index, 1);
1162           // session is closed, go slow path
1163           next[0] = def_slow;
1164           goto trace0;
1165         }
1166
1167       b0->flags |= VNET_BUFFER_F_IS_NATED;
1168
1169       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1170         {
1171           f = &s0->i2o;
1172         }
1173       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1174                nat_6t_t_eq (&s0->o2i.match, &lookup))
1175         {
1176           f = &s0->o2i;
1177         }
1178       else
1179         {
1180           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1181           nat_free_session_data (sm, s0, thread_index, 0);
1182           nat_ed_session_delete (sm, s0, thread_index, 1);
1183           next[0] = NAT_NEXT_DROP;
1184           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1185           goto trace0;
1186         }
1187
1188       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1189           (translation_error = nat_6t_flow_buf_translate_i2o (
1190              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1191         {
1192           nat_free_session_data (sm, s0, thread_index, 0);
1193           nat_ed_session_delete (sm, s0, thread_index, 1);
1194           next[0] = NAT_NEXT_DROP;
1195           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1196           goto trace0;
1197         }
1198
1199       switch (proto0)
1200         {
1201         case NAT_PROTOCOL_TCP:
1202           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1203                                          thread_index, cntr_sw_if_index0, 1);
1204           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1205           break;
1206         case NAT_PROTOCOL_UDP:
1207           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1208                                          thread_index, cntr_sw_if_index0, 1);
1209           break;
1210         case NAT_PROTOCOL_ICMP:
1211           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1212                                          thread_index, cntr_sw_if_index0, 1);
1213           break;
1214         case NAT_PROTOCOL_OTHER:
1215           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1216                                          thread_index, cntr_sw_if_index0, 1);
1217           break;
1218         }
1219
1220       /* Accounting */
1221       nat44_session_update_counters (s0, now,
1222                                      vlib_buffer_length_in_chain (vm, b0),
1223                                      thread_index);
1224       /* Per-user LRU list maintenance */
1225       nat44_session_update_lru (sm, s0, thread_index);
1226
1227     trace0:
1228       if (PREDICT_FALSE
1229           ((node->flags & VLIB_NODE_FLAG_TRACE)
1230            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1231         {
1232           nat_in2out_ed_trace_t *t =
1233             vlib_add_trace (vm, node, b0, sizeof (*t));
1234           t->sw_if_index = rx_sw_if_index0;
1235           t->next_index = next[0];
1236           t->is_slow_path = 0;
1237           t->translation_error = translation_error;
1238           t->lookup_skipped = lookup_skipped;
1239           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1240
1241           if (s0)
1242             {
1243               t->session_index = s0 - tsm->sessions;
1244               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1245               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1246               t->translation_via_i2of = (&s0->i2o == f);
1247             }
1248           else
1249             {
1250               t->session_index = ~0;
1251             }
1252         }
1253
1254       if (next[0] == NAT_NEXT_DROP)
1255         {
1256           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1257                                          thread_index, cntr_sw_if_index0, 1);
1258         }
1259
1260       n_left_from--;
1261       next++;
1262     }
1263
1264   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1265                                frame->n_vectors);
1266   return frame->n_vectors;
1267 }
1268
1269 static inline uword
1270 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1271                                           vlib_node_runtime_t *node,
1272                                           vlib_frame_t *frame,
1273                                           int is_output_feature,
1274                                           int is_multi_worker)
1275 {
1276   u32 n_left_from, *from;
1277   snat_main_t *sm = &snat_main;
1278   f64 now = vlib_time_now (vm);
1279   u32 thread_index = vm->thread_index;
1280   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1281
1282   from = vlib_frame_vector_args (frame);
1283   n_left_from = frame->n_vectors;
1284
1285   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1286   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1287   vlib_get_buffers (vm, from, b, n_left_from);
1288
1289   while (n_left_from > 0)
1290     {
1291       vlib_buffer_t *b0;
1292       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1293       u32 tx_sw_if_index0;
1294       u32 cntr_sw_if_index0;
1295       nat_protocol_t proto0;
1296       ip4_header_t *ip0;
1297       udp_header_t *udp0;
1298       icmp46_header_t *icmp0;
1299       snat_session_t *s0 = 0;
1300       clib_bihash_kv_16_8_t kv0, value0;
1301       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1302
1303       b0 = *b;
1304
1305       if (is_output_feature)
1306         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1307
1308       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1309
1310       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1311                               iph_offset0);
1312
1313       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1314       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1315       cntr_sw_if_index0 =
1316         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1317       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1318                                                            rx_sw_if_index0);
1319
1320       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1321         {
1322           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1323           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1324                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1325                                        0);
1326           next[0] = NAT_NEXT_ICMP_ERROR;
1327           goto trace0;
1328         }
1329
1330       udp0 = ip4_next_header (ip0);
1331       icmp0 = (icmp46_header_t *) udp0;
1332       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1333
1334       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1335         {
1336           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1337             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1338           if (!s0)
1339             next[0] = NAT_NEXT_DROP;
1340
1341           if (NAT_NEXT_DROP != next[0] && s0 &&
1342               NAT_ED_TRNSL_ERR_SUCCESS !=
1343                 (translation_error = nat_6t_flow_buf_translate_i2o (
1344                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1345             {
1346               nat_free_session_data (sm, s0, thread_index, 0);
1347               nat_ed_session_delete (sm, s0, thread_index, 1);
1348               next[0] = NAT_NEXT_DROP;
1349               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1350               goto trace0;
1351             }
1352
1353           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1354                                          thread_index, cntr_sw_if_index0, 1);
1355           goto trace0;
1356         }
1357
1358       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1359         {
1360           next[0] = icmp_in2out_ed_slow_path (
1361             sm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node, next[0],
1362             now, thread_index, proto0, &s0, is_multi_worker);
1363           if (NAT_NEXT_DROP != next[0] && s0 &&
1364               NAT_ED_TRNSL_ERR_SUCCESS !=
1365                 (translation_error = nat_6t_flow_buf_translate_i2o (
1366                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1367             {
1368               nat_free_session_data (sm, s0, thread_index, 0);
1369               nat_ed_session_delete (sm, s0, thread_index, 1);
1370               next[0] = NAT_NEXT_DROP;
1371               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1372               goto trace0;
1373             }
1374
1375           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1376                                          thread_index, cntr_sw_if_index0, 1);
1377           goto trace0;
1378         }
1379
1380       init_ed_k (&kv0, ip0->src_address,
1381                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1382                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1383                  ip0->protocol);
1384       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1385         {
1386           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1387           s0 =
1388             pool_elt_at_index (tsm->sessions,
1389                                ed_value_get_session_index (&value0));
1390
1391           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1392             {
1393               nat_free_session_data (sm, s0, thread_index, 0);
1394               nat_ed_session_delete (sm, s0, thread_index, 1);
1395               s0 = NULL;
1396             }
1397         }
1398
1399       if (!s0)
1400         {
1401           if (is_output_feature)
1402             {
1403               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1404                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1405                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1406                     rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker)))
1407                 goto trace0;
1408
1409               /*
1410                * Send DHCP packets to the ipv4 stack, or we won't
1411                * be able to use dhcp client on the outside interface
1412                */
1413               if (PREDICT_FALSE
1414                   (proto0 == NAT_PROTOCOL_UDP
1415                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1416                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1417                    && ip0->dst_address.as_u32 == 0xffffffff))
1418                 goto trace0;
1419             }
1420           else
1421             {
1422               if (PREDICT_FALSE (nat44_ed_not_translate (
1423                     vm, sm, node, rx_sw_if_index0, b0, ip0, proto0,
1424                     rx_fib_index0, thread_index)))
1425                 goto trace0;
1426             }
1427
1428           next[0] = slow_path_ed (
1429             vm, sm, b0, ip0->src_address, ip0->dst_address,
1430             vnet_buffer (b0)->ip.reass.l4_src_port,
1431             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1432             rx_fib_index0, &s0, node, next[0], thread_index, now);
1433
1434           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1435             goto trace0;
1436
1437           if (PREDICT_FALSE (!s0))
1438             goto trace0;
1439
1440         }
1441
1442       b0->flags |= VNET_BUFFER_F_IS_NATED;
1443
1444       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1445           (translation_error = nat_6t_flow_buf_translate_i2o (
1446              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1447         {
1448           nat_free_session_data (sm, s0, thread_index, 0);
1449           nat_ed_session_delete (sm, s0, thread_index, 1);
1450           next[0] = NAT_NEXT_DROP;
1451           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1452           goto trace0;
1453         }
1454
1455       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1456         {
1457           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1458                                          thread_index, cntr_sw_if_index0, 1);
1459           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1460         }
1461       else
1462         {
1463           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1464                                          thread_index, cntr_sw_if_index0, 1);
1465         }
1466
1467       /* Accounting */
1468       nat44_session_update_counters (s0, now,
1469                                      vlib_buffer_length_in_chain
1470                                      (vm, b0), thread_index);
1471       /* Per-user LRU list maintenance */
1472       nat44_session_update_lru (sm, s0, thread_index);
1473
1474     trace0:
1475       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1476                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1477         {
1478           nat_in2out_ed_trace_t *t =
1479             vlib_add_trace (vm, node, b0, sizeof (*t));
1480           t->sw_if_index = rx_sw_if_index0;
1481           t->next_index = next[0];
1482           t->is_slow_path = 1;
1483           t->translation_error = translation_error;
1484           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1485
1486           if (s0)
1487             {
1488               t->session_index = s0 - tsm->sessions;
1489               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1490               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1491               t->translation_via_i2of = 1;
1492             }
1493
1494           else
1495             {
1496               t->session_index = ~0;
1497             }
1498         }
1499
1500       if (next[0] == NAT_NEXT_DROP)
1501         {
1502           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1503                                          thread_index, cntr_sw_if_index0, 1);
1504         }
1505
1506       n_left_from--;
1507       next++;
1508       b++;
1509     }
1510
1511   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1512                                frame->n_vectors);
1513
1514   return frame->n_vectors;
1515 }
1516
1517 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1518                                      vlib_node_runtime_t * node,
1519                                      vlib_frame_t * frame)
1520 {
1521   if (snat_main.num_workers > 1)
1522     {
1523       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1524     }
1525   else
1526     {
1527       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1528     }
1529 }
1530
1531 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1532   .name = "nat44-ed-in2out",
1533   .vector_size = sizeof (u32),
1534   .sibling_of = "nat-default",
1535   .format_trace = format_nat_in2out_ed_trace,
1536   .type = VLIB_NODE_TYPE_INTERNAL,
1537   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1538   .error_strings = nat_in2out_ed_error_strings,
1539   .runtime_data_bytes = sizeof (snat_runtime_t),
1540 };
1541
1542 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1543                                             vlib_node_runtime_t * node,
1544                                             vlib_frame_t * frame)
1545 {
1546   if (snat_main.num_workers > 1)
1547     {
1548       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1549     }
1550   else
1551     {
1552       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1553     }
1554 }
1555
1556 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1557   .name = "nat44-ed-in2out-output",
1558   .vector_size = sizeof (u32),
1559   .sibling_of = "nat-default",
1560   .format_trace = format_nat_in2out_ed_trace,
1561   .type = VLIB_NODE_TYPE_INTERNAL,
1562   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1563   .error_strings = nat_in2out_ed_error_strings,
1564   .runtime_data_bytes = sizeof (snat_runtime_t),
1565 };
1566
1567 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1568                                               vlib_node_runtime_t *
1569                                               node, vlib_frame_t * frame)
1570 {
1571   if (snat_main.num_workers > 1)
1572     {
1573       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1574     }
1575   else
1576     {
1577       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1578     }
1579 }
1580
1581 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1582   .name = "nat44-ed-in2out-slowpath",
1583   .vector_size = sizeof (u32),
1584   .sibling_of = "nat-default",
1585   .format_trace = format_nat_in2out_ed_trace,
1586   .type = VLIB_NODE_TYPE_INTERNAL,
1587   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1588   .error_strings = nat_in2out_ed_error_strings,
1589   .runtime_data_bytes = sizeof (snat_runtime_t),
1590 };
1591
1592 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1593                                                      vlib_node_runtime_t
1594                                                      * node,
1595                                                      vlib_frame_t * frame)
1596 {
1597   if (snat_main.num_workers > 1)
1598     {
1599       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1600     }
1601   else
1602     {
1603       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1604     }
1605 }
1606
1607 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1608   .name = "nat44-ed-in2out-output-slowpath",
1609   .vector_size = sizeof (u32),
1610   .sibling_of = "nat-default",
1611   .format_trace = format_nat_in2out_ed_trace,
1612   .type = VLIB_NODE_TYPE_INTERNAL,
1613   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1614   .error_strings = nat_in2out_ed_error_strings,
1615   .runtime_data_bytes = sizeof (snat_runtime_t),
1616 };
1617
1618 static u8 *
1619 format_nat_pre_trace (u8 * s, va_list * args)
1620 {
1621   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1622   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1623   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1624   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1625                  t->arc_next_index);
1626 }
1627
1628 VLIB_NODE_FN (nat_pre_in2out_node)
1629   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1630 {
1631   return nat_pre_node_fn_inline (vm, node, frame,
1632                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1633 }
1634
1635 VLIB_NODE_FN (nat_pre_in2out_output_node)
1636   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1637 {
1638   return nat_pre_node_fn_inline (vm, node, frame,
1639                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1640 }
1641
1642 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1643   .name = "nat-pre-in2out",
1644   .vector_size = sizeof (u32),
1645   .sibling_of = "nat-default",
1646   .format_trace = format_nat_pre_trace,
1647   .type = VLIB_NODE_TYPE_INTERNAL,
1648   .n_errors = 0,
1649 };
1650
1651 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1652   .name = "nat-pre-in2out-output",
1653   .vector_size = sizeof (u32),
1654   .sibling_of = "nat-default",
1655   .format_trace = format_nat_pre_trace,
1656   .type = VLIB_NODE_TYPE_INTERNAL,
1657   .n_errors = 0,
1658 };
1659
1660 /*
1661  * fd.io coding-style-patch-verification: ON
1662  *
1663  * Local Variables:
1664  * eval: (c-set-style "gnu")
1665  * End:
1666  */