nat: pick outside addr based on local addr
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
73               t->sw_if_index, t->next_index, t->session_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", translation result '%U' via %s",
77                   format_nat_ed_translation_error, t->translation_error,
78                   t->translation_via_i2of ? "i2of" : "o2if");
79       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
80       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
81     }
82   if (!t->is_slow_path)
83     {
84       if (t->lookup_skipped)
85         {
86           s = format (s, "\n lookup skipped - cached session index used");
87         }
88       else
89         {
90           s = format (s, "\n  search key %U", format_ed_session_kvp,
91                       &t->search_key);
92         }
93     }
94
95   return s;
96 }
97
98 /**
99  * @brief Check if packet should be translated
100  *
101  * Packets aimed at outside interface and external address with active session
102  * should be translated.
103  *
104  * @param sm            NAT main
105  * @param rt            NAT runtime data
106  * @param sw_if_index0  index of the inside interface
107  * @param ip0           IPv4 header
108  * @param proto0        NAT protocol
109  * @param rx_fib_index0 RX FIB index
110  *
111  * @returns 0 if packet should be translated otherwise 1
112  */
113 static inline int
114 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
115                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
116                          u32 rx_fib_index0)
117 {
118   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
119   nat_outside_fib_t *outside_fib;
120   fib_prefix_t pfx = {
121     .fp_proto = FIB_PROTOCOL_IP4,
122     .fp_len = 32,
123     .fp_addr = {
124                 .ip4.as_u32 = ip0->dst_address.as_u32,
125                 }
126     ,
127   };
128
129   /* Don't NAT packet aimed at the intfc address */
130   if (PREDICT_FALSE (
131         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
132     return 1;
133
134   fei = fib_table_lookup (rx_fib_index0, &pfx);
135   if (FIB_NODE_INDEX_INVALID != fei)
136     {
137       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
138       if (sw_if_index == ~0)
139         {
140           vec_foreach (outside_fib, sm->outside_fibs)
141             {
142               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
143               if (FIB_NODE_INDEX_INVALID != fei)
144                 {
145                   sw_if_index = fib_entry_get_resolving_interface (fei);
146                   if (sw_if_index != ~0)
147                     break;
148                 }
149             }
150         }
151       if (sw_if_index == ~0)
152         return 1;
153
154       snat_interface_t *i;
155       pool_foreach (i, sm->interfaces)
156         {
157           /* NAT packet aimed at outside interface */
158           if ((nat_interface_is_outside (i)) &&
159               (sw_if_index == i->sw_if_index))
160             return 0;
161         }
162     }
163
164   return 1;
165 }
166
167 static int
168 nat_ed_alloc_addr_and_port_with_snat_address (
169   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
170   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
171   ip4_address_t *outside_addr, u16 *outside_port)
172 {
173   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
174
175   s->o2i.match.daddr = a->addr;
176   /* first try port suggested by caller */
177   u16 port = clib_net_to_host_u16 (*outside_port);
178   u16 port_offset = port - port_thread_offset;
179   if (port <= port_thread_offset ||
180       port > port_thread_offset + port_per_thread)
181     {
182       /* need to pick a different port, suggested port doesn't fit in
183        * this thread's port range */
184       port_offset = snat_random_port (0, port_per_thread - 1);
185       port = port_thread_offset + port_offset;
186     }
187   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
188   do
189     {
190       if (NAT_PROTOCOL_ICMP == nat_proto)
191         {
192           s->o2i.match.sport = clib_host_to_net_u16 (port);
193         }
194       s->o2i.match.dport = clib_host_to_net_u16 (port);
195       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
196         {
197 #define _(N, i, n, s)                                                         \
198   case NAT_PROTOCOL_##N:                                                      \
199     ++a->busy_##n##_port_refcounts[port];                                     \
200     a->busy_##n##_ports_per_thread[thread_index]++;                           \
201     a->busy_##n##_ports++;                                                    \
202     break;
203           switch (nat_proto)
204             {
205               foreach_nat_protocol;
206             default:
207               nat_elog_info (sm, "unknown protocol");
208               return 1;
209             }
210 #undef _
211           *outside_addr = a->addr;
212           *outside_port = clib_host_to_net_u16 (port);
213           return 0;
214         }
215       port_offset = snat_random_port (0, port_per_thread - 1);
216       port = port_thread_offset + port_offset;
217       --attempts;
218     }
219   while (attempts > 0);
220   return 1;
221 }
222
223 static int
224 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
225                             u32 thread_index, ip4_address_t s_addr,
226                             u16 port_per_thread, u32 snat_thread_index,
227                             snat_session_t *s, ip4_address_t *outside_addr,
228                             u16 *outside_port)
229 {
230   int i;
231   snat_address_t *a, *ga = 0;
232
233   if (vec_len (sm->addresses) > 0)
234     {
235       int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
236
237       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
238         {
239           a = sm->addresses + i;
240           if (a->fib_index == rx_fib_index)
241             {
242               return nat_ed_alloc_addr_and_port_with_snat_address (
243                 sm, nat_proto, thread_index, a, port_per_thread,
244                 snat_thread_index, s, outside_addr, outside_port);
245             }
246           else if (a->fib_index == ~0)
247             {
248               ga = a;
249             }
250         }
251
252       for (i = 0; i < s_addr_offset; ++i)
253         {
254           a = sm->addresses + i;
255           if (a->fib_index == rx_fib_index)
256             {
257               return nat_ed_alloc_addr_and_port_with_snat_address (
258                 sm, nat_proto, thread_index, a, port_per_thread,
259                 snat_thread_index, s, outside_addr, outside_port);
260             }
261           else if (a->fib_index == ~0)
262             {
263               ga = a;
264             }
265         }
266
267       if (ga)
268         {
269           return nat_ed_alloc_addr_and_port_with_snat_address (
270             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
271             s, outside_addr, outside_port);
272         }
273     }
274   /* Totally out of translations to use... */
275   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
276   return 1;
277 }
278
279 static_always_inline u32
280 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
281 {
282   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
283   nat_outside_fib_t *outside_fib;
284   fib_prefix_t pfx = {
285     .fp_proto = FIB_PROTOCOL_IP4,
286     .fp_len = 32,
287     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
288     ,
289   };
290   // TODO: multiple vrfs none can resolve addr
291   vec_foreach (outside_fib, sm->outside_fibs)
292     {
293       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
294       if (FIB_NODE_INDEX_INVALID != fei)
295         {
296           if (fib_entry_get_resolving_interface (fei) != ~0)
297             {
298               return outside_fib->fib_index;
299             }
300         }
301     }
302   return ~0;
303 }
304
305 static_always_inline int
306 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
307                              u16 match_port, nat_protocol_t match_protocol,
308                              u32 match_fib_index, ip4_address_t *daddr,
309                              u16 *dport)
310 {
311   clib_bihash_kv_8_8_t kv, value;
312   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
313   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
314     {
315       /* Try address only mapping */
316       init_nat_k (&kv, match_addr, 0, 0, 0);
317       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
318                                   &value))
319         return 0;
320     }
321
322   snat_static_mapping_t *m =
323     pool_elt_at_index (sm->static_mappings, value.value);
324   *daddr = m->local_addr;
325   if (dport)
326     {
327       /* Address only mapping doesn't change port */
328       *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port;
329     }
330   return 1;
331 }
332
333 static u32
334 slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr,
335               ip4_address_t r_addr, u16 l_port, u16 r_port, u8 proto,
336               u32 rx_fib_index, snat_session_t **sessionp,
337               vlib_node_runtime_t *node, u32 next, u32 thread_index, f64 now)
338 {
339   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
340   ip4_address_t outside_addr;
341   u16 outside_port;
342   u32 outside_fib_index;
343   u8 is_identity_nat;
344
345   u32 nat_proto = ip_proto_to_nat_proto (proto);
346   snat_session_t *s = NULL;
347   lb_nat_type_t lb = 0;
348   ip4_address_t daddr = r_addr;
349   u16 dport = r_port;
350
351   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
352     {
353       if (PREDICT_FALSE
354           (!tcp_flags_is_init
355            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
356         {
357           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
358           return NAT_NEXT_DROP;
359         }
360     }
361
362   if (PREDICT_FALSE
363       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
364     {
365       if (!nat_lru_free_one (sm, thread_index, now))
366         {
367           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
368           nat_ipfix_logging_max_sessions (thread_index,
369                                           sm->max_translations_per_thread);
370           nat_elog_notice (sm, "maximum sessions exceeded");
371           return NAT_NEXT_DROP;
372         }
373     }
374
375   outside_fib_index = sm->outside_fib_index;
376
377   switch (vec_len (sm->outside_fibs))
378     {
379     case 0:
380       outside_fib_index = sm->outside_fib_index;
381       break;
382     case 1:
383       outside_fib_index = sm->outside_fibs[0].fib_index;
384       break;
385     default:
386       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
387       break;
388     }
389
390   ip4_address_t sm_addr;
391   u16 sm_port;
392   u32 sm_fib_index;
393   /* First try to match static mapping by local address and port */
394   int is_sm;
395   if (snat_static_mapping_match (sm, l_addr, l_port, rx_fib_index, nat_proto,
396                                  &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
397                                  &lb, 0, &is_identity_nat, 0))
398     {
399       is_sm = 0;
400     }
401   else
402     {
403       is_sm = 1;
404     }
405
406   if (PREDICT_FALSE (is_sm && is_identity_nat))
407     {
408       *sessionp = NULL;
409       return next;
410     }
411
412   s = nat_ed_session_alloc (sm, thread_index, now, proto);
413   ASSERT (s);
414
415   if (!is_sm)
416     {
417       s->in2out.addr = l_addr;
418       s->in2out.port = l_port;
419       s->nat_proto = nat_proto;
420       s->in2out.fib_index = rx_fib_index;
421       s->out2in.fib_index = outside_fib_index;
422
423       // suggest using local port to allocation function
424       outside_port = l_port;
425
426       // hairpinning?
427       int is_hairpinning = nat44_ed_external_sm_lookup (
428         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
429       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
430
431       // destination addr/port updated with real values in
432       // nat_ed_alloc_addr_and_port
433       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
434                             s->out2in.fib_index, proto);
435       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
436       if (NAT_PROTOCOL_ICMP == nat_proto)
437         {
438           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
439         }
440       else
441         {
442           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
443         }
444       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
445
446       if (nat_ed_alloc_addr_and_port (
447             sm, rx_fib_index, nat_proto, thread_index, l_addr,
448             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
449             &outside_port))
450         {
451           nat_elog_notice (sm, "addresses exhausted");
452           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
453           nat_ed_session_delete (sm, s, thread_index, 1);
454           return NAT_NEXT_DROP;
455         }
456       s->out2in.addr = outside_addr;
457       s->out2in.port = outside_port;
458     }
459   else
460     {
461       // static mapping
462       s->out2in.addr = outside_addr = sm_addr;
463       s->out2in.port = outside_port = sm_port;
464       s->in2out.addr = l_addr;
465       s->in2out.port = l_port;
466       s->nat_proto = nat_proto;
467       s->in2out.fib_index = rx_fib_index;
468       s->out2in.fib_index = outside_fib_index;
469       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
470
471       // hairpinning?
472       int is_hairpinning = nat44_ed_external_sm_lookup (
473         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
474       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
475
476       if (NAT_PROTOCOL_ICMP == nat_proto)
477         {
478           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
479                                 sm_port, s->out2in.fib_index, proto);
480           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
481         }
482       else
483         {
484           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
485                                 sm_port, s->out2in.fib_index, proto);
486           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
487         }
488       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
489       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
490       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
491         {
492           nat_elog_notice (sm, "out2in key add failed");
493           goto error;
494         }
495     }
496
497   if (lb)
498     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
499   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
500   s->ext_host_addr = r_addr;
501   s->ext_host_port = r_port;
502
503   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
504                         rx_fib_index, proto);
505   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
506   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
507   if (NAT_PROTOCOL_ICMP == nat_proto)
508     {
509       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
510     }
511   else
512     {
513       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
514       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
515     }
516   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
517
518   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
519     {
520       nat_elog_notice (sm, "in2out key add failed");
521       goto error;
522     }
523
524   /* log NAT event */
525   nat_ipfix_logging_nat44_ses_create (thread_index,
526                                       s->in2out.addr.as_u32,
527                                       s->out2in.addr.as_u32,
528                                       s->nat_proto,
529                                       s->in2out.port,
530                                       s->out2in.port, s->in2out.fib_index);
531
532   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
533                          s->in2out.port, &s->ext_host_nat_addr,
534                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
535                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
536
537   per_vrf_sessions_register_session (s, thread_index);
538
539   *sessionp = s;
540   return next;
541 error:
542   if (s)
543     {
544       if (!is_sm)
545         {
546           snat_free_outside_address_and_port (sm->addresses, thread_index,
547                                               &outside_addr, outside_port,
548                                               nat_proto);
549         }
550       nat_ed_session_delete (sm, s, thread_index, 1);
551     }
552   *sessionp = s = NULL;
553   return NAT_NEXT_DROP;
554 }
555
556 static_always_inline int
557 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
558                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
559                         u32 rx_fib_index, u32 thread_index)
560 {
561   udp_header_t *udp = ip4_next_header (ip);
562   clib_bihash_kv_16_8_t kv, value;
563
564   init_ed_k (&kv, ip->dst_address, udp->dst_port, ip->src_address,
565              udp->src_port, sm->outside_fib_index, ip->protocol);
566
567   /* NAT packet aimed at external address if has active sessions */
568   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
569     {
570       /* or is static mappings */
571       ip4_address_t placeholder_addr;
572       u16 placeholder_port;
573       u32 placeholder_fib_index;
574       if (!snat_static_mapping_match
575           (sm, ip->dst_address, udp->dst_port, sm->outside_fib_index, proto,
576            &placeholder_addr, &placeholder_port, &placeholder_fib_index, 1, 0,
577            0, 0, 0, 0, 0))
578         return 0;
579     }
580   else
581     return 0;
582
583   if (sm->forwarding_enabled)
584     return 1;
585
586   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
587                                   rx_fib_index);
588 }
589
590 static_always_inline int
591 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
592                                       u32 thread_index, f64 now,
593                                       vlib_main_t * vm, vlib_buffer_t * b)
594 {
595   clib_bihash_kv_16_8_t kv, value;
596   snat_session_t *s = 0;
597   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
598
599   if (!sm->forwarding_enabled)
600     return 0;
601
602   if (ip->protocol == IP_PROTOCOL_ICMP)
603     {
604       ip4_address_t lookup_saddr, lookup_daddr;
605       u16 lookup_sport, lookup_dport;
606       u8 lookup_protocol;
607       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
608                                               &lookup_sport, &lookup_daddr,
609                                               &lookup_dport, &lookup_protocol))
610         return 0;
611       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
612                  0, lookup_protocol);
613     }
614   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
615     {
616       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
617                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
618                  ip->protocol);
619     }
620   else
621     {
622       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
623                  ip->protocol);
624     }
625
626   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
627     {
628       ASSERT (thread_index == ed_value_get_thread_index (&value));
629       s =
630         pool_elt_at_index (tsm->sessions,
631                            ed_value_get_session_index (&value));
632
633       if (is_fwd_bypass_session (s))
634         {
635           if (ip->protocol == IP_PROTOCOL_TCP)
636             {
637               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
638             }
639           /* Accounting */
640           nat44_session_update_counters (s, now,
641                                          vlib_buffer_length_in_chain (vm, b),
642                                          thread_index);
643           /* Per-user LRU list maintenance */
644           nat44_session_update_lru (sm, s, thread_index);
645           return 1;
646         }
647       else
648         return 0;
649     }
650
651   return 0;
652 }
653
654 static_always_inline int
655 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
656                                        u16 src_port, u16 dst_port,
657                                        u32 thread_index, u32 rx_sw_if_index,
658                                        u32 tx_sw_if_index, f64 now)
659 {
660   clib_bihash_kv_16_8_t kv, value;
661   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
662   snat_interface_t *i;
663   snat_session_t *s;
664   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
665   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
666
667   /* src NAT check */
668   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
669              tx_fib_index, ip->protocol);
670   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
671     {
672       ASSERT (thread_index == ed_value_get_thread_index (&value));
673       s =
674         pool_elt_at_index (tsm->sessions,
675                            ed_value_get_session_index (&value));
676       if (nat44_is_ses_closed (s)
677           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
678         {
679           nat_free_session_data (sm, s, thread_index, 0);
680           nat_ed_session_delete (sm, s, thread_index, 1);
681         }
682       return 1;
683     }
684
685   /* dst NAT check */
686   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
687              rx_fib_index, ip->protocol);
688   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
689     {
690       ASSERT (thread_index == ed_value_get_thread_index (&value));
691       s =
692         pool_elt_at_index (tsm->sessions,
693                            ed_value_get_session_index (&value));
694
695       if (is_fwd_bypass_session (s))
696         return 0;
697
698       /* hairpinning */
699       pool_foreach (i, sm->output_feature_interfaces)
700        {
701         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
702            return 0;
703       }
704       return 1;
705     }
706
707   return 0;
708 }
709
710 static inline u32
711 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
712                           icmp46_header_t *icmp, u32 sw_if_index,
713                           u32 rx_fib_index, vlib_node_runtime_t *node,
714                           u32 next, f64 now, u32 thread_index,
715                           nat_protocol_t nat_proto, snat_session_t **s_p)
716 {
717   vlib_main_t *vm = vlib_get_main ();
718   u16 checksum;
719   int err;
720   snat_session_t *s = NULL;
721   u8 lookup_protocol = ip->protocol;
722   u16 lookup_sport, lookup_dport;
723   ip4_address_t lookup_saddr, lookup_daddr;
724
725   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
726                                             &lookup_sport, &lookup_daddr,
727                                             &lookup_dport, &lookup_protocol);
728   if (err != 0)
729     {
730       b->error = node->errors[err];
731       return NAT_NEXT_DROP;
732     }
733
734   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
735     {
736       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
737             sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
738             vnet_buffer (b)->sw_if_index[VLIB_TX], now)))
739         {
740           return next;
741         }
742     }
743   else
744     {
745       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, ip,
746                                                  NAT_PROTOCOL_ICMP,
747                                                  rx_fib_index, thread_index)))
748         {
749           return next;
750         }
751     }
752
753   if (PREDICT_FALSE (icmp_type_is_error_message (
754         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
755     {
756       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
757       return NAT_NEXT_DROP;
758     }
759
760   next = slow_path_ed (sm, b, ip->src_address, ip->dst_address, lookup_sport,
761                        lookup_dport, ip->protocol, rx_fib_index, &s, node,
762                        next, thread_index, vlib_time_now (vm));
763
764   if (NAT_NEXT_DROP == next)
765     goto out;
766
767   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
768     {
769       ip_csum_t sum = ip_incremental_checksum_buffer (
770         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
771         ntohs (ip->length) - ip4_header_bytes (ip), 0);
772       checksum = ~ip_csum_fold (sum);
773       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
774         {
775           next = NAT_NEXT_DROP;
776           goto out;
777         }
778     }
779
780 out:
781   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
782     {
783       /* Accounting */
784       nat44_session_update_counters (
785         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
786       /* Per-user LRU list maintenance */
787       nat44_session_update_lru (sm, s, thread_index);
788     }
789   *s_p = s;
790   return next;
791 }
792
793 static snat_session_t *
794 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
795                                         ip4_header_t *ip, u32 rx_fib_index,
796                                         u32 thread_index, f64 now,
797                                         vlib_main_t *vm,
798                                         vlib_node_runtime_t *node)
799 {
800   clib_bihash_kv_8_8_t kv, value;
801   clib_bihash_kv_16_8_t s_kv, s_value;
802   snat_static_mapping_t *m = NULL;
803   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
804   snat_session_t *s = NULL;
805   u32 outside_fib_index = sm->outside_fib_index;
806   int i;
807   ip4_address_t new_src_addr = { 0 };
808   ip4_address_t new_dst_addr = ip->dst_address;
809
810   if (PREDICT_FALSE (
811         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
812     {
813       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
814       nat_ipfix_logging_max_sessions (thread_index,
815                                       sm->max_translations_per_thread);
816       nat_elog_notice (sm, "maximum sessions exceeded");
817       return 0;
818     }
819
820   switch (vec_len (sm->outside_fibs))
821     {
822     case 0:
823       outside_fib_index = sm->outside_fib_index;
824       break;
825     case 1:
826       outside_fib_index = sm->outside_fibs[0].fib_index;
827       break;
828     default:
829       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
830       break;
831     }
832
833   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
834
835   /* Try to find static mapping first */
836   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
837     {
838       m = pool_elt_at_index (sm->static_mappings, value.value);
839       new_src_addr = m->external_addr;
840     }
841   else
842     {
843       pool_foreach (s, tsm->sessions)
844         {
845           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
846             {
847               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
848                          outside_fib_index, ip->protocol);
849               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
850                 {
851                   new_src_addr = s->out2in.addr;
852                 }
853               break;
854             }
855         }
856
857       if (!new_src_addr.as_u32)
858         {
859           for (i = 0; i < vec_len (sm->addresses); i++)
860             {
861               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
862                          outside_fib_index, ip->protocol);
863               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
864                 {
865                   new_src_addr = sm->addresses[i].addr;
866                 }
867             }
868         }
869     }
870
871   if (!new_src_addr.as_u32)
872     {
873       // could not allocate address for translation ...
874       return 0;
875     }
876
877   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
878   if (!s)
879     {
880       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
881       nat_elog_warn (sm, "create NAT session failed");
882       return 0;
883     }
884
885   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
886                         ip->dst_address, 0, rx_fib_index, ip->protocol);
887   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
888   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
889
890   // hairpinning?
891   int is_hairpinning =
892     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
893                                  outside_fib_index, &new_dst_addr, NULL);
894   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
895
896   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
897   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
898
899   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
900                         outside_fib_index, ip->protocol);
901   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
902   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
903   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
904
905   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
906   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
907   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
908   s->out2in.addr.as_u32 = new_src_addr.as_u32;
909   s->out2in.fib_index = outside_fib_index;
910   s->in2out.addr.as_u32 = ip->src_address.as_u32;
911   s->in2out.fib_index = rx_fib_index;
912   s->in2out.port = s->out2in.port = ip->protocol;
913   if (m)
914     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
915
916   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
917     {
918       nat_elog_notice (sm, "in2out flow hash add failed");
919       nat_ed_session_delete (sm, s, thread_index, 1);
920       return NULL;
921     }
922
923   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
924     {
925       nat_elog_notice (sm, "out2in flow hash add failed");
926       nat_ed_session_delete (sm, s, thread_index, 1);
927       return NULL;
928     }
929
930   per_vrf_sessions_register_session (s, thread_index);
931
932   /* Accounting */
933   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
934                                  thread_index);
935   /* Per-user LRU list maintenance */
936   nat44_session_update_lru (sm, s, thread_index);
937
938   return s;
939 }
940
941 static inline uword
942 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
943                                           vlib_node_runtime_t *node,
944                                           vlib_frame_t *frame,
945                                           int is_output_feature,
946                                           int is_multi_worker)
947 {
948   u32 n_left_from, *from;
949   snat_main_t *sm = &snat_main;
950   f64 now = vlib_time_now (vm);
951   u32 thread_index = vm->thread_index;
952   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
953   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
954     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
955
956   from = vlib_frame_vector_args (frame);
957   n_left_from = frame->n_vectors;
958
959   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
960   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
961   vlib_get_buffers (vm, from, b, n_left_from);
962
963   while (n_left_from > 0)
964     {
965       vlib_buffer_t *b0;
966       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
967       nat_protocol_t proto0;
968       ip4_header_t *ip0;
969       snat_session_t *s0 = 0;
970       clib_bihash_kv_16_8_t kv0, value0;
971       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
972       nat_6t_flow_t *f = 0;
973       nat_6t_t lookup;
974       int lookup_skipped = 0;
975
976       b0 = *b;
977       b++;
978
979       /* Prefetch next iteration. */
980       if (PREDICT_TRUE (n_left_from >= 2))
981         {
982           vlib_buffer_t *p2;
983
984           p2 = *b;
985
986           vlib_prefetch_buffer_header (p2, LOAD);
987
988           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
989         }
990
991       if (is_output_feature)
992         {
993           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
994         }
995
996       next[0] = vnet_buffer2 (b0)->nat.arc_next;
997
998       ip0 =
999         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1000
1001       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1002       rx_fib_index0 =
1003         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1004       lookup.fib_index = rx_fib_index0;
1005
1006       if (PREDICT_FALSE (ip0->ttl == 1))
1007         {
1008           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1009           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1010                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1011                                        0);
1012           next[0] = NAT_NEXT_ICMP_ERROR;
1013           goto trace0;
1014         }
1015
1016       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1017
1018       if (is_output_feature)
1019         {
1020           if (PREDICT_FALSE
1021               (nat_not_translate_output_feature_fwd
1022                (sm, ip0, thread_index, now, vm, b0)))
1023             goto trace0;
1024         }
1025
1026       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1027         {
1028           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1029                 ICMP4_echo_request &&
1030               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1031                 ICMP4_echo_reply &&
1032               !icmp_type_is_error_message (
1033                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1034             {
1035               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1036               next[0] = NAT_NEXT_DROP;
1037               goto trace0;
1038             }
1039           int err = nat_get_icmp_session_lookup_values (
1040             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1041             &lookup.dport, &lookup.proto);
1042           if (err != 0)
1043             {
1044               b0->error = node->errors[err];
1045               next[0] = NAT_NEXT_DROP;
1046               goto trace0;
1047             }
1048         }
1049       else
1050         {
1051           lookup.proto = ip0->protocol;
1052           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1053           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1054           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1055           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1056         }
1057
1058       /* there might be a stashed index in vnet_buffer2 from handoff or
1059        * classify node, see if it can be used */
1060       if (is_multi_worker &&
1061           !pool_is_free_index (tsm->sessions,
1062                                vnet_buffer2 (b0)->nat.cached_session_index))
1063         {
1064           s0 = pool_elt_at_index (tsm->sessions,
1065                                   vnet_buffer2 (b0)->nat.cached_session_index);
1066           if (PREDICT_TRUE (
1067                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1068                 // for some hairpinning cases there are two "i2i" flows instead
1069                 // of i2o and o2i as both hosts are on inside
1070                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1071                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1072             {
1073               /* yes, this is the droid we're looking for */
1074               lookup_skipped = 1;
1075               goto skip_lookup;
1076             }
1077           s0 = NULL;
1078         }
1079
1080       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1081                  lookup.fib_index, lookup.proto);
1082
1083       // lookup flow
1084       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1085         {
1086           // flow does not exist go slow path
1087           next[0] = def_slow;
1088           goto trace0;
1089         }
1090
1091       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1092       s0 =
1093         pool_elt_at_index (tsm->sessions,
1094                            ed_value_get_session_index (&value0));
1095
1096     skip_lookup:
1097
1098       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1099         {
1100           // session is closed, go slow path
1101           nat_free_session_data (sm, s0, thread_index, 0);
1102           nat_ed_session_delete (sm, s0, thread_index, 1);
1103           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1104           goto trace0;
1105         }
1106
1107       if (s0->tcp_closed_timestamp)
1108         {
1109           if (now >= s0->tcp_closed_timestamp)
1110             {
1111               // session is closed, go slow path, freed in slow path
1112               next[0] = def_slow;
1113             }
1114           else
1115             {
1116               // session in transitory timeout, drop
1117               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1118               next[0] = NAT_NEXT_DROP;
1119             }
1120           goto trace0;
1121         }
1122
1123       // drop if session expired
1124       u64 sess_timeout_time;
1125       sess_timeout_time =
1126         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1127       if (now >= sess_timeout_time)
1128         {
1129           nat_free_session_data (sm, s0, thread_index, 0);
1130           nat_ed_session_delete (sm, s0, thread_index, 1);
1131           // session is closed, go slow path
1132           next[0] = def_slow;
1133           goto trace0;
1134         }
1135
1136       b0->flags |= VNET_BUFFER_F_IS_NATED;
1137
1138       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1139         {
1140           f = &s0->i2o;
1141         }
1142       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1143                nat_6t_t_eq (&s0->o2i.match, &lookup))
1144         {
1145           f = &s0->o2i;
1146         }
1147       else
1148         {
1149           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1150           nat_free_session_data (sm, s0, thread_index, 0);
1151           nat_ed_session_delete (sm, s0, thread_index, 1);
1152           next[0] = NAT_NEXT_DROP;
1153           goto trace0;
1154         }
1155
1156       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1157           (translation_error = nat_6t_flow_buf_translate (
1158              sm, b0, ip0, f, proto0, is_output_feature)))
1159         {
1160           nat_free_session_data (sm, s0, thread_index, 0);
1161           nat_ed_session_delete (sm, s0, thread_index, 1);
1162           next[0] = NAT_NEXT_DROP;
1163           goto trace0;
1164         }
1165
1166       switch (proto0)
1167         {
1168         case NAT_PROTOCOL_TCP:
1169           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1170                                          thread_index, sw_if_index0, 1);
1171           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1172           break;
1173         case NAT_PROTOCOL_UDP:
1174           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1175                                          thread_index, sw_if_index0, 1);
1176           break;
1177         case NAT_PROTOCOL_ICMP:
1178           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1179                                          thread_index, sw_if_index0, 1);
1180           break;
1181         case NAT_PROTOCOL_OTHER:
1182           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1183                                          thread_index, sw_if_index0, 1);
1184           break;
1185         }
1186
1187       /* Accounting */
1188       nat44_session_update_counters (s0, now,
1189                                      vlib_buffer_length_in_chain (vm, b0),
1190                                      thread_index);
1191       /* Per-user LRU list maintenance */
1192       nat44_session_update_lru (sm, s0, thread_index);
1193
1194     trace0:
1195       if (PREDICT_FALSE
1196           ((node->flags & VLIB_NODE_FLAG_TRACE)
1197            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1198         {
1199           nat_in2out_ed_trace_t *t =
1200             vlib_add_trace (vm, node, b0, sizeof (*t));
1201           t->sw_if_index = sw_if_index0;
1202           t->next_index = next[0];
1203           t->is_slow_path = 0;
1204           t->translation_error = translation_error;
1205           t->lookup_skipped = lookup_skipped;
1206           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1207
1208           if (s0)
1209             {
1210               t->session_index = s0 - tsm->sessions;
1211               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1212               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1213               t->translation_via_i2of = (&s0->i2o == f);
1214             }
1215           else
1216             {
1217               t->session_index = ~0;
1218             }
1219         }
1220
1221       if (next[0] == NAT_NEXT_DROP)
1222         {
1223           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1224                                          thread_index, sw_if_index0, 1);
1225         }
1226
1227       n_left_from--;
1228       next++;
1229     }
1230
1231   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1232                                frame->n_vectors);
1233   return frame->n_vectors;
1234 }
1235
1236 static inline uword
1237 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
1238                                           vlib_node_runtime_t * node,
1239                                           vlib_frame_t * frame,
1240                                           int is_output_feature)
1241 {
1242   u32 n_left_from, *from;
1243   snat_main_t *sm = &snat_main;
1244   f64 now = vlib_time_now (vm);
1245   u32 thread_index = vm->thread_index;
1246   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1247
1248   from = vlib_frame_vector_args (frame);
1249   n_left_from = frame->n_vectors;
1250
1251   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1252   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1253   vlib_get_buffers (vm, from, b, n_left_from);
1254
1255   while (n_left_from > 0)
1256     {
1257       vlib_buffer_t *b0;
1258       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1259       nat_protocol_t proto0;
1260       ip4_header_t *ip0;
1261       udp_header_t *udp0;
1262       icmp46_header_t *icmp0;
1263       snat_session_t *s0 = 0;
1264       clib_bihash_kv_16_8_t kv0, value0;
1265       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1266
1267       b0 = *b;
1268
1269       if (is_output_feature)
1270         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1271
1272       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1273
1274       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1275                               iph_offset0);
1276
1277       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1278       rx_fib_index0 =
1279         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1280
1281       if (PREDICT_FALSE (ip0->ttl == 1))
1282         {
1283           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1284           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1285                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1286                                        0);
1287           next[0] = NAT_NEXT_ICMP_ERROR;
1288           goto trace0;
1289         }
1290
1291       udp0 = ip4_next_header (ip0);
1292       icmp0 = (icmp46_header_t *) udp0;
1293       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1294
1295       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1296         {
1297           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1298             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1299           if (!s0)
1300             next[0] = NAT_NEXT_DROP;
1301
1302           if (NAT_ED_TRNSL_ERR_SUCCESS !=
1303               (translation_error = nat_6t_flow_buf_translate (
1304                  sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1305             {
1306               goto trace0;
1307             }
1308
1309           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1310                                          thread_index, sw_if_index0, 1);
1311           goto trace0;
1312         }
1313
1314       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1315         {
1316           next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0,
1317                                               rx_fib_index0, node, next[0],
1318                                               now, thread_index, proto0, &s0);
1319           if (NAT_NEXT_DROP != next[0] && s0 &&
1320               NAT_ED_TRNSL_ERR_SUCCESS !=
1321                 (translation_error = nat_6t_flow_buf_translate (
1322                    sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1323             {
1324               goto trace0;
1325             }
1326
1327           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1328                                          thread_index, sw_if_index0, 1);
1329           goto trace0;
1330         }
1331
1332       init_ed_k (&kv0, ip0->src_address,
1333                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1334                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1335                  ip0->protocol);
1336       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1337         {
1338           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1339           s0 =
1340             pool_elt_at_index (tsm->sessions,
1341                                ed_value_get_session_index (&value0));
1342
1343           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1344             {
1345               nat_free_session_data (sm, s0, thread_index, 0);
1346               nat_ed_session_delete (sm, s0, thread_index, 1);
1347               s0 = NULL;
1348             }
1349         }
1350
1351       if (!s0)
1352         {
1353           if (is_output_feature)
1354             {
1355               if (PREDICT_FALSE
1356                   (nat44_ed_not_translate_output_feature
1357                    (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1358                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1359                     sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX],
1360                     now)))
1361                 goto trace0;
1362
1363               /*
1364                * Send DHCP packets to the ipv4 stack, or we won't
1365                * be able to use dhcp client on the outside interface
1366                */
1367               if (PREDICT_FALSE
1368                   (proto0 == NAT_PROTOCOL_UDP
1369                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1370                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1371                    && ip0->dst_address.as_u32 == 0xffffffff))
1372                 goto trace0;
1373             }
1374           else
1375             {
1376               if (PREDICT_FALSE
1377                   (nat44_ed_not_translate
1378                    (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
1379                     thread_index)))
1380                 goto trace0;
1381             }
1382
1383           next[0] =
1384             slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address,
1385                           vnet_buffer (b0)->ip.reass.l4_src_port,
1386                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1387                           ip0->protocol, rx_fib_index0, &s0, node, next[0],
1388                           thread_index, now);
1389
1390           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1391             goto trace0;
1392
1393           if (PREDICT_FALSE (!s0))
1394             goto trace0;
1395
1396         }
1397
1398       b0->flags |= VNET_BUFFER_F_IS_NATED;
1399
1400       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1401           (translation_error = nat_6t_flow_buf_translate (
1402              sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1403         {
1404           nat_free_session_data (sm, s0, thread_index, 0);
1405           nat_ed_session_delete (sm, s0, thread_index, 1);
1406           s0 = NULL;
1407           goto trace0;
1408         }
1409
1410       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1411         {
1412           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1413                                          thread_index, sw_if_index0, 1);
1414           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1415         }
1416       else
1417         {
1418           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1419                                          thread_index, sw_if_index0, 1);
1420         }
1421
1422       /* Accounting */
1423       nat44_session_update_counters (s0, now,
1424                                      vlib_buffer_length_in_chain
1425                                      (vm, b0), thread_index);
1426       /* Per-user LRU list maintenance */
1427       nat44_session_update_lru (sm, s0, thread_index);
1428
1429     trace0:
1430       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1431                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1432         {
1433           nat_in2out_ed_trace_t *t =
1434             vlib_add_trace (vm, node, b0, sizeof (*t));
1435           t->sw_if_index = sw_if_index0;
1436           t->next_index = next[0];
1437           t->is_slow_path = 1;
1438           t->translation_error = translation_error;
1439           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1440
1441           if (s0)
1442             {
1443               t->session_index = s0 - tsm->sessions;
1444               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1445               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1446               t->translation_via_i2of = 1;
1447             }
1448
1449           else
1450             {
1451               t->session_index = ~0;
1452             }
1453         }
1454
1455       if (next[0] == NAT_NEXT_DROP)
1456         {
1457           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1458                                          thread_index, sw_if_index0, 1);
1459         }
1460
1461       n_left_from--;
1462       next++;
1463       b++;
1464     }
1465
1466   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1467                                frame->n_vectors);
1468
1469   return frame->n_vectors;
1470 }
1471
1472 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1473                                      vlib_node_runtime_t * node,
1474                                      vlib_frame_t * frame)
1475 {
1476   if (snat_main.num_workers > 1)
1477     {
1478       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1479     }
1480   else
1481     {
1482       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1483     }
1484 }
1485
1486 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1487   .name = "nat44-ed-in2out",
1488   .vector_size = sizeof (u32),
1489   .sibling_of = "nat-default",
1490   .format_trace = format_nat_in2out_ed_trace,
1491   .type = VLIB_NODE_TYPE_INTERNAL,
1492   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1493   .error_strings = nat_in2out_ed_error_strings,
1494   .runtime_data_bytes = sizeof (snat_runtime_t),
1495 };
1496
1497 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1498                                             vlib_node_runtime_t * node,
1499                                             vlib_frame_t * frame)
1500 {
1501   if (snat_main.num_workers > 1)
1502     {
1503       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1504     }
1505   else
1506     {
1507       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1508     }
1509 }
1510
1511 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1512   .name = "nat44-ed-in2out-output",
1513   .vector_size = sizeof (u32),
1514   .sibling_of = "nat-default",
1515   .format_trace = format_nat_in2out_ed_trace,
1516   .type = VLIB_NODE_TYPE_INTERNAL,
1517   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1518   .error_strings = nat_in2out_ed_error_strings,
1519   .runtime_data_bytes = sizeof (snat_runtime_t),
1520 };
1521
1522 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1523                                               vlib_node_runtime_t *
1524                                               node, vlib_frame_t * frame)
1525 {
1526   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0);
1527 }
1528
1529 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1530   .name = "nat44-ed-in2out-slowpath",
1531   .vector_size = sizeof (u32),
1532   .sibling_of = "nat-default",
1533   .format_trace = format_nat_in2out_ed_trace,
1534   .type = VLIB_NODE_TYPE_INTERNAL,
1535   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1536   .error_strings = nat_in2out_ed_error_strings,
1537   .runtime_data_bytes = sizeof (snat_runtime_t),
1538 };
1539
1540 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1541                                                      vlib_node_runtime_t
1542                                                      * node,
1543                                                      vlib_frame_t * frame)
1544 {
1545   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1);
1546 }
1547
1548 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1549   .name = "nat44-ed-in2out-output-slowpath",
1550   .vector_size = sizeof (u32),
1551   .sibling_of = "nat-default",
1552   .format_trace = format_nat_in2out_ed_trace,
1553   .type = VLIB_NODE_TYPE_INTERNAL,
1554   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1555   .error_strings = nat_in2out_ed_error_strings,
1556   .runtime_data_bytes = sizeof (snat_runtime_t),
1557 };
1558
1559 static u8 *
1560 format_nat_pre_trace (u8 * s, va_list * args)
1561 {
1562   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1563   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1564   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1565   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1566                  t->arc_next_index);
1567 }
1568
1569 VLIB_NODE_FN (nat_pre_in2out_node)
1570   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1571 {
1572   return nat_pre_node_fn_inline (vm, node, frame,
1573                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1574 }
1575
1576 VLIB_NODE_FN (nat_pre_in2out_output_node)
1577   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1578 {
1579   return nat_pre_node_fn_inline (vm, node, frame,
1580                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1581 }
1582
1583 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1584   .name = "nat-pre-in2out",
1585   .vector_size = sizeof (u32),
1586   .sibling_of = "nat-default",
1587   .format_trace = format_nat_pre_trace,
1588   .type = VLIB_NODE_TYPE_INTERNAL,
1589   .n_errors = 0,
1590 };
1591
1592 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1593   .name = "nat-pre-in2out-output",
1594   .vector_size = sizeof (u32),
1595   .sibling_of = "nat-default",
1596   .format_trace = format_nat_pre_trace,
1597   .type = VLIB_NODE_TYPE_INTERNAL,
1598   .n_errors = 0,
1599 };
1600
1601 /*
1602  * fd.io coding-style-patch-verification: ON
1603  *
1604  * Local Variables:
1605  * eval: (c-set-style "gnu")
1606  * End:
1607  */