6658e5b2b23bf766253ef850f59877888f39b7d0
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
73               t->sw_if_index, t->next_index, t->session_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", translation result '%U' via %s",
77                   format_nat_ed_translation_error, t->translation_error,
78                   t->translation_via_i2of ? "i2of" : "o2if");
79       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
80       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
81     }
82   if (!t->is_slow_path)
83     {
84       if (t->lookup_skipped)
85         {
86           s = format (s, "\n lookup skipped - cached session index used");
87         }
88       else
89         {
90           s = format (s, "\n  search key %U", format_ed_session_kvp,
91                       &t->search_key);
92         }
93     }
94
95   return s;
96 }
97
98 /**
99  * @brief Check if packet should be translated
100  *
101  * Packets aimed at outside interface and external address with active session
102  * should be translated.
103  *
104  * @param sm            NAT main
105  * @param rt            NAT runtime data
106  * @param sw_if_index0  index of the inside interface
107  * @param ip0           IPv4 header
108  * @param proto0        NAT protocol
109  * @param rx_fib_index0 RX FIB index
110  *
111  * @returns 0 if packet should be translated otherwise 1
112  */
113 static inline int
114 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
115                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
116                          u32 rx_fib_index0)
117 {
118   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
119   nat_outside_fib_t *outside_fib;
120   fib_prefix_t pfx = {
121     .fp_proto = FIB_PROTOCOL_IP4,
122     .fp_len = 32,
123     .fp_addr = {
124                 .ip4.as_u32 = ip0->dst_address.as_u32,
125                 }
126     ,
127   };
128
129   /* Don't NAT packet aimed at the intfc address */
130   if (PREDICT_FALSE (
131         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
132     return 1;
133
134   fei = fib_table_lookup (rx_fib_index0, &pfx);
135   if (FIB_NODE_INDEX_INVALID != fei)
136     {
137       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
138       if (sw_if_index == ~0)
139         {
140           vec_foreach (outside_fib, sm->outside_fibs)
141             {
142               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
143               if (FIB_NODE_INDEX_INVALID != fei)
144                 {
145                   sw_if_index = fib_entry_get_resolving_interface (fei);
146                   if (sw_if_index != ~0)
147                     break;
148                 }
149             }
150         }
151       if (sw_if_index == ~0)
152         return 1;
153
154       snat_interface_t *i;
155       pool_foreach (i, sm->interfaces)
156         {
157           /* NAT packet aimed at outside interface */
158           if ((nat_interface_is_outside (i)) &&
159               (sw_if_index == i->sw_if_index))
160             return 0;
161         }
162     }
163
164   return 1;
165 }
166
167 static int
168 nat_ed_alloc_addr_and_port_with_snat_address (
169   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
170   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
171   ip4_address_t *outside_addr, u16 *outside_port)
172 {
173   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
174
175   s->o2i.match.daddr = a->addr;
176   /* first try port suggested by caller */
177   u16 port = clib_net_to_host_u16 (*outside_port);
178   u16 port_offset = port - port_thread_offset;
179   if (port <= port_thread_offset ||
180       port > port_thread_offset + port_per_thread)
181     {
182       /* need to pick a different port, suggested port doesn't fit in
183        * this thread's port range */
184       port_offset = snat_random_port (0, port_per_thread - 1);
185       port = port_thread_offset + port_offset;
186     }
187   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
188   do
189     {
190       if (NAT_PROTOCOL_ICMP == nat_proto)
191         {
192           s->o2i.match.sport = clib_host_to_net_u16 (port);
193         }
194       s->o2i.match.dport = clib_host_to_net_u16 (port);
195       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
196         {
197 #define _(N, i, n, s)                                                         \
198   case NAT_PROTOCOL_##N:                                                      \
199     ++a->busy_##n##_port_refcounts[port];                                     \
200     a->busy_##n##_ports_per_thread[thread_index]++;                           \
201     a->busy_##n##_ports++;                                                    \
202     break;
203           switch (nat_proto)
204             {
205               foreach_nat_protocol;
206             default:
207               nat_elog_info (sm, "unknown protocol");
208               return 1;
209             }
210 #undef _
211           *outside_addr = a->addr;
212           *outside_port = clib_host_to_net_u16 (port);
213           return 0;
214         }
215       port_offset = snat_random_port (0, port_per_thread - 1);
216       port = port_thread_offset + port_offset;
217       --attempts;
218     }
219   while (attempts > 0);
220   return 1;
221 }
222
223 static int
224 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
225                             u32 thread_index, ip4_address_t s_addr,
226                             u16 port_per_thread, u32 snat_thread_index,
227                             snat_session_t *s, ip4_address_t *outside_addr,
228                             u16 *outside_port)
229 {
230   int i;
231   snat_address_t *a, *ga = 0;
232
233   if (vec_len (sm->addresses) > 0)
234     {
235       int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
236
237       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
238         {
239           a = sm->addresses + i;
240           if (a->fib_index == rx_fib_index)
241             {
242               return nat_ed_alloc_addr_and_port_with_snat_address (
243                 sm, nat_proto, thread_index, a, port_per_thread,
244                 snat_thread_index, s, outside_addr, outside_port);
245             }
246           else if (a->fib_index == ~0)
247             {
248               ga = a;
249             }
250         }
251
252       for (i = 0; i < s_addr_offset; ++i)
253         {
254           a = sm->addresses + i;
255           if (a->fib_index == rx_fib_index)
256             {
257               return nat_ed_alloc_addr_and_port_with_snat_address (
258                 sm, nat_proto, thread_index, a, port_per_thread,
259                 snat_thread_index, s, outside_addr, outside_port);
260             }
261           else if (a->fib_index == ~0)
262             {
263               ga = a;
264             }
265         }
266
267       if (ga)
268         {
269           return nat_ed_alloc_addr_and_port_with_snat_address (
270             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
271             s, outside_addr, outside_port);
272         }
273     }
274   /* Totally out of translations to use... */
275   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
276   return 1;
277 }
278
279 static_always_inline u32
280 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
281 {
282   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
283   nat_outside_fib_t *outside_fib;
284   fib_prefix_t pfx = {
285     .fp_proto = FIB_PROTOCOL_IP4,
286     .fp_len = 32,
287     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
288     ,
289   };
290   // TODO: multiple vrfs none can resolve addr
291   vec_foreach (outside_fib, sm->outside_fibs)
292     {
293       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
294       if (FIB_NODE_INDEX_INVALID != fei)
295         {
296           if (fib_entry_get_resolving_interface (fei) != ~0)
297             {
298               return outside_fib->fib_index;
299             }
300         }
301     }
302   return ~0;
303 }
304
305 static_always_inline int
306 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
307                              u16 match_port, nat_protocol_t match_protocol,
308                              u32 match_fib_index, ip4_address_t *daddr,
309                              u16 *dport)
310 {
311   clib_bihash_kv_8_8_t kv, value;
312   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
313   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
314     {
315       /* Try address only mapping */
316       init_nat_k (&kv, match_addr, 0, 0, 0);
317       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
318                                   &value))
319         return 0;
320     }
321
322   snat_static_mapping_t *m =
323     pool_elt_at_index (sm->static_mappings, value.value);
324   *daddr = m->local_addr;
325   if (dport)
326     {
327       /* Address only mapping doesn't change port */
328       *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port;
329     }
330   return 1;
331 }
332
333 static u32
334 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
335               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
336               u16 r_port, u8 proto, u32 rx_fib_index,
337               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
338               u32 thread_index, f64 now)
339 {
340   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
341   ip4_address_t outside_addr;
342   u16 outside_port;
343   u32 outside_fib_index;
344   u8 is_identity_nat;
345
346   u32 nat_proto = ip_proto_to_nat_proto (proto);
347   snat_session_t *s = NULL;
348   lb_nat_type_t lb = 0;
349   ip4_address_t daddr = r_addr;
350   u16 dport = r_port;
351
352   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
353     {
354       if (PREDICT_FALSE
355           (!tcp_flags_is_init
356            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
357         {
358           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
359           return NAT_NEXT_DROP;
360         }
361     }
362
363   if (PREDICT_FALSE
364       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
365     {
366       if (!nat_lru_free_one (sm, thread_index, now))
367         {
368           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
369           nat_ipfix_logging_max_sessions (thread_index,
370                                           sm->max_translations_per_thread);
371           nat_elog_notice (sm, "maximum sessions exceeded");
372           return NAT_NEXT_DROP;
373         }
374     }
375
376   outside_fib_index = sm->outside_fib_index;
377
378   switch (vec_len (sm->outside_fibs))
379     {
380     case 0:
381       outside_fib_index = sm->outside_fib_index;
382       break;
383     case 1:
384       outside_fib_index = sm->outside_fibs[0].fib_index;
385       break;
386     default:
387       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
388       break;
389     }
390
391   ip4_address_t sm_addr;
392   u16 sm_port;
393   u32 sm_fib_index;
394   /* First try to match static mapping by local address and port */
395   int is_sm;
396   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
397                                  nat_proto, &sm_addr, &sm_port, &sm_fib_index,
398                                  0, 0, 0, &lb, 0, &is_identity_nat, 0))
399     {
400       is_sm = 0;
401     }
402   else
403     {
404       is_sm = 1;
405     }
406
407   if (PREDICT_FALSE (is_sm && is_identity_nat))
408     {
409       *sessionp = NULL;
410       return next;
411     }
412
413   s = nat_ed_session_alloc (sm, thread_index, now, proto);
414   ASSERT (s);
415
416   if (!is_sm)
417     {
418       s->in2out.addr = l_addr;
419       s->in2out.port = l_port;
420       s->nat_proto = nat_proto;
421       s->in2out.fib_index = rx_fib_index;
422       s->out2in.fib_index = outside_fib_index;
423
424       // suggest using local port to allocation function
425       outside_port = l_port;
426
427       // hairpinning?
428       int is_hairpinning = nat44_ed_external_sm_lookup (
429         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
430       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
431
432       // destination addr/port updated with real values in
433       // nat_ed_alloc_addr_and_port
434       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
435                             s->out2in.fib_index, proto);
436       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
437       if (NAT_PROTOCOL_ICMP == nat_proto)
438         {
439           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
440         }
441       else
442         {
443           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
444         }
445       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
446
447       if (nat_ed_alloc_addr_and_port (
448             sm, rx_fib_index, nat_proto, thread_index, l_addr,
449             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
450             &outside_port))
451         {
452           nat_elog_notice (sm, "addresses exhausted");
453           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
454           nat_ed_session_delete (sm, s, thread_index, 1);
455           return NAT_NEXT_DROP;
456         }
457       s->out2in.addr = outside_addr;
458       s->out2in.port = outside_port;
459     }
460   else
461     {
462       // static mapping
463       s->out2in.addr = outside_addr = sm_addr;
464       s->out2in.port = outside_port = sm_port;
465       s->in2out.addr = l_addr;
466       s->in2out.port = l_port;
467       s->nat_proto = nat_proto;
468       s->in2out.fib_index = rx_fib_index;
469       s->out2in.fib_index = outside_fib_index;
470       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
471
472       // hairpinning?
473       int is_hairpinning = nat44_ed_external_sm_lookup (
474         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
475       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
476
477       if (NAT_PROTOCOL_ICMP == nat_proto)
478         {
479           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
480                                 sm_port, s->out2in.fib_index, proto);
481           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
482         }
483       else
484         {
485           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
486                                 sm_port, s->out2in.fib_index, proto);
487           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
488         }
489       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
490       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
491       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
492         {
493           nat_elog_notice (sm, "out2in key add failed");
494           goto error;
495         }
496     }
497
498   if (lb)
499     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
500   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
501   s->ext_host_addr = r_addr;
502   s->ext_host_port = r_port;
503
504   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
505                         rx_fib_index, proto);
506   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
507   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
508
509   if (NAT_PROTOCOL_ICMP == nat_proto)
510     {
511       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
512     }
513   else
514     {
515       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
516       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
517     }
518   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
519
520   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
521     {
522       nat_elog_notice (sm, "in2out key add failed");
523       goto error;
524     }
525
526   /* log NAT event */
527   nat_ipfix_logging_nat44_ses_create (thread_index,
528                                       s->in2out.addr.as_u32,
529                                       s->out2in.addr.as_u32,
530                                       s->nat_proto,
531                                       s->in2out.port,
532                                       s->out2in.port, s->in2out.fib_index);
533
534   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
535                          s->in2out.port, &s->ext_host_nat_addr,
536                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
537                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
538
539   per_vrf_sessions_register_session (s, thread_index);
540
541   *sessionp = s;
542   return next;
543 error:
544   if (s)
545     {
546       if (!is_sm)
547         {
548           snat_free_outside_address_and_port (sm->addresses, thread_index,
549                                               &outside_addr, outside_port,
550                                               nat_proto);
551         }
552       nat_ed_session_delete (sm, s, thread_index, 1);
553     }
554   *sessionp = s = NULL;
555   return NAT_NEXT_DROP;
556 }
557
558 static_always_inline int
559 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
560                         vlib_node_runtime_t *node, u32 sw_if_index,
561                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
562                         u32 rx_fib_index, u32 thread_index)
563 {
564   clib_bihash_kv_16_8_t kv, value;
565
566   init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
567              ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
568              sm->outside_fib_index, ip->protocol);
569
570   /* NAT packet aimed at external address if has active sessions */
571   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
572     {
573       /* or is static mappings */
574       ip4_address_t placeholder_addr;
575       u16 placeholder_port;
576       u32 placeholder_fib_index;
577       if (!snat_static_mapping_match (
578             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
579             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
580             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
581         return 0;
582     }
583   else
584     return 0;
585
586   if (sm->forwarding_enabled)
587     return 1;
588
589   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
590                                   rx_fib_index);
591 }
592
593 static_always_inline int
594 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
595                                       u32 thread_index, f64 now,
596                                       vlib_main_t * vm, vlib_buffer_t * b)
597 {
598   clib_bihash_kv_16_8_t kv, value;
599   snat_session_t *s = 0;
600   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
601
602   if (!sm->forwarding_enabled)
603     return 0;
604
605   if (ip->protocol == IP_PROTOCOL_ICMP)
606     {
607       ip4_address_t lookup_saddr, lookup_daddr;
608       u16 lookup_sport, lookup_dport;
609       u8 lookup_protocol;
610       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
611                                               &lookup_sport, &lookup_daddr,
612                                               &lookup_dport, &lookup_protocol))
613         return 0;
614       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
615                  0, lookup_protocol);
616     }
617   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
618     {
619       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
620                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
621                  ip->protocol);
622     }
623   else
624     {
625       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
626                  ip->protocol);
627     }
628
629   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
630     {
631       ASSERT (thread_index == ed_value_get_thread_index (&value));
632       s =
633         pool_elt_at_index (tsm->sessions,
634                            ed_value_get_session_index (&value));
635
636       if (is_fwd_bypass_session (s))
637         {
638           if (ip->protocol == IP_PROTOCOL_TCP)
639             {
640               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
641             }
642           /* Accounting */
643           nat44_session_update_counters (s, now,
644                                          vlib_buffer_length_in_chain (vm, b),
645                                          thread_index);
646           /* Per-user LRU list maintenance */
647           nat44_session_update_lru (sm, s, thread_index);
648           return 1;
649         }
650       else
651         return 0;
652     }
653
654   return 0;
655 }
656
657 static_always_inline int
658 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
659                                        ip4_header_t *ip, u16 src_port,
660                                        u16 dst_port, u32 thread_index,
661                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
662                                        f64 now, int is_multi_worker)
663 {
664   clib_bihash_kv_16_8_t kv, value;
665   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
666   snat_interface_t *i;
667   snat_session_t *s;
668   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
669   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
670
671   /* src NAT check */
672   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
673              tx_fib_index, ip->protocol);
674   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
675     {
676       ASSERT (thread_index == ed_value_get_thread_index (&value));
677       s =
678         pool_elt_at_index (tsm->sessions,
679                            ed_value_get_session_index (&value));
680       if (nat44_is_ses_closed (s)
681           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
682         {
683           nat_free_session_data (sm, s, thread_index, 0);
684           nat_ed_session_delete (sm, s, thread_index, 1);
685         }
686       return 1;
687     }
688
689   /* dst NAT check */
690   if (is_multi_worker &&
691       PREDICT_TRUE (!pool_is_free_index (
692         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
693     {
694       nat_6t_t lookup;
695       lookup.fib_index = rx_fib_index;
696       lookup.proto = ip->protocol;
697       lookup.daddr.as_u32 = ip->src_address.as_u32;
698       lookup.dport = src_port;
699       lookup.saddr.as_u32 = ip->dst_address.as_u32;
700       lookup.sport = dst_port;
701       s = pool_elt_at_index (
702         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
703       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
704         {
705           goto skip_dst_nat_lookup;
706         }
707       s = NULL;
708     }
709
710   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
711              rx_fib_index, ip->protocol);
712   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
713     {
714       ASSERT (thread_index == ed_value_get_thread_index (&value));
715       s =
716         pool_elt_at_index (tsm->sessions,
717                            ed_value_get_session_index (&value));
718
719     skip_dst_nat_lookup:
720       if (is_fwd_bypass_session (s))
721         return 0;
722
723       /* hairpinning */
724       pool_foreach (i, sm->output_feature_interfaces)
725        {
726         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
727            return 0;
728       }
729       return 1;
730     }
731
732   return 0;
733 }
734
735 static inline u32
736 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
737                           icmp46_header_t *icmp, u32 sw_if_index,
738                           u32 rx_fib_index, vlib_node_runtime_t *node,
739                           u32 next, f64 now, u32 thread_index,
740                           nat_protocol_t nat_proto, snat_session_t **s_p,
741                           int is_multi_worker)
742 {
743   vlib_main_t *vm = vlib_get_main ();
744   u16 checksum;
745   int err;
746   snat_session_t *s = NULL;
747   u8 lookup_protocol = ip->protocol;
748   u16 lookup_sport, lookup_dport;
749   ip4_address_t lookup_saddr, lookup_daddr;
750
751   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
752                                             &lookup_sport, &lookup_daddr,
753                                             &lookup_dport, &lookup_protocol);
754   if (err != 0)
755     {
756       b->error = node->errors[err];
757       return NAT_NEXT_DROP;
758     }
759
760   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
761     {
762       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
763             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
764             vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
765         {
766           return next;
767         }
768     }
769   else
770     {
771       if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
772                                                  ip, NAT_PROTOCOL_ICMP,
773                                                  rx_fib_index, thread_index)))
774         {
775           return next;
776         }
777     }
778
779   if (PREDICT_FALSE (icmp_type_is_error_message (
780         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
781     {
782       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
783       return NAT_NEXT_DROP;
784     }
785
786   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
787                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
788                        &s, node, next, thread_index, vlib_time_now (vm));
789
790   if (NAT_NEXT_DROP == next)
791     goto out;
792
793   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
794     {
795       ip_csum_t sum = ip_incremental_checksum_buffer (
796         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
797         ntohs (ip->length) - ip4_header_bytes (ip), 0);
798       checksum = ~ip_csum_fold (sum);
799       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
800         {
801           next = NAT_NEXT_DROP;
802           goto out;
803         }
804     }
805
806 out:
807   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
808     {
809       /* Accounting */
810       nat44_session_update_counters (
811         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
812       /* Per-user LRU list maintenance */
813       nat44_session_update_lru (sm, s, thread_index);
814     }
815   *s_p = s;
816   return next;
817 }
818
819 static snat_session_t *
820 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
821                                         ip4_header_t *ip, u32 rx_fib_index,
822                                         u32 thread_index, f64 now,
823                                         vlib_main_t *vm,
824                                         vlib_node_runtime_t *node)
825 {
826   clib_bihash_kv_8_8_t kv, value;
827   clib_bihash_kv_16_8_t s_kv, s_value;
828   snat_static_mapping_t *m = NULL;
829   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
830   snat_session_t *s = NULL;
831   u32 outside_fib_index = sm->outside_fib_index;
832   int i;
833   ip4_address_t new_src_addr = { 0 };
834   ip4_address_t new_dst_addr = ip->dst_address;
835
836   if (PREDICT_FALSE (
837         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
838     {
839       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
840       nat_ipfix_logging_max_sessions (thread_index,
841                                       sm->max_translations_per_thread);
842       nat_elog_notice (sm, "maximum sessions exceeded");
843       return 0;
844     }
845
846   switch (vec_len (sm->outside_fibs))
847     {
848     case 0:
849       outside_fib_index = sm->outside_fib_index;
850       break;
851     case 1:
852       outside_fib_index = sm->outside_fibs[0].fib_index;
853       break;
854     default:
855       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
856       break;
857     }
858
859   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
860
861   /* Try to find static mapping first */
862   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
863     {
864       m = pool_elt_at_index (sm->static_mappings, value.value);
865       new_src_addr = m->external_addr;
866     }
867   else
868     {
869       pool_foreach (s, tsm->sessions)
870         {
871           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
872             {
873               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
874                          outside_fib_index, ip->protocol);
875               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
876                 {
877                   new_src_addr = s->out2in.addr;
878                 }
879               break;
880             }
881         }
882
883       if (!new_src_addr.as_u32)
884         {
885           for (i = 0; i < vec_len (sm->addresses); i++)
886             {
887               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
888                          outside_fib_index, ip->protocol);
889               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
890                 {
891                   new_src_addr = sm->addresses[i].addr;
892                 }
893             }
894         }
895     }
896
897   if (!new_src_addr.as_u32)
898     {
899       // could not allocate address for translation ...
900       return 0;
901     }
902
903   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
904   if (!s)
905     {
906       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
907       nat_elog_warn (sm, "create NAT session failed");
908       return 0;
909     }
910
911   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
912                         ip->dst_address, 0, rx_fib_index, ip->protocol);
913   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
914   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
915
916   // hairpinning?
917   int is_hairpinning =
918     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
919                                  outside_fib_index, &new_dst_addr, NULL);
920   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
921
922   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
923   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
924
925   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
926                         outside_fib_index, ip->protocol);
927   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
928   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
929   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
930
931   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
932   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
933   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
934   s->out2in.addr.as_u32 = new_src_addr.as_u32;
935   s->out2in.fib_index = outside_fib_index;
936   s->in2out.addr.as_u32 = ip->src_address.as_u32;
937   s->in2out.fib_index = rx_fib_index;
938   s->in2out.port = s->out2in.port = ip->protocol;
939   if (m)
940     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
941
942   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
943     {
944       nat_elog_notice (sm, "in2out flow hash add failed");
945       nat_ed_session_delete (sm, s, thread_index, 1);
946       return NULL;
947     }
948
949   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
950     {
951       nat_elog_notice (sm, "out2in flow hash add failed");
952       nat_ed_session_delete (sm, s, thread_index, 1);
953       return NULL;
954     }
955
956   per_vrf_sessions_register_session (s, thread_index);
957
958   /* Accounting */
959   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
960                                  thread_index);
961   /* Per-user LRU list maintenance */
962   nat44_session_update_lru (sm, s, thread_index);
963
964   return s;
965 }
966
967 static inline uword
968 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
969                                           vlib_node_runtime_t *node,
970                                           vlib_frame_t *frame,
971                                           int is_output_feature,
972                                           int is_multi_worker)
973 {
974   u32 n_left_from, *from;
975   snat_main_t *sm = &snat_main;
976   f64 now = vlib_time_now (vm);
977   u32 thread_index = vm->thread_index;
978   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
979   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
980     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
981
982   from = vlib_frame_vector_args (frame);
983   n_left_from = frame->n_vectors;
984
985   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
986   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
987   vlib_get_buffers (vm, from, b, n_left_from);
988
989   while (n_left_from > 0)
990     {
991       vlib_buffer_t *b0;
992       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
993       nat_protocol_t proto0;
994       ip4_header_t *ip0;
995       snat_session_t *s0 = 0;
996       clib_bihash_kv_16_8_t kv0, value0;
997       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
998       nat_6t_flow_t *f = 0;
999       nat_6t_t lookup;
1000       int lookup_skipped = 0;
1001
1002       b0 = *b;
1003       b++;
1004
1005       /* Prefetch next iteration. */
1006       if (PREDICT_TRUE (n_left_from >= 2))
1007         {
1008           vlib_buffer_t *p2;
1009
1010           p2 = *b;
1011
1012           vlib_prefetch_buffer_header (p2, LOAD);
1013
1014           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
1015         }
1016
1017       if (is_output_feature)
1018         {
1019           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1020         }
1021
1022       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1023
1024       ip0 =
1025         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1026
1027       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1028       rx_fib_index0 =
1029         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1030       lookup.fib_index = rx_fib_index0;
1031
1032       if (PREDICT_FALSE (ip0->ttl == 1))
1033         {
1034           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1035           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1036                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1037                                        0);
1038           next[0] = NAT_NEXT_ICMP_ERROR;
1039           goto trace0;
1040         }
1041
1042       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1043
1044       if (is_output_feature)
1045         {
1046           if (PREDICT_FALSE
1047               (nat_not_translate_output_feature_fwd
1048                (sm, ip0, thread_index, now, vm, b0)))
1049             goto trace0;
1050         }
1051
1052       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1053         {
1054           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1055                 ICMP4_echo_request &&
1056               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1057                 ICMP4_echo_reply &&
1058               !icmp_type_is_error_message (
1059                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1060             {
1061               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1062               next[0] = NAT_NEXT_DROP;
1063               goto trace0;
1064             }
1065           int err = nat_get_icmp_session_lookup_values (
1066             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1067             &lookup.dport, &lookup.proto);
1068           if (err != 0)
1069             {
1070               b0->error = node->errors[err];
1071               next[0] = NAT_NEXT_DROP;
1072               goto trace0;
1073             }
1074         }
1075       else
1076         {
1077           lookup.proto = ip0->protocol;
1078           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1079           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1080           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1081           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1082         }
1083
1084       /* there might be a stashed index in vnet_buffer2 from handoff or
1085        * classify node, see if it can be used */
1086       if (is_multi_worker &&
1087           !pool_is_free_index (tsm->sessions,
1088                                vnet_buffer2 (b0)->nat.cached_session_index))
1089         {
1090           s0 = pool_elt_at_index (tsm->sessions,
1091                                   vnet_buffer2 (b0)->nat.cached_session_index);
1092           if (PREDICT_TRUE (
1093                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1094                 // for some hairpinning cases there are two "i2i" flows instead
1095                 // of i2o and o2i as both hosts are on inside
1096                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1097                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1098             {
1099               /* yes, this is the droid we're looking for */
1100               lookup_skipped = 1;
1101               goto skip_lookup;
1102             }
1103           s0 = NULL;
1104         }
1105
1106       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1107                  lookup.fib_index, lookup.proto);
1108
1109       // lookup flow
1110       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1111         {
1112           // flow does not exist go slow path
1113           next[0] = def_slow;
1114           goto trace0;
1115         }
1116
1117       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1118       s0 =
1119         pool_elt_at_index (tsm->sessions,
1120                            ed_value_get_session_index (&value0));
1121
1122     skip_lookup:
1123
1124       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1125         {
1126           // session is closed, go slow path
1127           nat_free_session_data (sm, s0, thread_index, 0);
1128           nat_ed_session_delete (sm, s0, thread_index, 1);
1129           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1130           goto trace0;
1131         }
1132
1133       if (s0->tcp_closed_timestamp)
1134         {
1135           if (now >= s0->tcp_closed_timestamp)
1136             {
1137               // session is closed, go slow path, freed in slow path
1138               next[0] = def_slow;
1139             }
1140           else
1141             {
1142               // session in transitory timeout, drop
1143               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1144               next[0] = NAT_NEXT_DROP;
1145             }
1146           goto trace0;
1147         }
1148
1149       // drop if session expired
1150       u64 sess_timeout_time;
1151       sess_timeout_time =
1152         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1153       if (now >= sess_timeout_time)
1154         {
1155           nat_free_session_data (sm, s0, thread_index, 0);
1156           nat_ed_session_delete (sm, s0, thread_index, 1);
1157           // session is closed, go slow path
1158           next[0] = def_slow;
1159           goto trace0;
1160         }
1161
1162       b0->flags |= VNET_BUFFER_F_IS_NATED;
1163
1164       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1165         {
1166           f = &s0->i2o;
1167         }
1168       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1169                nat_6t_t_eq (&s0->o2i.match, &lookup))
1170         {
1171           f = &s0->o2i;
1172         }
1173       else
1174         {
1175           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1176           nat_free_session_data (sm, s0, thread_index, 0);
1177           nat_ed_session_delete (sm, s0, thread_index, 1);
1178           next[0] = NAT_NEXT_DROP;
1179           goto trace0;
1180         }
1181
1182       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1183           (translation_error = nat_6t_flow_buf_translate (
1184              sm, b0, ip0, f, proto0, is_output_feature)))
1185         {
1186           nat_free_session_data (sm, s0, thread_index, 0);
1187           nat_ed_session_delete (sm, s0, thread_index, 1);
1188           next[0] = NAT_NEXT_DROP;
1189           goto trace0;
1190         }
1191
1192       switch (proto0)
1193         {
1194         case NAT_PROTOCOL_TCP:
1195           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1196                                          thread_index, sw_if_index0, 1);
1197           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1198           break;
1199         case NAT_PROTOCOL_UDP:
1200           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1201                                          thread_index, sw_if_index0, 1);
1202           break;
1203         case NAT_PROTOCOL_ICMP:
1204           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1205                                          thread_index, sw_if_index0, 1);
1206           break;
1207         case NAT_PROTOCOL_OTHER:
1208           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1209                                          thread_index, sw_if_index0, 1);
1210           break;
1211         }
1212
1213       /* Accounting */
1214       nat44_session_update_counters (s0, now,
1215                                      vlib_buffer_length_in_chain (vm, b0),
1216                                      thread_index);
1217       /* Per-user LRU list maintenance */
1218       nat44_session_update_lru (sm, s0, thread_index);
1219
1220     trace0:
1221       if (PREDICT_FALSE
1222           ((node->flags & VLIB_NODE_FLAG_TRACE)
1223            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1224         {
1225           nat_in2out_ed_trace_t *t =
1226             vlib_add_trace (vm, node, b0, sizeof (*t));
1227           t->sw_if_index = sw_if_index0;
1228           t->next_index = next[0];
1229           t->is_slow_path = 0;
1230           t->translation_error = translation_error;
1231           t->lookup_skipped = lookup_skipped;
1232           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1233
1234           if (s0)
1235             {
1236               t->session_index = s0 - tsm->sessions;
1237               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1238               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1239               t->translation_via_i2of = (&s0->i2o == f);
1240             }
1241           else
1242             {
1243               t->session_index = ~0;
1244             }
1245         }
1246
1247       if (next[0] == NAT_NEXT_DROP)
1248         {
1249           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1250                                          thread_index, sw_if_index0, 1);
1251         }
1252
1253       n_left_from--;
1254       next++;
1255     }
1256
1257   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1258                                frame->n_vectors);
1259   return frame->n_vectors;
1260 }
1261
1262 static inline uword
1263 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1264                                           vlib_node_runtime_t *node,
1265                                           vlib_frame_t *frame,
1266                                           int is_output_feature,
1267                                           int is_multi_worker)
1268 {
1269   u32 n_left_from, *from;
1270   snat_main_t *sm = &snat_main;
1271   f64 now = vlib_time_now (vm);
1272   u32 thread_index = vm->thread_index;
1273   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1274
1275   from = vlib_frame_vector_args (frame);
1276   n_left_from = frame->n_vectors;
1277
1278   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1279   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1280   vlib_get_buffers (vm, from, b, n_left_from);
1281
1282   while (n_left_from > 0)
1283     {
1284       vlib_buffer_t *b0;
1285       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1286       nat_protocol_t proto0;
1287       ip4_header_t *ip0;
1288       udp_header_t *udp0;
1289       icmp46_header_t *icmp0;
1290       snat_session_t *s0 = 0;
1291       clib_bihash_kv_16_8_t kv0, value0;
1292       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1293
1294       b0 = *b;
1295
1296       if (is_output_feature)
1297         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1298
1299       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1300
1301       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1302                               iph_offset0);
1303
1304       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1305       rx_fib_index0 =
1306         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1307
1308       if (PREDICT_FALSE (ip0->ttl == 1))
1309         {
1310           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1311           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1312                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1313                                        0);
1314           next[0] = NAT_NEXT_ICMP_ERROR;
1315           goto trace0;
1316         }
1317
1318       udp0 = ip4_next_header (ip0);
1319       icmp0 = (icmp46_header_t *) udp0;
1320       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1321
1322       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1323         {
1324           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1325             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1326           if (!s0)
1327             next[0] = NAT_NEXT_DROP;
1328
1329           if (NAT_ED_TRNSL_ERR_SUCCESS !=
1330               (translation_error = nat_6t_flow_buf_translate (
1331                  sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1332             {
1333               goto trace0;
1334             }
1335
1336           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1337                                          thread_index, sw_if_index0, 1);
1338           goto trace0;
1339         }
1340
1341       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1342         {
1343           next[0] = icmp_in2out_ed_slow_path (
1344             sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next[0],
1345             now, thread_index, proto0, &s0, is_multi_worker);
1346           if (NAT_NEXT_DROP != next[0] && s0 &&
1347               NAT_ED_TRNSL_ERR_SUCCESS !=
1348                 (translation_error = nat_6t_flow_buf_translate (
1349                    sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1350             {
1351               goto trace0;
1352             }
1353
1354           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1355                                          thread_index, sw_if_index0, 1);
1356           goto trace0;
1357         }
1358
1359       init_ed_k (&kv0, ip0->src_address,
1360                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1361                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1362                  ip0->protocol);
1363       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1364         {
1365           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1366           s0 =
1367             pool_elt_at_index (tsm->sessions,
1368                                ed_value_get_session_index (&value0));
1369
1370           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1371             {
1372               nat_free_session_data (sm, s0, thread_index, 0);
1373               nat_ed_session_delete (sm, s0, thread_index, 1);
1374               s0 = NULL;
1375             }
1376         }
1377
1378       if (!s0)
1379         {
1380           if (is_output_feature)
1381             {
1382               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1383                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1384                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1385                     sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX], now,
1386                     is_multi_worker)))
1387                 goto trace0;
1388
1389               /*
1390                * Send DHCP packets to the ipv4 stack, or we won't
1391                * be able to use dhcp client on the outside interface
1392                */
1393               if (PREDICT_FALSE
1394                   (proto0 == NAT_PROTOCOL_UDP
1395                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1396                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1397                    && ip0->dst_address.as_u32 == 0xffffffff))
1398                 goto trace0;
1399             }
1400           else
1401             {
1402               if (PREDICT_FALSE (nat44_ed_not_translate (
1403                     vm, sm, node, sw_if_index0, b0, ip0, proto0, rx_fib_index0,
1404                     thread_index)))
1405                 goto trace0;
1406             }
1407
1408           next[0] = slow_path_ed (
1409             vm, sm, b0, ip0->src_address, ip0->dst_address,
1410             vnet_buffer (b0)->ip.reass.l4_src_port,
1411             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1412             rx_fib_index0, &s0, node, next[0], thread_index, now);
1413
1414           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1415             goto trace0;
1416
1417           if (PREDICT_FALSE (!s0))
1418             goto trace0;
1419
1420         }
1421
1422       b0->flags |= VNET_BUFFER_F_IS_NATED;
1423
1424       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1425           (translation_error = nat_6t_flow_buf_translate (
1426              sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1427         {
1428           nat_free_session_data (sm, s0, thread_index, 0);
1429           nat_ed_session_delete (sm, s0, thread_index, 1);
1430           s0 = NULL;
1431           goto trace0;
1432         }
1433
1434       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1435         {
1436           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1437                                          thread_index, sw_if_index0, 1);
1438           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1439         }
1440       else
1441         {
1442           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1443                                          thread_index, sw_if_index0, 1);
1444         }
1445
1446       /* Accounting */
1447       nat44_session_update_counters (s0, now,
1448                                      vlib_buffer_length_in_chain
1449                                      (vm, b0), thread_index);
1450       /* Per-user LRU list maintenance */
1451       nat44_session_update_lru (sm, s0, thread_index);
1452
1453     trace0:
1454       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1455                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1456         {
1457           nat_in2out_ed_trace_t *t =
1458             vlib_add_trace (vm, node, b0, sizeof (*t));
1459           t->sw_if_index = sw_if_index0;
1460           t->next_index = next[0];
1461           t->is_slow_path = 1;
1462           t->translation_error = translation_error;
1463           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1464
1465           if (s0)
1466             {
1467               t->session_index = s0 - tsm->sessions;
1468               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1469               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1470               t->translation_via_i2of = 1;
1471             }
1472
1473           else
1474             {
1475               t->session_index = ~0;
1476             }
1477         }
1478
1479       if (next[0] == NAT_NEXT_DROP)
1480         {
1481           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1482                                          thread_index, sw_if_index0, 1);
1483         }
1484
1485       n_left_from--;
1486       next++;
1487       b++;
1488     }
1489
1490   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1491                                frame->n_vectors);
1492
1493   return frame->n_vectors;
1494 }
1495
1496 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1497                                      vlib_node_runtime_t * node,
1498                                      vlib_frame_t * frame)
1499 {
1500   if (snat_main.num_workers > 1)
1501     {
1502       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1503     }
1504   else
1505     {
1506       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1507     }
1508 }
1509
1510 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1511   .name = "nat44-ed-in2out",
1512   .vector_size = sizeof (u32),
1513   .sibling_of = "nat-default",
1514   .format_trace = format_nat_in2out_ed_trace,
1515   .type = VLIB_NODE_TYPE_INTERNAL,
1516   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1517   .error_strings = nat_in2out_ed_error_strings,
1518   .runtime_data_bytes = sizeof (snat_runtime_t),
1519 };
1520
1521 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1522                                             vlib_node_runtime_t * node,
1523                                             vlib_frame_t * frame)
1524 {
1525   if (snat_main.num_workers > 1)
1526     {
1527       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1528     }
1529   else
1530     {
1531       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1532     }
1533 }
1534
1535 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1536   .name = "nat44-ed-in2out-output",
1537   .vector_size = sizeof (u32),
1538   .sibling_of = "nat-default",
1539   .format_trace = format_nat_in2out_ed_trace,
1540   .type = VLIB_NODE_TYPE_INTERNAL,
1541   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1542   .error_strings = nat_in2out_ed_error_strings,
1543   .runtime_data_bytes = sizeof (snat_runtime_t),
1544 };
1545
1546 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1547                                               vlib_node_runtime_t *
1548                                               node, vlib_frame_t * frame)
1549 {
1550   if (snat_main.num_workers > 1)
1551     {
1552       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1553     }
1554   else
1555     {
1556       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1557     }
1558 }
1559
1560 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1561   .name = "nat44-ed-in2out-slowpath",
1562   .vector_size = sizeof (u32),
1563   .sibling_of = "nat-default",
1564   .format_trace = format_nat_in2out_ed_trace,
1565   .type = VLIB_NODE_TYPE_INTERNAL,
1566   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1567   .error_strings = nat_in2out_ed_error_strings,
1568   .runtime_data_bytes = sizeof (snat_runtime_t),
1569 };
1570
1571 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1572                                                      vlib_node_runtime_t
1573                                                      * node,
1574                                                      vlib_frame_t * frame)
1575 {
1576   if (snat_main.num_workers > 1)
1577     {
1578       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1579     }
1580   else
1581     {
1582       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1583     }
1584 }
1585
1586 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1587   .name = "nat44-ed-in2out-output-slowpath",
1588   .vector_size = sizeof (u32),
1589   .sibling_of = "nat-default",
1590   .format_trace = format_nat_in2out_ed_trace,
1591   .type = VLIB_NODE_TYPE_INTERNAL,
1592   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1593   .error_strings = nat_in2out_ed_error_strings,
1594   .runtime_data_bytes = sizeof (snat_runtime_t),
1595 };
1596
1597 static u8 *
1598 format_nat_pre_trace (u8 * s, va_list * args)
1599 {
1600   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1601   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1602   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1603   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1604                  t->arc_next_index);
1605 }
1606
1607 VLIB_NODE_FN (nat_pre_in2out_node)
1608   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1609 {
1610   return nat_pre_node_fn_inline (vm, node, frame,
1611                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1612 }
1613
1614 VLIB_NODE_FN (nat_pre_in2out_output_node)
1615   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1616 {
1617   return nat_pre_node_fn_inline (vm, node, frame,
1618                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1619 }
1620
1621 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1622   .name = "nat-pre-in2out",
1623   .vector_size = sizeof (u32),
1624   .sibling_of = "nat-default",
1625   .format_trace = format_nat_pre_trace,
1626   .type = VLIB_NODE_TYPE_INTERNAL,
1627   .n_errors = 0,
1628 };
1629
1630 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1631   .name = "nat-pre-in2out-output",
1632   .vector_size = sizeof (u32),
1633   .sibling_of = "nat-default",
1634   .format_trace = format_nat_pre_trace,
1635   .type = VLIB_NODE_TYPE_INTERNAL,
1636   .n_errors = 0,
1637 };
1638
1639 /*
1640  * fd.io coding-style-patch-verification: ON
1641  *
1642  * Local Variables:
1643  * eval: (c-set-style "gnu")
1644  * End:
1645  */