nat: don't drop packet with ttl=1 if output feature
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
73               t->next_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", session %d, translation result '%U' via %s",
77                   t->session_index, format_nat_ed_translation_error,
78                   t->translation_error,
79                   t->translation_via_i2of ? "i2of" : "o2if");
80       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
81       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
82     }
83   if (!t->is_slow_path)
84     {
85       if (t->lookup_skipped)
86         {
87           s = format (s, "\n lookup skipped - cached session index used");
88         }
89       else
90         {
91           s = format (s, "\n  search key %U", format_ed_session_kvp,
92                       &t->search_key);
93         }
94     }
95
96   return s;
97 }
98
99 /**
100  * @brief Check if packet should be translated
101  *
102  * Packets aimed at outside interface and external address with active session
103  * should be translated.
104  *
105  * @param sm            NAT main
106  * @param rt            NAT runtime data
107  * @param sw_if_index0  index of the inside interface
108  * @param ip0           IPv4 header
109  * @param proto0        NAT protocol
110  * @param rx_fib_index0 RX FIB index
111  *
112  * @returns 0 if packet should be translated otherwise 1
113  */
114 static inline int
115 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
116                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
117                          u32 rx_fib_index0)
118 {
119   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
120   nat_outside_fib_t *outside_fib;
121   fib_prefix_t pfx = {
122     .fp_proto = FIB_PROTOCOL_IP4,
123     .fp_len = 32,
124     .fp_addr = {
125                 .ip4.as_u32 = ip0->dst_address.as_u32,
126                 }
127     ,
128   };
129
130   /* Don't NAT packet aimed at the intfc address */
131   if (PREDICT_FALSE (
132         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
133     return 1;
134
135   fei = fib_table_lookup (rx_fib_index0, &pfx);
136   if (FIB_NODE_INDEX_INVALID != fei)
137     {
138       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
139       if (sw_if_index == ~0)
140         {
141           vec_foreach (outside_fib, sm->outside_fibs)
142             {
143               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
144               if (FIB_NODE_INDEX_INVALID != fei)
145                 {
146                   sw_if_index = fib_entry_get_resolving_interface (fei);
147                   if (sw_if_index != ~0)
148                     break;
149                 }
150             }
151         }
152       if (sw_if_index == ~0)
153         return 1;
154
155       snat_interface_t *i;
156       pool_foreach (i, sm->interfaces)
157         {
158           /* NAT packet aimed at outside interface */
159           if ((nat_interface_is_outside (i)) &&
160               (sw_if_index == i->sw_if_index))
161             return 0;
162         }
163     }
164
165   return 1;
166 }
167
168 static int
169 nat_ed_alloc_addr_and_port_with_snat_address (
170   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
171   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
172   ip4_address_t *outside_addr, u16 *outside_port)
173 {
174   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
175
176   s->o2i.match.daddr = a->addr;
177   /* first try port suggested by caller */
178   u16 port = clib_net_to_host_u16 (*outside_port);
179   u16 port_offset = port - port_thread_offset;
180   if (port <= port_thread_offset ||
181       port > port_thread_offset + port_per_thread)
182     {
183       /* need to pick a different port, suggested port doesn't fit in
184        * this thread's port range */
185       port_offset = snat_random_port (0, port_per_thread - 1);
186       port = port_thread_offset + port_offset;
187     }
188   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
189   do
190     {
191       if (NAT_PROTOCOL_ICMP == nat_proto)
192         {
193           s->o2i.match.sport = clib_host_to_net_u16 (port);
194         }
195       s->o2i.match.dport = clib_host_to_net_u16 (port);
196       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
197         {
198 #define _(N, i, n, s)                                                         \
199   case NAT_PROTOCOL_##N:                                                      \
200     ++a->busy_##n##_port_refcounts[port];                                     \
201     a->busy_##n##_ports_per_thread[thread_index]++;                           \
202     a->busy_##n##_ports++;                                                    \
203     break;
204           switch (nat_proto)
205             {
206               foreach_nat_protocol;
207             default:
208               nat_elog_info (sm, "unknown protocol");
209               return 1;
210             }
211 #undef _
212           *outside_addr = a->addr;
213           *outside_port = clib_host_to_net_u16 (port);
214           return 0;
215         }
216       port_offset = snat_random_port (0, port_per_thread - 1);
217       port = port_thread_offset + port_offset;
218       --attempts;
219     }
220   while (attempts > 0);
221   return 1;
222 }
223
224 static int
225 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
226                             u32 thread_index, ip4_address_t s_addr,
227                             u16 port_per_thread, u32 snat_thread_index,
228                             snat_session_t *s, ip4_address_t *outside_addr,
229                             u16 *outside_port)
230 {
231   int i;
232   snat_address_t *a, *ga = 0;
233
234   if (vec_len (sm->addresses) > 0)
235     {
236       int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
237
238       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
239         {
240           a = sm->addresses + i;
241           if (a->fib_index == rx_fib_index)
242             {
243               return nat_ed_alloc_addr_and_port_with_snat_address (
244                 sm, nat_proto, thread_index, a, port_per_thread,
245                 snat_thread_index, s, outside_addr, outside_port);
246             }
247           else if (a->fib_index == ~0)
248             {
249               ga = a;
250             }
251         }
252
253       for (i = 0; i < s_addr_offset; ++i)
254         {
255           a = sm->addresses + i;
256           if (a->fib_index == rx_fib_index)
257             {
258               return nat_ed_alloc_addr_and_port_with_snat_address (
259                 sm, nat_proto, thread_index, a, port_per_thread,
260                 snat_thread_index, s, outside_addr, outside_port);
261             }
262           else if (a->fib_index == ~0)
263             {
264               ga = a;
265             }
266         }
267
268       if (ga)
269         {
270           return nat_ed_alloc_addr_and_port_with_snat_address (
271             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
272             s, outside_addr, outside_port);
273         }
274     }
275   /* Totally out of translations to use... */
276   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
277   return 1;
278 }
279
280 static_always_inline u32
281 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
282 {
283   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
284   nat_outside_fib_t *outside_fib;
285   fib_prefix_t pfx = {
286     .fp_proto = FIB_PROTOCOL_IP4,
287     .fp_len = 32,
288     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
289     ,
290   };
291   // TODO: multiple vrfs none can resolve addr
292   vec_foreach (outside_fib, sm->outside_fibs)
293     {
294       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
295       if (FIB_NODE_INDEX_INVALID != fei)
296         {
297           if (fib_entry_get_resolving_interface (fei) != ~0)
298             {
299               return outside_fib->fib_index;
300             }
301         }
302     }
303   return ~0;
304 }
305
306 static_always_inline int
307 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
308                              u16 match_port, nat_protocol_t match_protocol,
309                              u32 match_fib_index, ip4_address_t *daddr,
310                              u16 *dport)
311 {
312   clib_bihash_kv_8_8_t kv, value;
313   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
314   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
315     {
316       /* Try address only mapping */
317       init_nat_k (&kv, match_addr, 0, 0, 0);
318       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
319                                   &value))
320         return 0;
321     }
322
323   snat_static_mapping_t *m =
324     pool_elt_at_index (sm->static_mappings, value.value);
325   *daddr = m->local_addr;
326   if (dport)
327     {
328       /* Address only mapping doesn't change port */
329       *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port;
330     }
331   return 1;
332 }
333
334 static u32
335 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
336               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
337               u16 r_port, u8 proto, u32 rx_fib_index,
338               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
339               u32 thread_index, f64 now)
340 {
341   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
342   ip4_address_t outside_addr;
343   u16 outside_port;
344   u32 outside_fib_index;
345   u8 is_identity_nat;
346
347   u32 nat_proto = ip_proto_to_nat_proto (proto);
348   snat_session_t *s = NULL;
349   lb_nat_type_t lb = 0;
350   ip4_address_t daddr = r_addr;
351   u16 dport = r_port;
352
353   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
354     {
355       if (PREDICT_FALSE
356           (!tcp_flags_is_init
357            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
358         {
359           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
360           return NAT_NEXT_DROP;
361         }
362     }
363
364   if (PREDICT_FALSE
365       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
366     {
367       if (!nat_lru_free_one (sm, thread_index, now))
368         {
369           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
370           nat_ipfix_logging_max_sessions (thread_index,
371                                           sm->max_translations_per_thread);
372           nat_elog_notice (sm, "maximum sessions exceeded");
373           return NAT_NEXT_DROP;
374         }
375     }
376
377   outside_fib_index = sm->outside_fib_index;
378
379   switch (vec_len (sm->outside_fibs))
380     {
381     case 0:
382       outside_fib_index = sm->outside_fib_index;
383       break;
384     case 1:
385       outside_fib_index = sm->outside_fibs[0].fib_index;
386       break;
387     default:
388       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
389       break;
390     }
391
392   ip4_address_t sm_addr;
393   u16 sm_port;
394   u32 sm_fib_index;
395   /* First try to match static mapping by local address and port */
396   int is_sm;
397   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
398                                  nat_proto, &sm_addr, &sm_port, &sm_fib_index,
399                                  0, 0, 0, &lb, 0, &is_identity_nat, 0))
400     {
401       is_sm = 0;
402     }
403   else
404     {
405       is_sm = 1;
406     }
407
408   if (PREDICT_FALSE (is_sm && is_identity_nat))
409     {
410       *sessionp = NULL;
411       return next;
412     }
413
414   s = nat_ed_session_alloc (sm, thread_index, now, proto);
415   ASSERT (s);
416
417   if (!is_sm)
418     {
419       s->in2out.addr = l_addr;
420       s->in2out.port = l_port;
421       s->nat_proto = nat_proto;
422       s->in2out.fib_index = rx_fib_index;
423       s->out2in.fib_index = outside_fib_index;
424
425       // suggest using local port to allocation function
426       outside_port = l_port;
427
428       // hairpinning?
429       int is_hairpinning = nat44_ed_external_sm_lookup (
430         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
431       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
432
433       // destination addr/port updated with real values in
434       // nat_ed_alloc_addr_and_port
435       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
436                             s->out2in.fib_index, proto);
437       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
438       if (NAT_PROTOCOL_ICMP == nat_proto)
439         {
440           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
441         }
442       else
443         {
444           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
445         }
446       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
447
448       if (nat_ed_alloc_addr_and_port (
449             sm, rx_fib_index, nat_proto, thread_index, l_addr,
450             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
451             &outside_port))
452         {
453           nat_elog_notice (sm, "addresses exhausted");
454           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
455           nat_ed_session_delete (sm, s, thread_index, 1);
456           return NAT_NEXT_DROP;
457         }
458       s->out2in.addr = outside_addr;
459       s->out2in.port = outside_port;
460     }
461   else
462     {
463       // static mapping
464       s->out2in.addr = outside_addr = sm_addr;
465       s->out2in.port = outside_port = sm_port;
466       s->in2out.addr = l_addr;
467       s->in2out.port = l_port;
468       s->nat_proto = nat_proto;
469       s->in2out.fib_index = rx_fib_index;
470       s->out2in.fib_index = outside_fib_index;
471       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
472
473       // hairpinning?
474       int is_hairpinning = nat44_ed_external_sm_lookup (
475         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
476       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
477
478       if (NAT_PROTOCOL_ICMP == nat_proto)
479         {
480           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
481                                 sm_port, s->out2in.fib_index, proto);
482           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
483         }
484       else
485         {
486           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
487                                 sm_port, s->out2in.fib_index, proto);
488           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
489         }
490       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
491       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
492       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
493         {
494           nat_elog_notice (sm, "out2in key add failed");
495           goto error;
496         }
497     }
498
499   if (lb)
500     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
501   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
502   s->ext_host_addr = r_addr;
503   s->ext_host_port = r_port;
504
505   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
506                         rx_fib_index, proto);
507   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
508   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
509
510   if (NAT_PROTOCOL_ICMP == nat_proto)
511     {
512       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
513     }
514   else
515     {
516       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
517       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
518     }
519   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
520
521   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
522     {
523       nat_elog_notice (sm, "in2out key add failed");
524       goto error;
525     }
526
527   /* log NAT event */
528   nat_ipfix_logging_nat44_ses_create (thread_index,
529                                       s->in2out.addr.as_u32,
530                                       s->out2in.addr.as_u32,
531                                       s->nat_proto,
532                                       s->in2out.port,
533                                       s->out2in.port, s->in2out.fib_index);
534
535   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
536                          s->in2out.port, &s->ext_host_nat_addr,
537                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
538                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
539
540   per_vrf_sessions_register_session (s, thread_index);
541
542   *sessionp = s;
543   return next;
544 error:
545   if (s)
546     {
547       if (!is_sm)
548         {
549           snat_free_outside_address_and_port (sm->addresses, thread_index,
550                                               &outside_addr, outside_port,
551                                               nat_proto);
552         }
553       nat_ed_session_delete (sm, s, thread_index, 1);
554     }
555   *sessionp = s = NULL;
556   return NAT_NEXT_DROP;
557 }
558
559 static_always_inline int
560 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
561                         vlib_node_runtime_t *node, u32 sw_if_index,
562                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
563                         u32 rx_fib_index, u32 thread_index)
564 {
565   clib_bihash_kv_16_8_t kv, value;
566
567   init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
568              ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
569              sm->outside_fib_index, ip->protocol);
570
571   /* NAT packet aimed at external address if has active sessions */
572   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
573     {
574       /* or is static mappings */
575       ip4_address_t placeholder_addr;
576       u16 placeholder_port;
577       u32 placeholder_fib_index;
578       if (!snat_static_mapping_match (
579             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
580             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
581             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
582         return 0;
583     }
584   else
585     return 0;
586
587   if (sm->forwarding_enabled)
588     return 1;
589
590   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
591                                   rx_fib_index);
592 }
593
594 static_always_inline int
595 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
596                                       u32 thread_index, f64 now,
597                                       vlib_main_t * vm, vlib_buffer_t * b)
598 {
599   clib_bihash_kv_16_8_t kv, value;
600   snat_session_t *s = 0;
601   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
602
603   if (!sm->forwarding_enabled)
604     return 0;
605
606   if (ip->protocol == IP_PROTOCOL_ICMP)
607     {
608       ip4_address_t lookup_saddr, lookup_daddr;
609       u16 lookup_sport, lookup_dport;
610       u8 lookup_protocol;
611       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
612                                               &lookup_sport, &lookup_daddr,
613                                               &lookup_dport, &lookup_protocol))
614         return 0;
615       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
616                  0, lookup_protocol);
617     }
618   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
619     {
620       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
621                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
622                  ip->protocol);
623     }
624   else
625     {
626       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
627                  ip->protocol);
628     }
629
630   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
631     {
632       ASSERT (thread_index == ed_value_get_thread_index (&value));
633       s =
634         pool_elt_at_index (tsm->sessions,
635                            ed_value_get_session_index (&value));
636
637       if (is_fwd_bypass_session (s))
638         {
639           if (ip->protocol == IP_PROTOCOL_TCP)
640             {
641               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
642             }
643           /* Accounting */
644           nat44_session_update_counters (s, now,
645                                          vlib_buffer_length_in_chain (vm, b),
646                                          thread_index);
647           /* Per-user LRU list maintenance */
648           nat44_session_update_lru (sm, s, thread_index);
649           return 1;
650         }
651       else
652         return 0;
653     }
654
655   return 0;
656 }
657
658 static_always_inline int
659 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
660                                        ip4_header_t *ip, u16 src_port,
661                                        u16 dst_port, u32 thread_index,
662                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
663                                        f64 now, int is_multi_worker)
664 {
665   clib_bihash_kv_16_8_t kv, value;
666   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
667   snat_interface_t *i;
668   snat_session_t *s;
669   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
670   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
671
672   /* src NAT check */
673   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
674              tx_fib_index, ip->protocol);
675   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
676     {
677       ASSERT (thread_index == ed_value_get_thread_index (&value));
678       s =
679         pool_elt_at_index (tsm->sessions,
680                            ed_value_get_session_index (&value));
681       if (nat44_is_ses_closed (s)
682           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
683         {
684           nat_free_session_data (sm, s, thread_index, 0);
685           nat_ed_session_delete (sm, s, thread_index, 1);
686         }
687       return 1;
688     }
689
690   /* dst NAT check */
691   if (is_multi_worker &&
692       PREDICT_TRUE (!pool_is_free_index (
693         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
694     {
695       nat_6t_t lookup;
696       lookup.fib_index = rx_fib_index;
697       lookup.proto = ip->protocol;
698       lookup.daddr.as_u32 = ip->src_address.as_u32;
699       lookup.dport = src_port;
700       lookup.saddr.as_u32 = ip->dst_address.as_u32;
701       lookup.sport = dst_port;
702       s = pool_elt_at_index (
703         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
704       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
705         {
706           goto skip_dst_nat_lookup;
707         }
708       s = NULL;
709     }
710
711   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
712              rx_fib_index, ip->protocol);
713   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
714     {
715       ASSERT (thread_index == ed_value_get_thread_index (&value));
716       s =
717         pool_elt_at_index (tsm->sessions,
718                            ed_value_get_session_index (&value));
719
720     skip_dst_nat_lookup:
721       if (is_fwd_bypass_session (s))
722         return 0;
723
724       /* hairpinning */
725       pool_foreach (i, sm->output_feature_interfaces)
726        {
727         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
728            return 0;
729       }
730       return 1;
731     }
732
733   return 0;
734 }
735
736 static inline u32
737 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
738                           icmp46_header_t *icmp, u32 sw_if_index,
739                           u32 rx_fib_index, vlib_node_runtime_t *node,
740                           u32 next, f64 now, u32 thread_index,
741                           nat_protocol_t nat_proto, snat_session_t **s_p,
742                           int is_multi_worker)
743 {
744   vlib_main_t *vm = vlib_get_main ();
745   u16 checksum;
746   int err;
747   snat_session_t *s = NULL;
748   u8 lookup_protocol = ip->protocol;
749   u16 lookup_sport, lookup_dport;
750   ip4_address_t lookup_saddr, lookup_daddr;
751
752   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
753                                             &lookup_sport, &lookup_daddr,
754                                             &lookup_dport, &lookup_protocol);
755   if (err != 0)
756     {
757       b->error = node->errors[err];
758       return NAT_NEXT_DROP;
759     }
760
761   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
762     {
763       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
764             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
765             vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
766         {
767           return next;
768         }
769     }
770   else
771     {
772       if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
773                                                  ip, NAT_PROTOCOL_ICMP,
774                                                  rx_fib_index, thread_index)))
775         {
776           return next;
777         }
778     }
779
780   if (PREDICT_FALSE (icmp_type_is_error_message (
781         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
782     {
783       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
784       return NAT_NEXT_DROP;
785     }
786
787   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
788                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
789                        &s, node, next, thread_index, vlib_time_now (vm));
790
791   if (NAT_NEXT_DROP == next)
792     goto out;
793
794   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
795     {
796       ip_csum_t sum = ip_incremental_checksum_buffer (
797         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
798         ntohs (ip->length) - ip4_header_bytes (ip), 0);
799       checksum = ~ip_csum_fold (sum);
800       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
801         {
802           next = NAT_NEXT_DROP;
803           goto out;
804         }
805     }
806
807 out:
808   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
809     {
810       /* Accounting */
811       nat44_session_update_counters (
812         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
813       /* Per-user LRU list maintenance */
814       nat44_session_update_lru (sm, s, thread_index);
815     }
816   *s_p = s;
817   return next;
818 }
819
820 static snat_session_t *
821 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
822                                         ip4_header_t *ip, u32 rx_fib_index,
823                                         u32 thread_index, f64 now,
824                                         vlib_main_t *vm,
825                                         vlib_node_runtime_t *node)
826 {
827   clib_bihash_kv_8_8_t kv, value;
828   clib_bihash_kv_16_8_t s_kv, s_value;
829   snat_static_mapping_t *m = NULL;
830   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
831   snat_session_t *s = NULL;
832   u32 outside_fib_index = sm->outside_fib_index;
833   int i;
834   ip4_address_t new_src_addr = { 0 };
835   ip4_address_t new_dst_addr = ip->dst_address;
836
837   if (PREDICT_FALSE (
838         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
839     {
840       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
841       nat_ipfix_logging_max_sessions (thread_index,
842                                       sm->max_translations_per_thread);
843       nat_elog_notice (sm, "maximum sessions exceeded");
844       return 0;
845     }
846
847   switch (vec_len (sm->outside_fibs))
848     {
849     case 0:
850       outside_fib_index = sm->outside_fib_index;
851       break;
852     case 1:
853       outside_fib_index = sm->outside_fibs[0].fib_index;
854       break;
855     default:
856       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
857       break;
858     }
859
860   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
861
862   /* Try to find static mapping first */
863   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
864     {
865       m = pool_elt_at_index (sm->static_mappings, value.value);
866       new_src_addr = m->external_addr;
867     }
868   else
869     {
870       pool_foreach (s, tsm->sessions)
871         {
872           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
873             {
874               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
875                          outside_fib_index, ip->protocol);
876               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
877                 {
878                   new_src_addr = s->out2in.addr;
879                 }
880               break;
881             }
882         }
883
884       if (!new_src_addr.as_u32)
885         {
886           for (i = 0; i < vec_len (sm->addresses); i++)
887             {
888               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
889                          outside_fib_index, ip->protocol);
890               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
891                 {
892                   new_src_addr = sm->addresses[i].addr;
893                 }
894             }
895         }
896     }
897
898   if (!new_src_addr.as_u32)
899     {
900       // could not allocate address for translation ...
901       return 0;
902     }
903
904   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
905   if (!s)
906     {
907       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
908       nat_elog_warn (sm, "create NAT session failed");
909       return 0;
910     }
911
912   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
913                         ip->dst_address, 0, rx_fib_index, ip->protocol);
914   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
915   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
916
917   // hairpinning?
918   int is_hairpinning =
919     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
920                                  outside_fib_index, &new_dst_addr, NULL);
921   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
922
923   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
924   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
925
926   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
927                         outside_fib_index, ip->protocol);
928   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
929   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
930   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
931
932   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
933   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
934   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
935   s->out2in.addr.as_u32 = new_src_addr.as_u32;
936   s->out2in.fib_index = outside_fib_index;
937   s->in2out.addr.as_u32 = ip->src_address.as_u32;
938   s->in2out.fib_index = rx_fib_index;
939   s->in2out.port = s->out2in.port = ip->protocol;
940   if (m)
941     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
942
943   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
944     {
945       nat_elog_notice (sm, "in2out flow hash add failed");
946       nat_ed_session_delete (sm, s, thread_index, 1);
947       return NULL;
948     }
949
950   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
951     {
952       nat_elog_notice (sm, "out2in flow hash add failed");
953       nat_ed_session_delete (sm, s, thread_index, 1);
954       return NULL;
955     }
956
957   per_vrf_sessions_register_session (s, thread_index);
958
959   /* Accounting */
960   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
961                                  thread_index);
962   /* Per-user LRU list maintenance */
963   nat44_session_update_lru (sm, s, thread_index);
964
965   return s;
966 }
967
968 static inline uword
969 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
970                                           vlib_node_runtime_t *node,
971                                           vlib_frame_t *frame,
972                                           int is_output_feature,
973                                           int is_multi_worker)
974 {
975   u32 n_left_from, *from;
976   snat_main_t *sm = &snat_main;
977   f64 now = vlib_time_now (vm);
978   u32 thread_index = vm->thread_index;
979   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
980   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
981     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
982
983   from = vlib_frame_vector_args (frame);
984   n_left_from = frame->n_vectors;
985
986   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
987   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
988   vlib_get_buffers (vm, from, b, n_left_from);
989
990   while (n_left_from > 0)
991     {
992       vlib_buffer_t *b0;
993       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
994       nat_protocol_t proto0;
995       ip4_header_t *ip0;
996       snat_session_t *s0 = 0;
997       clib_bihash_kv_16_8_t kv0, value0;
998       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
999       nat_6t_flow_t *f = 0;
1000       nat_6t_t lookup;
1001       int lookup_skipped = 0;
1002
1003       b0 = *b;
1004       b++;
1005
1006       /* Prefetch next iteration. */
1007       if (PREDICT_TRUE (n_left_from >= 2))
1008         {
1009           vlib_buffer_t *p2;
1010
1011           p2 = *b;
1012
1013           vlib_prefetch_buffer_header (p2, LOAD);
1014
1015           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
1016         }
1017
1018       if (is_output_feature)
1019         {
1020           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1021         }
1022
1023       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1024
1025       ip0 =
1026         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1027
1028       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1029       rx_fib_index0 =
1030         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1031       lookup.fib_index = rx_fib_index0;
1032
1033       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1034         {
1035           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1036           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1037                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1038                                        0);
1039           next[0] = NAT_NEXT_ICMP_ERROR;
1040           goto trace0;
1041         }
1042
1043       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1044
1045       if (is_output_feature)
1046         {
1047           if (PREDICT_FALSE
1048               (nat_not_translate_output_feature_fwd
1049                (sm, ip0, thread_index, now, vm, b0)))
1050             goto trace0;
1051         }
1052
1053       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1054         {
1055           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1056                 ICMP4_echo_request &&
1057               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1058                 ICMP4_echo_reply &&
1059               !icmp_type_is_error_message (
1060                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1061             {
1062               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1063               next[0] = NAT_NEXT_DROP;
1064               goto trace0;
1065             }
1066           int err = nat_get_icmp_session_lookup_values (
1067             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1068             &lookup.dport, &lookup.proto);
1069           if (err != 0)
1070             {
1071               b0->error = node->errors[err];
1072               next[0] = NAT_NEXT_DROP;
1073               goto trace0;
1074             }
1075         }
1076       else
1077         {
1078           lookup.proto = ip0->protocol;
1079           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1080           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1081           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1082           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1083         }
1084
1085       /* there might be a stashed index in vnet_buffer2 from handoff or
1086        * classify node, see if it can be used */
1087       if (is_multi_worker &&
1088           !pool_is_free_index (tsm->sessions,
1089                                vnet_buffer2 (b0)->nat.cached_session_index))
1090         {
1091           s0 = pool_elt_at_index (tsm->sessions,
1092                                   vnet_buffer2 (b0)->nat.cached_session_index);
1093           if (PREDICT_TRUE (
1094                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1095                 // for some hairpinning cases there are two "i2i" flows instead
1096                 // of i2o and o2i as both hosts are on inside
1097                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1098                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1099             {
1100               /* yes, this is the droid we're looking for */
1101               lookup_skipped = 1;
1102               goto skip_lookup;
1103             }
1104           s0 = NULL;
1105         }
1106
1107       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1108                  lookup.fib_index, lookup.proto);
1109
1110       // lookup flow
1111       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1112         {
1113           // flow does not exist go slow path
1114           next[0] = def_slow;
1115           goto trace0;
1116         }
1117
1118       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1119       s0 =
1120         pool_elt_at_index (tsm->sessions,
1121                            ed_value_get_session_index (&value0));
1122
1123     skip_lookup:
1124
1125       ASSERT (thread_index == s0->thread_index);
1126
1127       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1128         {
1129           // session is closed, go slow path
1130           nat_free_session_data (sm, s0, thread_index, 0);
1131           nat_ed_session_delete (sm, s0, thread_index, 1);
1132           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1133           goto trace0;
1134         }
1135
1136       if (s0->tcp_closed_timestamp)
1137         {
1138           if (now >= s0->tcp_closed_timestamp)
1139             {
1140               // session is closed, go slow path, freed in slow path
1141               next[0] = def_slow;
1142             }
1143           else
1144             {
1145               // session in transitory timeout, drop
1146               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1147               next[0] = NAT_NEXT_DROP;
1148             }
1149           goto trace0;
1150         }
1151
1152       // drop if session expired
1153       u64 sess_timeout_time;
1154       sess_timeout_time =
1155         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1156       if (now >= sess_timeout_time)
1157         {
1158           nat_free_session_data (sm, s0, thread_index, 0);
1159           nat_ed_session_delete (sm, s0, thread_index, 1);
1160           // session is closed, go slow path
1161           next[0] = def_slow;
1162           goto trace0;
1163         }
1164
1165       b0->flags |= VNET_BUFFER_F_IS_NATED;
1166
1167       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1168         {
1169           f = &s0->i2o;
1170         }
1171       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1172                nat_6t_t_eq (&s0->o2i.match, &lookup))
1173         {
1174           f = &s0->o2i;
1175         }
1176       else
1177         {
1178           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1179           nat_free_session_data (sm, s0, thread_index, 0);
1180           nat_ed_session_delete (sm, s0, thread_index, 1);
1181           next[0] = NAT_NEXT_DROP;
1182           goto trace0;
1183         }
1184
1185       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1186           (translation_error = nat_6t_flow_buf_translate (
1187              sm, b0, ip0, f, proto0, is_output_feature)))
1188         {
1189           nat_free_session_data (sm, s0, thread_index, 0);
1190           nat_ed_session_delete (sm, s0, thread_index, 1);
1191           next[0] = NAT_NEXT_DROP;
1192           goto trace0;
1193         }
1194
1195       switch (proto0)
1196         {
1197         case NAT_PROTOCOL_TCP:
1198           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1199                                          thread_index, sw_if_index0, 1);
1200           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1201           break;
1202         case NAT_PROTOCOL_UDP:
1203           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1204                                          thread_index, sw_if_index0, 1);
1205           break;
1206         case NAT_PROTOCOL_ICMP:
1207           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1208                                          thread_index, sw_if_index0, 1);
1209           break;
1210         case NAT_PROTOCOL_OTHER:
1211           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1212                                          thread_index, sw_if_index0, 1);
1213           break;
1214         }
1215
1216       /* Accounting */
1217       nat44_session_update_counters (s0, now,
1218                                      vlib_buffer_length_in_chain (vm, b0),
1219                                      thread_index);
1220       /* Per-user LRU list maintenance */
1221       nat44_session_update_lru (sm, s0, thread_index);
1222
1223     trace0:
1224       if (PREDICT_FALSE
1225           ((node->flags & VLIB_NODE_FLAG_TRACE)
1226            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1227         {
1228           nat_in2out_ed_trace_t *t =
1229             vlib_add_trace (vm, node, b0, sizeof (*t));
1230           t->sw_if_index = sw_if_index0;
1231           t->next_index = next[0];
1232           t->is_slow_path = 0;
1233           t->translation_error = translation_error;
1234           t->lookup_skipped = lookup_skipped;
1235           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1236
1237           if (s0)
1238             {
1239               t->session_index = s0 - tsm->sessions;
1240               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1241               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1242               t->translation_via_i2of = (&s0->i2o == f);
1243             }
1244           else
1245             {
1246               t->session_index = ~0;
1247             }
1248         }
1249
1250       if (next[0] == NAT_NEXT_DROP)
1251         {
1252           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1253                                          thread_index, sw_if_index0, 1);
1254         }
1255
1256       n_left_from--;
1257       next++;
1258     }
1259
1260   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1261                                frame->n_vectors);
1262   return frame->n_vectors;
1263 }
1264
1265 static inline uword
1266 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1267                                           vlib_node_runtime_t *node,
1268                                           vlib_frame_t *frame,
1269                                           int is_output_feature,
1270                                           int is_multi_worker)
1271 {
1272   u32 n_left_from, *from;
1273   snat_main_t *sm = &snat_main;
1274   f64 now = vlib_time_now (vm);
1275   u32 thread_index = vm->thread_index;
1276   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1277
1278   from = vlib_frame_vector_args (frame);
1279   n_left_from = frame->n_vectors;
1280
1281   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1282   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1283   vlib_get_buffers (vm, from, b, n_left_from);
1284
1285   while (n_left_from > 0)
1286     {
1287       vlib_buffer_t *b0;
1288       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1289       nat_protocol_t proto0;
1290       ip4_header_t *ip0;
1291       udp_header_t *udp0;
1292       icmp46_header_t *icmp0;
1293       snat_session_t *s0 = 0;
1294       clib_bihash_kv_16_8_t kv0, value0;
1295       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1296
1297       b0 = *b;
1298
1299       if (is_output_feature)
1300         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1301
1302       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1303
1304       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1305                               iph_offset0);
1306
1307       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1308       rx_fib_index0 =
1309         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1310
1311       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1312         {
1313           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1314           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1315                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1316                                        0);
1317           next[0] = NAT_NEXT_ICMP_ERROR;
1318           goto trace0;
1319         }
1320
1321       udp0 = ip4_next_header (ip0);
1322       icmp0 = (icmp46_header_t *) udp0;
1323       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1324
1325       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1326         {
1327           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1328             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1329           if (!s0)
1330             next[0] = NAT_NEXT_DROP;
1331
1332           if (NAT_NEXT_DROP != next[0] && s0 &&
1333               NAT_ED_TRNSL_ERR_SUCCESS !=
1334                 (translation_error = nat_6t_flow_buf_translate (
1335                    sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1336             {
1337               goto trace0;
1338             }
1339
1340           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1341                                          thread_index, sw_if_index0, 1);
1342           goto trace0;
1343         }
1344
1345       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1346         {
1347           next[0] = icmp_in2out_ed_slow_path (
1348             sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next[0],
1349             now, thread_index, proto0, &s0, is_multi_worker);
1350           if (NAT_NEXT_DROP != next[0] && s0 &&
1351               NAT_ED_TRNSL_ERR_SUCCESS !=
1352                 (translation_error = nat_6t_flow_buf_translate (
1353                    sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1354             {
1355               goto trace0;
1356             }
1357
1358           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1359                                          thread_index, sw_if_index0, 1);
1360           goto trace0;
1361         }
1362
1363       init_ed_k (&kv0, ip0->src_address,
1364                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1365                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1366                  ip0->protocol);
1367       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1368         {
1369           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1370           s0 =
1371             pool_elt_at_index (tsm->sessions,
1372                                ed_value_get_session_index (&value0));
1373
1374           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1375             {
1376               nat_free_session_data (sm, s0, thread_index, 0);
1377               nat_ed_session_delete (sm, s0, thread_index, 1);
1378               s0 = NULL;
1379             }
1380         }
1381
1382       if (!s0)
1383         {
1384           if (is_output_feature)
1385             {
1386               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1387                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1388                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1389                     sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX], now,
1390                     is_multi_worker)))
1391                 goto trace0;
1392
1393               /*
1394                * Send DHCP packets to the ipv4 stack, or we won't
1395                * be able to use dhcp client on the outside interface
1396                */
1397               if (PREDICT_FALSE
1398                   (proto0 == NAT_PROTOCOL_UDP
1399                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1400                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1401                    && ip0->dst_address.as_u32 == 0xffffffff))
1402                 goto trace0;
1403             }
1404           else
1405             {
1406               if (PREDICT_FALSE (nat44_ed_not_translate (
1407                     vm, sm, node, sw_if_index0, b0, ip0, proto0, rx_fib_index0,
1408                     thread_index)))
1409                 goto trace0;
1410             }
1411
1412           next[0] = slow_path_ed (
1413             vm, sm, b0, ip0->src_address, ip0->dst_address,
1414             vnet_buffer (b0)->ip.reass.l4_src_port,
1415             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1416             rx_fib_index0, &s0, node, next[0], thread_index, now);
1417
1418           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1419             goto trace0;
1420
1421           if (PREDICT_FALSE (!s0))
1422             goto trace0;
1423
1424         }
1425
1426       b0->flags |= VNET_BUFFER_F_IS_NATED;
1427
1428       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1429           (translation_error = nat_6t_flow_buf_translate (
1430              sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1431         {
1432           nat_free_session_data (sm, s0, thread_index, 0);
1433           nat_ed_session_delete (sm, s0, thread_index, 1);
1434           s0 = NULL;
1435           goto trace0;
1436         }
1437
1438       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1439         {
1440           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1441                                          thread_index, sw_if_index0, 1);
1442           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1443         }
1444       else
1445         {
1446           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1447                                          thread_index, sw_if_index0, 1);
1448         }
1449
1450       /* Accounting */
1451       nat44_session_update_counters (s0, now,
1452                                      vlib_buffer_length_in_chain
1453                                      (vm, b0), thread_index);
1454       /* Per-user LRU list maintenance */
1455       nat44_session_update_lru (sm, s0, thread_index);
1456
1457     trace0:
1458       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1459                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1460         {
1461           nat_in2out_ed_trace_t *t =
1462             vlib_add_trace (vm, node, b0, sizeof (*t));
1463           t->sw_if_index = sw_if_index0;
1464           t->next_index = next[0];
1465           t->is_slow_path = 1;
1466           t->translation_error = translation_error;
1467           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1468
1469           if (s0)
1470             {
1471               t->session_index = s0 - tsm->sessions;
1472               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1473               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1474               t->translation_via_i2of = 1;
1475             }
1476
1477           else
1478             {
1479               t->session_index = ~0;
1480             }
1481         }
1482
1483       if (next[0] == NAT_NEXT_DROP)
1484         {
1485           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1486                                          thread_index, sw_if_index0, 1);
1487         }
1488
1489       n_left_from--;
1490       next++;
1491       b++;
1492     }
1493
1494   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1495                                frame->n_vectors);
1496
1497   return frame->n_vectors;
1498 }
1499
1500 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1501                                      vlib_node_runtime_t * node,
1502                                      vlib_frame_t * frame)
1503 {
1504   if (snat_main.num_workers > 1)
1505     {
1506       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1507     }
1508   else
1509     {
1510       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1511     }
1512 }
1513
1514 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1515   .name = "nat44-ed-in2out",
1516   .vector_size = sizeof (u32),
1517   .sibling_of = "nat-default",
1518   .format_trace = format_nat_in2out_ed_trace,
1519   .type = VLIB_NODE_TYPE_INTERNAL,
1520   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1521   .error_strings = nat_in2out_ed_error_strings,
1522   .runtime_data_bytes = sizeof (snat_runtime_t),
1523 };
1524
1525 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1526                                             vlib_node_runtime_t * node,
1527                                             vlib_frame_t * frame)
1528 {
1529   if (snat_main.num_workers > 1)
1530     {
1531       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1532     }
1533   else
1534     {
1535       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1536     }
1537 }
1538
1539 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1540   .name = "nat44-ed-in2out-output",
1541   .vector_size = sizeof (u32),
1542   .sibling_of = "nat-default",
1543   .format_trace = format_nat_in2out_ed_trace,
1544   .type = VLIB_NODE_TYPE_INTERNAL,
1545   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1546   .error_strings = nat_in2out_ed_error_strings,
1547   .runtime_data_bytes = sizeof (snat_runtime_t),
1548 };
1549
1550 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1551                                               vlib_node_runtime_t *
1552                                               node, vlib_frame_t * frame)
1553 {
1554   if (snat_main.num_workers > 1)
1555     {
1556       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1557     }
1558   else
1559     {
1560       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1561     }
1562 }
1563
1564 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1565   .name = "nat44-ed-in2out-slowpath",
1566   .vector_size = sizeof (u32),
1567   .sibling_of = "nat-default",
1568   .format_trace = format_nat_in2out_ed_trace,
1569   .type = VLIB_NODE_TYPE_INTERNAL,
1570   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1571   .error_strings = nat_in2out_ed_error_strings,
1572   .runtime_data_bytes = sizeof (snat_runtime_t),
1573 };
1574
1575 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1576                                                      vlib_node_runtime_t
1577                                                      * node,
1578                                                      vlib_frame_t * frame)
1579 {
1580   if (snat_main.num_workers > 1)
1581     {
1582       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1583     }
1584   else
1585     {
1586       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1587     }
1588 }
1589
1590 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1591   .name = "nat44-ed-in2out-output-slowpath",
1592   .vector_size = sizeof (u32),
1593   .sibling_of = "nat-default",
1594   .format_trace = format_nat_in2out_ed_trace,
1595   .type = VLIB_NODE_TYPE_INTERNAL,
1596   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1597   .error_strings = nat_in2out_ed_error_strings,
1598   .runtime_data_bytes = sizeof (snat_runtime_t),
1599 };
1600
1601 static u8 *
1602 format_nat_pre_trace (u8 * s, va_list * args)
1603 {
1604   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1605   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1606   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1607   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1608                  t->arc_next_index);
1609 }
1610
1611 VLIB_NODE_FN (nat_pre_in2out_node)
1612   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1613 {
1614   return nat_pre_node_fn_inline (vm, node, frame,
1615                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1616 }
1617
1618 VLIB_NODE_FN (nat_pre_in2out_output_node)
1619   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1620 {
1621   return nat_pre_node_fn_inline (vm, node, frame,
1622                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1623 }
1624
1625 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1626   .name = "nat-pre-in2out",
1627   .vector_size = sizeof (u32),
1628   .sibling_of = "nat-default",
1629   .format_trace = format_nat_pre_trace,
1630   .type = VLIB_NODE_TYPE_INTERNAL,
1631   .n_errors = 0,
1632 };
1633
1634 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1635   .name = "nat-pre-in2out-output",
1636   .vector_size = sizeof (u32),
1637   .sibling_of = "nat-default",
1638   .format_trace = format_nat_pre_trace,
1639   .type = VLIB_NODE_TYPE_INTERNAL,
1640   .n_errors = 0,
1641 };
1642
1643 /*
1644  * fd.io coding-style-patch-verification: ON
1645  *
1646  * Local Variables:
1647  * eval: (c-set-style "gnu")
1648  * End:
1649  */