79c03bd87e529917687ef711d4dac0e1a339537f
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
73               t->next_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", session %d, translation result '%U' via %s",
77                   t->session_index, format_nat_ed_translation_error,
78                   t->translation_error,
79                   t->translation_via_i2of ? "i2of" : "o2if");
80       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
81       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
82     }
83   if (!t->is_slow_path)
84     {
85       if (t->lookup_skipped)
86         {
87           s = format (s, "\n lookup skipped - cached session index used");
88         }
89       else
90         {
91           s = format (s, "\n  search key %U", format_ed_session_kvp,
92                       &t->search_key);
93         }
94     }
95
96   return s;
97 }
98
99 /**
100  * @brief Check if packet should be translated
101  *
102  * Packets aimed at outside interface and external address with active session
103  * should be translated.
104  *
105  * @param sm            NAT main
106  * @param rt            NAT runtime data
107  * @param sw_if_index0  index of the inside interface
108  * @param ip0           IPv4 header
109  * @param proto0        NAT protocol
110  * @param rx_fib_index0 RX FIB index
111  *
112  * @returns 0 if packet should be translated otherwise 1
113  */
114 static inline int
115 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
116                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
117                          u32 rx_fib_index0)
118 {
119   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
120   nat_outside_fib_t *outside_fib;
121   fib_prefix_t pfx = {
122     .fp_proto = FIB_PROTOCOL_IP4,
123     .fp_len = 32,
124     .fp_addr = {
125                 .ip4.as_u32 = ip0->dst_address.as_u32,
126                 }
127     ,
128   };
129
130   /* Don't NAT packet aimed at the intfc address */
131   if (PREDICT_FALSE (
132         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
133     return 1;
134
135   fei = fib_table_lookup (rx_fib_index0, &pfx);
136   if (FIB_NODE_INDEX_INVALID != fei)
137     {
138       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
139       if (sw_if_index == ~0)
140         {
141           vec_foreach (outside_fib, sm->outside_fibs)
142             {
143               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
144               if (FIB_NODE_INDEX_INVALID != fei)
145                 {
146                   sw_if_index = fib_entry_get_resolving_interface (fei);
147                   if (sw_if_index != ~0)
148                     break;
149                 }
150             }
151         }
152       if (sw_if_index == ~0)
153         return 1;
154
155       snat_interface_t *i;
156       pool_foreach (i, sm->interfaces)
157         {
158           /* NAT packet aimed at outside interface */
159           if ((nat_interface_is_outside (i)) &&
160               (sw_if_index == i->sw_if_index))
161             return 0;
162         }
163     }
164
165   return 1;
166 }
167
168 static int
169 nat_ed_alloc_addr_and_port_with_snat_address (
170   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
171   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
172   ip4_address_t *outside_addr, u16 *outside_port)
173 {
174   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
175
176   s->o2i.match.daddr = a->addr;
177   /* first try port suggested by caller */
178   u16 port = clib_net_to_host_u16 (*outside_port);
179   u16 port_offset = port - port_thread_offset;
180   if (port <= port_thread_offset ||
181       port > port_thread_offset + port_per_thread)
182     {
183       /* need to pick a different port, suggested port doesn't fit in
184        * this thread's port range */
185       port_offset = snat_random_port (0, port_per_thread - 1);
186       port = port_thread_offset + port_offset;
187     }
188   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
189   do
190     {
191       if (NAT_PROTOCOL_ICMP == nat_proto)
192         {
193           s->o2i.match.sport = clib_host_to_net_u16 (port);
194         }
195       s->o2i.match.dport = clib_host_to_net_u16 (port);
196       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
197         {
198 #define _(N, i, n, s)                                                         \
199   case NAT_PROTOCOL_##N:                                                      \
200     ++a->busy_##n##_port_refcounts[port];                                     \
201     a->busy_##n##_ports_per_thread[thread_index]++;                           \
202     a->busy_##n##_ports++;                                                    \
203     break;
204           switch (nat_proto)
205             {
206               foreach_nat_protocol;
207             default:
208               nat_elog_info (sm, "unknown protocol");
209               return 1;
210             }
211 #undef _
212           *outside_addr = a->addr;
213           *outside_port = clib_host_to_net_u16 (port);
214           return 0;
215         }
216       port_offset = snat_random_port (0, port_per_thread - 1);
217       port = port_thread_offset + port_offset;
218       --attempts;
219     }
220   while (attempts > 0);
221   return 1;
222 }
223
224 static int
225 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
226                             u32 thread_index, ip4_address_t s_addr,
227                             u16 port_per_thread, u32 snat_thread_index,
228                             snat_session_t *s, ip4_address_t *outside_addr,
229                             u16 *outside_port)
230 {
231   int i;
232   snat_address_t *a, *ga = 0;
233
234   if (vec_len (sm->addresses) > 0)
235     {
236       int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
237
238       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
239         {
240           a = sm->addresses + i;
241           if (a->fib_index == rx_fib_index)
242             {
243               return nat_ed_alloc_addr_and_port_with_snat_address (
244                 sm, nat_proto, thread_index, a, port_per_thread,
245                 snat_thread_index, s, outside_addr, outside_port);
246             }
247           else if (a->fib_index == ~0)
248             {
249               ga = a;
250             }
251         }
252
253       for (i = 0; i < s_addr_offset; ++i)
254         {
255           a = sm->addresses + i;
256           if (a->fib_index == rx_fib_index)
257             {
258               return nat_ed_alloc_addr_and_port_with_snat_address (
259                 sm, nat_proto, thread_index, a, port_per_thread,
260                 snat_thread_index, s, outside_addr, outside_port);
261             }
262           else if (a->fib_index == ~0)
263             {
264               ga = a;
265             }
266         }
267
268       if (ga)
269         {
270           return nat_ed_alloc_addr_and_port_with_snat_address (
271             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
272             s, outside_addr, outside_port);
273         }
274     }
275   /* Totally out of translations to use... */
276   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
277   return 1;
278 }
279
280 static_always_inline u32
281 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
282 {
283   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
284   nat_outside_fib_t *outside_fib;
285   fib_prefix_t pfx = {
286     .fp_proto = FIB_PROTOCOL_IP4,
287     .fp_len = 32,
288     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
289     ,
290   };
291   // TODO: multiple vrfs none can resolve addr
292   vec_foreach (outside_fib, sm->outside_fibs)
293     {
294       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
295       if (FIB_NODE_INDEX_INVALID != fei)
296         {
297           if (fib_entry_get_resolving_interface (fei) != ~0)
298             {
299               return outside_fib->fib_index;
300             }
301         }
302     }
303   return ~0;
304 }
305
306 static_always_inline int
307 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
308                              u16 match_port, nat_protocol_t match_protocol,
309                              u32 match_fib_index, ip4_address_t *daddr,
310                              u16 *dport)
311 {
312   clib_bihash_kv_8_8_t kv, value;
313   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
314   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
315     {
316       /* Try address only mapping */
317       init_nat_k (&kv, match_addr, 0, 0, 0);
318       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
319                                   &value))
320         return 0;
321     }
322
323   snat_static_mapping_t *m =
324     pool_elt_at_index (sm->static_mappings, value.value);
325   *daddr = m->local_addr;
326   if (dport)
327     {
328       /* Address only mapping doesn't change port */
329       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
330     }
331   return 1;
332 }
333
334 static u32
335 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
336               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
337               u16 r_port, u8 proto, u32 rx_fib_index,
338               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
339               u32 thread_index, f64 now)
340 {
341   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
342   ip4_address_t outside_addr;
343   u16 outside_port;
344   u32 outside_fib_index;
345   u8 is_identity_nat = 0;
346
347   u32 nat_proto = ip_proto_to_nat_proto (proto);
348   snat_session_t *s = NULL;
349   lb_nat_type_t lb = 0;
350   ip4_address_t daddr = r_addr;
351   u16 dport = r_port;
352
353   if (PREDICT_FALSE
354       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
355     {
356       if (!nat_lru_free_one (sm, thread_index, now))
357         {
358           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
359           nat_ipfix_logging_max_sessions (thread_index,
360                                           sm->max_translations_per_thread);
361           nat_elog_notice (sm, "maximum sessions exceeded");
362           return NAT_NEXT_DROP;
363         }
364     }
365
366   outside_fib_index = sm->outside_fib_index;
367
368   switch (vec_len (sm->outside_fibs))
369     {
370     case 0:
371       outside_fib_index = sm->outside_fib_index;
372       break;
373     case 1:
374       outside_fib_index = sm->outside_fibs[0].fib_index;
375       break;
376     default:
377       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
378       break;
379     }
380
381   ip4_address_t sm_addr;
382   u16 sm_port;
383   u32 sm_fib_index;
384   /* First try to match static mapping by local address and port */
385   int is_sm;
386   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
387                                  nat_proto, &sm_addr, &sm_port, &sm_fib_index,
388                                  0, 0, 0, &lb, 0, &is_identity_nat, 0))
389     {
390       is_sm = 0;
391     }
392   else
393     {
394       if (PREDICT_FALSE (is_identity_nat))
395         {
396           *sessionp = NULL;
397           return next;
398         }
399       is_sm = 1;
400     }
401
402   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
403     {
404       if (PREDICT_FALSE (!tcp_flags_is_init (
405             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
406         {
407           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
408           return NAT_NEXT_DROP;
409         }
410     }
411
412   s = nat_ed_session_alloc (sm, thread_index, now, proto);
413   ASSERT (s);
414
415   if (!is_sm)
416     {
417       s->in2out.addr = l_addr;
418       s->in2out.port = l_port;
419       s->nat_proto = nat_proto;
420       s->in2out.fib_index = rx_fib_index;
421       s->out2in.fib_index = outside_fib_index;
422
423       // suggest using local port to allocation function
424       outside_port = l_port;
425
426       // hairpinning?
427       int is_hairpinning = nat44_ed_external_sm_lookup (
428         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
429       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
430
431       // destination addr/port updated with real values in
432       // nat_ed_alloc_addr_and_port
433       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
434                             s->out2in.fib_index, proto);
435       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
436       if (NAT_PROTOCOL_ICMP == nat_proto)
437         {
438           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
439         }
440       else
441         {
442           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
443         }
444       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
445
446       if (nat_ed_alloc_addr_and_port (
447             sm, rx_fib_index, nat_proto, thread_index, l_addr,
448             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
449             &outside_port))
450         {
451           nat_elog_notice (sm, "addresses exhausted");
452           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
453           nat_ed_session_delete (sm, s, thread_index, 1);
454           return NAT_NEXT_DROP;
455         }
456       s->out2in.addr = outside_addr;
457       s->out2in.port = outside_port;
458     }
459   else
460     {
461       // static mapping
462       s->out2in.addr = outside_addr = sm_addr;
463       s->out2in.port = outside_port = sm_port;
464       s->in2out.addr = l_addr;
465       s->in2out.port = l_port;
466       s->nat_proto = nat_proto;
467       s->in2out.fib_index = rx_fib_index;
468       s->out2in.fib_index = outside_fib_index;
469       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
470
471       // hairpinning?
472       int is_hairpinning = nat44_ed_external_sm_lookup (
473         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
474       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
475
476       if (NAT_PROTOCOL_ICMP == nat_proto)
477         {
478           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
479                                 sm_port, s->out2in.fib_index, proto);
480           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
481         }
482       else
483         {
484           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
485                                 sm_port, s->out2in.fib_index, proto);
486           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
487         }
488       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
489       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
490       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
491         {
492           nat_elog_notice (sm, "out2in key add failed");
493           goto error;
494         }
495     }
496
497   if (lb)
498     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
499   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
500   s->ext_host_addr = r_addr;
501   s->ext_host_port = r_port;
502
503   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
504                         rx_fib_index, proto);
505   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
506   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
507
508   if (NAT_PROTOCOL_ICMP == nat_proto)
509     {
510       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
511     }
512   else
513     {
514       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
515       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
516     }
517   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
518
519   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
520     {
521       nat_elog_notice (sm, "in2out key add failed");
522       goto error;
523     }
524
525   /* log NAT event */
526   nat_ipfix_logging_nat44_ses_create (thread_index,
527                                       s->in2out.addr.as_u32,
528                                       s->out2in.addr.as_u32,
529                                       s->nat_proto,
530                                       s->in2out.port,
531                                       s->out2in.port, s->in2out.fib_index);
532
533   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
534                          s->in2out.port, &s->ext_host_nat_addr,
535                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
536                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
537
538   per_vrf_sessions_register_session (s, thread_index);
539
540   *sessionp = s;
541   return next;
542 error:
543   if (s)
544     {
545       if (!is_sm)
546         {
547           snat_free_outside_address_and_port (sm->addresses, thread_index,
548                                               &outside_addr, outside_port,
549                                               nat_proto);
550         }
551       nat_ed_session_delete (sm, s, thread_index, 1);
552     }
553   *sessionp = s = NULL;
554   return NAT_NEXT_DROP;
555 }
556
557 static_always_inline int
558 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
559                         vlib_node_runtime_t *node, u32 sw_if_index,
560                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
561                         u32 rx_fib_index, u32 thread_index)
562 {
563   clib_bihash_kv_16_8_t kv, value;
564
565   init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
566              ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
567              sm->outside_fib_index, ip->protocol);
568
569   /* NAT packet aimed at external address if has active sessions */
570   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
571     {
572       /* or is static mappings */
573       ip4_address_t placeholder_addr;
574       u16 placeholder_port;
575       u32 placeholder_fib_index;
576       if (!snat_static_mapping_match (
577             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
578             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
579             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
580         return 0;
581     }
582   else
583     return 0;
584
585   if (sm->forwarding_enabled)
586     return 1;
587
588   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
589                                   rx_fib_index);
590 }
591
592 static_always_inline int
593 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
594                                       u32 thread_index, f64 now,
595                                       vlib_main_t * vm, vlib_buffer_t * b)
596 {
597   clib_bihash_kv_16_8_t kv, value;
598   snat_session_t *s = 0;
599   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
600
601   if (!sm->forwarding_enabled)
602     return 0;
603
604   if (ip->protocol == IP_PROTOCOL_ICMP)
605     {
606       ip4_address_t lookup_saddr, lookup_daddr;
607       u16 lookup_sport, lookup_dport;
608       u8 lookup_protocol;
609       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
610                                               &lookup_sport, &lookup_daddr,
611                                               &lookup_dport, &lookup_protocol))
612         return 0;
613       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
614                  0, lookup_protocol);
615     }
616   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
617     {
618       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
619                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
620                  ip->protocol);
621     }
622   else
623     {
624       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
625                  ip->protocol);
626     }
627
628   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
629     {
630       ASSERT (thread_index == ed_value_get_thread_index (&value));
631       s =
632         pool_elt_at_index (tsm->sessions,
633                            ed_value_get_session_index (&value));
634
635       if (is_fwd_bypass_session (s))
636         {
637           if (ip->protocol == IP_PROTOCOL_TCP)
638             {
639               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
640             }
641           /* Accounting */
642           nat44_session_update_counters (s, now,
643                                          vlib_buffer_length_in_chain (vm, b),
644                                          thread_index);
645           /* Per-user LRU list maintenance */
646           nat44_session_update_lru (sm, s, thread_index);
647           return 1;
648         }
649       else
650         return 0;
651     }
652
653   return 0;
654 }
655
656 static_always_inline int
657 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
658                                        ip4_header_t *ip, u16 src_port,
659                                        u16 dst_port, u32 thread_index,
660                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
661                                        f64 now, int is_multi_worker)
662 {
663   clib_bihash_kv_16_8_t kv, value;
664   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
665   snat_interface_t *i;
666   snat_session_t *s;
667   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
668   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
669
670   /* src NAT check */
671   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
672              tx_fib_index, ip->protocol);
673   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
674     {
675       ASSERT (thread_index == ed_value_get_thread_index (&value));
676       s =
677         pool_elt_at_index (tsm->sessions,
678                            ed_value_get_session_index (&value));
679       if (nat44_is_ses_closed (s)
680           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
681         {
682           nat_free_session_data (sm, s, thread_index, 0);
683           nat_ed_session_delete (sm, s, thread_index, 1);
684         }
685       return 1;
686     }
687
688   /* dst NAT check */
689   if (is_multi_worker &&
690       PREDICT_TRUE (!pool_is_free_index (
691         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
692     {
693       nat_6t_t lookup;
694       lookup.fib_index = rx_fib_index;
695       lookup.proto = ip->protocol;
696       lookup.daddr.as_u32 = ip->src_address.as_u32;
697       lookup.dport = src_port;
698       lookup.saddr.as_u32 = ip->dst_address.as_u32;
699       lookup.sport = dst_port;
700       s = pool_elt_at_index (
701         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
702       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
703         {
704           goto skip_dst_nat_lookup;
705         }
706       s = NULL;
707     }
708
709   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
710              rx_fib_index, ip->protocol);
711   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
712     {
713       ASSERT (thread_index == ed_value_get_thread_index (&value));
714       s =
715         pool_elt_at_index (tsm->sessions,
716                            ed_value_get_session_index (&value));
717
718     skip_dst_nat_lookup:
719       if (is_fwd_bypass_session (s))
720         return 0;
721
722       /* hairpinning */
723       pool_foreach (i, sm->output_feature_interfaces)
724        {
725         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
726            return 0;
727       }
728       return 1;
729     }
730
731   return 0;
732 }
733
734 static inline u32
735 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
736                           icmp46_header_t *icmp, u32 sw_if_index,
737                           u32 rx_fib_index, vlib_node_runtime_t *node,
738                           u32 next, f64 now, u32 thread_index,
739                           nat_protocol_t nat_proto, snat_session_t **s_p,
740                           int is_multi_worker)
741 {
742   vlib_main_t *vm = vlib_get_main ();
743   u16 checksum;
744   int err;
745   snat_session_t *s = NULL;
746   u8 lookup_protocol = ip->protocol;
747   u16 lookup_sport, lookup_dport;
748   ip4_address_t lookup_saddr, lookup_daddr;
749
750   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
751                                             &lookup_sport, &lookup_daddr,
752                                             &lookup_dport, &lookup_protocol);
753   if (err != 0)
754     {
755       b->error = node->errors[err];
756       return NAT_NEXT_DROP;
757     }
758
759   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
760     {
761       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
762             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
763             vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
764         {
765           return next;
766         }
767     }
768   else
769     {
770       if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
771                                                  ip, NAT_PROTOCOL_ICMP,
772                                                  rx_fib_index, thread_index)))
773         {
774           return next;
775         }
776     }
777
778   if (PREDICT_FALSE (icmp_type_is_error_message (
779         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
780     {
781       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
782       return NAT_NEXT_DROP;
783     }
784
785   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
786                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
787                        &s, node, next, thread_index, vlib_time_now (vm));
788
789   if (NAT_NEXT_DROP == next)
790     goto out;
791
792   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
793     {
794       ip_csum_t sum = ip_incremental_checksum_buffer (
795         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
796         ntohs (ip->length) - ip4_header_bytes (ip), 0);
797       checksum = ~ip_csum_fold (sum);
798       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
799         {
800           next = NAT_NEXT_DROP;
801           goto out;
802         }
803     }
804
805 out:
806   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
807     {
808       /* Accounting */
809       nat44_session_update_counters (
810         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
811       /* Per-user LRU list maintenance */
812       nat44_session_update_lru (sm, s, thread_index);
813     }
814   *s_p = s;
815   return next;
816 }
817
818 static snat_session_t *
819 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
820                                         ip4_header_t *ip, u32 rx_fib_index,
821                                         u32 thread_index, f64 now,
822                                         vlib_main_t *vm,
823                                         vlib_node_runtime_t *node)
824 {
825   clib_bihash_kv_8_8_t kv, value;
826   clib_bihash_kv_16_8_t s_kv, s_value;
827   snat_static_mapping_t *m = NULL;
828   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
829   snat_session_t *s = NULL;
830   u32 outside_fib_index = sm->outside_fib_index;
831   int i;
832   ip4_address_t new_src_addr = { 0 };
833   ip4_address_t new_dst_addr = ip->dst_address;
834
835   if (PREDICT_FALSE (
836         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
837     {
838       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
839       nat_ipfix_logging_max_sessions (thread_index,
840                                       sm->max_translations_per_thread);
841       nat_elog_notice (sm, "maximum sessions exceeded");
842       return 0;
843     }
844
845   switch (vec_len (sm->outside_fibs))
846     {
847     case 0:
848       outside_fib_index = sm->outside_fib_index;
849       break;
850     case 1:
851       outside_fib_index = sm->outside_fibs[0].fib_index;
852       break;
853     default:
854       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
855       break;
856     }
857
858   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
859
860   /* Try to find static mapping first */
861   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
862     {
863       m = pool_elt_at_index (sm->static_mappings, value.value);
864       new_src_addr = m->external_addr;
865     }
866   else
867     {
868       pool_foreach (s, tsm->sessions)
869         {
870           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
871             {
872               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
873                          outside_fib_index, ip->protocol);
874               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
875                 {
876                   new_src_addr = s->out2in.addr;
877                 }
878               break;
879             }
880         }
881
882       if (!new_src_addr.as_u32)
883         {
884           for (i = 0; i < vec_len (sm->addresses); i++)
885             {
886               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
887                          outside_fib_index, ip->protocol);
888               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
889                 {
890                   new_src_addr = sm->addresses[i].addr;
891                 }
892             }
893         }
894     }
895
896   if (!new_src_addr.as_u32)
897     {
898       // could not allocate address for translation ...
899       return 0;
900     }
901
902   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
903   if (!s)
904     {
905       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
906       nat_elog_warn (sm, "create NAT session failed");
907       return 0;
908     }
909
910   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
911                         ip->dst_address, 0, rx_fib_index, ip->protocol);
912   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
913   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
914
915   // hairpinning?
916   int is_hairpinning =
917     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
918                                  outside_fib_index, &new_dst_addr, NULL);
919   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
920
921   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
922   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
923
924   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
925                         outside_fib_index, ip->protocol);
926   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
927   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
928   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
929
930   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
931   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
932   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
933   s->out2in.addr.as_u32 = new_src_addr.as_u32;
934   s->out2in.fib_index = outside_fib_index;
935   s->in2out.addr.as_u32 = ip->src_address.as_u32;
936   s->in2out.fib_index = rx_fib_index;
937   s->in2out.port = s->out2in.port = ip->protocol;
938   if (m)
939     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
940
941   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
942     {
943       nat_elog_notice (sm, "in2out flow hash add failed");
944       nat_ed_session_delete (sm, s, thread_index, 1);
945       return NULL;
946     }
947
948   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
949     {
950       nat_elog_notice (sm, "out2in flow hash add failed");
951       nat_ed_session_delete (sm, s, thread_index, 1);
952       return NULL;
953     }
954
955   per_vrf_sessions_register_session (s, thread_index);
956
957   /* Accounting */
958   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
959                                  thread_index);
960   /* Per-user LRU list maintenance */
961   nat44_session_update_lru (sm, s, thread_index);
962
963   return s;
964 }
965
966 static inline uword
967 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
968                                           vlib_node_runtime_t *node,
969                                           vlib_frame_t *frame,
970                                           int is_output_feature,
971                                           int is_multi_worker)
972 {
973   u32 n_left_from, *from;
974   snat_main_t *sm = &snat_main;
975   f64 now = vlib_time_now (vm);
976   u32 thread_index = vm->thread_index;
977   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
978   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
979     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
980
981   from = vlib_frame_vector_args (frame);
982   n_left_from = frame->n_vectors;
983
984   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
985   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
986   vlib_get_buffers (vm, from, b, n_left_from);
987
988   while (n_left_from > 0)
989     {
990       vlib_buffer_t *b0;
991       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
992       u32 tx_sw_if_index0;
993       u32 cntr_sw_if_index0;
994       nat_protocol_t proto0;
995       ip4_header_t *ip0;
996       snat_session_t *s0 = 0;
997       clib_bihash_kv_16_8_t kv0, value0;
998       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
999       nat_6t_flow_t *f = 0;
1000       nat_6t_t lookup;
1001       int lookup_skipped = 0;
1002
1003       b0 = *b;
1004       b++;
1005
1006       /* Prefetch next iteration. */
1007       if (PREDICT_TRUE (n_left_from >= 2))
1008         {
1009           vlib_buffer_t *p2;
1010
1011           p2 = *b;
1012
1013           vlib_prefetch_buffer_header (p2, LOAD);
1014
1015           clib_prefetch_load (p2->data);
1016         }
1017
1018       if (is_output_feature)
1019         {
1020           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1021         }
1022
1023       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1024
1025       ip0 =
1026         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1027
1028       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1029       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1030       cntr_sw_if_index0 =
1031         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1032       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1033                                                            rx_sw_if_index0);
1034       lookup.fib_index = rx_fib_index0;
1035
1036       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1037         {
1038           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1039           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1040                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1041                                        0);
1042           next[0] = NAT_NEXT_ICMP_ERROR;
1043           goto trace0;
1044         }
1045
1046       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1047
1048       if (is_output_feature)
1049         {
1050           if (PREDICT_FALSE
1051               (nat_not_translate_output_feature_fwd
1052                (sm, ip0, thread_index, now, vm, b0)))
1053             goto trace0;
1054         }
1055
1056       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1057         {
1058           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1059                 ICMP4_echo_request &&
1060               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1061                 ICMP4_echo_reply &&
1062               !icmp_type_is_error_message (
1063                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1064             {
1065               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1066               next[0] = NAT_NEXT_DROP;
1067               goto trace0;
1068             }
1069           int err = nat_get_icmp_session_lookup_values (
1070             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1071             &lookup.dport, &lookup.proto);
1072           if (err != 0)
1073             {
1074               b0->error = node->errors[err];
1075               next[0] = NAT_NEXT_DROP;
1076               goto trace0;
1077             }
1078         }
1079       else
1080         {
1081           lookup.proto = ip0->protocol;
1082           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1083           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1084           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1085           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1086         }
1087
1088       /* there might be a stashed index in vnet_buffer2 from handoff or
1089        * classify node, see if it can be used */
1090       if (is_multi_worker &&
1091           !pool_is_free_index (tsm->sessions,
1092                                vnet_buffer2 (b0)->nat.cached_session_index))
1093         {
1094           s0 = pool_elt_at_index (tsm->sessions,
1095                                   vnet_buffer2 (b0)->nat.cached_session_index);
1096           if (PREDICT_TRUE (
1097                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1098                 // for some hairpinning cases there are two "i2i" flows instead
1099                 // of i2o and o2i as both hosts are on inside
1100                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1101                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1102             {
1103               /* yes, this is the droid we're looking for */
1104               lookup_skipped = 1;
1105               goto skip_lookup;
1106             }
1107           s0 = NULL;
1108         }
1109
1110       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1111                  lookup.fib_index, lookup.proto);
1112
1113       // lookup flow
1114       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1115         {
1116           // flow does not exist go slow path
1117           next[0] = def_slow;
1118           goto trace0;
1119         }
1120
1121       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1122       s0 =
1123         pool_elt_at_index (tsm->sessions,
1124                            ed_value_get_session_index (&value0));
1125
1126     skip_lookup:
1127
1128       ASSERT (thread_index == s0->thread_index);
1129
1130       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1131         {
1132           // session is closed, go slow path
1133           nat_free_session_data (sm, s0, thread_index, 0);
1134           nat_ed_session_delete (sm, s0, thread_index, 1);
1135           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1136           goto trace0;
1137         }
1138
1139       if (s0->tcp_closed_timestamp)
1140         {
1141           if (now >= s0->tcp_closed_timestamp)
1142             {
1143               // session is closed, go slow path, freed in slow path
1144               next[0] = def_slow;
1145             }
1146           else
1147             {
1148               // session in transitory timeout, drop
1149               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1150               next[0] = NAT_NEXT_DROP;
1151             }
1152           goto trace0;
1153         }
1154
1155       // drop if session expired
1156       u64 sess_timeout_time;
1157       sess_timeout_time =
1158         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1159       if (now >= sess_timeout_time)
1160         {
1161           nat_free_session_data (sm, s0, thread_index, 0);
1162           nat_ed_session_delete (sm, s0, thread_index, 1);
1163           // session is closed, go slow path
1164           next[0] = def_slow;
1165           goto trace0;
1166         }
1167
1168       b0->flags |= VNET_BUFFER_F_IS_NATED;
1169
1170       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1171         {
1172           f = &s0->i2o;
1173         }
1174       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1175                nat_6t_t_eq (&s0->o2i.match, &lookup))
1176         {
1177           f = &s0->o2i;
1178         }
1179       else
1180         {
1181           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1182           nat_free_session_data (sm, s0, thread_index, 0);
1183           nat_ed_session_delete (sm, s0, thread_index, 1);
1184           next[0] = NAT_NEXT_DROP;
1185           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1186           goto trace0;
1187         }
1188
1189       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1190           (translation_error = nat_6t_flow_buf_translate_i2o (
1191              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1192         {
1193           nat_free_session_data (sm, s0, thread_index, 0);
1194           nat_ed_session_delete (sm, s0, thread_index, 1);
1195           next[0] = NAT_NEXT_DROP;
1196           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1197           goto trace0;
1198         }
1199
1200       switch (proto0)
1201         {
1202         case NAT_PROTOCOL_TCP:
1203           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1204                                          thread_index, cntr_sw_if_index0, 1);
1205           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1206           break;
1207         case NAT_PROTOCOL_UDP:
1208           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1209                                          thread_index, cntr_sw_if_index0, 1);
1210           break;
1211         case NAT_PROTOCOL_ICMP:
1212           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1213                                          thread_index, cntr_sw_if_index0, 1);
1214           break;
1215         case NAT_PROTOCOL_OTHER:
1216           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1217                                          thread_index, cntr_sw_if_index0, 1);
1218           break;
1219         }
1220
1221       /* Accounting */
1222       nat44_session_update_counters (s0, now,
1223                                      vlib_buffer_length_in_chain (vm, b0),
1224                                      thread_index);
1225       /* Per-user LRU list maintenance */
1226       nat44_session_update_lru (sm, s0, thread_index);
1227
1228     trace0:
1229       if (PREDICT_FALSE
1230           ((node->flags & VLIB_NODE_FLAG_TRACE)
1231            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1232         {
1233           nat_in2out_ed_trace_t *t =
1234             vlib_add_trace (vm, node, b0, sizeof (*t));
1235           t->sw_if_index = rx_sw_if_index0;
1236           t->next_index = next[0];
1237           t->is_slow_path = 0;
1238           t->translation_error = translation_error;
1239           t->lookup_skipped = lookup_skipped;
1240           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1241
1242           if (s0)
1243             {
1244               t->session_index = s0 - tsm->sessions;
1245               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1246               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1247               t->translation_via_i2of = (&s0->i2o == f);
1248             }
1249           else
1250             {
1251               t->session_index = ~0;
1252             }
1253         }
1254
1255       if (next[0] == NAT_NEXT_DROP)
1256         {
1257           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1258                                          thread_index, cntr_sw_if_index0, 1);
1259         }
1260
1261       n_left_from--;
1262       next++;
1263     }
1264
1265   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1266                                frame->n_vectors);
1267   return frame->n_vectors;
1268 }
1269
1270 static inline uword
1271 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1272                                           vlib_node_runtime_t *node,
1273                                           vlib_frame_t *frame,
1274                                           int is_output_feature,
1275                                           int is_multi_worker)
1276 {
1277   u32 n_left_from, *from;
1278   snat_main_t *sm = &snat_main;
1279   f64 now = vlib_time_now (vm);
1280   u32 thread_index = vm->thread_index;
1281   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1282
1283   from = vlib_frame_vector_args (frame);
1284   n_left_from = frame->n_vectors;
1285
1286   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1287   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1288   vlib_get_buffers (vm, from, b, n_left_from);
1289
1290   while (n_left_from > 0)
1291     {
1292       vlib_buffer_t *b0;
1293       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1294       u32 tx_sw_if_index0;
1295       u32 cntr_sw_if_index0;
1296       nat_protocol_t proto0;
1297       ip4_header_t *ip0;
1298       udp_header_t *udp0;
1299       icmp46_header_t *icmp0;
1300       snat_session_t *s0 = 0;
1301       clib_bihash_kv_16_8_t kv0, value0;
1302       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1303
1304       b0 = *b;
1305
1306       if (is_output_feature)
1307         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1308
1309       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1310
1311       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1312                               iph_offset0);
1313
1314       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1315       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1316       cntr_sw_if_index0 =
1317         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1318       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1319                                                            rx_sw_if_index0);
1320
1321       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1322         {
1323           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1324           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1325                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1326                                        0);
1327           next[0] = NAT_NEXT_ICMP_ERROR;
1328           goto trace0;
1329         }
1330
1331       udp0 = ip4_next_header (ip0);
1332       icmp0 = (icmp46_header_t *) udp0;
1333       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1334
1335       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1336         {
1337           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1338             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1339           if (!s0)
1340             next[0] = NAT_NEXT_DROP;
1341
1342           if (NAT_NEXT_DROP != next[0] && s0 &&
1343               NAT_ED_TRNSL_ERR_SUCCESS !=
1344                 (translation_error = nat_6t_flow_buf_translate_i2o (
1345                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1346             {
1347               nat_free_session_data (sm, s0, thread_index, 0);
1348               nat_ed_session_delete (sm, s0, thread_index, 1);
1349               next[0] = NAT_NEXT_DROP;
1350               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1351               goto trace0;
1352             }
1353
1354           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1355                                          thread_index, cntr_sw_if_index0, 1);
1356           goto trace0;
1357         }
1358
1359       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1360         {
1361           next[0] = icmp_in2out_ed_slow_path (
1362             sm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node, next[0],
1363             now, thread_index, proto0, &s0, is_multi_worker);
1364           if (NAT_NEXT_DROP != next[0] && s0 &&
1365               NAT_ED_TRNSL_ERR_SUCCESS !=
1366                 (translation_error = nat_6t_flow_buf_translate_i2o (
1367                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1368             {
1369               nat_free_session_data (sm, s0, thread_index, 0);
1370               nat_ed_session_delete (sm, s0, thread_index, 1);
1371               next[0] = NAT_NEXT_DROP;
1372               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1373               goto trace0;
1374             }
1375
1376           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1377                                          thread_index, cntr_sw_if_index0, 1);
1378           goto trace0;
1379         }
1380
1381       init_ed_k (&kv0, ip0->src_address,
1382                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1383                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1384                  ip0->protocol);
1385       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1386         {
1387           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1388           s0 =
1389             pool_elt_at_index (tsm->sessions,
1390                                ed_value_get_session_index (&value0));
1391
1392           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1393             {
1394               nat_free_session_data (sm, s0, thread_index, 0);
1395               nat_ed_session_delete (sm, s0, thread_index, 1);
1396               s0 = NULL;
1397             }
1398         }
1399
1400       if (!s0)
1401         {
1402           if (is_output_feature)
1403             {
1404               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1405                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1406                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1407                     rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker)))
1408                 goto trace0;
1409
1410               /*
1411                * Send DHCP packets to the ipv4 stack, or we won't
1412                * be able to use dhcp client on the outside interface
1413                */
1414               if (PREDICT_FALSE
1415                   (proto0 == NAT_PROTOCOL_UDP
1416                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1417                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1418                    && ip0->dst_address.as_u32 == 0xffffffff))
1419                 goto trace0;
1420             }
1421           else
1422             {
1423               if (PREDICT_FALSE (nat44_ed_not_translate (
1424                     vm, sm, node, rx_sw_if_index0, b0, ip0, proto0,
1425                     rx_fib_index0, thread_index)))
1426                 goto trace0;
1427             }
1428
1429           next[0] = slow_path_ed (
1430             vm, sm, b0, ip0->src_address, ip0->dst_address,
1431             vnet_buffer (b0)->ip.reass.l4_src_port,
1432             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1433             rx_fib_index0, &s0, node, next[0], thread_index, now);
1434
1435           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1436             goto trace0;
1437
1438           if (PREDICT_FALSE (!s0))
1439             goto trace0;
1440
1441         }
1442
1443       b0->flags |= VNET_BUFFER_F_IS_NATED;
1444
1445       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1446           (translation_error = nat_6t_flow_buf_translate_i2o (
1447              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1448         {
1449           nat_free_session_data (sm, s0, thread_index, 0);
1450           nat_ed_session_delete (sm, s0, thread_index, 1);
1451           next[0] = NAT_NEXT_DROP;
1452           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1453           goto trace0;
1454         }
1455
1456       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1457         {
1458           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1459                                          thread_index, cntr_sw_if_index0, 1);
1460           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1461         }
1462       else
1463         {
1464           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1465                                          thread_index, cntr_sw_if_index0, 1);
1466         }
1467
1468       /* Accounting */
1469       nat44_session_update_counters (s0, now,
1470                                      vlib_buffer_length_in_chain
1471                                      (vm, b0), thread_index);
1472       /* Per-user LRU list maintenance */
1473       nat44_session_update_lru (sm, s0, thread_index);
1474
1475     trace0:
1476       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1477                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1478         {
1479           nat_in2out_ed_trace_t *t =
1480             vlib_add_trace (vm, node, b0, sizeof (*t));
1481           t->sw_if_index = rx_sw_if_index0;
1482           t->next_index = next[0];
1483           t->is_slow_path = 1;
1484           t->translation_error = translation_error;
1485           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1486
1487           if (s0)
1488             {
1489               t->session_index = s0 - tsm->sessions;
1490               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1491               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1492               t->translation_via_i2of = 1;
1493             }
1494
1495           else
1496             {
1497               t->session_index = ~0;
1498             }
1499         }
1500
1501       if (next[0] == NAT_NEXT_DROP)
1502         {
1503           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1504                                          thread_index, cntr_sw_if_index0, 1);
1505         }
1506
1507       n_left_from--;
1508       next++;
1509       b++;
1510     }
1511
1512   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1513                                frame->n_vectors);
1514
1515   return frame->n_vectors;
1516 }
1517
1518 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1519                                      vlib_node_runtime_t * node,
1520                                      vlib_frame_t * frame)
1521 {
1522   if (snat_main.num_workers > 1)
1523     {
1524       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1525     }
1526   else
1527     {
1528       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1529     }
1530 }
1531
1532 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1533   .name = "nat44-ed-in2out",
1534   .vector_size = sizeof (u32),
1535   .sibling_of = "nat-default",
1536   .format_trace = format_nat_in2out_ed_trace,
1537   .type = VLIB_NODE_TYPE_INTERNAL,
1538   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1539   .error_strings = nat_in2out_ed_error_strings,
1540   .runtime_data_bytes = sizeof (snat_runtime_t),
1541 };
1542
1543 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1544                                             vlib_node_runtime_t * node,
1545                                             vlib_frame_t * frame)
1546 {
1547   if (snat_main.num_workers > 1)
1548     {
1549       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1550     }
1551   else
1552     {
1553       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1554     }
1555 }
1556
1557 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1558   .name = "nat44-ed-in2out-output",
1559   .vector_size = sizeof (u32),
1560   .sibling_of = "nat-default",
1561   .format_trace = format_nat_in2out_ed_trace,
1562   .type = VLIB_NODE_TYPE_INTERNAL,
1563   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1564   .error_strings = nat_in2out_ed_error_strings,
1565   .runtime_data_bytes = sizeof (snat_runtime_t),
1566 };
1567
1568 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1569                                               vlib_node_runtime_t *
1570                                               node, vlib_frame_t * frame)
1571 {
1572   if (snat_main.num_workers > 1)
1573     {
1574       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1575     }
1576   else
1577     {
1578       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1579     }
1580 }
1581
1582 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1583   .name = "nat44-ed-in2out-slowpath",
1584   .vector_size = sizeof (u32),
1585   .sibling_of = "nat-default",
1586   .format_trace = format_nat_in2out_ed_trace,
1587   .type = VLIB_NODE_TYPE_INTERNAL,
1588   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1589   .error_strings = nat_in2out_ed_error_strings,
1590   .runtime_data_bytes = sizeof (snat_runtime_t),
1591 };
1592
1593 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1594                                                      vlib_node_runtime_t
1595                                                      * node,
1596                                                      vlib_frame_t * frame)
1597 {
1598   if (snat_main.num_workers > 1)
1599     {
1600       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1601     }
1602   else
1603     {
1604       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1605     }
1606 }
1607
1608 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1609   .name = "nat44-ed-in2out-output-slowpath",
1610   .vector_size = sizeof (u32),
1611   .sibling_of = "nat-default",
1612   .format_trace = format_nat_in2out_ed_trace,
1613   .type = VLIB_NODE_TYPE_INTERNAL,
1614   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1615   .error_strings = nat_in2out_ed_error_strings,
1616   .runtime_data_bytes = sizeof (snat_runtime_t),
1617 };
1618
1619 static u8 *
1620 format_nat_pre_trace (u8 * s, va_list * args)
1621 {
1622   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1623   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1624   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1625   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1626                  t->arc_next_index);
1627 }
1628
1629 VLIB_NODE_FN (nat_pre_in2out_node)
1630   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1631 {
1632   return nat_pre_node_fn_inline (vm, node, frame,
1633                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1634 }
1635
1636 VLIB_NODE_FN (nat_pre_in2out_output_node)
1637   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1638 {
1639   return nat_pre_node_fn_inline (vm, node, frame,
1640                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1641 }
1642
1643 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1644   .name = "nat-pre-in2out",
1645   .vector_size = sizeof (u32),
1646   .sibling_of = "nat-default",
1647   .format_trace = format_nat_pre_trace,
1648   .type = VLIB_NODE_TYPE_INTERNAL,
1649   .n_errors = 0,
1650 };
1651
1652 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1653   .name = "nat-pre-in2out-output",
1654   .vector_size = sizeof (u32),
1655   .sibling_of = "nat-default",
1656   .format_trace = format_nat_pre_trace,
1657   .type = VLIB_NODE_TYPE_INTERNAL,
1658   .n_errors = 0,
1659 };
1660
1661 /*
1662  * fd.io coding-style-patch-verification: ON
1663  *
1664  * Local Variables:
1665  * eval: (c-set-style "gnu")
1666  * End:
1667  */