nat: harden ICMP handling
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
73               t->next_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", session %d, translation result '%U' via %s",
77                   t->session_index, format_nat_ed_translation_error,
78                   t->translation_error,
79                   t->translation_via_i2of ? "i2of" : "o2if");
80       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
81       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
82     }
83   if (!t->is_slow_path)
84     {
85       if (t->lookup_skipped)
86         {
87           s = format (s, "\n lookup skipped - cached session index used");
88         }
89       else
90         {
91           s = format (s, "\n  search key %U", format_ed_session_kvp,
92                       &t->search_key);
93         }
94     }
95
96   return s;
97 }
98
99 /**
100  * @brief Check if packet should be translated
101  *
102  * Packets aimed at outside interface and external address with active session
103  * should be translated.
104  *
105  * @param sm            NAT main
106  * @param rt            NAT runtime data
107  * @param sw_if_index0  index of the inside interface
108  * @param ip0           IPv4 header
109  * @param proto0        NAT protocol
110  * @param rx_fib_index0 RX FIB index
111  *
112  * @returns 0 if packet should be translated otherwise 1
113  */
114 static inline int
115 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
116                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
117                          u32 rx_fib_index0)
118 {
119   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
120   nat_outside_fib_t *outside_fib;
121   fib_prefix_t pfx = {
122     .fp_proto = FIB_PROTOCOL_IP4,
123     .fp_len = 32,
124     .fp_addr = {
125                 .ip4.as_u32 = ip0->dst_address.as_u32,
126                 }
127     ,
128   };
129
130   /* Don't NAT packet aimed at the intfc address */
131   if (PREDICT_FALSE (
132         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
133     return 1;
134
135   fei = fib_table_lookup (rx_fib_index0, &pfx);
136   if (FIB_NODE_INDEX_INVALID != fei)
137     {
138       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
139       if (sw_if_index == ~0)
140         {
141           vec_foreach (outside_fib, sm->outside_fibs)
142             {
143               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
144               if (FIB_NODE_INDEX_INVALID != fei)
145                 {
146                   sw_if_index = fib_entry_get_resolving_interface (fei);
147                   if (sw_if_index != ~0)
148                     break;
149                 }
150             }
151         }
152       if (sw_if_index == ~0)
153         return 1;
154
155       snat_interface_t *i;
156       pool_foreach (i, sm->interfaces)
157         {
158           /* NAT packet aimed at outside interface */
159           if ((nat_interface_is_outside (i)) &&
160               (sw_if_index == i->sw_if_index))
161             return 0;
162         }
163     }
164
165   return 1;
166 }
167
168 static int
169 nat_ed_alloc_addr_and_port_with_snat_address (
170   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
171   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
172   ip4_address_t *outside_addr, u16 *outside_port)
173 {
174   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
175
176   s->o2i.match.daddr = a->addr;
177   /* first try port suggested by caller */
178   u16 port = clib_net_to_host_u16 (*outside_port);
179   u16 port_offset = port - port_thread_offset;
180   if (port <= port_thread_offset ||
181       port > port_thread_offset + port_per_thread)
182     {
183       /* need to pick a different port, suggested port doesn't fit in
184        * this thread's port range */
185       port_offset = snat_random_port (0, port_per_thread - 1);
186       port = port_thread_offset + port_offset;
187     }
188   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
189   do
190     {
191       if (NAT_PROTOCOL_ICMP == nat_proto)
192         {
193           s->o2i.match.sport = clib_host_to_net_u16 (port);
194         }
195       s->o2i.match.dport = clib_host_to_net_u16 (port);
196       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
197         {
198 #define _(N, i, n, s)                                                         \
199   case NAT_PROTOCOL_##N:                                                      \
200     ++a->busy_##n##_port_refcounts[port];                                     \
201     a->busy_##n##_ports_per_thread[thread_index]++;                           \
202     a->busy_##n##_ports++;                                                    \
203     break;
204           switch (nat_proto)
205             {
206               foreach_nat_protocol;
207             default:
208               nat_elog_info (sm, "unknown protocol");
209               return 1;
210             }
211 #undef _
212           *outside_addr = a->addr;
213           *outside_port = clib_host_to_net_u16 (port);
214           return 0;
215         }
216       port_offset = snat_random_port (0, port_per_thread - 1);
217       port = port_thread_offset + port_offset;
218       --attempts;
219     }
220   while (attempts > 0);
221   return 1;
222 }
223
224 static int
225 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
226                             u32 thread_index, ip4_address_t s_addr,
227                             u16 port_per_thread, u32 snat_thread_index,
228                             snat_session_t *s, ip4_address_t *outside_addr,
229                             u16 *outside_port)
230 {
231   int i;
232   snat_address_t *a, *ga = 0;
233
234   if (vec_len (sm->addresses) > 0)
235     {
236       int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
237
238       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
239         {
240           a = sm->addresses + i;
241           if (a->fib_index == rx_fib_index)
242             {
243               return nat_ed_alloc_addr_and_port_with_snat_address (
244                 sm, nat_proto, thread_index, a, port_per_thread,
245                 snat_thread_index, s, outside_addr, outside_port);
246             }
247           else if (a->fib_index == ~0)
248             {
249               ga = a;
250             }
251         }
252
253       for (i = 0; i < s_addr_offset; ++i)
254         {
255           a = sm->addresses + i;
256           if (a->fib_index == rx_fib_index)
257             {
258               return nat_ed_alloc_addr_and_port_with_snat_address (
259                 sm, nat_proto, thread_index, a, port_per_thread,
260                 snat_thread_index, s, outside_addr, outside_port);
261             }
262           else if (a->fib_index == ~0)
263             {
264               ga = a;
265             }
266         }
267
268       if (ga)
269         {
270           return nat_ed_alloc_addr_and_port_with_snat_address (
271             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
272             s, outside_addr, outside_port);
273         }
274     }
275   /* Totally out of translations to use... */
276   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
277   return 1;
278 }
279
280 static_always_inline u32
281 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
282 {
283   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
284   nat_outside_fib_t *outside_fib;
285   fib_prefix_t pfx = {
286     .fp_proto = FIB_PROTOCOL_IP4,
287     .fp_len = 32,
288     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
289     ,
290   };
291   // TODO: multiple vrfs none can resolve addr
292   vec_foreach (outside_fib, sm->outside_fibs)
293     {
294       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
295       if (FIB_NODE_INDEX_INVALID != fei)
296         {
297           if (fib_entry_get_resolving_interface (fei) != ~0)
298             {
299               return outside_fib->fib_index;
300             }
301         }
302     }
303   return ~0;
304 }
305
306 static_always_inline int
307 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
308                              u16 match_port, nat_protocol_t match_protocol,
309                              u32 match_fib_index, ip4_address_t *daddr,
310                              u16 *dport)
311 {
312   clib_bihash_kv_8_8_t kv, value;
313   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
314   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
315     {
316       /* Try address only mapping */
317       init_nat_k (&kv, match_addr, 0, 0, 0);
318       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
319                                   &value))
320         return 0;
321     }
322
323   snat_static_mapping_t *m =
324     pool_elt_at_index (sm->static_mappings, value.value);
325   *daddr = m->local_addr;
326   if (dport)
327     {
328       /* Address only mapping doesn't change port */
329       *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port;
330     }
331   return 1;
332 }
333
334 static u32
335 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
336               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
337               u16 r_port, u8 proto, u32 rx_fib_index,
338               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
339               u32 thread_index, f64 now)
340 {
341   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
342   ip4_address_t outside_addr;
343   u16 outside_port;
344   u32 outside_fib_index;
345   u8 is_identity_nat = 0;
346
347   u32 nat_proto = ip_proto_to_nat_proto (proto);
348   snat_session_t *s = NULL;
349   lb_nat_type_t lb = 0;
350   ip4_address_t daddr = r_addr;
351   u16 dport = r_port;
352
353   if (PREDICT_FALSE
354       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
355     {
356       if (!nat_lru_free_one (sm, thread_index, now))
357         {
358           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
359           nat_ipfix_logging_max_sessions (thread_index,
360                                           sm->max_translations_per_thread);
361           nat_elog_notice (sm, "maximum sessions exceeded");
362           return NAT_NEXT_DROP;
363         }
364     }
365
366   outside_fib_index = sm->outside_fib_index;
367
368   switch (vec_len (sm->outside_fibs))
369     {
370     case 0:
371       outside_fib_index = sm->outside_fib_index;
372       break;
373     case 1:
374       outside_fib_index = sm->outside_fibs[0].fib_index;
375       break;
376     default:
377       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
378       break;
379     }
380
381   ip4_address_t sm_addr;
382   u16 sm_port;
383   u32 sm_fib_index;
384   /* First try to match static mapping by local address and port */
385   int is_sm;
386   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
387                                  nat_proto, &sm_addr, &sm_port, &sm_fib_index,
388                                  0, 0, 0, &lb, 0, &is_identity_nat, 0))
389     {
390       is_sm = 0;
391     }
392   else
393     {
394       if (PREDICT_FALSE (is_identity_nat))
395         {
396           *sessionp = NULL;
397           return next;
398         }
399       is_sm = 1;
400     }
401
402   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
403     {
404       if (PREDICT_FALSE (!tcp_flags_is_init (
405             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
406         {
407           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
408           return NAT_NEXT_DROP;
409         }
410     }
411
412   s = nat_ed_session_alloc (sm, thread_index, now, proto);
413   ASSERT (s);
414
415   if (!is_sm)
416     {
417       s->in2out.addr = l_addr;
418       s->in2out.port = l_port;
419       s->nat_proto = nat_proto;
420       s->in2out.fib_index = rx_fib_index;
421       s->out2in.fib_index = outside_fib_index;
422
423       // suggest using local port to allocation function
424       outside_port = l_port;
425
426       // hairpinning?
427       int is_hairpinning = nat44_ed_external_sm_lookup (
428         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
429       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
430
431       // destination addr/port updated with real values in
432       // nat_ed_alloc_addr_and_port
433       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
434                             s->out2in.fib_index, proto);
435       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
436       if (NAT_PROTOCOL_ICMP == nat_proto)
437         {
438           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
439         }
440       else
441         {
442           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
443         }
444       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
445
446       if (nat_ed_alloc_addr_and_port (
447             sm, rx_fib_index, nat_proto, thread_index, l_addr,
448             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
449             &outside_port))
450         {
451           nat_elog_notice (sm, "addresses exhausted");
452           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
453           nat_ed_session_delete (sm, s, thread_index, 1);
454           return NAT_NEXT_DROP;
455         }
456       s->out2in.addr = outside_addr;
457       s->out2in.port = outside_port;
458     }
459   else
460     {
461       // static mapping
462       s->out2in.addr = outside_addr = sm_addr;
463       s->out2in.port = outside_port = sm_port;
464       s->in2out.addr = l_addr;
465       s->in2out.port = l_port;
466       s->nat_proto = nat_proto;
467       s->in2out.fib_index = rx_fib_index;
468       s->out2in.fib_index = outside_fib_index;
469       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
470
471       // hairpinning?
472       int is_hairpinning = nat44_ed_external_sm_lookup (
473         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
474       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
475
476       if (NAT_PROTOCOL_ICMP == nat_proto)
477         {
478           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
479                                 sm_port, s->out2in.fib_index, proto);
480           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
481         }
482       else
483         {
484           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
485                                 sm_port, s->out2in.fib_index, proto);
486           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
487         }
488       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
489       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
490       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
491         {
492           nat_elog_notice (sm, "out2in key add failed");
493           goto error;
494         }
495     }
496
497   if (lb)
498     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
499   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
500   s->ext_host_addr = r_addr;
501   s->ext_host_port = r_port;
502
503   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
504                         rx_fib_index, proto);
505   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
506   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
507
508   if (NAT_PROTOCOL_ICMP == nat_proto)
509     {
510       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
511     }
512   else
513     {
514       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
515       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
516     }
517   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
518
519   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
520     {
521       nat_elog_notice (sm, "in2out key add failed");
522       goto error;
523     }
524
525   /* log NAT event */
526   nat_ipfix_logging_nat44_ses_create (thread_index,
527                                       s->in2out.addr.as_u32,
528                                       s->out2in.addr.as_u32,
529                                       s->nat_proto,
530                                       s->in2out.port,
531                                       s->out2in.port, s->in2out.fib_index);
532
533   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
534                          s->in2out.port, &s->ext_host_nat_addr,
535                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
536                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
537
538   per_vrf_sessions_register_session (s, thread_index);
539
540   *sessionp = s;
541   return next;
542 error:
543   if (s)
544     {
545       if (!is_sm)
546         {
547           snat_free_outside_address_and_port (sm->addresses, thread_index,
548                                               &outside_addr, outside_port,
549                                               nat_proto);
550         }
551       nat_ed_session_delete (sm, s, thread_index, 1);
552     }
553   *sessionp = s = NULL;
554   return NAT_NEXT_DROP;
555 }
556
557 static_always_inline int
558 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
559                         vlib_node_runtime_t *node, u32 sw_if_index,
560                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
561                         u32 rx_fib_index, u32 thread_index)
562 {
563   clib_bihash_kv_16_8_t kv, value;
564
565   init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
566              ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
567              sm->outside_fib_index, ip->protocol);
568
569   /* NAT packet aimed at external address if has active sessions */
570   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
571     {
572       /* or is static mappings */
573       ip4_address_t placeholder_addr;
574       u16 placeholder_port;
575       u32 placeholder_fib_index;
576       if (!snat_static_mapping_match (
577             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
578             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
579             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
580         return 0;
581     }
582   else
583     return 0;
584
585   if (sm->forwarding_enabled)
586     return 1;
587
588   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
589                                   rx_fib_index);
590 }
591
592 static_always_inline int
593 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
594                                       u32 thread_index, f64 now,
595                                       vlib_main_t * vm, vlib_buffer_t * b)
596 {
597   clib_bihash_kv_16_8_t kv, value;
598   snat_session_t *s = 0;
599   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
600
601   if (!sm->forwarding_enabled)
602     return 0;
603
604   if (ip->protocol == IP_PROTOCOL_ICMP)
605     {
606       ip4_address_t lookup_saddr, lookup_daddr;
607       u16 lookup_sport, lookup_dport;
608       u8 lookup_protocol;
609       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
610                                               &lookup_sport, &lookup_daddr,
611                                               &lookup_dport, &lookup_protocol))
612         return 0;
613       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
614                  0, lookup_protocol);
615     }
616   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
617     {
618       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
619                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
620                  ip->protocol);
621     }
622   else
623     {
624       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
625                  ip->protocol);
626     }
627
628   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
629     {
630       ASSERT (thread_index == ed_value_get_thread_index (&value));
631       s =
632         pool_elt_at_index (tsm->sessions,
633                            ed_value_get_session_index (&value));
634
635       if (is_fwd_bypass_session (s))
636         {
637           if (ip->protocol == IP_PROTOCOL_TCP)
638             {
639               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
640             }
641           /* Accounting */
642           nat44_session_update_counters (s, now,
643                                          vlib_buffer_length_in_chain (vm, b),
644                                          thread_index);
645           /* Per-user LRU list maintenance */
646           nat44_session_update_lru (sm, s, thread_index);
647           return 1;
648         }
649       else
650         return 0;
651     }
652
653   return 0;
654 }
655
656 static_always_inline int
657 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
658                                        ip4_header_t *ip, u16 src_port,
659                                        u16 dst_port, u32 thread_index,
660                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
661                                        f64 now, int is_multi_worker)
662 {
663   clib_bihash_kv_16_8_t kv, value;
664   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
665   snat_interface_t *i;
666   snat_session_t *s;
667   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
668   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
669
670   /* src NAT check */
671   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
672              tx_fib_index, ip->protocol);
673   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
674     {
675       ASSERT (thread_index == ed_value_get_thread_index (&value));
676       s =
677         pool_elt_at_index (tsm->sessions,
678                            ed_value_get_session_index (&value));
679       if (nat44_is_ses_closed (s)
680           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
681         {
682           nat_free_session_data (sm, s, thread_index, 0);
683           nat_ed_session_delete (sm, s, thread_index, 1);
684         }
685       return 1;
686     }
687
688   /* dst NAT check */
689   if (is_multi_worker &&
690       PREDICT_TRUE (!pool_is_free_index (
691         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
692     {
693       nat_6t_t lookup;
694       lookup.fib_index = rx_fib_index;
695       lookup.proto = ip->protocol;
696       lookup.daddr.as_u32 = ip->src_address.as_u32;
697       lookup.dport = src_port;
698       lookup.saddr.as_u32 = ip->dst_address.as_u32;
699       lookup.sport = dst_port;
700       s = pool_elt_at_index (
701         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
702       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
703         {
704           goto skip_dst_nat_lookup;
705         }
706       s = NULL;
707     }
708
709   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
710              rx_fib_index, ip->protocol);
711   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
712     {
713       ASSERT (thread_index == ed_value_get_thread_index (&value));
714       s =
715         pool_elt_at_index (tsm->sessions,
716                            ed_value_get_session_index (&value));
717
718     skip_dst_nat_lookup:
719       if (is_fwd_bypass_session (s))
720         return 0;
721
722       /* hairpinning */
723       pool_foreach (i, sm->output_feature_interfaces)
724        {
725         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
726            return 0;
727       }
728       return 1;
729     }
730
731   return 0;
732 }
733
734 static inline u32
735 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
736                           icmp46_header_t *icmp, u32 sw_if_index,
737                           u32 rx_fib_index, vlib_node_runtime_t *node,
738                           u32 next, f64 now, u32 thread_index,
739                           nat_protocol_t nat_proto, snat_session_t **s_p,
740                           int is_multi_worker)
741 {
742   vlib_main_t *vm = vlib_get_main ();
743   u16 checksum;
744   int err;
745   snat_session_t *s = NULL;
746   u8 lookup_protocol = ip->protocol;
747   u16 lookup_sport, lookup_dport;
748   ip4_address_t lookup_saddr, lookup_daddr;
749
750   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
751                                             &lookup_sport, &lookup_daddr,
752                                             &lookup_dport, &lookup_protocol);
753   if (err != 0)
754     {
755       b->error = node->errors[err];
756       return NAT_NEXT_DROP;
757     }
758
759   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
760     {
761       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
762             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
763             vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
764         {
765           return next;
766         }
767     }
768   else
769     {
770       if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
771                                                  ip, NAT_PROTOCOL_ICMP,
772                                                  rx_fib_index, thread_index)))
773         {
774           return next;
775         }
776     }
777
778   if (PREDICT_FALSE (icmp_type_is_error_message (
779         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
780     {
781       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
782       return NAT_NEXT_DROP;
783     }
784
785   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
786                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
787                        &s, node, next, thread_index, vlib_time_now (vm));
788
789   if (NAT_NEXT_DROP == next)
790     goto out;
791
792   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
793     {
794       ip_csum_t sum = ip_incremental_checksum_buffer (
795         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
796         ntohs (ip->length) - ip4_header_bytes (ip), 0);
797       checksum = ~ip_csum_fold (sum);
798       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
799         {
800           next = NAT_NEXT_DROP;
801           goto out;
802         }
803     }
804
805 out:
806   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
807     {
808       /* Accounting */
809       nat44_session_update_counters (
810         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
811       /* Per-user LRU list maintenance */
812       nat44_session_update_lru (sm, s, thread_index);
813     }
814   *s_p = s;
815   return next;
816 }
817
818 static snat_session_t *
819 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
820                                         ip4_header_t *ip, u32 rx_fib_index,
821                                         u32 thread_index, f64 now,
822                                         vlib_main_t *vm,
823                                         vlib_node_runtime_t *node)
824 {
825   clib_bihash_kv_8_8_t kv, value;
826   clib_bihash_kv_16_8_t s_kv, s_value;
827   snat_static_mapping_t *m = NULL;
828   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
829   snat_session_t *s = NULL;
830   u32 outside_fib_index = sm->outside_fib_index;
831   int i;
832   ip4_address_t new_src_addr = { 0 };
833   ip4_address_t new_dst_addr = ip->dst_address;
834
835   if (PREDICT_FALSE (
836         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
837     {
838       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
839       nat_ipfix_logging_max_sessions (thread_index,
840                                       sm->max_translations_per_thread);
841       nat_elog_notice (sm, "maximum sessions exceeded");
842       return 0;
843     }
844
845   switch (vec_len (sm->outside_fibs))
846     {
847     case 0:
848       outside_fib_index = sm->outside_fib_index;
849       break;
850     case 1:
851       outside_fib_index = sm->outside_fibs[0].fib_index;
852       break;
853     default:
854       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
855       break;
856     }
857
858   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
859
860   /* Try to find static mapping first */
861   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
862     {
863       m = pool_elt_at_index (sm->static_mappings, value.value);
864       new_src_addr = m->external_addr;
865     }
866   else
867     {
868       pool_foreach (s, tsm->sessions)
869         {
870           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
871             {
872               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
873                          outside_fib_index, ip->protocol);
874               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
875                 {
876                   new_src_addr = s->out2in.addr;
877                 }
878               break;
879             }
880         }
881
882       if (!new_src_addr.as_u32)
883         {
884           for (i = 0; i < vec_len (sm->addresses); i++)
885             {
886               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
887                          outside_fib_index, ip->protocol);
888               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
889                 {
890                   new_src_addr = sm->addresses[i].addr;
891                 }
892             }
893         }
894     }
895
896   if (!new_src_addr.as_u32)
897     {
898       // could not allocate address for translation ...
899       return 0;
900     }
901
902   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
903   if (!s)
904     {
905       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
906       nat_elog_warn (sm, "create NAT session failed");
907       return 0;
908     }
909
910   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
911                         ip->dst_address, 0, rx_fib_index, ip->protocol);
912   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
913   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
914
915   // hairpinning?
916   int is_hairpinning =
917     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
918                                  outside_fib_index, &new_dst_addr, NULL);
919   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
920
921   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
922   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
923
924   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
925                         outside_fib_index, ip->protocol);
926   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
927   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
928   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
929
930   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
931   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
932   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
933   s->out2in.addr.as_u32 = new_src_addr.as_u32;
934   s->out2in.fib_index = outside_fib_index;
935   s->in2out.addr.as_u32 = ip->src_address.as_u32;
936   s->in2out.fib_index = rx_fib_index;
937   s->in2out.port = s->out2in.port = ip->protocol;
938   if (m)
939     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
940
941   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
942     {
943       nat_elog_notice (sm, "in2out flow hash add failed");
944       nat_ed_session_delete (sm, s, thread_index, 1);
945       return NULL;
946     }
947
948   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
949     {
950       nat_elog_notice (sm, "out2in flow hash add failed");
951       nat_ed_session_delete (sm, s, thread_index, 1);
952       return NULL;
953     }
954
955   per_vrf_sessions_register_session (s, thread_index);
956
957   /* Accounting */
958   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
959                                  thread_index);
960   /* Per-user LRU list maintenance */
961   nat44_session_update_lru (sm, s, thread_index);
962
963   return s;
964 }
965
966 static inline uword
967 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
968                                           vlib_node_runtime_t *node,
969                                           vlib_frame_t *frame,
970                                           int is_output_feature,
971                                           int is_multi_worker)
972 {
973   u32 n_left_from, *from;
974   snat_main_t *sm = &snat_main;
975   f64 now = vlib_time_now (vm);
976   u32 thread_index = vm->thread_index;
977   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
978   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
979     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
980
981   from = vlib_frame_vector_args (frame);
982   n_left_from = frame->n_vectors;
983
984   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
985   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
986   vlib_get_buffers (vm, from, b, n_left_from);
987
988   while (n_left_from > 0)
989     {
990       vlib_buffer_t *b0;
991       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
992       nat_protocol_t proto0;
993       ip4_header_t *ip0;
994       snat_session_t *s0 = 0;
995       clib_bihash_kv_16_8_t kv0, value0;
996       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
997       nat_6t_flow_t *f = 0;
998       nat_6t_t lookup;
999       int lookup_skipped = 0;
1000
1001       b0 = *b;
1002       b++;
1003
1004       /* Prefetch next iteration. */
1005       if (PREDICT_TRUE (n_left_from >= 2))
1006         {
1007           vlib_buffer_t *p2;
1008
1009           p2 = *b;
1010
1011           vlib_prefetch_buffer_header (p2, LOAD);
1012
1013           clib_prefetch_load (p2->data);
1014         }
1015
1016       if (is_output_feature)
1017         {
1018           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1019         }
1020
1021       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1022
1023       ip0 =
1024         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1025
1026       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1027       rx_fib_index0 =
1028         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1029       lookup.fib_index = rx_fib_index0;
1030
1031       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1032         {
1033           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1034           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1035                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1036                                        0);
1037           next[0] = NAT_NEXT_ICMP_ERROR;
1038           goto trace0;
1039         }
1040
1041       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1042
1043       if (is_output_feature)
1044         {
1045           if (PREDICT_FALSE
1046               (nat_not_translate_output_feature_fwd
1047                (sm, ip0, thread_index, now, vm, b0)))
1048             goto trace0;
1049         }
1050
1051       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1052         {
1053           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1054                 ICMP4_echo_request &&
1055               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1056                 ICMP4_echo_reply &&
1057               !icmp_type_is_error_message (
1058                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1059             {
1060               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1061               next[0] = NAT_NEXT_DROP;
1062               goto trace0;
1063             }
1064           int err = nat_get_icmp_session_lookup_values (
1065             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1066             &lookup.dport, &lookup.proto);
1067           if (err != 0)
1068             {
1069               b0->error = node->errors[err];
1070               next[0] = NAT_NEXT_DROP;
1071               goto trace0;
1072             }
1073         }
1074       else
1075         {
1076           lookup.proto = ip0->protocol;
1077           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1078           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1079           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1080           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1081         }
1082
1083       /* there might be a stashed index in vnet_buffer2 from handoff or
1084        * classify node, see if it can be used */
1085       if (is_multi_worker &&
1086           !pool_is_free_index (tsm->sessions,
1087                                vnet_buffer2 (b0)->nat.cached_session_index))
1088         {
1089           s0 = pool_elt_at_index (tsm->sessions,
1090                                   vnet_buffer2 (b0)->nat.cached_session_index);
1091           if (PREDICT_TRUE (
1092                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1093                 // for some hairpinning cases there are two "i2i" flows instead
1094                 // of i2o and o2i as both hosts are on inside
1095                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1096                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1097             {
1098               /* yes, this is the droid we're looking for */
1099               lookup_skipped = 1;
1100               goto skip_lookup;
1101             }
1102           s0 = NULL;
1103         }
1104
1105       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1106                  lookup.fib_index, lookup.proto);
1107
1108       // lookup flow
1109       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1110         {
1111           // flow does not exist go slow path
1112           next[0] = def_slow;
1113           goto trace0;
1114         }
1115
1116       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1117       s0 =
1118         pool_elt_at_index (tsm->sessions,
1119                            ed_value_get_session_index (&value0));
1120
1121     skip_lookup:
1122
1123       ASSERT (thread_index == s0->thread_index);
1124
1125       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1126         {
1127           // session is closed, go slow path
1128           nat_free_session_data (sm, s0, thread_index, 0);
1129           nat_ed_session_delete (sm, s0, thread_index, 1);
1130           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1131           goto trace0;
1132         }
1133
1134       if (s0->tcp_closed_timestamp)
1135         {
1136           if (now >= s0->tcp_closed_timestamp)
1137             {
1138               // session is closed, go slow path, freed in slow path
1139               next[0] = def_slow;
1140             }
1141           else
1142             {
1143               // session in transitory timeout, drop
1144               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1145               next[0] = NAT_NEXT_DROP;
1146             }
1147           goto trace0;
1148         }
1149
1150       // drop if session expired
1151       u64 sess_timeout_time;
1152       sess_timeout_time =
1153         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1154       if (now >= sess_timeout_time)
1155         {
1156           nat_free_session_data (sm, s0, thread_index, 0);
1157           nat_ed_session_delete (sm, s0, thread_index, 1);
1158           // session is closed, go slow path
1159           next[0] = def_slow;
1160           goto trace0;
1161         }
1162
1163       b0->flags |= VNET_BUFFER_F_IS_NATED;
1164
1165       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1166         {
1167           f = &s0->i2o;
1168         }
1169       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1170                nat_6t_t_eq (&s0->o2i.match, &lookup))
1171         {
1172           f = &s0->o2i;
1173         }
1174       else
1175         {
1176           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1177           nat_free_session_data (sm, s0, thread_index, 0);
1178           nat_ed_session_delete (sm, s0, thread_index, 1);
1179           next[0] = NAT_NEXT_DROP;
1180           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1181           goto trace0;
1182         }
1183
1184       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1185           (translation_error = nat_6t_flow_buf_translate_i2o (
1186              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1187         {
1188           nat_free_session_data (sm, s0, thread_index, 0);
1189           nat_ed_session_delete (sm, s0, thread_index, 1);
1190           next[0] = NAT_NEXT_DROP;
1191           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1192           goto trace0;
1193         }
1194
1195       switch (proto0)
1196         {
1197         case NAT_PROTOCOL_TCP:
1198           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1199                                          thread_index, sw_if_index0, 1);
1200           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1201           break;
1202         case NAT_PROTOCOL_UDP:
1203           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1204                                          thread_index, sw_if_index0, 1);
1205           break;
1206         case NAT_PROTOCOL_ICMP:
1207           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1208                                          thread_index, sw_if_index0, 1);
1209           break;
1210         case NAT_PROTOCOL_OTHER:
1211           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1212                                          thread_index, sw_if_index0, 1);
1213           break;
1214         }
1215
1216       /* Accounting */
1217       nat44_session_update_counters (s0, now,
1218                                      vlib_buffer_length_in_chain (vm, b0),
1219                                      thread_index);
1220       /* Per-user LRU list maintenance */
1221       nat44_session_update_lru (sm, s0, thread_index);
1222
1223     trace0:
1224       if (PREDICT_FALSE
1225           ((node->flags & VLIB_NODE_FLAG_TRACE)
1226            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1227         {
1228           nat_in2out_ed_trace_t *t =
1229             vlib_add_trace (vm, node, b0, sizeof (*t));
1230           t->sw_if_index = sw_if_index0;
1231           t->next_index = next[0];
1232           t->is_slow_path = 0;
1233           t->translation_error = translation_error;
1234           t->lookup_skipped = lookup_skipped;
1235           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1236
1237           if (s0)
1238             {
1239               t->session_index = s0 - tsm->sessions;
1240               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1241               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1242               t->translation_via_i2of = (&s0->i2o == f);
1243             }
1244           else
1245             {
1246               t->session_index = ~0;
1247             }
1248         }
1249
1250       if (next[0] == NAT_NEXT_DROP)
1251         {
1252           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1253                                          thread_index, sw_if_index0, 1);
1254         }
1255
1256       n_left_from--;
1257       next++;
1258     }
1259
1260   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1261                                frame->n_vectors);
1262   return frame->n_vectors;
1263 }
1264
1265 static inline uword
1266 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1267                                           vlib_node_runtime_t *node,
1268                                           vlib_frame_t *frame,
1269                                           int is_output_feature,
1270                                           int is_multi_worker)
1271 {
1272   u32 n_left_from, *from;
1273   snat_main_t *sm = &snat_main;
1274   f64 now = vlib_time_now (vm);
1275   u32 thread_index = vm->thread_index;
1276   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1277
1278   from = vlib_frame_vector_args (frame);
1279   n_left_from = frame->n_vectors;
1280
1281   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1282   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1283   vlib_get_buffers (vm, from, b, n_left_from);
1284
1285   while (n_left_from > 0)
1286     {
1287       vlib_buffer_t *b0;
1288       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1289       nat_protocol_t proto0;
1290       ip4_header_t *ip0;
1291       udp_header_t *udp0;
1292       icmp46_header_t *icmp0;
1293       snat_session_t *s0 = 0;
1294       clib_bihash_kv_16_8_t kv0, value0;
1295       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1296
1297       b0 = *b;
1298
1299       if (is_output_feature)
1300         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1301
1302       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1303
1304       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1305                               iph_offset0);
1306
1307       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1308       rx_fib_index0 =
1309         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1310
1311       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1312         {
1313           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1314           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1315                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1316                                        0);
1317           next[0] = NAT_NEXT_ICMP_ERROR;
1318           goto trace0;
1319         }
1320
1321       udp0 = ip4_next_header (ip0);
1322       icmp0 = (icmp46_header_t *) udp0;
1323       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1324
1325       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1326         {
1327           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1328             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1329           if (!s0)
1330             next[0] = NAT_NEXT_DROP;
1331
1332           if (NAT_NEXT_DROP != next[0] && s0 &&
1333               NAT_ED_TRNSL_ERR_SUCCESS !=
1334                 (translation_error = nat_6t_flow_buf_translate_i2o (
1335                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1336             {
1337               nat_free_session_data (sm, s0, thread_index, 0);
1338               nat_ed_session_delete (sm, s0, thread_index, 1);
1339               next[0] = NAT_NEXT_DROP;
1340               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1341               goto trace0;
1342             }
1343
1344           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1345                                          thread_index, sw_if_index0, 1);
1346           goto trace0;
1347         }
1348
1349       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1350         {
1351           next[0] = icmp_in2out_ed_slow_path (
1352             sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next[0],
1353             now, thread_index, proto0, &s0, is_multi_worker);
1354           if (NAT_NEXT_DROP != next[0] && s0 &&
1355               NAT_ED_TRNSL_ERR_SUCCESS !=
1356                 (translation_error = nat_6t_flow_buf_translate_i2o (
1357                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1358             {
1359               nat_free_session_data (sm, s0, thread_index, 0);
1360               nat_ed_session_delete (sm, s0, thread_index, 1);
1361               next[0] = NAT_NEXT_DROP;
1362               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1363               goto trace0;
1364             }
1365
1366           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1367                                          thread_index, sw_if_index0, 1);
1368           goto trace0;
1369         }
1370
1371       init_ed_k (&kv0, ip0->src_address,
1372                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1373                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1374                  ip0->protocol);
1375       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1376         {
1377           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1378           s0 =
1379             pool_elt_at_index (tsm->sessions,
1380                                ed_value_get_session_index (&value0));
1381
1382           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1383             {
1384               nat_free_session_data (sm, s0, thread_index, 0);
1385               nat_ed_session_delete (sm, s0, thread_index, 1);
1386               s0 = NULL;
1387             }
1388         }
1389
1390       if (!s0)
1391         {
1392           if (is_output_feature)
1393             {
1394               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1395                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1396                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1397                     sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX], now,
1398                     is_multi_worker)))
1399                 goto trace0;
1400
1401               /*
1402                * Send DHCP packets to the ipv4 stack, or we won't
1403                * be able to use dhcp client on the outside interface
1404                */
1405               if (PREDICT_FALSE
1406                   (proto0 == NAT_PROTOCOL_UDP
1407                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1408                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1409                    && ip0->dst_address.as_u32 == 0xffffffff))
1410                 goto trace0;
1411             }
1412           else
1413             {
1414               if (PREDICT_FALSE (nat44_ed_not_translate (
1415                     vm, sm, node, sw_if_index0, b0, ip0, proto0, rx_fib_index0,
1416                     thread_index)))
1417                 goto trace0;
1418             }
1419
1420           next[0] = slow_path_ed (
1421             vm, sm, b0, ip0->src_address, ip0->dst_address,
1422             vnet_buffer (b0)->ip.reass.l4_src_port,
1423             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1424             rx_fib_index0, &s0, node, next[0], thread_index, now);
1425
1426           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1427             goto trace0;
1428
1429           if (PREDICT_FALSE (!s0))
1430             goto trace0;
1431
1432         }
1433
1434       b0->flags |= VNET_BUFFER_F_IS_NATED;
1435
1436       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1437           (translation_error = nat_6t_flow_buf_translate_i2o (
1438              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1439         {
1440           nat_free_session_data (sm, s0, thread_index, 0);
1441           nat_ed_session_delete (sm, s0, thread_index, 1);
1442           next[0] = NAT_NEXT_DROP;
1443           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1444           goto trace0;
1445         }
1446
1447       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1448         {
1449           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1450                                          thread_index, sw_if_index0, 1);
1451           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1452         }
1453       else
1454         {
1455           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1456                                          thread_index, sw_if_index0, 1);
1457         }
1458
1459       /* Accounting */
1460       nat44_session_update_counters (s0, now,
1461                                      vlib_buffer_length_in_chain
1462                                      (vm, b0), thread_index);
1463       /* Per-user LRU list maintenance */
1464       nat44_session_update_lru (sm, s0, thread_index);
1465
1466     trace0:
1467       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1468                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1469         {
1470           nat_in2out_ed_trace_t *t =
1471             vlib_add_trace (vm, node, b0, sizeof (*t));
1472           t->sw_if_index = sw_if_index0;
1473           t->next_index = next[0];
1474           t->is_slow_path = 1;
1475           t->translation_error = translation_error;
1476           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1477
1478           if (s0)
1479             {
1480               t->session_index = s0 - tsm->sessions;
1481               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1482               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1483               t->translation_via_i2of = 1;
1484             }
1485
1486           else
1487             {
1488               t->session_index = ~0;
1489             }
1490         }
1491
1492       if (next[0] == NAT_NEXT_DROP)
1493         {
1494           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1495                                          thread_index, sw_if_index0, 1);
1496         }
1497
1498       n_left_from--;
1499       next++;
1500       b++;
1501     }
1502
1503   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1504                                frame->n_vectors);
1505
1506   return frame->n_vectors;
1507 }
1508
1509 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1510                                      vlib_node_runtime_t * node,
1511                                      vlib_frame_t * frame)
1512 {
1513   if (snat_main.num_workers > 1)
1514     {
1515       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1516     }
1517   else
1518     {
1519       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1520     }
1521 }
1522
1523 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1524   .name = "nat44-ed-in2out",
1525   .vector_size = sizeof (u32),
1526   .sibling_of = "nat-default",
1527   .format_trace = format_nat_in2out_ed_trace,
1528   .type = VLIB_NODE_TYPE_INTERNAL,
1529   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1530   .error_strings = nat_in2out_ed_error_strings,
1531   .runtime_data_bytes = sizeof (snat_runtime_t),
1532 };
1533
1534 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1535                                             vlib_node_runtime_t * node,
1536                                             vlib_frame_t * frame)
1537 {
1538   if (snat_main.num_workers > 1)
1539     {
1540       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1541     }
1542   else
1543     {
1544       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1545     }
1546 }
1547
1548 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1549   .name = "nat44-ed-in2out-output",
1550   .vector_size = sizeof (u32),
1551   .sibling_of = "nat-default",
1552   .format_trace = format_nat_in2out_ed_trace,
1553   .type = VLIB_NODE_TYPE_INTERNAL,
1554   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1555   .error_strings = nat_in2out_ed_error_strings,
1556   .runtime_data_bytes = sizeof (snat_runtime_t),
1557 };
1558
1559 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1560                                               vlib_node_runtime_t *
1561                                               node, vlib_frame_t * frame)
1562 {
1563   if (snat_main.num_workers > 1)
1564     {
1565       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1566     }
1567   else
1568     {
1569       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1570     }
1571 }
1572
1573 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1574   .name = "nat44-ed-in2out-slowpath",
1575   .vector_size = sizeof (u32),
1576   .sibling_of = "nat-default",
1577   .format_trace = format_nat_in2out_ed_trace,
1578   .type = VLIB_NODE_TYPE_INTERNAL,
1579   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1580   .error_strings = nat_in2out_ed_error_strings,
1581   .runtime_data_bytes = sizeof (snat_runtime_t),
1582 };
1583
1584 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1585                                                      vlib_node_runtime_t
1586                                                      * node,
1587                                                      vlib_frame_t * frame)
1588 {
1589   if (snat_main.num_workers > 1)
1590     {
1591       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1592     }
1593   else
1594     {
1595       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1596     }
1597 }
1598
1599 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1600   .name = "nat44-ed-in2out-output-slowpath",
1601   .vector_size = sizeof (u32),
1602   .sibling_of = "nat-default",
1603   .format_trace = format_nat_in2out_ed_trace,
1604   .type = VLIB_NODE_TYPE_INTERNAL,
1605   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1606   .error_strings = nat_in2out_ed_error_strings,
1607   .runtime_data_bytes = sizeof (snat_runtime_t),
1608 };
1609
1610 static u8 *
1611 format_nat_pre_trace (u8 * s, va_list * args)
1612 {
1613   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1614   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1615   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1616   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1617                  t->arc_next_index);
1618 }
1619
1620 VLIB_NODE_FN (nat_pre_in2out_node)
1621   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1622 {
1623   return nat_pre_node_fn_inline (vm, node, frame,
1624                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1625 }
1626
1627 VLIB_NODE_FN (nat_pre_in2out_output_node)
1628   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1629 {
1630   return nat_pre_node_fn_inline (vm, node, frame,
1631                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1632 }
1633
1634 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1635   .name = "nat-pre-in2out",
1636   .vector_size = sizeof (u32),
1637   .sibling_of = "nat-default",
1638   .format_trace = format_nat_pre_trace,
1639   .type = VLIB_NODE_TYPE_INTERNAL,
1640   .n_errors = 0,
1641 };
1642
1643 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1644   .name = "nat-pre-in2out-output",
1645   .vector_size = sizeof (u32),
1646   .sibling_of = "nat-default",
1647   .format_trace = format_nat_pre_trace,
1648   .type = VLIB_NODE_TYPE_INTERNAL,
1649   .n_errors = 0,
1650 };
1651
1652 /*
1653  * fd.io coding-style-patch-verification: ON
1654  *
1655  * Local Variables:
1656  * eval: (c-set-style "gnu")
1657  * End:
1658  */