nat: replace main vlib with per-thread vlib
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
73               t->sw_if_index, t->next_index, t->session_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", translation result '%U' via %s",
77                   format_nat_ed_translation_error, t->translation_error,
78                   t->translation_via_i2of ? "i2of" : "o2if");
79       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
80       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
81     }
82   if (!t->is_slow_path)
83     {
84       if (t->lookup_skipped)
85         {
86           s = format (s, "\n lookup skipped - cached session index used");
87         }
88       else
89         {
90           s = format (s, "\n  search key %U", format_ed_session_kvp,
91                       &t->search_key);
92         }
93     }
94
95   return s;
96 }
97
98 /**
99  * @brief Check if packet should be translated
100  *
101  * Packets aimed at outside interface and external address with active session
102  * should be translated.
103  *
104  * @param sm            NAT main
105  * @param rt            NAT runtime data
106  * @param sw_if_index0  index of the inside interface
107  * @param ip0           IPv4 header
108  * @param proto0        NAT protocol
109  * @param rx_fib_index0 RX FIB index
110  *
111  * @returns 0 if packet should be translated otherwise 1
112  */
113 static inline int
114 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
115                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
116                          u32 rx_fib_index0)
117 {
118   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
119   nat_outside_fib_t *outside_fib;
120   fib_prefix_t pfx = {
121     .fp_proto = FIB_PROTOCOL_IP4,
122     .fp_len = 32,
123     .fp_addr = {
124                 .ip4.as_u32 = ip0->dst_address.as_u32,
125                 }
126     ,
127   };
128
129   /* Don't NAT packet aimed at the intfc address */
130   if (PREDICT_FALSE (
131         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
132     return 1;
133
134   fei = fib_table_lookup (rx_fib_index0, &pfx);
135   if (FIB_NODE_INDEX_INVALID != fei)
136     {
137       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
138       if (sw_if_index == ~0)
139         {
140           vec_foreach (outside_fib, sm->outside_fibs)
141             {
142               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
143               if (FIB_NODE_INDEX_INVALID != fei)
144                 {
145                   sw_if_index = fib_entry_get_resolving_interface (fei);
146                   if (sw_if_index != ~0)
147                     break;
148                 }
149             }
150         }
151       if (sw_if_index == ~0)
152         return 1;
153
154       snat_interface_t *i;
155       pool_foreach (i, sm->interfaces)
156         {
157           /* NAT packet aimed at outside interface */
158           if ((nat_interface_is_outside (i)) &&
159               (sw_if_index == i->sw_if_index))
160             return 0;
161         }
162     }
163
164   return 1;
165 }
166
167 static int
168 nat_ed_alloc_addr_and_port_with_snat_address (
169   snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
170   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
171   ip4_address_t *outside_addr, u16 *outside_port)
172 {
173   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
174
175   s->o2i.match.daddr = a->addr;
176   /* first try port suggested by caller */
177   u16 port = clib_net_to_host_u16 (*outside_port);
178   u16 port_offset = port - port_thread_offset;
179   if (port <= port_thread_offset ||
180       port > port_thread_offset + port_per_thread)
181     {
182       /* need to pick a different port, suggested port doesn't fit in
183        * this thread's port range */
184       port_offset = snat_random_port (0, port_per_thread - 1);
185       port = port_thread_offset + port_offset;
186     }
187   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
188   do
189     {
190       if (NAT_PROTOCOL_ICMP == nat_proto)
191         {
192           s->o2i.match.sport = clib_host_to_net_u16 (port);
193         }
194       s->o2i.match.dport = clib_host_to_net_u16 (port);
195       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
196         {
197 #define _(N, i, n, s)                                                         \
198   case NAT_PROTOCOL_##N:                                                      \
199     ++a->busy_##n##_port_refcounts[port];                                     \
200     a->busy_##n##_ports_per_thread[thread_index]++;                           \
201     a->busy_##n##_ports++;                                                    \
202     break;
203           switch (nat_proto)
204             {
205               foreach_nat_protocol;
206             default:
207               nat_elog_info (sm, "unknown protocol");
208               return 1;
209             }
210 #undef _
211           *outside_addr = a->addr;
212           *outside_port = clib_host_to_net_u16 (port);
213           return 0;
214         }
215       port_offset = snat_random_port (0, port_per_thread - 1);
216       port = port_thread_offset + port_offset;
217       --attempts;
218     }
219   while (attempts > 0);
220   return 1;
221 }
222
223 static int
224 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
225                             u32 thread_index, ip4_address_t s_addr,
226                             u16 port_per_thread, u32 snat_thread_index,
227                             snat_session_t *s, ip4_address_t *outside_addr,
228                             u16 *outside_port)
229 {
230   int i;
231   snat_address_t *a, *ga = 0;
232
233   if (vec_len (sm->addresses) > 0)
234     {
235       int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
236
237       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
238         {
239           a = sm->addresses + i;
240           if (a->fib_index == rx_fib_index)
241             {
242               return nat_ed_alloc_addr_and_port_with_snat_address (
243                 sm, nat_proto, thread_index, a, port_per_thread,
244                 snat_thread_index, s, outside_addr, outside_port);
245             }
246           else if (a->fib_index == ~0)
247             {
248               ga = a;
249             }
250         }
251
252       for (i = 0; i < s_addr_offset; ++i)
253         {
254           a = sm->addresses + i;
255           if (a->fib_index == rx_fib_index)
256             {
257               return nat_ed_alloc_addr_and_port_with_snat_address (
258                 sm, nat_proto, thread_index, a, port_per_thread,
259                 snat_thread_index, s, outside_addr, outside_port);
260             }
261           else if (a->fib_index == ~0)
262             {
263               ga = a;
264             }
265         }
266
267       if (ga)
268         {
269           return nat_ed_alloc_addr_and_port_with_snat_address (
270             sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
271             s, outside_addr, outside_port);
272         }
273     }
274   /* Totally out of translations to use... */
275   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
276   return 1;
277 }
278
279 static_always_inline u32
280 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
281 {
282   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
283   nat_outside_fib_t *outside_fib;
284   fib_prefix_t pfx = {
285     .fp_proto = FIB_PROTOCOL_IP4,
286     .fp_len = 32,
287     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
288     ,
289   };
290   // TODO: multiple vrfs none can resolve addr
291   vec_foreach (outside_fib, sm->outside_fibs)
292     {
293       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
294       if (FIB_NODE_INDEX_INVALID != fei)
295         {
296           if (fib_entry_get_resolving_interface (fei) != ~0)
297             {
298               return outside_fib->fib_index;
299             }
300         }
301     }
302   return ~0;
303 }
304
305 static_always_inline int
306 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
307                              u16 match_port, nat_protocol_t match_protocol,
308                              u32 match_fib_index, ip4_address_t *daddr,
309                              u16 *dport)
310 {
311   clib_bihash_kv_8_8_t kv, value;
312   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
313   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
314     {
315       /* Try address only mapping */
316       init_nat_k (&kv, match_addr, 0, 0, 0);
317       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
318                                   &value))
319         return 0;
320     }
321
322   snat_static_mapping_t *m =
323     pool_elt_at_index (sm->static_mappings, value.value);
324   *daddr = m->local_addr;
325   if (dport)
326     {
327       /* Address only mapping doesn't change port */
328       *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port;
329     }
330   return 1;
331 }
332
333 static u32
334 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
335               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
336               u16 r_port, u8 proto, u32 rx_fib_index,
337               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
338               u32 thread_index, f64 now)
339 {
340   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
341   ip4_address_t outside_addr;
342   u16 outside_port;
343   u32 outside_fib_index;
344   u8 is_identity_nat;
345
346   u32 nat_proto = ip_proto_to_nat_proto (proto);
347   snat_session_t *s = NULL;
348   lb_nat_type_t lb = 0;
349   ip4_address_t daddr = r_addr;
350   u16 dport = r_port;
351
352   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
353     {
354       if (PREDICT_FALSE
355           (!tcp_flags_is_init
356            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
357         {
358           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
359           return NAT_NEXT_DROP;
360         }
361     }
362
363   if (PREDICT_FALSE
364       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
365     {
366       if (!nat_lru_free_one (sm, thread_index, now))
367         {
368           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
369           nat_ipfix_logging_max_sessions (thread_index,
370                                           sm->max_translations_per_thread);
371           nat_elog_notice (sm, "maximum sessions exceeded");
372           return NAT_NEXT_DROP;
373         }
374     }
375
376   outside_fib_index = sm->outside_fib_index;
377
378   switch (vec_len (sm->outside_fibs))
379     {
380     case 0:
381       outside_fib_index = sm->outside_fib_index;
382       break;
383     case 1:
384       outside_fib_index = sm->outside_fibs[0].fib_index;
385       break;
386     default:
387       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
388       break;
389     }
390
391   ip4_address_t sm_addr;
392   u16 sm_port;
393   u32 sm_fib_index;
394   /* First try to match static mapping by local address and port */
395   int is_sm;
396   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
397                                  nat_proto, &sm_addr, &sm_port, &sm_fib_index,
398                                  0, 0, 0, &lb, 0, &is_identity_nat, 0))
399     {
400       is_sm = 0;
401     }
402   else
403     {
404       is_sm = 1;
405     }
406
407   if (PREDICT_FALSE (is_sm && is_identity_nat))
408     {
409       *sessionp = NULL;
410       return next;
411     }
412
413   s = nat_ed_session_alloc (sm, thread_index, now, proto);
414   ASSERT (s);
415
416   if (!is_sm)
417     {
418       s->in2out.addr = l_addr;
419       s->in2out.port = l_port;
420       s->nat_proto = nat_proto;
421       s->in2out.fib_index = rx_fib_index;
422       s->out2in.fib_index = outside_fib_index;
423
424       // suggest using local port to allocation function
425       outside_port = l_port;
426
427       // hairpinning?
428       int is_hairpinning = nat44_ed_external_sm_lookup (
429         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
430       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
431
432       // destination addr/port updated with real values in
433       // nat_ed_alloc_addr_and_port
434       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
435                             s->out2in.fib_index, proto);
436       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
437       if (NAT_PROTOCOL_ICMP == nat_proto)
438         {
439           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
440         }
441       else
442         {
443           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
444         }
445       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
446
447       if (nat_ed_alloc_addr_and_port (
448             sm, rx_fib_index, nat_proto, thread_index, l_addr,
449             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
450             &outside_port))
451         {
452           nat_elog_notice (sm, "addresses exhausted");
453           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
454           nat_ed_session_delete (sm, s, thread_index, 1);
455           return NAT_NEXT_DROP;
456         }
457       s->out2in.addr = outside_addr;
458       s->out2in.port = outside_port;
459     }
460   else
461     {
462       // static mapping
463       s->out2in.addr = outside_addr = sm_addr;
464       s->out2in.port = outside_port = sm_port;
465       s->in2out.addr = l_addr;
466       s->in2out.port = l_port;
467       s->nat_proto = nat_proto;
468       s->in2out.fib_index = rx_fib_index;
469       s->out2in.fib_index = outside_fib_index;
470       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
471
472       // hairpinning?
473       int is_hairpinning = nat44_ed_external_sm_lookup (
474         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
475       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
476
477       if (NAT_PROTOCOL_ICMP == nat_proto)
478         {
479           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
480                                 sm_port, s->out2in.fib_index, proto);
481           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
482         }
483       else
484         {
485           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
486                                 sm_port, s->out2in.fib_index, proto);
487           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
488         }
489       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
490       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
491       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
492         {
493           nat_elog_notice (sm, "out2in key add failed");
494           goto error;
495         }
496     }
497
498   if (lb)
499     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
500   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
501   s->ext_host_addr = r_addr;
502   s->ext_host_port = r_port;
503
504   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
505                         rx_fib_index, proto);
506   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
507   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
508   if (NAT_PROTOCOL_ICMP == nat_proto)
509     {
510       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
511     }
512   else
513     {
514       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
515       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
516     }
517   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
518
519   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
520     {
521       nat_elog_notice (sm, "in2out key add failed");
522       goto error;
523     }
524
525   /* log NAT event */
526   nat_ipfix_logging_nat44_ses_create (thread_index,
527                                       s->in2out.addr.as_u32,
528                                       s->out2in.addr.as_u32,
529                                       s->nat_proto,
530                                       s->in2out.port,
531                                       s->out2in.port, s->in2out.fib_index);
532
533   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
534                          s->in2out.port, &s->ext_host_nat_addr,
535                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
536                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
537
538   per_vrf_sessions_register_session (s, thread_index);
539
540   *sessionp = s;
541   return next;
542 error:
543   if (s)
544     {
545       if (!is_sm)
546         {
547           snat_free_outside_address_and_port (sm->addresses, thread_index,
548                                               &outside_addr, outside_port,
549                                               nat_proto);
550         }
551       nat_ed_session_delete (sm, s, thread_index, 1);
552     }
553   *sessionp = s = NULL;
554   return NAT_NEXT_DROP;
555 }
556
557 static_always_inline int
558 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
559                         vlib_node_runtime_t *node, u32 sw_if_index,
560                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
561                         u32 rx_fib_index, u32 thread_index)
562 {
563   clib_bihash_kv_16_8_t kv, value;
564
565   init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
566              ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
567              sm->outside_fib_index, ip->protocol);
568
569   /* NAT packet aimed at external address if has active sessions */
570   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
571     {
572       /* or is static mappings */
573       ip4_address_t placeholder_addr;
574       u16 placeholder_port;
575       u32 placeholder_fib_index;
576       if (!snat_static_mapping_match (
577             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
578             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
579             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
580         return 0;
581     }
582   else
583     return 0;
584
585   if (sm->forwarding_enabled)
586     return 1;
587
588   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
589                                   rx_fib_index);
590 }
591
592 static_always_inline int
593 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
594                                       u32 thread_index, f64 now,
595                                       vlib_main_t * vm, vlib_buffer_t * b)
596 {
597   clib_bihash_kv_16_8_t kv, value;
598   snat_session_t *s = 0;
599   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
600
601   if (!sm->forwarding_enabled)
602     return 0;
603
604   if (ip->protocol == IP_PROTOCOL_ICMP)
605     {
606       ip4_address_t lookup_saddr, lookup_daddr;
607       u16 lookup_sport, lookup_dport;
608       u8 lookup_protocol;
609       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
610                                               &lookup_sport, &lookup_daddr,
611                                               &lookup_dport, &lookup_protocol))
612         return 0;
613       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
614                  0, lookup_protocol);
615     }
616   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
617     {
618       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
619                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
620                  ip->protocol);
621     }
622   else
623     {
624       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
625                  ip->protocol);
626     }
627
628   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
629     {
630       ASSERT (thread_index == ed_value_get_thread_index (&value));
631       s =
632         pool_elt_at_index (tsm->sessions,
633                            ed_value_get_session_index (&value));
634
635       if (is_fwd_bypass_session (s))
636         {
637           if (ip->protocol == IP_PROTOCOL_TCP)
638             {
639               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
640             }
641           /* Accounting */
642           nat44_session_update_counters (s, now,
643                                          vlib_buffer_length_in_chain (vm, b),
644                                          thread_index);
645           /* Per-user LRU list maintenance */
646           nat44_session_update_lru (sm, s, thread_index);
647           return 1;
648         }
649       else
650         return 0;
651     }
652
653   return 0;
654 }
655
656 static_always_inline int
657 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
658                                        u16 src_port, u16 dst_port,
659                                        u32 thread_index, u32 rx_sw_if_index,
660                                        u32 tx_sw_if_index, f64 now)
661 {
662   clib_bihash_kv_16_8_t kv, value;
663   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
664   snat_interface_t *i;
665   snat_session_t *s;
666   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
667   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
668
669   /* src NAT check */
670   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
671              tx_fib_index, ip->protocol);
672   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
673     {
674       ASSERT (thread_index == ed_value_get_thread_index (&value));
675       s =
676         pool_elt_at_index (tsm->sessions,
677                            ed_value_get_session_index (&value));
678       if (nat44_is_ses_closed (s)
679           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
680         {
681           nat_free_session_data (sm, s, thread_index, 0);
682           nat_ed_session_delete (sm, s, thread_index, 1);
683         }
684       return 1;
685     }
686
687   /* dst NAT check */
688   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
689              rx_fib_index, ip->protocol);
690   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
691     {
692       ASSERT (thread_index == ed_value_get_thread_index (&value));
693       s =
694         pool_elt_at_index (tsm->sessions,
695                            ed_value_get_session_index (&value));
696
697       if (is_fwd_bypass_session (s))
698         return 0;
699
700       /* hairpinning */
701       pool_foreach (i, sm->output_feature_interfaces)
702        {
703         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
704            return 0;
705       }
706       return 1;
707     }
708
709   return 0;
710 }
711
712 static inline u32
713 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
714                           icmp46_header_t *icmp, u32 sw_if_index,
715                           u32 rx_fib_index, vlib_node_runtime_t *node,
716                           u32 next, f64 now, u32 thread_index,
717                           nat_protocol_t nat_proto, snat_session_t **s_p)
718 {
719   vlib_main_t *vm = vlib_get_main ();
720   u16 checksum;
721   int err;
722   snat_session_t *s = NULL;
723   u8 lookup_protocol = ip->protocol;
724   u16 lookup_sport, lookup_dport;
725   ip4_address_t lookup_saddr, lookup_daddr;
726
727   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
728                                             &lookup_sport, &lookup_daddr,
729                                             &lookup_dport, &lookup_protocol);
730   if (err != 0)
731     {
732       b->error = node->errors[err];
733       return NAT_NEXT_DROP;
734     }
735
736   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
737     {
738       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
739             sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
740             vnet_buffer (b)->sw_if_index[VLIB_TX], now)))
741         {
742           return next;
743         }
744     }
745   else
746     {
747       if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
748                                                  ip, NAT_PROTOCOL_ICMP,
749                                                  rx_fib_index, thread_index)))
750         {
751           return next;
752         }
753     }
754
755   if (PREDICT_FALSE (icmp_type_is_error_message (
756         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
757     {
758       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
759       return NAT_NEXT_DROP;
760     }
761
762   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
763                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
764                        &s, node, next, thread_index, vlib_time_now (vm));
765
766   if (NAT_NEXT_DROP == next)
767     goto out;
768
769   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
770     {
771       ip_csum_t sum = ip_incremental_checksum_buffer (
772         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
773         ntohs (ip->length) - ip4_header_bytes (ip), 0);
774       checksum = ~ip_csum_fold (sum);
775       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
776         {
777           next = NAT_NEXT_DROP;
778           goto out;
779         }
780     }
781
782 out:
783   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
784     {
785       /* Accounting */
786       nat44_session_update_counters (
787         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
788       /* Per-user LRU list maintenance */
789       nat44_session_update_lru (sm, s, thread_index);
790     }
791   *s_p = s;
792   return next;
793 }
794
795 static snat_session_t *
796 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
797                                         ip4_header_t *ip, u32 rx_fib_index,
798                                         u32 thread_index, f64 now,
799                                         vlib_main_t *vm,
800                                         vlib_node_runtime_t *node)
801 {
802   clib_bihash_kv_8_8_t kv, value;
803   clib_bihash_kv_16_8_t s_kv, s_value;
804   snat_static_mapping_t *m = NULL;
805   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
806   snat_session_t *s = NULL;
807   u32 outside_fib_index = sm->outside_fib_index;
808   int i;
809   ip4_address_t new_src_addr = { 0 };
810   ip4_address_t new_dst_addr = ip->dst_address;
811
812   if (PREDICT_FALSE (
813         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
814     {
815       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
816       nat_ipfix_logging_max_sessions (thread_index,
817                                       sm->max_translations_per_thread);
818       nat_elog_notice (sm, "maximum sessions exceeded");
819       return 0;
820     }
821
822   switch (vec_len (sm->outside_fibs))
823     {
824     case 0:
825       outside_fib_index = sm->outside_fib_index;
826       break;
827     case 1:
828       outside_fib_index = sm->outside_fibs[0].fib_index;
829       break;
830     default:
831       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
832       break;
833     }
834
835   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
836
837   /* Try to find static mapping first */
838   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
839     {
840       m = pool_elt_at_index (sm->static_mappings, value.value);
841       new_src_addr = m->external_addr;
842     }
843   else
844     {
845       pool_foreach (s, tsm->sessions)
846         {
847           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
848             {
849               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
850                          outside_fib_index, ip->protocol);
851               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
852                 {
853                   new_src_addr = s->out2in.addr;
854                 }
855               break;
856             }
857         }
858
859       if (!new_src_addr.as_u32)
860         {
861           for (i = 0; i < vec_len (sm->addresses); i++)
862             {
863               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
864                          outside_fib_index, ip->protocol);
865               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
866                 {
867                   new_src_addr = sm->addresses[i].addr;
868                 }
869             }
870         }
871     }
872
873   if (!new_src_addr.as_u32)
874     {
875       // could not allocate address for translation ...
876       return 0;
877     }
878
879   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
880   if (!s)
881     {
882       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
883       nat_elog_warn (sm, "create NAT session failed");
884       return 0;
885     }
886
887   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
888                         ip->dst_address, 0, rx_fib_index, ip->protocol);
889   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
890   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
891
892   // hairpinning?
893   int is_hairpinning =
894     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
895                                  outside_fib_index, &new_dst_addr, NULL);
896   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
897
898   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
899   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
900
901   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
902                         outside_fib_index, ip->protocol);
903   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
904   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
905   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
906
907   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
908   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
909   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
910   s->out2in.addr.as_u32 = new_src_addr.as_u32;
911   s->out2in.fib_index = outside_fib_index;
912   s->in2out.addr.as_u32 = ip->src_address.as_u32;
913   s->in2out.fib_index = rx_fib_index;
914   s->in2out.port = s->out2in.port = ip->protocol;
915   if (m)
916     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
917
918   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
919     {
920       nat_elog_notice (sm, "in2out flow hash add failed");
921       nat_ed_session_delete (sm, s, thread_index, 1);
922       return NULL;
923     }
924
925   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
926     {
927       nat_elog_notice (sm, "out2in flow hash add failed");
928       nat_ed_session_delete (sm, s, thread_index, 1);
929       return NULL;
930     }
931
932   per_vrf_sessions_register_session (s, thread_index);
933
934   /* Accounting */
935   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
936                                  thread_index);
937   /* Per-user LRU list maintenance */
938   nat44_session_update_lru (sm, s, thread_index);
939
940   return s;
941 }
942
943 static inline uword
944 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
945                                           vlib_node_runtime_t *node,
946                                           vlib_frame_t *frame,
947                                           int is_output_feature,
948                                           int is_multi_worker)
949 {
950   u32 n_left_from, *from;
951   snat_main_t *sm = &snat_main;
952   f64 now = vlib_time_now (vm);
953   u32 thread_index = vm->thread_index;
954   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
955   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
956     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
957
958   from = vlib_frame_vector_args (frame);
959   n_left_from = frame->n_vectors;
960
961   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
962   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
963   vlib_get_buffers (vm, from, b, n_left_from);
964
965   while (n_left_from > 0)
966     {
967       vlib_buffer_t *b0;
968       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
969       nat_protocol_t proto0;
970       ip4_header_t *ip0;
971       snat_session_t *s0 = 0;
972       clib_bihash_kv_16_8_t kv0, value0;
973       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
974       nat_6t_flow_t *f = 0;
975       nat_6t_t lookup;
976       int lookup_skipped = 0;
977
978       b0 = *b;
979       b++;
980
981       /* Prefetch next iteration. */
982       if (PREDICT_TRUE (n_left_from >= 2))
983         {
984           vlib_buffer_t *p2;
985
986           p2 = *b;
987
988           vlib_prefetch_buffer_header (p2, LOAD);
989
990           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
991         }
992
993       if (is_output_feature)
994         {
995           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
996         }
997
998       next[0] = vnet_buffer2 (b0)->nat.arc_next;
999
1000       ip0 =
1001         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1002
1003       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1004       rx_fib_index0 =
1005         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1006       lookup.fib_index = rx_fib_index0;
1007
1008       if (PREDICT_FALSE (ip0->ttl == 1))
1009         {
1010           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1011           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1012                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1013                                        0);
1014           next[0] = NAT_NEXT_ICMP_ERROR;
1015           goto trace0;
1016         }
1017
1018       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1019
1020       if (is_output_feature)
1021         {
1022           if (PREDICT_FALSE
1023               (nat_not_translate_output_feature_fwd
1024                (sm, ip0, thread_index, now, vm, b0)))
1025             goto trace0;
1026         }
1027
1028       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1029         {
1030           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1031                 ICMP4_echo_request &&
1032               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1033                 ICMP4_echo_reply &&
1034               !icmp_type_is_error_message (
1035                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1036             {
1037               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1038               next[0] = NAT_NEXT_DROP;
1039               goto trace0;
1040             }
1041           int err = nat_get_icmp_session_lookup_values (
1042             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1043             &lookup.dport, &lookup.proto);
1044           if (err != 0)
1045             {
1046               b0->error = node->errors[err];
1047               next[0] = NAT_NEXT_DROP;
1048               goto trace0;
1049             }
1050         }
1051       else
1052         {
1053           lookup.proto = ip0->protocol;
1054           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1055           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1056           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1057           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1058         }
1059
1060       /* there might be a stashed index in vnet_buffer2 from handoff or
1061        * classify node, see if it can be used */
1062       if (is_multi_worker &&
1063           !pool_is_free_index (tsm->sessions,
1064                                vnet_buffer2 (b0)->nat.cached_session_index))
1065         {
1066           s0 = pool_elt_at_index (tsm->sessions,
1067                                   vnet_buffer2 (b0)->nat.cached_session_index);
1068           if (PREDICT_TRUE (
1069                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1070                 // for some hairpinning cases there are two "i2i" flows instead
1071                 // of i2o and o2i as both hosts are on inside
1072                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1073                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1074             {
1075               /* yes, this is the droid we're looking for */
1076               lookup_skipped = 1;
1077               goto skip_lookup;
1078             }
1079           s0 = NULL;
1080         }
1081
1082       init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
1083                  lookup.fib_index, lookup.proto);
1084
1085       // lookup flow
1086       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1087         {
1088           // flow does not exist go slow path
1089           next[0] = def_slow;
1090           goto trace0;
1091         }
1092
1093       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1094       s0 =
1095         pool_elt_at_index (tsm->sessions,
1096                            ed_value_get_session_index (&value0));
1097
1098     skip_lookup:
1099
1100       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1101         {
1102           // session is closed, go slow path
1103           nat_free_session_data (sm, s0, thread_index, 0);
1104           nat_ed_session_delete (sm, s0, thread_index, 1);
1105           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1106           goto trace0;
1107         }
1108
1109       if (s0->tcp_closed_timestamp)
1110         {
1111           if (now >= s0->tcp_closed_timestamp)
1112             {
1113               // session is closed, go slow path, freed in slow path
1114               next[0] = def_slow;
1115             }
1116           else
1117             {
1118               // session in transitory timeout, drop
1119               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1120               next[0] = NAT_NEXT_DROP;
1121             }
1122           goto trace0;
1123         }
1124
1125       // drop if session expired
1126       u64 sess_timeout_time;
1127       sess_timeout_time =
1128         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1129       if (now >= sess_timeout_time)
1130         {
1131           nat_free_session_data (sm, s0, thread_index, 0);
1132           nat_ed_session_delete (sm, s0, thread_index, 1);
1133           // session is closed, go slow path
1134           next[0] = def_slow;
1135           goto trace0;
1136         }
1137
1138       b0->flags |= VNET_BUFFER_F_IS_NATED;
1139
1140       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1141         {
1142           f = &s0->i2o;
1143         }
1144       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1145                nat_6t_t_eq (&s0->o2i.match, &lookup))
1146         {
1147           f = &s0->o2i;
1148         }
1149       else
1150         {
1151           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1152           nat_free_session_data (sm, s0, thread_index, 0);
1153           nat_ed_session_delete (sm, s0, thread_index, 1);
1154           next[0] = NAT_NEXT_DROP;
1155           goto trace0;
1156         }
1157
1158       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1159           (translation_error = nat_6t_flow_buf_translate (
1160              sm, b0, ip0, f, proto0, is_output_feature)))
1161         {
1162           nat_free_session_data (sm, s0, thread_index, 0);
1163           nat_ed_session_delete (sm, s0, thread_index, 1);
1164           next[0] = NAT_NEXT_DROP;
1165           goto trace0;
1166         }
1167
1168       switch (proto0)
1169         {
1170         case NAT_PROTOCOL_TCP:
1171           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1172                                          thread_index, sw_if_index0, 1);
1173           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1174           break;
1175         case NAT_PROTOCOL_UDP:
1176           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1177                                          thread_index, sw_if_index0, 1);
1178           break;
1179         case NAT_PROTOCOL_ICMP:
1180           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1181                                          thread_index, sw_if_index0, 1);
1182           break;
1183         case NAT_PROTOCOL_OTHER:
1184           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1185                                          thread_index, sw_if_index0, 1);
1186           break;
1187         }
1188
1189       /* Accounting */
1190       nat44_session_update_counters (s0, now,
1191                                      vlib_buffer_length_in_chain (vm, b0),
1192                                      thread_index);
1193       /* Per-user LRU list maintenance */
1194       nat44_session_update_lru (sm, s0, thread_index);
1195
1196     trace0:
1197       if (PREDICT_FALSE
1198           ((node->flags & VLIB_NODE_FLAG_TRACE)
1199            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1200         {
1201           nat_in2out_ed_trace_t *t =
1202             vlib_add_trace (vm, node, b0, sizeof (*t));
1203           t->sw_if_index = sw_if_index0;
1204           t->next_index = next[0];
1205           t->is_slow_path = 0;
1206           t->translation_error = translation_error;
1207           t->lookup_skipped = lookup_skipped;
1208           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1209
1210           if (s0)
1211             {
1212               t->session_index = s0 - tsm->sessions;
1213               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1214               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1215               t->translation_via_i2of = (&s0->i2o == f);
1216             }
1217           else
1218             {
1219               t->session_index = ~0;
1220             }
1221         }
1222
1223       if (next[0] == NAT_NEXT_DROP)
1224         {
1225           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1226                                          thread_index, sw_if_index0, 1);
1227         }
1228
1229       n_left_from--;
1230       next++;
1231     }
1232
1233   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1234                                frame->n_vectors);
1235   return frame->n_vectors;
1236 }
1237
1238 static inline uword
1239 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
1240                                           vlib_node_runtime_t * node,
1241                                           vlib_frame_t * frame,
1242                                           int is_output_feature)
1243 {
1244   u32 n_left_from, *from;
1245   snat_main_t *sm = &snat_main;
1246   f64 now = vlib_time_now (vm);
1247   u32 thread_index = vm->thread_index;
1248   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1249
1250   from = vlib_frame_vector_args (frame);
1251   n_left_from = frame->n_vectors;
1252
1253   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1254   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1255   vlib_get_buffers (vm, from, b, n_left_from);
1256
1257   while (n_left_from > 0)
1258     {
1259       vlib_buffer_t *b0;
1260       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1261       nat_protocol_t proto0;
1262       ip4_header_t *ip0;
1263       udp_header_t *udp0;
1264       icmp46_header_t *icmp0;
1265       snat_session_t *s0 = 0;
1266       clib_bihash_kv_16_8_t kv0, value0;
1267       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1268
1269       b0 = *b;
1270
1271       if (is_output_feature)
1272         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1273
1274       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1275
1276       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1277                               iph_offset0);
1278
1279       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1280       rx_fib_index0 =
1281         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1282
1283       if (PREDICT_FALSE (ip0->ttl == 1))
1284         {
1285           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1286           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1287                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1288                                        0);
1289           next[0] = NAT_NEXT_ICMP_ERROR;
1290           goto trace0;
1291         }
1292
1293       udp0 = ip4_next_header (ip0);
1294       icmp0 = (icmp46_header_t *) udp0;
1295       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1296
1297       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1298         {
1299           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1300             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1301           if (!s0)
1302             next[0] = NAT_NEXT_DROP;
1303
1304           if (NAT_ED_TRNSL_ERR_SUCCESS !=
1305               (translation_error = nat_6t_flow_buf_translate (
1306                  sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1307             {
1308               goto trace0;
1309             }
1310
1311           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1312                                          thread_index, sw_if_index0, 1);
1313           goto trace0;
1314         }
1315
1316       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1317         {
1318           next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0,
1319                                               rx_fib_index0, node, next[0],
1320                                               now, thread_index, proto0, &s0);
1321           if (NAT_NEXT_DROP != next[0] && s0 &&
1322               NAT_ED_TRNSL_ERR_SUCCESS !=
1323                 (translation_error = nat_6t_flow_buf_translate (
1324                    sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1325             {
1326               goto trace0;
1327             }
1328
1329           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1330                                          thread_index, sw_if_index0, 1);
1331           goto trace0;
1332         }
1333
1334       init_ed_k (&kv0, ip0->src_address,
1335                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1336                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1337                  ip0->protocol);
1338       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1339         {
1340           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1341           s0 =
1342             pool_elt_at_index (tsm->sessions,
1343                                ed_value_get_session_index (&value0));
1344
1345           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1346             {
1347               nat_free_session_data (sm, s0, thread_index, 0);
1348               nat_ed_session_delete (sm, s0, thread_index, 1);
1349               s0 = NULL;
1350             }
1351         }
1352
1353       if (!s0)
1354         {
1355           if (is_output_feature)
1356             {
1357               if (PREDICT_FALSE
1358                   (nat44_ed_not_translate_output_feature
1359                    (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1360                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1361                     sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX],
1362                     now)))
1363                 goto trace0;
1364
1365               /*
1366                * Send DHCP packets to the ipv4 stack, or we won't
1367                * be able to use dhcp client on the outside interface
1368                */
1369               if (PREDICT_FALSE
1370                   (proto0 == NAT_PROTOCOL_UDP
1371                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1372                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1373                    && ip0->dst_address.as_u32 == 0xffffffff))
1374                 goto trace0;
1375             }
1376           else
1377             {
1378               if (PREDICT_FALSE (nat44_ed_not_translate (
1379                     vm, sm, node, sw_if_index0, b0, ip0, proto0, rx_fib_index0,
1380                     thread_index)))
1381                 goto trace0;
1382             }
1383
1384           next[0] = slow_path_ed (
1385             vm, sm, b0, ip0->src_address, ip0->dst_address,
1386             vnet_buffer (b0)->ip.reass.l4_src_port,
1387             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1388             rx_fib_index0, &s0, node, next[0], thread_index, now);
1389
1390           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1391             goto trace0;
1392
1393           if (PREDICT_FALSE (!s0))
1394             goto trace0;
1395
1396         }
1397
1398       b0->flags |= VNET_BUFFER_F_IS_NATED;
1399
1400       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1401           (translation_error = nat_6t_flow_buf_translate (
1402              sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1403         {
1404           nat_free_session_data (sm, s0, thread_index, 0);
1405           nat_ed_session_delete (sm, s0, thread_index, 1);
1406           s0 = NULL;
1407           goto trace0;
1408         }
1409
1410       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1411         {
1412           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1413                                          thread_index, sw_if_index0, 1);
1414           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1415         }
1416       else
1417         {
1418           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1419                                          thread_index, sw_if_index0, 1);
1420         }
1421
1422       /* Accounting */
1423       nat44_session_update_counters (s0, now,
1424                                      vlib_buffer_length_in_chain
1425                                      (vm, b0), thread_index);
1426       /* Per-user LRU list maintenance */
1427       nat44_session_update_lru (sm, s0, thread_index);
1428
1429     trace0:
1430       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1431                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1432         {
1433           nat_in2out_ed_trace_t *t =
1434             vlib_add_trace (vm, node, b0, sizeof (*t));
1435           t->sw_if_index = sw_if_index0;
1436           t->next_index = next[0];
1437           t->is_slow_path = 1;
1438           t->translation_error = translation_error;
1439           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1440
1441           if (s0)
1442             {
1443               t->session_index = s0 - tsm->sessions;
1444               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1445               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1446               t->translation_via_i2of = 1;
1447             }
1448
1449           else
1450             {
1451               t->session_index = ~0;
1452             }
1453         }
1454
1455       if (next[0] == NAT_NEXT_DROP)
1456         {
1457           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1458                                          thread_index, sw_if_index0, 1);
1459         }
1460
1461       n_left_from--;
1462       next++;
1463       b++;
1464     }
1465
1466   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1467                                frame->n_vectors);
1468
1469   return frame->n_vectors;
1470 }
1471
1472 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1473                                      vlib_node_runtime_t * node,
1474                                      vlib_frame_t * frame)
1475 {
1476   if (snat_main.num_workers > 1)
1477     {
1478       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1479     }
1480   else
1481     {
1482       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1483     }
1484 }
1485
1486 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1487   .name = "nat44-ed-in2out",
1488   .vector_size = sizeof (u32),
1489   .sibling_of = "nat-default",
1490   .format_trace = format_nat_in2out_ed_trace,
1491   .type = VLIB_NODE_TYPE_INTERNAL,
1492   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1493   .error_strings = nat_in2out_ed_error_strings,
1494   .runtime_data_bytes = sizeof (snat_runtime_t),
1495 };
1496
1497 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1498                                             vlib_node_runtime_t * node,
1499                                             vlib_frame_t * frame)
1500 {
1501   if (snat_main.num_workers > 1)
1502     {
1503       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1504     }
1505   else
1506     {
1507       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1508     }
1509 }
1510
1511 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1512   .name = "nat44-ed-in2out-output",
1513   .vector_size = sizeof (u32),
1514   .sibling_of = "nat-default",
1515   .format_trace = format_nat_in2out_ed_trace,
1516   .type = VLIB_NODE_TYPE_INTERNAL,
1517   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1518   .error_strings = nat_in2out_ed_error_strings,
1519   .runtime_data_bytes = sizeof (snat_runtime_t),
1520 };
1521
1522 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1523                                               vlib_node_runtime_t *
1524                                               node, vlib_frame_t * frame)
1525 {
1526   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0);
1527 }
1528
1529 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1530   .name = "nat44-ed-in2out-slowpath",
1531   .vector_size = sizeof (u32),
1532   .sibling_of = "nat-default",
1533   .format_trace = format_nat_in2out_ed_trace,
1534   .type = VLIB_NODE_TYPE_INTERNAL,
1535   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1536   .error_strings = nat_in2out_ed_error_strings,
1537   .runtime_data_bytes = sizeof (snat_runtime_t),
1538 };
1539
1540 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1541                                                      vlib_node_runtime_t
1542                                                      * node,
1543                                                      vlib_frame_t * frame)
1544 {
1545   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1);
1546 }
1547
1548 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1549   .name = "nat44-ed-in2out-output-slowpath",
1550   .vector_size = sizeof (u32),
1551   .sibling_of = "nat-default",
1552   .format_trace = format_nat_in2out_ed_trace,
1553   .type = VLIB_NODE_TYPE_INTERNAL,
1554   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1555   .error_strings = nat_in2out_ed_error_strings,
1556   .runtime_data_bytes = sizeof (snat_runtime_t),
1557 };
1558
1559 static u8 *
1560 format_nat_pre_trace (u8 * s, va_list * args)
1561 {
1562   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1563   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1564   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1565   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1566                  t->arc_next_index);
1567 }
1568
1569 VLIB_NODE_FN (nat_pre_in2out_node)
1570   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1571 {
1572   return nat_pre_node_fn_inline (vm, node, frame,
1573                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1574 }
1575
1576 VLIB_NODE_FN (nat_pre_in2out_output_node)
1577   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1578 {
1579   return nat_pre_node_fn_inline (vm, node, frame,
1580                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1581 }
1582
1583 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1584   .name = "nat-pre-in2out",
1585   .vector_size = sizeof (u32),
1586   .sibling_of = "nat-default",
1587   .format_trace = format_nat_pre_trace,
1588   .type = VLIB_NODE_TYPE_INTERNAL,
1589   .n_errors = 0,
1590 };
1591
1592 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1593   .name = "nat-pre-in2out-output",
1594   .vector_size = sizeof (u32),
1595   .sibling_of = "nat-default",
1596   .format_trace = format_nat_pre_trace,
1597   .type = VLIB_NODE_TYPE_INTERNAL,
1598   .n_errors = 0,
1599 };
1600
1601 /*
1602  * fd.io coding-style-patch-verification: ON
1603  *
1604  * Local Variables:
1605  * eval: (c-set-style "gnu")
1606  * End:
1607  */