nat: do not use nat session object after deletion
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52   u8 tcp_state;
53 } nat_in2out_ed_trace_t;
54
55 static u8 *
56 format_nat_in2out_ed_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
61   char *tag;
62
63   tag =
64     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
65     "NAT44_IN2OUT_ED_FAST_PATH";
66
67   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
68               t->next_index);
69   if (~0 != t->session_index)
70     {
71       s = format (s, ", session %d, translation result '%U' via %s",
72                   t->session_index, format_nat_ed_translation_error,
73                   t->translation_error,
74                   t->translation_via_i2of ? "i2of" : "o2if");
75       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
76       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
77     }
78   if (!t->is_slow_path)
79     {
80       if (t->lookup_skipped)
81         {
82           s = format (s, "\n  lookup skipped - cached session index used");
83         }
84       else
85         {
86           s = format (s, "\n  search key %U", format_ed_session_kvp,
87                       &t->search_key);
88         }
89     }
90   if (IP_PROTOCOL_TCP == t->i2of.match.proto)
91     {
92       s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
93                   t->tcp_state);
94     }
95
96   return s;
97 }
98
99 static int
100 nat_ed_alloc_addr_and_port_with_snat_address (
101   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
102   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
103   ip4_address_t *outside_addr, u16 *outside_port)
104 {
105   const u16 port_thread_offset =
106     (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
107
108   s->o2i.match.daddr = a->addr;
109   /* first try port suggested by caller */
110   u16 port = clib_net_to_host_u16 (*outside_port);
111   u16 port_offset = port - port_thread_offset;
112   if (port < port_thread_offset ||
113       port >= port_thread_offset + port_per_thread)
114     {
115       /* need to pick a different port, suggested port doesn't fit in
116        * this thread's port range */
117       port_offset = snat_random_port (0, port_per_thread - 1);
118       port = port_thread_offset + port_offset;
119     }
120   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
121   do
122     {
123       if (IP_PROTOCOL_ICMP == proto)
124         {
125           s->o2i.match.sport = clib_host_to_net_u16 (port);
126         }
127       s->o2i.match.dport = clib_host_to_net_u16 (port);
128       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
129         {
130           *outside_addr = a->addr;
131           *outside_port = clib_host_to_net_u16 (port);
132           return 0;
133         }
134       port_offset = snat_random_port (0, port_per_thread - 1);
135       port = port_thread_offset + port_offset;
136       --attempts;
137     }
138   while (attempts > 0);
139   return 1;
140 }
141
142 static int
143 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
144                             u32 tx_sw_if_index, u32 nat_proto,
145                             u32 thread_index, ip4_address_t s_addr,
146                             ip4_address_t d_addr, u32 snat_thread_index,
147                             snat_session_t *s, ip4_address_t *outside_addr,
148                             u16 *outside_port)
149 {
150   if (vec_len (sm->addresses) > 0)
151     {
152       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
153       snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
154       int i;
155
156       // output feature
157       if (tx_sw_if_index != ~0)
158         {
159           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
160             {
161               a = sm->addresses + i;
162               if (a->fib_index == rx_fib_index)
163                 {
164                   if (a->sw_if_index == tx_sw_if_index)
165                     {
166                       if ((a->addr_len != ~0) &&
167                           (a->net.as_u32 ==
168                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
169
170                         {
171                           return nat_ed_alloc_addr_and_port_with_snat_address (
172                             sm, nat_proto, thread_index, a,
173                             sm->port_per_thread, snat_thread_index, s,
174                             outside_addr, outside_port);
175                         }
176                       ra = a;
177                     }
178                   ja = a;
179                 }
180               else if (a->fib_index == ~0)
181                 {
182                   ba = a;
183                 }
184             }
185           for (i = 0; i < s_addr_offset; ++i)
186             {
187               a = sm->addresses + i;
188               if (a->fib_index == rx_fib_index)
189                 {
190                   if (a->sw_if_index == tx_sw_if_index)
191                     {
192                       if ((a->addr_len != ~0) &&
193                           (a->net.as_u32 ==
194                            (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
195
196                         {
197                           return nat_ed_alloc_addr_and_port_with_snat_address (
198                             sm, nat_proto, thread_index, a,
199                             sm->port_per_thread, snat_thread_index, s,
200                             outside_addr, outside_port);
201                         }
202                       ra = a;
203                     }
204                   ja = a;
205                 }
206               else if (a->fib_index == ~0)
207                 {
208                   ba = a;
209                 }
210             }
211           if (ra)
212             {
213               return nat_ed_alloc_addr_and_port_with_snat_address (
214                 sm, nat_proto, thread_index, ra, sm->port_per_thread,
215                 snat_thread_index, s, outside_addr, outside_port);
216             }
217         }
218       else
219         {
220           // first try nat pool addresses to sw interface addreses mappings
221           for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
222             {
223               a = sm->addresses + i;
224               if (a->fib_index == rx_fib_index)
225                 {
226                   if ((a->addr_len != ~0) &&
227                       (a->net.as_u32 ==
228                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
229                     {
230                       return nat_ed_alloc_addr_and_port_with_snat_address (
231                         sm, nat_proto, thread_index, a, sm->port_per_thread,
232                         snat_thread_index, s, outside_addr, outside_port);
233                     }
234                   ja = a;
235                 }
236               else if (a->fib_index == ~0)
237                 {
238                   ba = a;
239                 }
240             }
241           for (i = 0; i < s_addr_offset; ++i)
242             {
243               a = sm->addresses + i;
244               if (a->fib_index == rx_fib_index)
245                 {
246                   if ((a->addr_len != ~0) &&
247                       (a->net.as_u32 ==
248                        (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
249                     {
250                       return nat_ed_alloc_addr_and_port_with_snat_address (
251                         sm, nat_proto, thread_index, a, sm->port_per_thread,
252                         snat_thread_index, s, outside_addr, outside_port);
253                     }
254                   ja = a;
255                 }
256               else if (a->fib_index == ~0)
257                 {
258                   ba = a;
259                 }
260             }
261         }
262
263       if (ja || ba)
264         {
265           a = ja ? ja : ba;
266           return nat_ed_alloc_addr_and_port_with_snat_address (
267             sm, nat_proto, thread_index, a, sm->port_per_thread,
268             snat_thread_index, s, outside_addr, outside_port);
269         }
270     }
271   /* Totally out of translations to use... */
272   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
273   return 1;
274 }
275
276 static_always_inline int
277 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
278                              u16 match_port, ip_protocol_t match_protocol,
279                              ip4_address_t *daddr, u16 *dport)
280 {
281   snat_static_mapping_t *m =
282     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
283   if (!m)
284     {
285       /* Try address only mapping */
286       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
287       if (!m)
288         return 0;
289     }
290   *daddr = m->local_addr;
291   if (dport)
292     {
293       /* Address only mapping doesn't change port */
294       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
295     }
296   return 1;
297 }
298
299 static_always_inline vrf_table_t *
300 get_vrf_table_by_fib (u32 fib_index)
301 {
302   snat_main_t *sm = &snat_main;
303   vrf_table_t *t;
304
305   pool_foreach (t, sm->vrf_tables)
306     {
307       if (fib_index == t->table_fib_index)
308         {
309           return t;
310         }
311     }
312
313   return 0;
314 }
315
316 static_always_inline u32
317 get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
318 {
319   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
320   fib_prefix_t pfx = {
321     .fp_proto = FIB_PROTOCOL_IP4,
322     .fp_len = 32,
323     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
324     ,
325   };
326
327   snat_main_t *sm = &snat_main;
328   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
329   // default to rx fib
330   u32 tx_fib_index = rx_fib_index;
331
332   if (0 != t)
333     {
334       // managed routes to other fibs
335       vrf_route_t *r;
336       pool_foreach (r, t->routes)
337         {
338           fei = fib_table_lookup (r->fib_index, &pfx);
339           if ((FIB_NODE_INDEX_INVALID != fei) &&
340               (~0 != fib_entry_get_resolving_interface (fei)))
341             {
342               tx_fib_index = r->fib_index;
343               break;
344             }
345         }
346     }
347   else
348     {
349       // default to configured fib
350       tx_fib_index = sm->outside_fib_index;
351
352       // default routes to other fibs
353       nat_fib_t *f;
354       vec_foreach (f, sm->outside_fibs)
355         {
356           fei = fib_table_lookup (f->fib_index, &pfx);
357           if ((FIB_NODE_INDEX_INVALID != fei) &&
358               (~0 != fib_entry_get_resolving_interface (fei)))
359             {
360               tx_fib_index = f->fib_index;
361               break;
362             }
363         }
364     }
365
366   return tx_fib_index;
367 }
368
369 static_always_inline int
370 is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
371 {
372   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
373   fib_prefix_t pfx = {
374     .fp_proto = FIB_PROTOCOL_IP4,
375     .fp_len = 32,
376     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
377     ,
378   };
379
380   snat_main_t *sm = &snat_main;
381   vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
382   u32 ii;
383
384   if (0 != t)
385     {
386       // managed routes to other fibs
387       vrf_route_t *r;
388       pool_foreach (r, t->routes)
389         {
390           fei = fib_table_lookup (r->fib_index, &pfx);
391           if ((FIB_NODE_INDEX_INVALID != fei) &&
392               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
393             {
394               return 1;
395             }
396         }
397     }
398   else
399     {
400       // default routes to other fibs
401       nat_fib_t *f;
402       vec_foreach (f, sm->outside_fibs)
403         {
404           fei = fib_table_lookup (f->fib_index, &pfx);
405           if ((FIB_NODE_INDEX_INVALID != fei) &&
406               (~0 != (ii = fib_entry_get_resolving_interface (fei))))
407             {
408               snat_interface_t *i;
409               pool_foreach (i, sm->interfaces)
410                 {
411                   if ((nat44_ed_is_interface_outside (i)) &&
412                       (ii == i->sw_if_index))
413                     {
414                       return 1;
415                     }
416                 }
417             }
418         }
419     }
420
421   return 0;
422 }
423
424 static u32
425 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
426               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
427               u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
428               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
429               u32 thread_index, f64 now)
430 {
431   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
432   ip4_address_t outside_addr;
433   u16 outside_port;
434   u32 tx_fib_index;
435   u8 is_identity_nat = 0;
436
437   snat_session_t *s = NULL;
438   lb_nat_type_t lb = 0;
439   ip4_address_t daddr = r_addr;
440   u16 dport = r_port;
441
442   if (PREDICT_FALSE
443       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
444     {
445       if (!nat_lru_free_one (sm, thread_index, now))
446         {
447           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
448           nat_ipfix_logging_max_sessions (thread_index,
449                                           sm->max_translations_per_thread);
450           nat_elog_notice (sm, "maximum sessions exceeded");
451           return NAT_NEXT_DROP;
452         }
453     }
454
455   ip4_address_t sm_addr;
456   u16 sm_port;
457   u32 sm_fib_index;
458   int is_sm = 0;
459   // First try to match static mapping by local address and port
460   if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
461                                   &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
462                                   &lb, 0, &is_identity_nat, 0))
463     {
464       if (PREDICT_FALSE (is_identity_nat))
465         {
466           *sessionp = NULL;
467           return next;
468         }
469       is_sm = 1;
470     }
471
472   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
473     {
474       if (PREDICT_FALSE (!tcp_flags_is_init (
475             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
476         {
477           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
478           return NAT_NEXT_DROP;
479         }
480     }
481
482   s = nat_ed_session_alloc (sm, thread_index, now, proto);
483   ASSERT (s);
484
485   tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
486
487   if (!is_sm)
488     {
489       s->in2out.addr = l_addr;
490       s->in2out.port = l_port;
491       s->proto = proto;
492       s->in2out.fib_index = rx_fib_index;
493       s->out2in.fib_index = tx_fib_index;
494
495       // suggest using local port to allocation function
496       outside_port = l_port;
497
498       if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
499                                                       proto, &daddr, &dport)))
500         {
501           s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
502         }
503
504       // destination addr/port updated with real values in
505       // nat_ed_alloc_addr_and_port
506       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
507                             s->out2in.fib_index, proto);
508       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
509       if (IP_PROTOCOL_ICMP == proto)
510         {
511           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
512         }
513       else
514         {
515           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
516         }
517       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
518
519       if (nat_ed_alloc_addr_and_port (
520             sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
521             r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
522         {
523           nat_elog_notice (sm, "addresses exhausted");
524           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
525           nat_ed_session_delete (sm, s, thread_index, 1);
526           return NAT_NEXT_DROP;
527         }
528       s->out2in.addr = outside_addr;
529       s->out2in.port = outside_port;
530     }
531   else
532     {
533       // static mapping
534       s->out2in.addr = outside_addr = sm_addr;
535       s->out2in.port = outside_port = sm_port;
536       s->in2out.addr = l_addr;
537       s->in2out.port = l_port;
538       s->proto = proto;
539       s->in2out.fib_index = rx_fib_index;
540       s->out2in.fib_index = tx_fib_index;
541       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
542
543       // hairpinning?
544       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
545                                                         proto, &daddr, &dport);
546       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
547
548       if (IP_PROTOCOL_ICMP == proto)
549         {
550           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
551                                 sm_port, s->out2in.fib_index, proto);
552           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
553         }
554       else
555         {
556           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
557                                 sm_port, s->out2in.fib_index, proto);
558           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
559         }
560       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
561       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
562       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
563         {
564           nat_elog_notice (sm, "out2in key add failed");
565           goto error;
566         }
567     }
568
569   if (lb)
570     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
571   s->ext_host_addr = r_addr;
572   s->ext_host_port = r_port;
573
574   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
575                         rx_fib_index, proto);
576   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
577   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
578
579   if (IP_PROTOCOL_ICMP == proto)
580     {
581       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
582     }
583   else
584     {
585       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
586       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
587     }
588   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
589
590   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
591     {
592       nat_elog_notice (sm, "in2out key add failed");
593       goto error;
594     }
595
596   /* log NAT event */
597   nat_ipfix_logging_nat44_ses_create (
598     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
599     s->in2out.port, s->out2in.port, s->in2out.fib_index);
600
601   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
602                          s->in2out.port, &s->ext_host_nat_addr,
603                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
604                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
605
606   per_vrf_sessions_register_session (s, thread_index);
607
608   *sessionp = s;
609   return next;
610 error:
611   if (s)
612     {
613       nat_ed_session_delete (sm, s, thread_index, 1);
614     }
615   *sessionp = s = NULL;
616   return NAT_NEXT_DROP;
617 }
618
619 static_always_inline int
620 nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
621                         u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
622                         u32 proto, u32 rx_fib_index)
623 {
624   snat_main_t *sm = &snat_main;
625
626   clib_bihash_kv_16_8_t kv, value;
627   ip4_address_t placeholder_addr;
628   u32 placeholder_fib_index;
629   u16 placeholder_port;
630
631   init_ed_k (&kv, ip->dst_address.as_u32,
632              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
633              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
634              ip->protocol);
635
636   // do nat if active session or is static mapping
637   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
638       !snat_static_mapping_match (
639         vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
640         sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
641         &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
642     {
643       return 0;
644     }
645
646   // do not nat if forwarding enabled
647   if (sm->forwarding_enabled)
648     {
649       return 1;
650     }
651
652   // do not nat packet aimed at the interface address
653   if (PREDICT_FALSE (
654         is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
655     {
656       return 1;
657     }
658
659   // do nat packets with resolvable destination
660   // destination can be resolved either by:
661   // a) vrf routing table entry
662   // b) (non output feature) outside interface fib
663   if (is_destination_resolvable (rx_fib_index, ip->dst_address))
664     {
665       return 0;
666     }
667
668   return 1;
669 }
670
671 static_always_inline int
672 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
673                                       u32 thread_index, f64 now,
674                                       vlib_main_t * vm, vlib_buffer_t * b)
675 {
676   clib_bihash_kv_16_8_t kv, value;
677   snat_session_t *s = 0;
678   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
679
680   if (!sm->forwarding_enabled)
681     return 0;
682
683   if (ip->protocol == IP_PROTOCOL_ICMP)
684     {
685       ip4_address_t lookup_saddr, lookup_daddr;
686       u16 lookup_sport, lookup_dport;
687       u8 lookup_protocol;
688       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
689                                               &lookup_sport, &lookup_daddr,
690                                               &lookup_dport, &lookup_protocol))
691         return 0;
692       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
693                  lookup_dport, 0, lookup_protocol);
694     }
695   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
696     {
697       init_ed_k (&kv, ip->src_address.as_u32,
698                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
699                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
700     }
701   else
702     {
703       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
704                  ip->protocol);
705     }
706
707   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
708     {
709       ASSERT (thread_index == ed_value_get_thread_index (&value));
710       s =
711         pool_elt_at_index (tsm->sessions,
712                            ed_value_get_session_index (&value));
713
714       if (na44_ed_is_fwd_bypass_session (s))
715         {
716           if (ip->protocol == IP_PROTOCOL_TCP)
717             {
718               nat44_set_tcp_session_state_i2o (
719                 sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
720                 thread_index);
721             }
722           /* Accounting */
723           nat44_session_update_counters (s, now,
724                                          vlib_buffer_length_in_chain (vm, b),
725                                          thread_index);
726           /* Per-user LRU list maintenance */
727           nat44_session_update_lru (sm, s, thread_index);
728           return 1;
729         }
730       else
731         return 0;
732     }
733
734   return 0;
735 }
736
737 static_always_inline int
738 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
739                                        ip4_header_t *ip, u16 src_port,
740                                        u16 dst_port, u32 thread_index,
741                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
742                                        int is_multi_worker)
743 {
744   clib_bihash_kv_16_8_t kv, value;
745   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
746   snat_interface_t *i;
747   snat_session_t *s;
748   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
749   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
750
751   /* src NAT check */
752   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
753              dst_port, tx_fib_index, ip->protocol);
754   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
755     {
756       ASSERT (thread_index == ed_value_get_thread_index (&value));
757       s =
758         pool_elt_at_index (tsm->sessions,
759                            ed_value_get_session_index (&value));
760       return 1;
761     }
762
763   /* dst NAT check */
764   if (is_multi_worker &&
765       PREDICT_TRUE (!pool_is_free_index (
766         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
767     {
768       nat_6t_t lookup;
769       lookup.fib_index = rx_fib_index;
770       lookup.proto = ip->protocol;
771       lookup.daddr.as_u32 = ip->src_address.as_u32;
772       lookup.dport = src_port;
773       lookup.saddr.as_u32 = ip->dst_address.as_u32;
774       lookup.sport = dst_port;
775       s = pool_elt_at_index (
776         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
777       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
778         {
779           goto skip_dst_nat_lookup;
780         }
781       s = NULL;
782     }
783
784   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
785              src_port, rx_fib_index, ip->protocol);
786   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
787     {
788       ASSERT (thread_index == ed_value_get_thread_index (&value));
789       s =
790         pool_elt_at_index (tsm->sessions,
791                            ed_value_get_session_index (&value));
792
793     skip_dst_nat_lookup:
794       if (na44_ed_is_fwd_bypass_session (s))
795         return 0;
796
797       /* hairpinning */
798       pool_foreach (i, sm->output_feature_interfaces)
799         {
800           if ((nat44_ed_is_interface_inside (i)) &&
801               (rx_sw_if_index == i->sw_if_index))
802             return 0;
803         }
804       return 1;
805     }
806
807   return 0;
808 }
809
810 static inline u32
811 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
812                           icmp46_header_t *icmp, u32 sw_if_index,
813                           u32 tx_sw_if_index, u32 rx_fib_index,
814                           vlib_node_runtime_t *node, u32 next, f64 now,
815                           u32 thread_index, snat_session_t **s_p,
816                           int is_multi_worker)
817 {
818   vlib_main_t *vm = vlib_get_main ();
819   u16 checksum;
820   int err;
821   snat_session_t *s = NULL;
822   u8 lookup_protocol = ip->protocol;
823   u16 lookup_sport, lookup_dport;
824   ip4_address_t lookup_saddr, lookup_daddr;
825
826   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
827                                             &lookup_sport, &lookup_daddr,
828                                             &lookup_dport, &lookup_protocol);
829   if (err != 0)
830     {
831       b->error = node->errors[err];
832       return NAT_NEXT_DROP;
833     }
834
835   if (tx_sw_if_index != ~0)
836     {
837       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
838             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
839             tx_sw_if_index, is_multi_worker)))
840         {
841           return next;
842         }
843     }
844   else
845     {
846       if (PREDICT_FALSE (nat44_ed_not_translate (
847             vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
848         {
849           return next;
850         }
851     }
852
853   if (PREDICT_FALSE (icmp_type_is_error_message (
854         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
855     {
856       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
857       return NAT_NEXT_DROP;
858     }
859
860   next =
861     slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
862                   lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
863                   node, next, thread_index, vlib_time_now (vm));
864
865   if (NAT_NEXT_DROP == next)
866     goto out;
867
868   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
869     {
870       ip_csum_t sum = ip_incremental_checksum_buffer (
871         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
872         ntohs (ip->length) - ip4_header_bytes (ip), 0);
873       checksum = ~ip_csum_fold (sum);
874       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
875         {
876           next = NAT_NEXT_DROP;
877           goto out;
878         }
879     }
880
881 out:
882   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
883     {
884       /* Accounting */
885       nat44_session_update_counters (
886         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
887       /* Per-user LRU list maintenance */
888       nat44_session_update_lru (sm, s, thread_index);
889     }
890   *s_p = s;
891   return next;
892 }
893
894 static snat_session_t *
895 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
896                                         ip4_header_t *ip, u32 rx_fib_index,
897                                         u32 thread_index, f64 now,
898                                         vlib_main_t *vm,
899                                         vlib_node_runtime_t *node)
900 {
901   clib_bihash_kv_16_8_t s_kv, s_value;
902   snat_static_mapping_t *m = NULL;
903   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
904   snat_session_t *s = NULL;
905   u32 tx_fib_index;
906   int i;
907   ip4_address_t new_src_addr = { 0 };
908   ip4_address_t new_dst_addr = ip->dst_address;
909
910   if (PREDICT_FALSE (
911         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
912     {
913       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
914       nat_ipfix_logging_max_sessions (thread_index,
915                                       sm->max_translations_per_thread);
916       nat_elog_notice (sm, "maximum sessions exceeded");
917       return 0;
918     }
919
920   tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
921
922   // Try to find static mapping first
923   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
924                               ip->protocol);
925   if (m)
926     {
927       new_src_addr = m->external_addr;
928     }
929   else
930     {
931       pool_foreach (s, tsm->sessions)
932         {
933           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
934             {
935               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
936                          ip->dst_address.as_u32, 0, tx_fib_index,
937                          ip->protocol);
938               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
939                 {
940                   new_src_addr = s->out2in.addr;
941                 }
942               break;
943             }
944         }
945
946       if (!new_src_addr.as_u32)
947         {
948           for (i = 0; i < vec_len (sm->addresses); i++)
949             {
950               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
951                          ip->dst_address.as_u32, 0, tx_fib_index,
952                          ip->protocol);
953               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
954                 {
955                   new_src_addr = sm->addresses[i].addr;
956                 }
957             }
958         }
959     }
960
961   if (!new_src_addr.as_u32)
962     {
963       // could not allocate address for translation ...
964       return 0;
965     }
966
967   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
968   if (!s)
969     {
970       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
971       nat_elog_warn (sm, "create NAT session failed");
972       return 0;
973     }
974
975   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
976                         ip->dst_address, 0, rx_fib_index, ip->protocol);
977   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
978   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
979
980   // hairpinning?
981   int is_hairpinning = nat44_ed_external_sm_lookup (
982     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
983   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
984
985   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
986   nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
987
988   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
989                         tx_fib_index, ip->protocol);
990   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
991   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
992   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
993
994   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
995   s->out2in.addr.as_u32 = new_src_addr.as_u32;
996   s->out2in.fib_index = tx_fib_index;
997   s->in2out.addr.as_u32 = ip->src_address.as_u32;
998   s->in2out.fib_index = rx_fib_index;
999   s->in2out.port = s->out2in.port = ip->protocol;
1000   if (m)
1001     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1002
1003   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
1004     {
1005       nat_elog_notice (sm, "in2out flow hash add failed");
1006       nat_ed_session_delete (sm, s, thread_index, 1);
1007       return NULL;
1008     }
1009
1010   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
1011     {
1012       nat_elog_notice (sm, "out2in flow hash add failed");
1013       nat_ed_session_delete (sm, s, thread_index, 1);
1014       return NULL;
1015     }
1016
1017   per_vrf_sessions_register_session (s, thread_index);
1018
1019   /* Accounting */
1020   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
1021                                  thread_index);
1022   /* Per-user LRU list maintenance */
1023   nat44_session_update_lru (sm, s, thread_index);
1024
1025   return s;
1026 }
1027
1028 static inline uword
1029 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
1030                                           vlib_node_runtime_t *node,
1031                                           vlib_frame_t *frame,
1032                                           int is_output_feature,
1033                                           int is_multi_worker)
1034 {
1035   u32 n_left_from, *from;
1036   snat_main_t *sm = &snat_main;
1037   f64 now = vlib_time_now (vm);
1038   u32 thread_index = vm->thread_index;
1039   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1040   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
1041     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
1042
1043   from = vlib_frame_vector_args (frame);
1044   n_left_from = frame->n_vectors;
1045
1046   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1047   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1048   vlib_get_buffers (vm, from, b, n_left_from);
1049
1050   while (n_left_from > 0)
1051     {
1052       vlib_buffer_t *b0;
1053       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1054       u32 tx_sw_if_index0;
1055       u32 cntr_sw_if_index0;
1056       ip_protocol_t proto0;
1057       ip4_header_t *ip0;
1058       snat_session_t *s0 = 0;
1059       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1060       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1061       nat_6t_flow_t *f = 0;
1062       nat_6t_t lookup;
1063       int lookup_skipped = 0;
1064
1065       b0 = *b;
1066       b++;
1067
1068       /* Prefetch next iteration. */
1069       if (PREDICT_TRUE (n_left_from >= 2))
1070         {
1071           vlib_buffer_t *p2;
1072
1073           p2 = *b;
1074
1075           vlib_prefetch_buffer_header (p2, LOAD);
1076
1077           clib_prefetch_load (p2->data);
1078         }
1079
1080       if (is_output_feature)
1081         {
1082           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1083         }
1084
1085       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1086
1087       ip0 =
1088         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
1089
1090       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1091       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1092       cntr_sw_if_index0 =
1093         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1094       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1095                                                            rx_sw_if_index0);
1096       lookup.fib_index = rx_fib_index0;
1097
1098       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1099         {
1100           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1101           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1102                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1103                                        0);
1104           next[0] = NAT_NEXT_ICMP_ERROR;
1105           goto trace0;
1106         }
1107
1108       proto0 = ip0->protocol;
1109
1110       if (is_output_feature)
1111         {
1112           if (PREDICT_FALSE
1113               (nat_not_translate_output_feature_fwd
1114                (sm, ip0, thread_index, now, vm, b0)))
1115             goto trace0;
1116         }
1117
1118       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1119         {
1120           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1121                 ICMP4_echo_request &&
1122               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1123                 ICMP4_echo_reply &&
1124               !icmp_type_is_error_message (
1125                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1126             {
1127               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1128               next[0] = NAT_NEXT_DROP;
1129               goto trace0;
1130             }
1131           int err = nat_get_icmp_session_lookup_values (
1132             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1133             &lookup.dport, &lookup.proto);
1134           if (err != 0)
1135             {
1136               b0->error = node->errors[err];
1137               next[0] = NAT_NEXT_DROP;
1138               goto trace0;
1139             }
1140         }
1141       else
1142         {
1143           lookup.proto = ip0->protocol;
1144           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1145           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1146           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1147           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1148         }
1149
1150       /* there might be a stashed index in vnet_buffer2 from handoff or
1151        * classify node, see if it can be used */
1152       if (is_multi_worker &&
1153           !pool_is_free_index (tsm->sessions,
1154                                vnet_buffer2 (b0)->nat.cached_session_index))
1155         {
1156           s0 = pool_elt_at_index (tsm->sessions,
1157                                   vnet_buffer2 (b0)->nat.cached_session_index);
1158           if (PREDICT_TRUE (
1159                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1160                 // for some hairpinning cases there are two "i2i" flows instead
1161                 // of i2o and o2i as both hosts are on inside
1162                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1163                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1164             {
1165               /* yes, this is the droid we're looking for */
1166               lookup_skipped = 1;
1167               goto skip_lookup;
1168             }
1169           s0 = NULL;
1170         }
1171
1172       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1173                  lookup.dport, lookup.fib_index, lookup.proto);
1174
1175       // lookup flow
1176       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1177         {
1178           // flow does not exist go slow path
1179           next[0] = def_slow;
1180           goto trace0;
1181         }
1182
1183       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1184       s0 =
1185         pool_elt_at_index (tsm->sessions,
1186                            ed_value_get_session_index (&value0));
1187
1188     skip_lookup:
1189
1190       ASSERT (thread_index == s0->thread_index);
1191
1192       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1193         {
1194           // session is closed, go slow path
1195           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1196           nat_ed_session_delete (sm, s0, thread_index, 1);
1197           s0 = 0;
1198           next[0] = def_slow;
1199           goto trace0;
1200         }
1201
1202       // drop if session expired
1203       u64 sess_timeout_time;
1204       sess_timeout_time =
1205         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1206       if (now >= sess_timeout_time)
1207         {
1208           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1209           nat_ed_session_delete (sm, s0, thread_index, 1);
1210           s0 = 0;
1211           // session is closed, go slow path
1212           next[0] = def_slow;
1213           goto trace0;
1214         }
1215
1216       b0->flags |= VNET_BUFFER_F_IS_NATED;
1217
1218       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1219         {
1220           f = &s0->i2o;
1221         }
1222       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1223                nat_6t_t_eq (&s0->o2i.match, &lookup))
1224         {
1225           f = &s0->o2i;
1226         }
1227       else
1228         {
1229           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1230           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1231           nat_ed_session_delete (sm, s0, thread_index, 1);
1232           s0 = 0;
1233           next[0] = NAT_NEXT_DROP;
1234           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1235           goto trace0;
1236         }
1237
1238       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1239           (translation_error = nat_6t_flow_buf_translate_i2o (
1240              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1241         {
1242           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1243           nat_ed_session_delete (sm, s0, thread_index, 1);
1244           s0 = 0;
1245           next[0] = NAT_NEXT_DROP;
1246           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1247           goto trace0;
1248         }
1249
1250       switch (proto0)
1251         {
1252         case IP_PROTOCOL_TCP:
1253           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1254                                          thread_index, cntr_sw_if_index0, 1);
1255           nat44_set_tcp_session_state_i2o (
1256             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1257             thread_index);
1258           break;
1259         case IP_PROTOCOL_UDP:
1260           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1261                                          thread_index, cntr_sw_if_index0, 1);
1262           break;
1263         case IP_PROTOCOL_ICMP:
1264           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1265                                          thread_index, cntr_sw_if_index0, 1);
1266           break;
1267         default:
1268           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1269                                          thread_index, cntr_sw_if_index0, 1);
1270           break;
1271         }
1272
1273       /* Accounting */
1274       nat44_session_update_counters (s0, now,
1275                                      vlib_buffer_length_in_chain (vm, b0),
1276                                      thread_index);
1277       /* Per-user LRU list maintenance */
1278       nat44_session_update_lru (sm, s0, thread_index);
1279
1280     trace0:
1281       if (PREDICT_FALSE
1282           ((node->flags & VLIB_NODE_FLAG_TRACE)
1283            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1284         {
1285           nat_in2out_ed_trace_t *t =
1286             vlib_add_trace (vm, node, b0, sizeof (*t));
1287           t->sw_if_index = rx_sw_if_index0;
1288           t->next_index = next[0];
1289           t->is_slow_path = 0;
1290           t->translation_error = translation_error;
1291           t->lookup_skipped = lookup_skipped;
1292           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1293
1294           if (s0)
1295             {
1296               t->session_index = s0 - tsm->sessions;
1297               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1298               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1299               t->translation_via_i2of = (&s0->i2o == f);
1300               t->tcp_state = s0->tcp_state;
1301             }
1302           else
1303             {
1304               t->session_index = ~0;
1305             }
1306         }
1307
1308       if (next[0] == NAT_NEXT_DROP)
1309         {
1310           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1311                                          thread_index, cntr_sw_if_index0, 1);
1312         }
1313
1314       n_left_from--;
1315       next++;
1316     }
1317
1318   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1319                                frame->n_vectors);
1320   return frame->n_vectors;
1321 }
1322
1323 static inline uword
1324 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1325                                           vlib_node_runtime_t *node,
1326                                           vlib_frame_t *frame,
1327                                           int is_output_feature,
1328                                           int is_multi_worker)
1329 {
1330   u32 n_left_from, *from;
1331   snat_main_t *sm = &snat_main;
1332   f64 now = vlib_time_now (vm);
1333   u32 thread_index = vm->thread_index;
1334   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1335
1336   from = vlib_frame_vector_args (frame);
1337   n_left_from = frame->n_vectors;
1338
1339   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1340   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1341   vlib_get_buffers (vm, from, b, n_left_from);
1342
1343   while (n_left_from > 0)
1344     {
1345       vlib_buffer_t *b0;
1346       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1347       u32 tx_sw_if_index0;
1348       u32 cntr_sw_if_index0;
1349       ip_protocol_t proto0;
1350       ip4_header_t *ip0;
1351       udp_header_t *udp0;
1352       icmp46_header_t *icmp0;
1353       snat_session_t *s0 = 0;
1354       clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
1355       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1356
1357       b0 = *b;
1358
1359       if (is_output_feature)
1360         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1361
1362       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1363
1364       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1365                               iph_offset0);
1366
1367       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1368       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1369       cntr_sw_if_index0 =
1370         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1371       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1372                                                            rx_sw_if_index0);
1373
1374       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1375         {
1376           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1377           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1378                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1379                                        0);
1380           next[0] = NAT_NEXT_ICMP_ERROR;
1381           goto trace0;
1382         }
1383
1384       udp0 = ip4_next_header (ip0);
1385       icmp0 = (icmp46_header_t *) udp0;
1386       proto0 = ip0->protocol;
1387
1388       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1389         {
1390           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1391             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1392           if (!s0)
1393             next[0] = NAT_NEXT_DROP;
1394
1395           if (NAT_NEXT_DROP != next[0] && s0 &&
1396               NAT_ED_TRNSL_ERR_SUCCESS !=
1397                 (translation_error = nat_6t_flow_buf_translate_i2o (
1398                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1399             {
1400               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1401               nat_ed_session_delete (sm, s0, thread_index, 1);
1402               s0 = 0;
1403               next[0] = NAT_NEXT_DROP;
1404               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1405               goto trace0;
1406             }
1407
1408           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1409                                          thread_index, cntr_sw_if_index0, 1);
1410           goto trace0;
1411         }
1412
1413       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1414         {
1415           next[0] = icmp_in2out_ed_slow_path (
1416             sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
1417             rx_fib_index0, node, next[0], now, thread_index, &s0,
1418             is_multi_worker);
1419           if (NAT_NEXT_DROP != next[0] && s0 &&
1420               NAT_ED_TRNSL_ERR_SUCCESS !=
1421                 (translation_error = nat_6t_flow_buf_translate_i2o (
1422                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1423             {
1424               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1425               nat_ed_session_delete (sm, s0, thread_index, 1);
1426               s0 = 0;
1427               next[0] = NAT_NEXT_DROP;
1428               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1429               goto trace0;
1430             }
1431
1432           if (NAT_NEXT_DROP != next[0])
1433             {
1434               vlib_increment_simple_counter (
1435                 &sm->counters.slowpath.in2out.icmp, thread_index,
1436                 cntr_sw_if_index0, 1);
1437             }
1438           goto trace0;
1439         }
1440
1441       init_ed_k (
1442         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1443         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1444         rx_fib_index0, ip0->protocol);
1445       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1446         {
1447           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1448           s0 =
1449             pool_elt_at_index (tsm->sessions,
1450                                ed_value_get_session_index (&value0));
1451         }
1452
1453       if (!s0)
1454         {
1455           if (is_output_feature)
1456             {
1457               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1458                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1459                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1460                     rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
1461                 goto trace0;
1462
1463               /*
1464                * Send DHCP packets to the ipv4 stack, or we won't
1465                * be able to use dhcp client on the outside interface
1466                */
1467               if (PREDICT_FALSE (
1468                     proto0 == IP_PROTOCOL_UDP &&
1469                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1470                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1471                     ip0->dst_address.as_u32 == 0xffffffff))
1472                 goto trace0;
1473             }
1474           else
1475             {
1476               if (PREDICT_FALSE (
1477                     nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
1478                                             proto0, rx_fib_index0)))
1479                 goto trace0;
1480             }
1481
1482           next[0] =
1483             slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
1484                           vnet_buffer (b0)->ip.reass.l4_src_port,
1485                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1486                           ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
1487                           node, next[0], thread_index, now);
1488
1489           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1490             goto trace0;
1491
1492           if (PREDICT_FALSE (!s0))
1493             goto trace0;
1494
1495         }
1496
1497       b0->flags |= VNET_BUFFER_F_IS_NATED;
1498
1499       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1500           (translation_error = nat_6t_flow_buf_translate_i2o (
1501              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1502         {
1503           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1504           nat_ed_session_delete (sm, s0, thread_index, 1);
1505           s0 = 0;
1506           next[0] = NAT_NEXT_DROP;
1507           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1508           goto trace0;
1509         }
1510
1511       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1512         {
1513           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1514                                          thread_index, cntr_sw_if_index0, 1);
1515           nat44_set_tcp_session_state_i2o (
1516             sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1517             thread_index);
1518         }
1519       else
1520         {
1521           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1522                                          thread_index, cntr_sw_if_index0, 1);
1523         }
1524
1525       /* Accounting */
1526       nat44_session_update_counters (s0, now,
1527                                      vlib_buffer_length_in_chain
1528                                      (vm, b0), thread_index);
1529       /* Per-user LRU list maintenance */
1530       nat44_session_update_lru (sm, s0, thread_index);
1531
1532     trace0:
1533       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1534                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1535         {
1536           nat_in2out_ed_trace_t *t =
1537             vlib_add_trace (vm, node, b0, sizeof (*t));
1538           t->sw_if_index = rx_sw_if_index0;
1539           t->next_index = next[0];
1540           t->is_slow_path = 1;
1541           t->translation_error = translation_error;
1542           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1543
1544           if (s0)
1545             {
1546               t->session_index = s0 - tsm->sessions;
1547               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1548               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1549               t->translation_via_i2of = 1;
1550               t->tcp_state = s0->tcp_state;
1551             }
1552
1553           else
1554             {
1555               t->session_index = ~0;
1556             }
1557         }
1558
1559       if (next[0] == NAT_NEXT_DROP)
1560         {
1561           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1562                                          thread_index, cntr_sw_if_index0, 1);
1563         }
1564
1565       n_left_from--;
1566       next++;
1567       b++;
1568     }
1569
1570   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1571                                frame->n_vectors);
1572
1573   return frame->n_vectors;
1574 }
1575
1576 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1577                                      vlib_node_runtime_t * node,
1578                                      vlib_frame_t * frame)
1579 {
1580   if (snat_main.num_workers > 1)
1581     {
1582       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1583     }
1584   else
1585     {
1586       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1587     }
1588 }
1589
1590 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1591   .name = "nat44-ed-in2out",
1592   .vector_size = sizeof (u32),
1593   .sibling_of = "nat-default",
1594   .format_trace = format_nat_in2out_ed_trace,
1595   .type = VLIB_NODE_TYPE_INTERNAL,
1596   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1597   .error_strings = nat_in2out_ed_error_strings,
1598   .runtime_data_bytes = sizeof (snat_runtime_t),
1599 };
1600
1601 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1602                                             vlib_node_runtime_t * node,
1603                                             vlib_frame_t * frame)
1604 {
1605   if (snat_main.num_workers > 1)
1606     {
1607       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1608     }
1609   else
1610     {
1611       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1612     }
1613 }
1614
1615 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1616   .name = "nat44-ed-in2out-output",
1617   .vector_size = sizeof (u32),
1618   .sibling_of = "nat-default",
1619   .format_trace = format_nat_in2out_ed_trace,
1620   .type = VLIB_NODE_TYPE_INTERNAL,
1621   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1622   .error_strings = nat_in2out_ed_error_strings,
1623   .runtime_data_bytes = sizeof (snat_runtime_t),
1624 };
1625
1626 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1627                                               vlib_node_runtime_t *
1628                                               node, vlib_frame_t * frame)
1629 {
1630   if (snat_main.num_workers > 1)
1631     {
1632       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1633     }
1634   else
1635     {
1636       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1637     }
1638 }
1639
1640 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1641   .name = "nat44-ed-in2out-slowpath",
1642   .vector_size = sizeof (u32),
1643   .sibling_of = "nat-default",
1644   .format_trace = format_nat_in2out_ed_trace,
1645   .type = VLIB_NODE_TYPE_INTERNAL,
1646   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1647   .error_strings = nat_in2out_ed_error_strings,
1648   .runtime_data_bytes = sizeof (snat_runtime_t),
1649 };
1650
1651 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1652                                                      vlib_node_runtime_t
1653                                                      * node,
1654                                                      vlib_frame_t * frame)
1655 {
1656   if (snat_main.num_workers > 1)
1657     {
1658       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1659     }
1660   else
1661     {
1662       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1663     }
1664 }
1665
1666 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1667   .name = "nat44-ed-in2out-output-slowpath",
1668   .vector_size = sizeof (u32),
1669   .sibling_of = "nat-default",
1670   .format_trace = format_nat_in2out_ed_trace,
1671   .type = VLIB_NODE_TYPE_INTERNAL,
1672   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1673   .error_strings = nat_in2out_ed_error_strings,
1674   .runtime_data_bytes = sizeof (snat_runtime_t),
1675 };
1676
1677 static u8 *
1678 format_nat_pre_trace (u8 * s, va_list * args)
1679 {
1680   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1681   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1682   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1683   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1684                  t->arc_next_index);
1685 }
1686
1687 VLIB_NODE_FN (nat_pre_in2out_node)
1688   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1689 {
1690   return nat_pre_node_fn_inline (vm, node, frame,
1691                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1692 }
1693
1694 VLIB_NODE_FN (nat_pre_in2out_output_node)
1695   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1696 {
1697   return nat_pre_node_fn_inline (vm, node, frame,
1698                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1699 }
1700
1701 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1702   .name = "nat-pre-in2out",
1703   .vector_size = sizeof (u32),
1704   .sibling_of = "nat-default",
1705   .format_trace = format_nat_pre_trace,
1706   .type = VLIB_NODE_TYPE_INTERNAL,
1707   .n_errors = 0,
1708 };
1709
1710 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1711   .name = "nat-pre-in2out-output",
1712   .vector_size = sizeof (u32),
1713   .sibling_of = "nat-default",
1714   .format_trace = format_nat_pre_trace,
1715   .type = VLIB_NODE_TYPE_INTERNAL,
1716   .n_errors = 0,
1717 };
1718
1719 /*
1720  * fd.io coding-style-patch-verification: ON
1721  *
1722  * Local Variables:
1723  * eval: (c-set-style "gnu")
1724  * End:
1725  */