nat: Final NAT44 EI/ED split patch
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_syslog.h>
29 #include <nat/lib/nat_inlines.h>
30 #include <nat/lib/ipfix_logging.h>
31
32 #include <nat/nat44-ed/nat44_ed.h>
33 #include <nat/nat44-ed/nat44_ed_inlines.h>
34
35 /* number of attempts to get a port for ED overloading algorithm, if rolling
36  * a dice this many times doesn't produce a free port, it's treated
37  * as if there were no free ports available to conserve resources */
38 #define ED_PORT_ALLOC_ATTEMPTS (10)
39
40 static char *nat_in2out_ed_error_strings[] = {
41 #define _(sym,string) string,
42   foreach_nat_in2out_ed_error
43 #undef _
44 };
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u32 session_index;
51   nat_translation_error_e translation_error;
52   nat_6t_flow_t i2of;
53   nat_6t_flow_t o2if;
54   clib_bihash_kv_16_8_t search_key;
55   u8 is_slow_path;
56   u8 translation_via_i2of;
57   u8 lookup_skipped;
58 } nat_in2out_ed_trace_t;
59
60 static u8 *
61 format_nat_in2out_ed_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
66   char *tag;
67
68   tag =
69     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
70     "NAT44_IN2OUT_ED_FAST_PATH";
71
72   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
73               t->sw_if_index, t->next_index, t->session_index);
74   if (~0 != t->session_index)
75     {
76       s = format (s, ", translation result '%U' via %s",
77                   format_nat_ed_translation_error, t->translation_error,
78                   t->translation_via_i2of ? "i2of" : "o2if");
79       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
80       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
81     }
82   if (!t->is_slow_path)
83     {
84       if (t->lookup_skipped)
85         {
86           s = format (s, "\n lookup skipped - cached session index used");
87         }
88       else
89         {
90           s = format (s, "\n  search key %U", format_ed_session_kvp,
91                       &t->search_key);
92         }
93     }
94
95   return s;
96 }
97
98 /**
99  * @brief Check if packet should be translated
100  *
101  * Packets aimed at outside interface and external address with active session
102  * should be translated.
103  *
104  * @param sm            NAT main
105  * @param rt            NAT runtime data
106  * @param sw_if_index0  index of the inside interface
107  * @param ip0           IPv4 header
108  * @param proto0        NAT protocol
109  * @param rx_fib_index0 RX FIB index
110  *
111  * @returns 0 if packet should be translated otherwise 1
112  */
113 static inline int
114 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
115                          u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
116                          u32 rx_fib_index0)
117 {
118   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
119   nat_outside_fib_t *outside_fib;
120   fib_prefix_t pfx = {
121     .fp_proto = FIB_PROTOCOL_IP4,
122     .fp_len = 32,
123     .fp_addr = {
124                 .ip4.as_u32 = ip0->dst_address.as_u32,
125                 }
126     ,
127   };
128
129   /* Don't NAT packet aimed at the intfc address */
130   if (PREDICT_FALSE (
131         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
132     return 1;
133
134   fei = fib_table_lookup (rx_fib_index0, &pfx);
135   if (FIB_NODE_INDEX_INVALID != fei)
136     {
137       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
138       if (sw_if_index == ~0)
139         {
140           vec_foreach (outside_fib, sm->outside_fibs)
141             {
142               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
143               if (FIB_NODE_INDEX_INVALID != fei)
144                 {
145                   sw_if_index = fib_entry_get_resolving_interface (fei);
146                   if (sw_if_index != ~0)
147                     break;
148                 }
149             }
150         }
151       if (sw_if_index == ~0)
152         return 1;
153
154       snat_interface_t *i;
155       pool_foreach (i, sm->interfaces)
156         {
157           /* NAT packet aimed at outside interface */
158           if ((nat_interface_is_outside (i)) &&
159               (sw_if_index == i->sw_if_index))
160             return 0;
161         }
162     }
163
164   return 1;
165 }
166
167 static int
168 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
169                             u32 thread_index, ip4_address_t r_addr, u16 r_port,
170                             u8 proto, u16 port_per_thread,
171                             u32 snat_thread_index, snat_session_t *s,
172                             ip4_address_t *outside_addr, u16 *outside_port)
173 {
174   int i;
175   snat_address_t *a, *ga = 0;
176
177   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
178
179   for (i = 0; i < vec_len (sm->addresses); i++)
180     {
181       a = sm->addresses + i;
182       switch (nat_proto)
183         {
184 #define _(N, j, n, unused)                                                    \
185   case NAT_PROTOCOL_##N:                                                      \
186     if (a->fib_index == rx_fib_index)                                         \
187       {                                                                       \
188         s->o2i.match.daddr = a->addr;                                         \
189         /* first try port suggested by caller */                              \
190         u16 port = clib_net_to_host_u16 (*outside_port);                      \
191         u16 port_offset = port - port_thread_offset;                          \
192         if (port <= port_thread_offset ||                                     \
193             port > port_thread_offset + port_per_thread)                      \
194           {                                                                   \
195             /* need to pick a different port, suggested port doesn't fit in   \
196              * this thread's port range */                                    \
197             port_offset = snat_random_port (0, port_per_thread - 1);          \
198             port = port_thread_offset + port_offset;                          \
199           }                                                                   \
200         u16 attempts = ED_PORT_ALLOC_ATTEMPTS;                                \
201         do                                                                    \
202           {                                                                   \
203             if (NAT_PROTOCOL_ICMP == nat_proto)                               \
204               {                                                               \
205                 s->o2i.match.sport = clib_host_to_net_u16 (port);             \
206               }                                                               \
207             s->o2i.match.dport = clib_host_to_net_u16 (port);                 \
208             if (0 ==                                                          \
209                 nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))    \
210               {                                                               \
211                 ++a->busy_##n##_port_refcounts[port];                         \
212                 a->busy_##n##_ports_per_thread[thread_index]++;               \
213                 a->busy_##n##_ports++;                                        \
214                 *outside_addr = a->addr;                                      \
215                 *outside_port = clib_host_to_net_u16 (port);                  \
216                 return 0;                                                     \
217               }                                                               \
218             port_offset = snat_random_port (0, port_per_thread - 1);          \
219             port = port_thread_offset + port_offset;                          \
220             --attempts;                                                       \
221           }                                                                   \
222         while (attempts > 0);                                                 \
223       }                                                                       \
224     else if (a->fib_index == ~0)                                              \
225       {                                                                       \
226         ga = a;                                                               \
227       }                                                                       \
228     break;
229
230           foreach_nat_protocol;
231         default:
232           nat_elog_info (sm, "unknown protocol");
233           return 1;
234         }
235     }
236
237   if (ga)
238     {
239       /* fake fib_index to reuse macro */
240       rx_fib_index = ~0;
241       a = ga;
242       switch (nat_proto)
243         {
244           foreach_nat_protocol;
245         default:
246           nat_elog_info (sm, "unknown protocol");
247           return 1;
248         }
249     }
250
251 #undef _
252
253   /* Totally out of translations to use... */
254   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
255   return 1;
256 }
257
258 static_always_inline u32
259 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
260 {
261   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
262   nat_outside_fib_t *outside_fib;
263   fib_prefix_t pfx = {
264     .fp_proto = FIB_PROTOCOL_IP4,
265     .fp_len = 32,
266     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
267     ,
268   };
269   // TODO: multiple vrfs none can resolve addr
270   vec_foreach (outside_fib, sm->outside_fibs)
271     {
272       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
273       if (FIB_NODE_INDEX_INVALID != fei)
274         {
275           if (fib_entry_get_resolving_interface (fei) != ~0)
276             {
277               return outside_fib->fib_index;
278             }
279         }
280     }
281   return ~0;
282 }
283
284 static_always_inline int
285 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
286                              u16 match_port, nat_protocol_t match_protocol,
287                              u32 match_fib_index, ip4_address_t *daddr,
288                              u16 *dport)
289 {
290   clib_bihash_kv_8_8_t kv, value;
291   init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
292   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
293     {
294       /* Try address only mapping */
295       init_nat_k (&kv, match_addr, 0, 0, 0);
296       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
297                                   &value))
298         return 0;
299     }
300
301   snat_static_mapping_t *m =
302     pool_elt_at_index (sm->static_mappings, value.value);
303   *daddr = m->local_addr;
304   if (dport)
305     {
306       /* Address only mapping doesn't change port */
307       *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port;
308     }
309   return 1;
310 }
311
312 static u32
313 slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr,
314               ip4_address_t r_addr, u16 l_port, u16 r_port, u8 proto,
315               u32 rx_fib_index, snat_session_t **sessionp,
316               vlib_node_runtime_t *node, u32 next, u32 thread_index, f64 now)
317 {
318   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
319   ip4_address_t outside_addr;
320   u16 outside_port;
321   u32 outside_fib_index;
322   u8 is_identity_nat;
323
324   u32 nat_proto = ip_proto_to_nat_proto (proto);
325   snat_session_t *s = NULL;
326   lb_nat_type_t lb = 0;
327   ip4_address_t daddr = r_addr;
328   u16 dport = r_port;
329
330   if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
331     {
332       if (PREDICT_FALSE
333           (!tcp_flags_is_init
334            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
335         {
336           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
337           return NAT_NEXT_DROP;
338         }
339     }
340
341   if (PREDICT_FALSE
342       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
343     {
344       if (!nat_lru_free_one (sm, thread_index, now))
345         {
346           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
347           nat_ipfix_logging_max_sessions (thread_index,
348                                           sm->max_translations_per_thread);
349           nat_elog_notice (sm, "maximum sessions exceeded");
350           return NAT_NEXT_DROP;
351         }
352     }
353
354   outside_fib_index = sm->outside_fib_index;
355
356   switch (vec_len (sm->outside_fibs))
357     {
358     case 0:
359       outside_fib_index = sm->outside_fib_index;
360       break;
361     case 1:
362       outside_fib_index = sm->outside_fibs[0].fib_index;
363       break;
364     default:
365       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
366       break;
367     }
368
369   ip4_address_t sm_addr;
370   u16 sm_port;
371   u32 sm_fib_index;
372   /* First try to match static mapping by local address and port */
373   int is_sm;
374   if (snat_static_mapping_match (sm, l_addr, l_port, rx_fib_index, nat_proto,
375                                  &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
376                                  &lb, 0, &is_identity_nat, 0))
377     {
378       is_sm = 0;
379     }
380   else
381     {
382       is_sm = 1;
383     }
384
385   if (PREDICT_FALSE (is_sm && is_identity_nat))
386     {
387       *sessionp = NULL;
388       return next;
389     }
390
391   s = nat_ed_session_alloc (sm, thread_index, now, proto);
392   ASSERT (s);
393
394   if (!is_sm)
395     {
396       s->in2out.addr = l_addr;
397       s->in2out.port = l_port;
398       s->nat_proto = nat_proto;
399       s->in2out.fib_index = rx_fib_index;
400       s->out2in.fib_index = outside_fib_index;
401
402       // suggest using local port to allocation function
403       outside_port = l_port;
404
405       // hairpinning?
406       int is_hairpinning = nat44_ed_external_sm_lookup (
407         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
408       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
409
410       // destination addr/port updated with real values in
411       // nat_ed_alloc_addr_and_port
412       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
413                             s->out2in.fib_index, proto);
414       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
415       if (NAT_PROTOCOL_ICMP == nat_proto)
416         {
417           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
418         }
419       else
420         {
421           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
422         }
423       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
424
425       if (nat_ed_alloc_addr_and_port (
426             sm, rx_fib_index, nat_proto, thread_index, daddr, dport, proto,
427             sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
428             &outside_port))
429         {
430           nat_elog_notice (sm, "addresses exhausted");
431           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
432           nat_ed_session_delete (sm, s, thread_index, 1);
433           return NAT_NEXT_DROP;
434         }
435       s->out2in.addr = outside_addr;
436       s->out2in.port = outside_port;
437     }
438   else
439     {
440       // static mapping
441       s->out2in.addr = outside_addr = sm_addr;
442       s->out2in.port = outside_port = sm_port;
443       s->in2out.addr = l_addr;
444       s->in2out.port = l_port;
445       s->nat_proto = nat_proto;
446       s->in2out.fib_index = rx_fib_index;
447       s->out2in.fib_index = outside_fib_index;
448       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
449
450       // hairpinning?
451       int is_hairpinning = nat44_ed_external_sm_lookup (
452         sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
453       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
454
455       if (NAT_PROTOCOL_ICMP == nat_proto)
456         {
457           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
458                                 sm_port, s->out2in.fib_index, proto);
459           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
460         }
461       else
462         {
463           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
464                                 sm_port, s->out2in.fib_index, proto);
465           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
466         }
467       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
468       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
469       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
470         {
471           nat_elog_notice (sm, "out2in key add failed");
472           goto error;
473         }
474     }
475
476   if (lb)
477     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
478   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
479   s->ext_host_addr = r_addr;
480   s->ext_host_port = r_port;
481
482   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
483                         rx_fib_index, proto);
484   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
485   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
486   if (NAT_PROTOCOL_ICMP == nat_proto)
487     {
488       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
489     }
490   else
491     {
492       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
493       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
494     }
495   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
496
497   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
498     {
499       nat_elog_notice (sm, "in2out key add failed");
500       goto error;
501     }
502
503   /* log NAT event */
504   nat_ipfix_logging_nat44_ses_create (thread_index,
505                                       s->in2out.addr.as_u32,
506                                       s->out2in.addr.as_u32,
507                                       s->nat_proto,
508                                       s->in2out.port,
509                                       s->out2in.port, s->in2out.fib_index);
510
511   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
512                          s->in2out.port, &s->ext_host_nat_addr,
513                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
514                          &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
515
516   per_vrf_sessions_register_session (s, thread_index);
517
518   *sessionp = s;
519   return next;
520 error:
521   if (s)
522     {
523       if (!is_sm)
524         {
525           snat_free_outside_address_and_port (sm->addresses, thread_index,
526                                               &outside_addr, outside_port,
527                                               nat_proto);
528         }
529       nat_ed_session_delete (sm, s, thread_index, 1);
530     }
531   *sessionp = s = NULL;
532   return NAT_NEXT_DROP;
533 }
534
535 static_always_inline int
536 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
537                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
538                         u32 rx_fib_index, u32 thread_index)
539 {
540   udp_header_t *udp = ip4_next_header (ip);
541   clib_bihash_kv_16_8_t kv, value;
542
543   init_ed_k (&kv, ip->dst_address, udp->dst_port, ip->src_address,
544              udp->src_port, sm->outside_fib_index, ip->protocol);
545
546   /* NAT packet aimed at external address if has active sessions */
547   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
548     {
549       /* or is static mappings */
550       ip4_address_t placeholder_addr;
551       u16 placeholder_port;
552       u32 placeholder_fib_index;
553       if (!snat_static_mapping_match
554           (sm, ip->dst_address, udp->dst_port, sm->outside_fib_index, proto,
555            &placeholder_addr, &placeholder_port, &placeholder_fib_index, 1, 0,
556            0, 0, 0, 0, 0))
557         return 0;
558     }
559   else
560     return 0;
561
562   if (sm->forwarding_enabled)
563     return 1;
564
565   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
566                                   rx_fib_index);
567 }
568
569 static_always_inline int
570 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
571                                       u32 thread_index, f64 now,
572                                       vlib_main_t * vm, vlib_buffer_t * b)
573 {
574   clib_bihash_kv_16_8_t kv, value;
575   snat_session_t *s = 0;
576   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
577
578   if (!sm->forwarding_enabled)
579     return 0;
580
581   if (ip->protocol == IP_PROTOCOL_ICMP)
582     {
583       ip4_address_t lookup_saddr, lookup_daddr;
584       u16 lookup_sport, lookup_dport;
585       u8 lookup_protocol;
586       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
587                                               &lookup_sport, &lookup_daddr,
588                                               &lookup_dport, &lookup_protocol))
589         return 0;
590       init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
591                  0, lookup_protocol);
592     }
593   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
594     {
595       init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
596                  ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
597                  ip->protocol);
598     }
599   else
600     {
601       init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
602                  ip->protocol);
603     }
604
605   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
606     {
607       ASSERT (thread_index == ed_value_get_thread_index (&value));
608       s =
609         pool_elt_at_index (tsm->sessions,
610                            ed_value_get_session_index (&value));
611
612       if (is_fwd_bypass_session (s))
613         {
614           if (ip->protocol == IP_PROTOCOL_TCP)
615             {
616               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
617             }
618           /* Accounting */
619           nat44_session_update_counters (s, now,
620                                          vlib_buffer_length_in_chain (vm, b),
621                                          thread_index);
622           /* Per-user LRU list maintenance */
623           nat44_session_update_lru (sm, s, thread_index);
624           return 1;
625         }
626       else
627         return 0;
628     }
629
630   return 0;
631 }
632
633 static_always_inline int
634 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
635                                        u16 src_port, u16 dst_port,
636                                        u32 thread_index, u32 rx_sw_if_index,
637                                        u32 tx_sw_if_index, f64 now)
638 {
639   clib_bihash_kv_16_8_t kv, value;
640   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
641   snat_interface_t *i;
642   snat_session_t *s;
643   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
644   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
645
646   /* src NAT check */
647   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
648              tx_fib_index, ip->protocol);
649   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
650     {
651       ASSERT (thread_index == ed_value_get_thread_index (&value));
652       s =
653         pool_elt_at_index (tsm->sessions,
654                            ed_value_get_session_index (&value));
655       if (nat44_is_ses_closed (s)
656           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
657         {
658           nat_free_session_data (sm, s, thread_index, 0);
659           nat_ed_session_delete (sm, s, thread_index, 1);
660         }
661       return 1;
662     }
663
664   /* dst NAT check */
665   init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
666              rx_fib_index, ip->protocol);
667   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
668     {
669       ASSERT (thread_index == ed_value_get_thread_index (&value));
670       s =
671         pool_elt_at_index (tsm->sessions,
672                            ed_value_get_session_index (&value));
673
674       if (is_fwd_bypass_session (s))
675         return 0;
676
677       /* hairpinning */
678       pool_foreach (i, sm->output_feature_interfaces)
679        {
680         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
681            return 0;
682       }
683       return 1;
684     }
685
686   return 0;
687 }
688
689 static inline u32
690 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
691                           icmp46_header_t *icmp, u32 sw_if_index,
692                           u32 rx_fib_index, vlib_node_runtime_t *node,
693                           u32 next, f64 now, u32 thread_index,
694                           nat_protocol_t nat_proto, snat_session_t **s_p)
695 {
696   vlib_main_t *vm = vlib_get_main ();
697   u16 checksum;
698   int err;
699   snat_session_t *s = NULL;
700   u8 lookup_protocol = ip->protocol;
701   u16 lookup_sport, lookup_dport;
702   ip4_address_t lookup_saddr, lookup_daddr;
703
704   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
705                                             &lookup_sport, &lookup_daddr,
706                                             &lookup_dport, &lookup_protocol);
707   if (err != 0)
708     {
709       b->error = node->errors[err];
710       return NAT_NEXT_DROP;
711     }
712
713   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
714     {
715       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
716             sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
717             vnet_buffer (b)->sw_if_index[VLIB_TX], now)))
718         {
719           return next;
720         }
721     }
722   else
723     {
724       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, ip,
725                                                  NAT_PROTOCOL_ICMP,
726                                                  rx_fib_index, thread_index)))
727         {
728           return next;
729         }
730     }
731
732   if (PREDICT_FALSE (icmp_type_is_error_message (
733         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
734     {
735       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
736       return NAT_NEXT_DROP;
737     }
738
739   next = slow_path_ed (sm, b, ip->src_address, ip->dst_address, lookup_sport,
740                        lookup_dport, ip->protocol, rx_fib_index, &s, node,
741                        next, thread_index, vlib_time_now (vm));
742
743   if (NAT_NEXT_DROP == next)
744     goto out;
745
746   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
747     {
748       ip_csum_t sum = ip_incremental_checksum_buffer (
749         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
750         ntohs (ip->length) - ip4_header_bytes (ip), 0);
751       checksum = ~ip_csum_fold (sum);
752       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
753         {
754           next = NAT_NEXT_DROP;
755           goto out;
756         }
757     }
758
759 out:
760   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
761     {
762       /* Accounting */
763       nat44_session_update_counters (
764         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
765       /* Per-user LRU list maintenance */
766       nat44_session_update_lru (sm, s, thread_index);
767     }
768   *s_p = s;
769   return next;
770 }
771
772 static snat_session_t *
773 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
774                                         ip4_header_t *ip, u32 rx_fib_index,
775                                         u32 thread_index, f64 now,
776                                         vlib_main_t *vm,
777                                         vlib_node_runtime_t *node)
778 {
779   clib_bihash_kv_8_8_t kv, value;
780   clib_bihash_kv_16_8_t s_kv, s_value;
781   snat_static_mapping_t *m = NULL;
782   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
783   snat_session_t *s = NULL;
784   u32 outside_fib_index = sm->outside_fib_index;
785   int i;
786   ip4_address_t new_src_addr = { 0 };
787   ip4_address_t new_dst_addr = ip->dst_address;
788
789   if (PREDICT_FALSE (
790         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
791     {
792       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
793       nat_ipfix_logging_max_sessions (thread_index,
794                                       sm->max_translations_per_thread);
795       nat_elog_notice (sm, "maximum sessions exceeded");
796       return 0;
797     }
798
799   switch (vec_len (sm->outside_fibs))
800     {
801     case 0:
802       outside_fib_index = sm->outside_fib_index;
803       break;
804     case 1:
805       outside_fib_index = sm->outside_fibs[0].fib_index;
806       break;
807     default:
808       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
809       break;
810     }
811
812   init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
813
814   /* Try to find static mapping first */
815   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
816     {
817       m = pool_elt_at_index (sm->static_mappings, value.value);
818       new_src_addr = m->external_addr;
819     }
820   else
821     {
822       pool_foreach (s, tsm->sessions)
823         {
824           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
825             {
826               init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
827                          outside_fib_index, ip->protocol);
828               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
829                 {
830                   new_src_addr = s->out2in.addr;
831                 }
832               break;
833             }
834         }
835
836       if (!new_src_addr.as_u32)
837         {
838           for (i = 0; i < vec_len (sm->addresses); i++)
839             {
840               init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
841                          outside_fib_index, ip->protocol);
842               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
843                 {
844                   new_src_addr = sm->addresses[i].addr;
845                 }
846             }
847         }
848     }
849
850   if (!new_src_addr.as_u32)
851     {
852       // could not allocate address for translation ...
853       return 0;
854     }
855
856   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
857   if (!s)
858     {
859       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
860       nat_elog_warn (sm, "create NAT session failed");
861       return 0;
862     }
863
864   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
865                         ip->dst_address, 0, rx_fib_index, ip->protocol);
866   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
867   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
868
869   // hairpinning?
870   int is_hairpinning =
871     nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
872                                  outside_fib_index, &new_dst_addr, NULL);
873   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
874
875   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
876   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
877
878   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
879                         outside_fib_index, ip->protocol);
880   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
881   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
882   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
883
884   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
885   s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
886   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
887   s->out2in.addr.as_u32 = new_src_addr.as_u32;
888   s->out2in.fib_index = outside_fib_index;
889   s->in2out.addr.as_u32 = ip->src_address.as_u32;
890   s->in2out.fib_index = rx_fib_index;
891   s->in2out.port = s->out2in.port = ip->protocol;
892   if (m)
893     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
894
895   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
896     {
897       nat_elog_notice (sm, "in2out flow hash add failed");
898       nat_ed_session_delete (sm, s, thread_index, 1);
899       return NULL;
900     }
901
902   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
903     {
904       nat_elog_notice (sm, "out2in flow hash add failed");
905       nat_ed_session_delete (sm, s, thread_index, 1);
906       return NULL;
907     }
908
909   per_vrf_sessions_register_session (s, thread_index);
910
911   /* Accounting */
912   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
913                                  thread_index);
914   /* Per-user LRU list maintenance */
915   nat44_session_update_lru (sm, s, thread_index);
916
917   return s;
918 }
919
920 static inline uword
921 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
922                                           vlib_node_runtime_t * node,
923                                           vlib_frame_t * frame,
924                                           int is_output_feature)
925 {
926   u32 n_left_from, *from;
927   snat_main_t *sm = &snat_main;
928   f64 now = vlib_time_now (vm);
929   u32 thread_index = vm->thread_index;
930   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
931   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
932     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
933
934   from = vlib_frame_vector_args (frame);
935   n_left_from = frame->n_vectors;
936
937   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
938   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
939   vlib_get_buffers (vm, from, b, n_left_from);
940
941   while (n_left_from > 0)
942     {
943       vlib_buffer_t *b0;
944       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
945       nat_protocol_t proto0;
946       ip4_header_t *ip0;
947       snat_session_t *s0 = 0;
948       clib_bihash_kv_16_8_t kv0, value0;
949       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
950       nat_6t_flow_t *f = 0;
951       ip4_address_t lookup_saddr, lookup_daddr;
952       u16 lookup_sport, lookup_dport;
953       u8 lookup_protocol;
954       int lookup_skipped = 0;
955
956       b0 = *b;
957       b++;
958
959       /* Prefetch next iteration. */
960       if (PREDICT_TRUE (n_left_from >= 2))
961         {
962           vlib_buffer_t *p2;
963
964           p2 = *b;
965
966           vlib_prefetch_buffer_header (p2, LOAD);
967
968           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
969         }
970
971       if (is_output_feature)
972         {
973           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
974         }
975
976       next[0] = vnet_buffer2 (b0)->nat.arc_next;
977
978       ip0 =
979         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
980
981       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
982       rx_fib_index0 =
983         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
984
985       if (PREDICT_FALSE (ip0->ttl == 1))
986         {
987           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
988           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
989                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
990                                        0);
991           next[0] = NAT_NEXT_ICMP_ERROR;
992           goto trace0;
993         }
994
995       proto0 = ip_proto_to_nat_proto (ip0->protocol);
996
997       if (is_output_feature)
998         {
999           if (PREDICT_FALSE
1000               (nat_not_translate_output_feature_fwd
1001                (sm, ip0, thread_index, now, vm, b0)))
1002             goto trace0;
1003         }
1004
1005       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1006         {
1007           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1008                 ICMP4_echo_request &&
1009               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1010                 ICMP4_echo_reply &&
1011               !icmp_type_is_error_message (
1012                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1013             {
1014               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1015               next[0] = NAT_NEXT_DROP;
1016               goto trace0;
1017             }
1018           int err = nat_get_icmp_session_lookup_values (
1019             b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr,
1020             &lookup_dport, &lookup_protocol);
1021           if (err != 0)
1022             {
1023               b0->error = node->errors[err];
1024               next[0] = NAT_NEXT_DROP;
1025               goto trace0;
1026             }
1027         }
1028       else
1029         {
1030           lookup_protocol = ip0->protocol;
1031           lookup_saddr = ip0->src_address;
1032           lookup_daddr = ip0->dst_address;
1033           lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1034           lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1035         }
1036
1037       /* there might be a stashed index in vnet_buffer2 from handoff or
1038        * classify node, see if it can be used */
1039       if (!pool_is_free_index (tsm->sessions,
1040                                vnet_buffer2 (b0)->nat.cached_session_index))
1041         {
1042           s0 = pool_elt_at_index (tsm->sessions,
1043                                   vnet_buffer2 (b0)->nat.cached_session_index);
1044           if (PREDICT_TRUE (
1045                 nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport,
1046                                    lookup_daddr, lookup_dport, lookup_protocol,
1047                                    rx_fib_index0)
1048                 // for some hairpinning cases there are two "i2i" flows instead
1049                 // of i2o and o2i as both hosts are on inside
1050                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1051                     nat_6t_flow_match (
1052                       &s0->o2i, b0, lookup_saddr, lookup_sport, lookup_daddr,
1053                       lookup_dport, lookup_protocol, rx_fib_index0))))
1054             {
1055               /* yes, this is the droid we're looking for */
1056               lookup_skipped = 1;
1057               goto skip_lookup;
1058             }
1059           s0 = NULL;
1060         }
1061
1062       init_ed_k (&kv0, ip0->src_address, lookup_sport, ip0->dst_address,
1063                  lookup_dport, rx_fib_index0, lookup_protocol);
1064
1065       // lookup flow
1066       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1067         {
1068           // flow does not exist go slow path
1069           next[0] = def_slow;
1070           goto trace0;
1071         }
1072
1073       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1074       s0 =
1075         pool_elt_at_index (tsm->sessions,
1076                            ed_value_get_session_index (&value0));
1077
1078     skip_lookup:
1079
1080       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1081         {
1082           // session is closed, go slow path
1083           nat_free_session_data (sm, s0, thread_index, 0);
1084           nat_ed_session_delete (sm, s0, thread_index, 1);
1085           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1086           goto trace0;
1087         }
1088
1089       if (s0->tcp_closed_timestamp)
1090         {
1091           if (now >= s0->tcp_closed_timestamp)
1092             {
1093               // session is closed, go slow path, freed in slow path
1094               next[0] = def_slow;
1095             }
1096           else
1097             {
1098               // session in transitory timeout, drop
1099               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1100               next[0] = NAT_NEXT_DROP;
1101             }
1102           goto trace0;
1103         }
1104
1105       // drop if session expired
1106       u64 sess_timeout_time;
1107       sess_timeout_time =
1108         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1109       if (now >= sess_timeout_time)
1110         {
1111           nat_free_session_data (sm, s0, thread_index, 0);
1112           nat_ed_session_delete (sm, s0, thread_index, 1);
1113           // session is closed, go slow path
1114           next[0] = def_slow;
1115           goto trace0;
1116         }
1117
1118       b0->flags |= VNET_BUFFER_F_IS_NATED;
1119
1120       if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport,
1121                              lookup_daddr, lookup_dport, lookup_protocol,
1122                              rx_fib_index0))
1123         {
1124           f = &s0->i2o;
1125         }
1126       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1127                nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport,
1128                                   lookup_daddr, lookup_dport, lookup_protocol,
1129                                   rx_fib_index0))
1130         {
1131           f = &s0->o2i;
1132         }
1133       else
1134         {
1135           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1136           nat_free_session_data (sm, s0, thread_index, 0);
1137           nat_ed_session_delete (sm, s0, thread_index, 1);
1138           next[0] = NAT_NEXT_DROP;
1139           goto trace0;
1140         }
1141
1142       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1143           (translation_error = nat_6t_flow_buf_translate (
1144              sm, b0, ip0, f, proto0, is_output_feature)))
1145         {
1146           nat_free_session_data (sm, s0, thread_index, 0);
1147           nat_ed_session_delete (sm, s0, thread_index, 1);
1148           next[0] = NAT_NEXT_DROP;
1149           goto trace0;
1150         }
1151
1152       switch (proto0)
1153         {
1154         case NAT_PROTOCOL_TCP:
1155           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1156                                          thread_index, sw_if_index0, 1);
1157           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1158           break;
1159         case NAT_PROTOCOL_UDP:
1160           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1161                                          thread_index, sw_if_index0, 1);
1162           break;
1163         case NAT_PROTOCOL_ICMP:
1164           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1165                                          thread_index, sw_if_index0, 1);
1166           break;
1167         case NAT_PROTOCOL_OTHER:
1168           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1169                                          thread_index, sw_if_index0, 1);
1170           break;
1171         }
1172
1173       /* Accounting */
1174       nat44_session_update_counters (s0, now,
1175                                      vlib_buffer_length_in_chain (vm, b0),
1176                                      thread_index);
1177       /* Per-user LRU list maintenance */
1178       nat44_session_update_lru (sm, s0, thread_index);
1179
1180     trace0:
1181       if (PREDICT_FALSE
1182           ((node->flags & VLIB_NODE_FLAG_TRACE)
1183            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1184         {
1185           nat_in2out_ed_trace_t *t =
1186             vlib_add_trace (vm, node, b0, sizeof (*t));
1187           t->sw_if_index = sw_if_index0;
1188           t->next_index = next[0];
1189           t->is_slow_path = 0;
1190           t->translation_error = translation_error;
1191           t->lookup_skipped = lookup_skipped;
1192           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1193
1194           if (s0)
1195             {
1196               t->session_index = s0 - tsm->sessions;
1197               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1198               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1199               t->translation_via_i2of = (&s0->i2o == f);
1200             }
1201           else
1202             {
1203               t->session_index = ~0;
1204             }
1205         }
1206
1207       if (next[0] == NAT_NEXT_DROP)
1208         {
1209           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1210                                          thread_index, sw_if_index0, 1);
1211         }
1212
1213       n_left_from--;
1214       next++;
1215     }
1216
1217   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1218                                frame->n_vectors);
1219   return frame->n_vectors;
1220 }
1221
1222 static inline uword
1223 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
1224                                           vlib_node_runtime_t * node,
1225                                           vlib_frame_t * frame,
1226                                           int is_output_feature)
1227 {
1228   u32 n_left_from, *from;
1229   snat_main_t *sm = &snat_main;
1230   f64 now = vlib_time_now (vm);
1231   u32 thread_index = vm->thread_index;
1232   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1233
1234   from = vlib_frame_vector_args (frame);
1235   n_left_from = frame->n_vectors;
1236
1237   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1238   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1239   vlib_get_buffers (vm, from, b, n_left_from);
1240
1241   while (n_left_from > 0)
1242     {
1243       vlib_buffer_t *b0;
1244       u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1245       nat_protocol_t proto0;
1246       ip4_header_t *ip0;
1247       udp_header_t *udp0;
1248       icmp46_header_t *icmp0;
1249       snat_session_t *s0 = 0;
1250       clib_bihash_kv_16_8_t kv0, value0;
1251       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1252
1253       b0 = *b;
1254
1255       if (is_output_feature)
1256         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1257
1258       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1259
1260       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1261                               iph_offset0);
1262
1263       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1264       rx_fib_index0 =
1265         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1266
1267       if (PREDICT_FALSE (ip0->ttl == 1))
1268         {
1269           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1270           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1271                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1272                                        0);
1273           next[0] = NAT_NEXT_ICMP_ERROR;
1274           goto trace0;
1275         }
1276
1277       udp0 = ip4_next_header (ip0);
1278       icmp0 = (icmp46_header_t *) udp0;
1279       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1280
1281       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1282         {
1283           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1284             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1285           if (!s0)
1286             next[0] = NAT_NEXT_DROP;
1287
1288           if (NAT_ED_TRNSL_ERR_SUCCESS !=
1289               (translation_error = nat_6t_flow_buf_translate (
1290                  sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1291             {
1292               goto trace0;
1293             }
1294
1295           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1296                                          thread_index, sw_if_index0, 1);
1297           goto trace0;
1298         }
1299
1300       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1301         {
1302           next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0,
1303                                               rx_fib_index0, node, next[0],
1304                                               now, thread_index, proto0, &s0);
1305           if (NAT_NEXT_DROP != next[0] && s0 &&
1306               NAT_ED_TRNSL_ERR_SUCCESS !=
1307                 (translation_error = nat_6t_flow_buf_translate (
1308                    sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1309             {
1310               goto trace0;
1311             }
1312
1313           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1314                                          thread_index, sw_if_index0, 1);
1315           goto trace0;
1316         }
1317
1318       init_ed_k (&kv0, ip0->src_address,
1319                  vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
1320                  vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1321                  ip0->protocol);
1322       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1323         {
1324           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1325           s0 =
1326             pool_elt_at_index (tsm->sessions,
1327                                ed_value_get_session_index (&value0));
1328
1329           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1330             {
1331               nat_free_session_data (sm, s0, thread_index, 0);
1332               nat_ed_session_delete (sm, s0, thread_index, 1);
1333               s0 = NULL;
1334             }
1335         }
1336
1337       if (!s0)
1338         {
1339           if (is_output_feature)
1340             {
1341               if (PREDICT_FALSE
1342                   (nat44_ed_not_translate_output_feature
1343                    (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1344                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1345                     sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX],
1346                     now)))
1347                 goto trace0;
1348
1349               /*
1350                * Send DHCP packets to the ipv4 stack, or we won't
1351                * be able to use dhcp client on the outside interface
1352                */
1353               if (PREDICT_FALSE
1354                   (proto0 == NAT_PROTOCOL_UDP
1355                    && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1356                        clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
1357                    && ip0->dst_address.as_u32 == 0xffffffff))
1358                 goto trace0;
1359             }
1360           else
1361             {
1362               if (PREDICT_FALSE
1363                   (nat44_ed_not_translate
1364                    (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
1365                     thread_index)))
1366                 goto trace0;
1367             }
1368
1369           next[0] =
1370             slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address,
1371                           vnet_buffer (b0)->ip.reass.l4_src_port,
1372                           vnet_buffer (b0)->ip.reass.l4_dst_port,
1373                           ip0->protocol, rx_fib_index0, &s0, node, next[0],
1374                           thread_index, now);
1375
1376           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1377             goto trace0;
1378
1379           if (PREDICT_FALSE (!s0))
1380             goto trace0;
1381
1382         }
1383
1384       b0->flags |= VNET_BUFFER_F_IS_NATED;
1385
1386       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1387           (translation_error = nat_6t_flow_buf_translate (
1388              sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1389         {
1390           nat_free_session_data (sm, s0, thread_index, 0);
1391           nat_ed_session_delete (sm, s0, thread_index, 1);
1392           s0 = NULL;
1393           goto trace0;
1394         }
1395
1396       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1397         {
1398           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1399                                          thread_index, sw_if_index0, 1);
1400           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1401         }
1402       else
1403         {
1404           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1405                                          thread_index, sw_if_index0, 1);
1406         }
1407
1408       /* Accounting */
1409       nat44_session_update_counters (s0, now,
1410                                      vlib_buffer_length_in_chain
1411                                      (vm, b0), thread_index);
1412       /* Per-user LRU list maintenance */
1413       nat44_session_update_lru (sm, s0, thread_index);
1414
1415     trace0:
1416       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1417                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1418         {
1419           nat_in2out_ed_trace_t *t =
1420             vlib_add_trace (vm, node, b0, sizeof (*t));
1421           t->sw_if_index = sw_if_index0;
1422           t->next_index = next[0];
1423           t->is_slow_path = 1;
1424           t->translation_error = translation_error;
1425           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1426
1427           if (s0)
1428             {
1429               t->session_index = s0 - tsm->sessions;
1430               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1431               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1432               t->translation_via_i2of = 1;
1433             }
1434
1435           else
1436             {
1437               t->session_index = ~0;
1438             }
1439         }
1440
1441       if (next[0] == NAT_NEXT_DROP)
1442         {
1443           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1444                                          thread_index, sw_if_index0, 1);
1445         }
1446
1447       n_left_from--;
1448       next++;
1449       b++;
1450     }
1451
1452   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1453                                frame->n_vectors);
1454
1455   return frame->n_vectors;
1456 }
1457
1458 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1459                                      vlib_node_runtime_t * node,
1460                                      vlib_frame_t * frame)
1461 {
1462   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0);
1463 }
1464
1465 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1466   .name = "nat44-ed-in2out",
1467   .vector_size = sizeof (u32),
1468   .sibling_of = "nat-default",
1469   .format_trace = format_nat_in2out_ed_trace,
1470   .type = VLIB_NODE_TYPE_INTERNAL,
1471   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1472   .error_strings = nat_in2out_ed_error_strings,
1473   .runtime_data_bytes = sizeof (snat_runtime_t),
1474 };
1475
1476 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1477                                             vlib_node_runtime_t * node,
1478                                             vlib_frame_t * frame)
1479 {
1480   return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1);
1481 }
1482
1483 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1484   .name = "nat44-ed-in2out-output",
1485   .vector_size = sizeof (u32),
1486   .sibling_of = "nat-default",
1487   .format_trace = format_nat_in2out_ed_trace,
1488   .type = VLIB_NODE_TYPE_INTERNAL,
1489   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1490   .error_strings = nat_in2out_ed_error_strings,
1491   .runtime_data_bytes = sizeof (snat_runtime_t),
1492 };
1493
1494 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1495                                               vlib_node_runtime_t *
1496                                               node, vlib_frame_t * frame)
1497 {
1498   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0);
1499 }
1500
1501 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1502   .name = "nat44-ed-in2out-slowpath",
1503   .vector_size = sizeof (u32),
1504   .sibling_of = "nat-default",
1505   .format_trace = format_nat_in2out_ed_trace,
1506   .type = VLIB_NODE_TYPE_INTERNAL,
1507   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1508   .error_strings = nat_in2out_ed_error_strings,
1509   .runtime_data_bytes = sizeof (snat_runtime_t),
1510 };
1511
1512 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1513                                                      vlib_node_runtime_t
1514                                                      * node,
1515                                                      vlib_frame_t * frame)
1516 {
1517   return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1);
1518 }
1519
1520 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1521   .name = "nat44-ed-in2out-output-slowpath",
1522   .vector_size = sizeof (u32),
1523   .sibling_of = "nat-default",
1524   .format_trace = format_nat_in2out_ed_trace,
1525   .type = VLIB_NODE_TYPE_INTERNAL,
1526   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1527   .error_strings = nat_in2out_ed_error_strings,
1528   .runtime_data_bytes = sizeof (snat_runtime_t),
1529 };
1530
1531 static u8 *
1532 format_nat_pre_trace (u8 * s, va_list * args)
1533 {
1534   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1535   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1536   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1537   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1538                  t->arc_next_index);
1539 }
1540
1541 VLIB_NODE_FN (nat_pre_in2out_node)
1542   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1543 {
1544   return nat_pre_node_fn_inline (vm, node, frame,
1545                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1546 }
1547
1548 VLIB_NODE_FN (nat_pre_in2out_output_node)
1549   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1550 {
1551   return nat_pre_node_fn_inline (vm, node, frame,
1552                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1553 }
1554
1555 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1556   .name = "nat-pre-in2out",
1557   .vector_size = sizeof (u32),
1558   .sibling_of = "nat-default",
1559   .format_trace = format_nat_pre_trace,
1560   .type = VLIB_NODE_TYPE_INTERNAL,
1561   .n_errors = 0,
1562 };
1563
1564 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1565   .name = "nat-pre-in2out-output",
1566   .vector_size = sizeof (u32),
1567   .sibling_of = "nat-default",
1568   .format_trace = format_nat_pre_trace,
1569   .type = VLIB_NODE_TYPE_INTERNAL,
1570   .n_errors = 0,
1571 };
1572
1573 /*
1574  * fd.io coding-style-patch-verification: ON
1575  *
1576  * Local Variables:
1577  * eval: (c-set-style "gnu")
1578  * End:
1579  */