nat: static mappings in flow hash
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27
28 #include <nat/lib/nat_inlines.h>
29 #include <nat/lib/ipfix_logging.h>
30
31 #include <nat/nat44-ed/nat44_ed.h>
32 #include <nat/nat44-ed/nat44_ed_inlines.h>
33
34 static char *nat_in2out_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_in2out_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   nat_translation_error_e translation_error;
46   nat_6t_flow_t i2of;
47   nat_6t_flow_t o2if;
48   clib_bihash_kv_16_8_t search_key;
49   u8 is_slow_path;
50   u8 translation_via_i2of;
51   u8 lookup_skipped;
52 } nat_in2out_ed_trace_t;
53
54 static u8 *
55 format_nat_in2out_ed_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
60   char *tag;
61
62   tag =
63     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
64     "NAT44_IN2OUT_ED_FAST_PATH";
65
66   s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
67               t->next_index);
68   if (~0 != t->session_index)
69     {
70       s = format (s, ", session %d, translation result '%U' via %s",
71                   t->session_index, format_nat_ed_translation_error,
72                   t->translation_error,
73                   t->translation_via_i2of ? "i2of" : "o2if");
74       s = format (s, "\n  i2of %U", format_nat_6t_flow, &t->i2of);
75       s = format (s, "\n  o2if %U", format_nat_6t_flow, &t->o2if);
76     }
77   if (!t->is_slow_path)
78     {
79       if (t->lookup_skipped)
80         {
81           s = format (s, "\n lookup skipped - cached session index used");
82         }
83       else
84         {
85           s = format (s, "\n  search key %U", format_ed_session_kvp,
86                       &t->search_key);
87         }
88     }
89
90   return s;
91 }
92
93 /**
94  * @brief Check if packet should be translated
95  *
96  * Packets aimed at outside interface and external address with active session
97  * should be translated.
98  *
99  * @param sm            NAT main
100  * @param rt            NAT runtime data
101  * @param sw_if_index0  index of the inside interface
102  * @param ip0           IPv4 header
103  * @param rx_fib_index0 RX FIB index
104  *
105  * @returns 0 if packet should be translated otherwise 1
106  */
107 static inline int
108 snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
109                          u32 sw_if_index0, ip4_header_t *ip0,
110                          u32 rx_fib_index0)
111 {
112   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
113   nat_outside_fib_t *outside_fib;
114   fib_prefix_t pfx = {
115     .fp_proto = FIB_PROTOCOL_IP4,
116     .fp_len = 32,
117     .fp_addr = {
118                 .ip4.as_u32 = ip0->dst_address.as_u32,
119                 }
120     ,
121   };
122
123   /* Don't NAT packet aimed at the intfc address */
124   if (PREDICT_FALSE (
125         is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
126     return 1;
127
128   fei = fib_table_lookup (rx_fib_index0, &pfx);
129   if (FIB_NODE_INDEX_INVALID != fei)
130     {
131       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
132       if (sw_if_index == ~0)
133         {
134           vec_foreach (outside_fib, sm->outside_fibs)
135             {
136               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
137               if (FIB_NODE_INDEX_INVALID != fei)
138                 {
139                   sw_if_index = fib_entry_get_resolving_interface (fei);
140                   if (sw_if_index != ~0)
141                     break;
142                 }
143             }
144         }
145       if (sw_if_index == ~0)
146         return 1;
147
148       snat_interface_t *i;
149       pool_foreach (i, sm->interfaces)
150         {
151           /* NAT packet aimed at outside interface */
152           if ((nat44_ed_is_interface_outside (i)) &&
153               (sw_if_index == i->sw_if_index))
154             return 0;
155         }
156     }
157
158   return 1;
159 }
160
161 static int
162 nat_ed_alloc_addr_and_port_with_snat_address (
163   snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
164   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
165   ip4_address_t *outside_addr, u16 *outside_port)
166 {
167   const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
168
169   s->o2i.match.daddr = a->addr;
170   /* first try port suggested by caller */
171   u16 port = clib_net_to_host_u16 (*outside_port);
172   u16 port_offset = port - port_thread_offset;
173   if (port <= port_thread_offset ||
174       port > port_thread_offset + port_per_thread)
175     {
176       /* need to pick a different port, suggested port doesn't fit in
177        * this thread's port range */
178       port_offset = snat_random_port (0, port_per_thread - 1);
179       port = port_thread_offset + port_offset;
180     }
181   u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
182   do
183     {
184       if (IP_PROTOCOL_ICMP == proto)
185         {
186           s->o2i.match.sport = clib_host_to_net_u16 (port);
187         }
188       s->o2i.match.dport = clib_host_to_net_u16 (port);
189       if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
190         {
191           *outside_addr = a->addr;
192           *outside_port = clib_host_to_net_u16 (port);
193           return 0;
194         }
195       port_offset = snat_random_port (0, port_per_thread - 1);
196       port = port_thread_offset + port_offset;
197       --attempts;
198     }
199   while (attempts > 0);
200   return 1;
201 }
202
203 static int
204 nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
205                             u32 thread_index, ip4_address_t s_addr,
206                             u32 snat_thread_index, snat_session_t *s,
207                             ip4_address_t *outside_addr, u16 *outside_port)
208 {
209   int i;
210   snat_address_t *a, *ga = 0;
211
212   if (vec_len (sm->addresses) > 0)
213     {
214       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
215
216       for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
217         {
218           a = sm->addresses + i;
219           if (a->fib_index == rx_fib_index)
220             {
221               return nat_ed_alloc_addr_and_port_with_snat_address (
222                 sm, nat_proto, thread_index, a, sm->port_per_thread,
223                 snat_thread_index, s, outside_addr, outside_port);
224             }
225           else if (a->fib_index == ~0)
226             {
227               ga = a;
228             }
229         }
230
231       for (i = 0; i < s_addr_offset; ++i)
232         {
233           a = sm->addresses + i;
234           if (a->fib_index == rx_fib_index)
235             {
236               return nat_ed_alloc_addr_and_port_with_snat_address (
237                 sm, nat_proto, thread_index, a, sm->port_per_thread,
238                 snat_thread_index, s, outside_addr, outside_port);
239             }
240           else if (a->fib_index == ~0)
241             {
242               ga = a;
243             }
244         }
245
246       if (ga)
247         {
248           return nat_ed_alloc_addr_and_port_with_snat_address (
249             sm, nat_proto, thread_index, a, sm->port_per_thread,
250             snat_thread_index, s, outside_addr, outside_port);
251         }
252     }
253   /* Totally out of translations to use... */
254   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
255   return 1;
256 }
257
258 static_always_inline u32
259 nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
260 {
261   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
262   nat_outside_fib_t *outside_fib;
263   fib_prefix_t pfx = {
264     .fp_proto = FIB_PROTOCOL_IP4,
265     .fp_len = 32,
266     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
267     ,
268   };
269   // TODO: multiple vrfs none can resolve addr
270   vec_foreach (outside_fib, sm->outside_fibs)
271     {
272       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
273       if (FIB_NODE_INDEX_INVALID != fei)
274         {
275           if (fib_entry_get_resolving_interface (fei) != ~0)
276             {
277               return outside_fib->fib_index;
278             }
279         }
280     }
281   return ~0;
282 }
283
284 static_always_inline int
285 nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
286                              u16 match_port, ip_protocol_t match_protocol,
287                              ip4_address_t *daddr, u16 *dport)
288 {
289   snat_static_mapping_t *m =
290     nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
291   if (!m)
292     {
293       /* Try address only mapping */
294       m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
295       if (!m)
296         return 0;
297     }
298   *daddr = m->local_addr;
299   if (dport)
300     {
301       /* Address only mapping doesn't change port */
302       *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
303     }
304   return 1;
305 }
306
307 static u32
308 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
309               ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
310               u16 r_port, u8 proto, u32 rx_fib_index,
311               snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
312               u32 thread_index, f64 now)
313 {
314   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
315   ip4_address_t outside_addr;
316   u16 outside_port;
317   u32 outside_fib_index;
318   u8 is_identity_nat = 0;
319
320   snat_session_t *s = NULL;
321   lb_nat_type_t lb = 0;
322   ip4_address_t daddr = r_addr;
323   u16 dport = r_port;
324
325   if (PREDICT_FALSE
326       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
327     {
328       if (!nat_lru_free_one (sm, thread_index, now))
329         {
330           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
331           nat_ipfix_logging_max_sessions (thread_index,
332                                           sm->max_translations_per_thread);
333           nat_elog_notice (sm, "maximum sessions exceeded");
334           return NAT_NEXT_DROP;
335         }
336     }
337
338   outside_fib_index = sm->outside_fib_index;
339
340   switch (vec_len (sm->outside_fibs))
341     {
342     case 0:
343       outside_fib_index = sm->outside_fib_index;
344       break;
345     case 1:
346       outside_fib_index = sm->outside_fibs[0].fib_index;
347       break;
348     default:
349       outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
350       break;
351     }
352
353   ip4_address_t sm_addr;
354   u16 sm_port;
355   u32 sm_fib_index;
356   /* First try to match static mapping by local address and port */
357   int is_sm;
358   if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index, proto,
359                                  &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
360                                  &lb, 0, &is_identity_nat, 0))
361     {
362       is_sm = 0;
363     }
364   else
365     {
366       if (PREDICT_FALSE (is_identity_nat))
367         {
368           *sessionp = NULL;
369           return next;
370         }
371       is_sm = 1;
372     }
373
374   if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
375     {
376       if (PREDICT_FALSE (!tcp_flags_is_init (
377             vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
378         {
379           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
380           return NAT_NEXT_DROP;
381         }
382     }
383
384   s = nat_ed_session_alloc (sm, thread_index, now, proto);
385   ASSERT (s);
386
387   if (!is_sm)
388     {
389       s->in2out.addr = l_addr;
390       s->in2out.port = l_port;
391       s->proto = proto;
392       s->in2out.fib_index = rx_fib_index;
393       s->out2in.fib_index = outside_fib_index;
394
395       // suggest using local port to allocation function
396       outside_port = l_port;
397
398       // hairpinning?
399       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
400                                                         proto, &daddr, &dport);
401       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
402
403       // destination addr/port updated with real values in
404       // nat_ed_alloc_addr_and_port
405       nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
406                             s->out2in.fib_index, proto);
407       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
408       if (IP_PROTOCOL_ICMP == proto)
409         {
410           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
411         }
412       else
413         {
414           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
415         }
416       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
417
418       if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, proto, thread_index,
419                                       l_addr, tsm->snat_thread_index, s,
420                                       &outside_addr, &outside_port))
421         {
422           nat_elog_notice (sm, "addresses exhausted");
423           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
424           nat_ed_session_delete (sm, s, thread_index, 1);
425           return NAT_NEXT_DROP;
426         }
427       s->out2in.addr = outside_addr;
428       s->out2in.port = outside_port;
429     }
430   else
431     {
432       // static mapping
433       s->out2in.addr = outside_addr = sm_addr;
434       s->out2in.port = outside_port = sm_port;
435       s->in2out.addr = l_addr;
436       s->in2out.port = l_port;
437       s->proto = proto;
438       s->in2out.fib_index = rx_fib_index;
439       s->out2in.fib_index = outside_fib_index;
440       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
441
442       // hairpinning?
443       int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
444                                                         proto, &daddr, &dport);
445       s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
446
447       if (IP_PROTOCOL_ICMP == proto)
448         {
449           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
450                                 sm_port, s->out2in.fib_index, proto);
451           nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
452         }
453       else
454         {
455           nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr,
456                                 sm_port, s->out2in.fib_index, proto);
457           nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
458         }
459       nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
460       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
461       if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
462         {
463           nat_elog_notice (sm, "out2in key add failed");
464           goto error;
465         }
466     }
467
468   if (lb)
469     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
470   s->ext_host_addr = r_addr;
471   s->ext_host_port = r_port;
472
473   nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port,
474                         rx_fib_index, proto);
475   nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
476   nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
477
478   if (IP_PROTOCOL_ICMP == proto)
479     {
480       nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
481     }
482   else
483     {
484       nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
485       nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
486     }
487   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
488
489   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
490     {
491       nat_elog_notice (sm, "in2out key add failed");
492       goto error;
493     }
494
495   /* log NAT event */
496   nat_ipfix_logging_nat44_ses_create (
497     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
498     s->in2out.port, s->out2in.port, s->in2out.fib_index);
499
500   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
501                          s->in2out.port, &s->ext_host_nat_addr,
502                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
503                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
504
505   per_vrf_sessions_register_session (s, thread_index);
506
507   *sessionp = s;
508   return next;
509 error:
510   if (s)
511     {
512       nat_ed_session_delete (sm, s, thread_index, 1);
513     }
514   *sessionp = s = NULL;
515   return NAT_NEXT_DROP;
516 }
517
518 static_always_inline int
519 nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
520                         vlib_node_runtime_t *node, u32 sw_if_index,
521                         vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
522                         u32 rx_fib_index)
523 {
524   clib_bihash_kv_16_8_t kv, value;
525
526   init_ed_k (&kv, ip->dst_address.as_u32,
527              vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
528              vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
529              ip->protocol);
530
531   /* NAT packet aimed at external address if has active sessions */
532   if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
533     {
534       /* or is static mappings */
535       ip4_address_t placeholder_addr;
536       u16 placeholder_port;
537       u32 placeholder_fib_index;
538       if (!snat_static_mapping_match (
539             vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
540             sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
541             &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
542         return 0;
543     }
544   else
545     return 0;
546
547   if (sm->forwarding_enabled)
548     return 1;
549
550   return snat_not_translate_fast (sm, node, sw_if_index, ip, rx_fib_index);
551 }
552
553 static_always_inline int
554 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
555                                       u32 thread_index, f64 now,
556                                       vlib_main_t * vm, vlib_buffer_t * b)
557 {
558   clib_bihash_kv_16_8_t kv, value;
559   snat_session_t *s = 0;
560   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
561
562   if (!sm->forwarding_enabled)
563     return 0;
564
565   if (ip->protocol == IP_PROTOCOL_ICMP)
566     {
567       ip4_address_t lookup_saddr, lookup_daddr;
568       u16 lookup_sport, lookup_dport;
569       u8 lookup_protocol;
570       if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
571                                               &lookup_sport, &lookup_daddr,
572                                               &lookup_dport, &lookup_protocol))
573         return 0;
574       init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
575                  lookup_dport, 0, lookup_protocol);
576     }
577   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
578     {
579       init_ed_k (&kv, ip->src_address.as_u32,
580                  vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
581                  vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
582     }
583   else
584     {
585       init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
586                  ip->protocol);
587     }
588
589   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
590     {
591       ASSERT (thread_index == ed_value_get_thread_index (&value));
592       s =
593         pool_elt_at_index (tsm->sessions,
594                            ed_value_get_session_index (&value));
595
596       if (na44_ed_is_fwd_bypass_session (s))
597         {
598           if (ip->protocol == IP_PROTOCOL_TCP)
599             {
600               nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
601             }
602           /* Accounting */
603           nat44_session_update_counters (s, now,
604                                          vlib_buffer_length_in_chain (vm, b),
605                                          thread_index);
606           /* Per-user LRU list maintenance */
607           nat44_session_update_lru (sm, s, thread_index);
608           return 1;
609         }
610       else
611         return 0;
612     }
613
614   return 0;
615 }
616
617 static_always_inline int
618 nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
619                                        ip4_header_t *ip, u16 src_port,
620                                        u16 dst_port, u32 thread_index,
621                                        u32 rx_sw_if_index, u32 tx_sw_if_index,
622                                        f64 now, int is_multi_worker)
623 {
624   clib_bihash_kv_16_8_t kv, value;
625   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
626   snat_interface_t *i;
627   snat_session_t *s;
628   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
629   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
630
631   /* src NAT check */
632   init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
633              dst_port, tx_fib_index, ip->protocol);
634   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
635     {
636       ASSERT (thread_index == ed_value_get_thread_index (&value));
637       s =
638         pool_elt_at_index (tsm->sessions,
639                            ed_value_get_session_index (&value));
640       if (nat44_is_ses_closed (s)
641           && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
642         {
643           nat44_ed_free_session_data (sm, s, thread_index, 0);
644           nat_ed_session_delete (sm, s, thread_index, 1);
645         }
646       return 1;
647     }
648
649   /* dst NAT check */
650   if (is_multi_worker &&
651       PREDICT_TRUE (!pool_is_free_index (
652         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index)))
653     {
654       nat_6t_t lookup;
655       lookup.fib_index = rx_fib_index;
656       lookup.proto = ip->protocol;
657       lookup.daddr.as_u32 = ip->src_address.as_u32;
658       lookup.dport = src_port;
659       lookup.saddr.as_u32 = ip->dst_address.as_u32;
660       lookup.sport = dst_port;
661       s = pool_elt_at_index (
662         tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index);
663       if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup)))
664         {
665           goto skip_dst_nat_lookup;
666         }
667       s = NULL;
668     }
669
670   init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
671              src_port, rx_fib_index, ip->protocol);
672   if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
673     {
674       ASSERT (thread_index == ed_value_get_thread_index (&value));
675       s =
676         pool_elt_at_index (tsm->sessions,
677                            ed_value_get_session_index (&value));
678
679     skip_dst_nat_lookup:
680       if (na44_ed_is_fwd_bypass_session (s))
681         return 0;
682
683       /* hairpinning */
684       pool_foreach (i, sm->output_feature_interfaces)
685        {
686          if ((nat44_ed_is_interface_inside (i)) &&
687              (rx_sw_if_index == i->sw_if_index))
688            return 0;
689       }
690       return 1;
691     }
692
693   return 0;
694 }
695
696 static inline u32
697 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
698                           icmp46_header_t *icmp, u32 sw_if_index,
699                           u32 rx_fib_index, vlib_node_runtime_t *node,
700                           u32 next, f64 now, u32 thread_index,
701                           snat_session_t **s_p, int is_multi_worker)
702 {
703   vlib_main_t *vm = vlib_get_main ();
704   u16 checksum;
705   int err;
706   snat_session_t *s = NULL;
707   u8 lookup_protocol = ip->protocol;
708   u16 lookup_sport, lookup_dport;
709   ip4_address_t lookup_saddr, lookup_daddr;
710
711   err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr,
712                                             &lookup_sport, &lookup_daddr,
713                                             &lookup_dport, &lookup_protocol);
714   if (err != 0)
715     {
716       b->error = node->errors[err];
717       return NAT_NEXT_DROP;
718     }
719
720   if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
721     {
722       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
723             sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
724             vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
725         {
726           return next;
727         }
728     }
729   else
730     {
731       if (PREDICT_FALSE (nat44_ed_not_translate (
732             vm, sm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
733         {
734           return next;
735         }
736     }
737
738   if (PREDICT_FALSE (icmp_type_is_error_message (
739         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
740     {
741       b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
742       return NAT_NEXT_DROP;
743     }
744
745   next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
746                        lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
747                        &s, node, next, thread_index, vlib_time_now (vm));
748
749   if (NAT_NEXT_DROP == next)
750     goto out;
751
752   if (PREDICT_TRUE (!ip4_is_fragment (ip)))
753     {
754       ip_csum_t sum = ip_incremental_checksum_buffer (
755         vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b),
756         ntohs (ip->length) - ip4_header_bytes (ip), 0);
757       checksum = ~ip_csum_fold (sum);
758       if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff))
759         {
760           next = NAT_NEXT_DROP;
761           goto out;
762         }
763     }
764
765 out:
766   if (PREDICT_TRUE (next != NAT_NEXT_DROP && s))
767     {
768       /* Accounting */
769       nat44_session_update_counters (
770         s, now, vlib_buffer_length_in_chain (vm, b), thread_index);
771       /* Per-user LRU list maintenance */
772       nat44_session_update_lru (sm, s, thread_index);
773     }
774   *s_p = s;
775   return next;
776 }
777
778 static snat_session_t *
779 nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
780                                         ip4_header_t *ip, u32 rx_fib_index,
781                                         u32 thread_index, f64 now,
782                                         vlib_main_t *vm,
783                                         vlib_node_runtime_t *node)
784 {
785   clib_bihash_kv_16_8_t s_kv, s_value;
786   snat_static_mapping_t *m = NULL;
787   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
788   snat_session_t *s = NULL;
789   u32 outside_fib_index = sm->outside_fib_index;
790   int i;
791   ip4_address_t new_src_addr = { 0 };
792   ip4_address_t new_dst_addr = ip->dst_address;
793
794   if (PREDICT_FALSE (
795         nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
796     {
797       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
798       nat_ipfix_logging_max_sessions (thread_index,
799                                       sm->max_translations_per_thread);
800       nat_elog_notice (sm, "maximum sessions exceeded");
801       return 0;
802     }
803
804   switch (vec_len (sm->outside_fibs))
805     {
806     case 0:
807       outside_fib_index = sm->outside_fib_index;
808       break;
809     case 1:
810       outside_fib_index = sm->outside_fibs[0].fib_index;
811       break;
812     default:
813       outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
814       break;
815     }
816
817   /* Try to find static mapping first */
818   m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
819                               ip->protocol);
820   if (m)
821     {
822       new_src_addr = m->external_addr;
823     }
824   else
825     {
826       pool_foreach (s, tsm->sessions)
827         {
828           if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
829             {
830               init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
831                          ip->dst_address.as_u32, 0, outside_fib_index,
832                          ip->protocol);
833               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
834                 {
835                   new_src_addr = s->out2in.addr;
836                 }
837               break;
838             }
839         }
840
841       if (!new_src_addr.as_u32)
842         {
843           for (i = 0; i < vec_len (sm->addresses); i++)
844             {
845               init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
846                          ip->dst_address.as_u32, 0, outside_fib_index,
847                          ip->protocol);
848               if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
849                 {
850                   new_src_addr = sm->addresses[i].addr;
851                 }
852             }
853         }
854     }
855
856   if (!new_src_addr.as_u32)
857     {
858       // could not allocate address for translation ...
859       return 0;
860     }
861
862   s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
863   if (!s)
864     {
865       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
866       nat_elog_warn (sm, "create NAT session failed");
867       return 0;
868     }
869
870   nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
871                         ip->dst_address, 0, rx_fib_index, ip->protocol);
872   nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
873   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
874
875   // hairpinning?
876   int is_hairpinning = nat44_ed_external_sm_lookup (
877     sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
878   s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
879
880   nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
881   nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
882
883   nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
884                         outside_fib_index, ip->protocol);
885   nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
886   nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
887   nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
888
889   s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
890   s->out2in.addr.as_u32 = new_src_addr.as_u32;
891   s->out2in.fib_index = outside_fib_index;
892   s->in2out.addr.as_u32 = ip->src_address.as_u32;
893   s->in2out.fib_index = rx_fib_index;
894   s->in2out.port = s->out2in.port = ip->protocol;
895   if (m)
896     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
897
898   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
899     {
900       nat_elog_notice (sm, "in2out flow hash add failed");
901       nat_ed_session_delete (sm, s, thread_index, 1);
902       return NULL;
903     }
904
905   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1))
906     {
907       nat_elog_notice (sm, "out2in flow hash add failed");
908       nat_ed_session_delete (sm, s, thread_index, 1);
909       return NULL;
910     }
911
912   per_vrf_sessions_register_session (s, thread_index);
913
914   /* Accounting */
915   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
916                                  thread_index);
917   /* Per-user LRU list maintenance */
918   nat44_session_update_lru (sm, s, thread_index);
919
920   return s;
921 }
922
923 static inline uword
924 nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
925                                           vlib_node_runtime_t *node,
926                                           vlib_frame_t *frame,
927                                           int is_output_feature,
928                                           int is_multi_worker)
929 {
930   u32 n_left_from, *from;
931   snat_main_t *sm = &snat_main;
932   f64 now = vlib_time_now (vm);
933   u32 thread_index = vm->thread_index;
934   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
935   u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH
936     : NAT_NEXT_IN2OUT_ED_SLOW_PATH;
937
938   from = vlib_frame_vector_args (frame);
939   n_left_from = frame->n_vectors;
940
941   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
942   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
943   vlib_get_buffers (vm, from, b, n_left_from);
944
945   while (n_left_from > 0)
946     {
947       vlib_buffer_t *b0;
948       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
949       u32 tx_sw_if_index0;
950       u32 cntr_sw_if_index0;
951       ip_protocol_t proto0;
952       ip4_header_t *ip0;
953       snat_session_t *s0 = 0;
954       clib_bihash_kv_16_8_t kv0, value0;
955       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
956       nat_6t_flow_t *f = 0;
957       nat_6t_t lookup;
958       int lookup_skipped = 0;
959
960       b0 = *b;
961       b++;
962
963       /* Prefetch next iteration. */
964       if (PREDICT_TRUE (n_left_from >= 2))
965         {
966           vlib_buffer_t *p2;
967
968           p2 = *b;
969
970           vlib_prefetch_buffer_header (p2, LOAD);
971
972           clib_prefetch_load (p2->data);
973         }
974
975       if (is_output_feature)
976         {
977           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
978         }
979
980       next[0] = vnet_buffer2 (b0)->nat.arc_next;
981
982       ip0 =
983         (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
984
985       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
986       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
987       cntr_sw_if_index0 =
988         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
989       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
990                                                            rx_sw_if_index0);
991       lookup.fib_index = rx_fib_index0;
992
993       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
994         {
995           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
996           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
997                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
998                                        0);
999           next[0] = NAT_NEXT_ICMP_ERROR;
1000           goto trace0;
1001         }
1002
1003       proto0 = ip0->protocol;
1004
1005       if (is_output_feature)
1006         {
1007           if (PREDICT_FALSE
1008               (nat_not_translate_output_feature_fwd
1009                (sm, ip0, thread_index, now, vm, b0)))
1010             goto trace0;
1011         }
1012
1013       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1014         {
1015           if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1016                 ICMP4_echo_request &&
1017               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
1018                 ICMP4_echo_reply &&
1019               !icmp_type_is_error_message (
1020                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
1021             {
1022               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
1023               next[0] = NAT_NEXT_DROP;
1024               goto trace0;
1025             }
1026           int err = nat_get_icmp_session_lookup_values (
1027             b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
1028             &lookup.dport, &lookup.proto);
1029           if (err != 0)
1030             {
1031               b0->error = node->errors[err];
1032               next[0] = NAT_NEXT_DROP;
1033               goto trace0;
1034             }
1035         }
1036       else
1037         {
1038           lookup.proto = ip0->protocol;
1039           lookup.saddr.as_u32 = ip0->src_address.as_u32;
1040           lookup.daddr.as_u32 = ip0->dst_address.as_u32;
1041           lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
1042           lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
1043         }
1044
1045       /* there might be a stashed index in vnet_buffer2 from handoff or
1046        * classify node, see if it can be used */
1047       if (is_multi_worker &&
1048           !pool_is_free_index (tsm->sessions,
1049                                vnet_buffer2 (b0)->nat.cached_session_index))
1050         {
1051           s0 = pool_elt_at_index (tsm->sessions,
1052                                   vnet_buffer2 (b0)->nat.cached_session_index);
1053           if (PREDICT_TRUE (
1054                 nat_6t_t_eq (&s0->i2o.match, &lookup)
1055                 // for some hairpinning cases there are two "i2i" flows instead
1056                 // of i2o and o2i as both hosts are on inside
1057                 || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1058                     nat_6t_t_eq (&s0->o2i.match, &lookup))))
1059             {
1060               /* yes, this is the droid we're looking for */
1061               lookup_skipped = 1;
1062               goto skip_lookup;
1063             }
1064           s0 = NULL;
1065         }
1066
1067       init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
1068                  lookup.dport, lookup.fib_index, lookup.proto);
1069
1070       // lookup flow
1071       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1072         {
1073           // flow does not exist go slow path
1074           next[0] = def_slow;
1075           goto trace0;
1076         }
1077
1078       ASSERT (thread_index == ed_value_get_thread_index (&value0));
1079       s0 =
1080         pool_elt_at_index (tsm->sessions,
1081                            ed_value_get_session_index (&value0));
1082
1083     skip_lookup:
1084
1085       ASSERT (thread_index == s0->thread_index);
1086
1087       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
1088         {
1089           // session is closed, go slow path
1090           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1091           nat_ed_session_delete (sm, s0, thread_index, 1);
1092           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1093           goto trace0;
1094         }
1095
1096       if (s0->tcp_closed_timestamp)
1097         {
1098           if (now >= s0->tcp_closed_timestamp)
1099             {
1100               // session is closed, go slow path, freed in slow path
1101               next[0] = def_slow;
1102             }
1103           else
1104             {
1105               // session in transitory timeout, drop
1106               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
1107               next[0] = NAT_NEXT_DROP;
1108             }
1109           goto trace0;
1110         }
1111
1112       // drop if session expired
1113       u64 sess_timeout_time;
1114       sess_timeout_time =
1115         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1116       if (now >= sess_timeout_time)
1117         {
1118           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1119           nat_ed_session_delete (sm, s0, thread_index, 1);
1120           // session is closed, go slow path
1121           next[0] = def_slow;
1122           goto trace0;
1123         }
1124
1125       b0->flags |= VNET_BUFFER_F_IS_NATED;
1126
1127       if (nat_6t_t_eq (&s0->i2o.match, &lookup))
1128         {
1129           f = &s0->i2o;
1130         }
1131       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
1132                nat_6t_t_eq (&s0->o2i.match, &lookup))
1133         {
1134           f = &s0->o2i;
1135         }
1136       else
1137         {
1138           translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
1139           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1140           nat_ed_session_delete (sm, s0, thread_index, 1);
1141           next[0] = NAT_NEXT_DROP;
1142           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1143           goto trace0;
1144         }
1145
1146       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1147           (translation_error = nat_6t_flow_buf_translate_i2o (
1148              vm, sm, b0, ip0, f, proto0, is_output_feature)))
1149         {
1150           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1151           nat_ed_session_delete (sm, s0, thread_index, 1);
1152           next[0] = NAT_NEXT_DROP;
1153           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1154           goto trace0;
1155         }
1156
1157       switch (proto0)
1158         {
1159         case IP_PROTOCOL_TCP:
1160           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
1161                                          thread_index, cntr_sw_if_index0, 1);
1162           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1163           break;
1164         case IP_PROTOCOL_UDP:
1165           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
1166                                          thread_index, cntr_sw_if_index0, 1);
1167           break;
1168         case IP_PROTOCOL_ICMP:
1169           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
1170                                          thread_index, cntr_sw_if_index0, 1);
1171           break;
1172         default:
1173           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
1174                                          thread_index, cntr_sw_if_index0, 1);
1175           break;
1176         }
1177
1178       /* Accounting */
1179       nat44_session_update_counters (s0, now,
1180                                      vlib_buffer_length_in_chain (vm, b0),
1181                                      thread_index);
1182       /* Per-user LRU list maintenance */
1183       nat44_session_update_lru (sm, s0, thread_index);
1184
1185     trace0:
1186       if (PREDICT_FALSE
1187           ((node->flags & VLIB_NODE_FLAG_TRACE)
1188            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1189         {
1190           nat_in2out_ed_trace_t *t =
1191             vlib_add_trace (vm, node, b0, sizeof (*t));
1192           t->sw_if_index = rx_sw_if_index0;
1193           t->next_index = next[0];
1194           t->is_slow_path = 0;
1195           t->translation_error = translation_error;
1196           t->lookup_skipped = lookup_skipped;
1197           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1198
1199           if (s0)
1200             {
1201               t->session_index = s0 - tsm->sessions;
1202               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1203               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1204               t->translation_via_i2of = (&s0->i2o == f);
1205             }
1206           else
1207             {
1208               t->session_index = ~0;
1209             }
1210         }
1211
1212       if (next[0] == NAT_NEXT_DROP)
1213         {
1214           vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
1215                                          thread_index, cntr_sw_if_index0, 1);
1216         }
1217
1218       n_left_from--;
1219       next++;
1220     }
1221
1222   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1223                                frame->n_vectors);
1224   return frame->n_vectors;
1225 }
1226
1227 static inline uword
1228 nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
1229                                           vlib_node_runtime_t *node,
1230                                           vlib_frame_t *frame,
1231                                           int is_output_feature,
1232                                           int is_multi_worker)
1233 {
1234   u32 n_left_from, *from;
1235   snat_main_t *sm = &snat_main;
1236   f64 now = vlib_time_now (vm);
1237   u32 thread_index = vm->thread_index;
1238   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1239
1240   from = vlib_frame_vector_args (frame);
1241   n_left_from = frame->n_vectors;
1242
1243   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1244   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1245   vlib_get_buffers (vm, from, b, n_left_from);
1246
1247   while (n_left_from > 0)
1248     {
1249       vlib_buffer_t *b0;
1250       u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
1251       u32 tx_sw_if_index0;
1252       u32 cntr_sw_if_index0;
1253       ip_protocol_t proto0;
1254       ip4_header_t *ip0;
1255       udp_header_t *udp0;
1256       icmp46_header_t *icmp0;
1257       snat_session_t *s0 = 0;
1258       clib_bihash_kv_16_8_t kv0, value0;
1259       int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
1260
1261       b0 = *b;
1262
1263       if (is_output_feature)
1264         iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
1265
1266       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1267
1268       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1269                               iph_offset0);
1270
1271       rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1272       tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1273       cntr_sw_if_index0 =
1274         is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
1275       rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1276                                                            rx_sw_if_index0);
1277
1278       if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
1279         {
1280           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1281           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1282                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1283                                        0);
1284           next[0] = NAT_NEXT_ICMP_ERROR;
1285           goto trace0;
1286         }
1287
1288       udp0 = ip4_next_header (ip0);
1289       icmp0 = (icmp46_header_t *) udp0;
1290       proto0 = ip0->protocol;
1291
1292       if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
1293         {
1294           s0 = nat44_ed_in2out_slowpath_unknown_proto (
1295             sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
1296           if (!s0)
1297             next[0] = NAT_NEXT_DROP;
1298
1299           if (NAT_NEXT_DROP != next[0] && s0 &&
1300               NAT_ED_TRNSL_ERR_SUCCESS !=
1301                 (translation_error = nat_6t_flow_buf_translate_i2o (
1302                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1303             {
1304               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1305               nat_ed_session_delete (sm, s0, thread_index, 1);
1306               next[0] = NAT_NEXT_DROP;
1307               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1308               goto trace0;
1309             }
1310
1311           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
1312                                          thread_index, cntr_sw_if_index0, 1);
1313           goto trace0;
1314         }
1315
1316       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
1317         {
1318           next[0] = icmp_in2out_ed_slow_path (
1319             sm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node, next[0],
1320             now, thread_index, &s0, is_multi_worker);
1321           if (NAT_NEXT_DROP != next[0] && s0 &&
1322               NAT_ED_TRNSL_ERR_SUCCESS !=
1323                 (translation_error = nat_6t_flow_buf_translate_i2o (
1324                    vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1325             {
1326               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1327               nat_ed_session_delete (sm, s0, thread_index, 1);
1328               next[0] = NAT_NEXT_DROP;
1329               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1330               goto trace0;
1331             }
1332
1333           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
1334                                          thread_index, cntr_sw_if_index0, 1);
1335           goto trace0;
1336         }
1337
1338       init_ed_k (
1339         &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
1340         ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
1341         rx_fib_index0, ip0->protocol);
1342       if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
1343         {
1344           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1345           s0 =
1346             pool_elt_at_index (tsm->sessions,
1347                                ed_value_get_session_index (&value0));
1348
1349           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1350             {
1351               nat44_ed_free_session_data (sm, s0, thread_index, 0);
1352               nat_ed_session_delete (sm, s0, thread_index, 1);
1353               s0 = NULL;
1354             }
1355         }
1356
1357       if (!s0)
1358         {
1359           if (is_output_feature)
1360             {
1361               if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
1362                     sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1363                     vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
1364                     rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker)))
1365                 goto trace0;
1366
1367               /*
1368                * Send DHCP packets to the ipv4 stack, or we won't
1369                * be able to use dhcp client on the outside interface
1370                */
1371               if (PREDICT_FALSE (
1372                     proto0 == IP_PROTOCOL_UDP &&
1373                     (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1374                      clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
1375                     ip0->dst_address.as_u32 == 0xffffffff))
1376                 goto trace0;
1377             }
1378           else
1379             {
1380               if (PREDICT_FALSE (
1381                     nat44_ed_not_translate (vm, sm, node, rx_sw_if_index0, b0,
1382                                             ip0, proto0, rx_fib_index0)))
1383                 goto trace0;
1384             }
1385
1386           next[0] = slow_path_ed (
1387             vm, sm, b0, ip0->src_address, ip0->dst_address,
1388             vnet_buffer (b0)->ip.reass.l4_src_port,
1389             vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
1390             rx_fib_index0, &s0, node, next[0], thread_index, now);
1391
1392           if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
1393             goto trace0;
1394
1395           if (PREDICT_FALSE (!s0))
1396             goto trace0;
1397
1398         }
1399
1400       b0->flags |= VNET_BUFFER_F_IS_NATED;
1401
1402       if (NAT_ED_TRNSL_ERR_SUCCESS !=
1403           (translation_error = nat_6t_flow_buf_translate_i2o (
1404              vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
1405         {
1406           nat44_ed_free_session_data (sm, s0, thread_index, 0);
1407           nat_ed_session_delete (sm, s0, thread_index, 1);
1408           next[0] = NAT_NEXT_DROP;
1409           b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
1410           goto trace0;
1411         }
1412
1413       if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
1414         {
1415           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
1416                                          thread_index, cntr_sw_if_index0, 1);
1417           nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
1418         }
1419       else
1420         {
1421           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
1422                                          thread_index, cntr_sw_if_index0, 1);
1423         }
1424
1425       /* Accounting */
1426       nat44_session_update_counters (s0, now,
1427                                      vlib_buffer_length_in_chain
1428                                      (vm, b0), thread_index);
1429       /* Per-user LRU list maintenance */
1430       nat44_session_update_lru (sm, s0, thread_index);
1431
1432     trace0:
1433       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1434                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1435         {
1436           nat_in2out_ed_trace_t *t =
1437             vlib_add_trace (vm, node, b0, sizeof (*t));
1438           t->sw_if_index = rx_sw_if_index0;
1439           t->next_index = next[0];
1440           t->is_slow_path = 1;
1441           t->translation_error = translation_error;
1442           clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key));
1443
1444           if (s0)
1445             {
1446               t->session_index = s0 - tsm->sessions;
1447               clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
1448               clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
1449               t->translation_via_i2of = 1;
1450             }
1451
1452           else
1453             {
1454               t->session_index = ~0;
1455             }
1456         }
1457
1458       if (next[0] == NAT_NEXT_DROP)
1459         {
1460           vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
1461                                          thread_index, cntr_sw_if_index0, 1);
1462         }
1463
1464       n_left_from--;
1465       next++;
1466       b++;
1467     }
1468
1469   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1470                                frame->n_vectors);
1471
1472   return frame->n_vectors;
1473 }
1474
1475 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1476                                      vlib_node_runtime_t * node,
1477                                      vlib_frame_t * frame)
1478 {
1479   if (snat_main.num_workers > 1)
1480     {
1481       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
1482     }
1483   else
1484     {
1485       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
1486     }
1487 }
1488
1489 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1490   .name = "nat44-ed-in2out",
1491   .vector_size = sizeof (u32),
1492   .sibling_of = "nat-default",
1493   .format_trace = format_nat_in2out_ed_trace,
1494   .type = VLIB_NODE_TYPE_INTERNAL,
1495   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1496   .error_strings = nat_in2out_ed_error_strings,
1497   .runtime_data_bytes = sizeof (snat_runtime_t),
1498 };
1499
1500 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1501                                             vlib_node_runtime_t * node,
1502                                             vlib_frame_t * frame)
1503 {
1504   if (snat_main.num_workers > 1)
1505     {
1506       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
1507     }
1508   else
1509     {
1510       return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
1511     }
1512 }
1513
1514 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1515   .name = "nat44-ed-in2out-output",
1516   .vector_size = sizeof (u32),
1517   .sibling_of = "nat-default",
1518   .format_trace = format_nat_in2out_ed_trace,
1519   .type = VLIB_NODE_TYPE_INTERNAL,
1520   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1521   .error_strings = nat_in2out_ed_error_strings,
1522   .runtime_data_bytes = sizeof (snat_runtime_t),
1523 };
1524
1525 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1526                                               vlib_node_runtime_t *
1527                                               node, vlib_frame_t * frame)
1528 {
1529   if (snat_main.num_workers > 1)
1530     {
1531       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1);
1532     }
1533   else
1534     {
1535       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0);
1536     }
1537 }
1538
1539 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1540   .name = "nat44-ed-in2out-slowpath",
1541   .vector_size = sizeof (u32),
1542   .sibling_of = "nat-default",
1543   .format_trace = format_nat_in2out_ed_trace,
1544   .type = VLIB_NODE_TYPE_INTERNAL,
1545   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1546   .error_strings = nat_in2out_ed_error_strings,
1547   .runtime_data_bytes = sizeof (snat_runtime_t),
1548 };
1549
1550 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1551                                                      vlib_node_runtime_t
1552                                                      * node,
1553                                                      vlib_frame_t * frame)
1554 {
1555   if (snat_main.num_workers > 1)
1556     {
1557       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1);
1558     }
1559   else
1560     {
1561       return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0);
1562     }
1563 }
1564
1565 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1566   .name = "nat44-ed-in2out-output-slowpath",
1567   .vector_size = sizeof (u32),
1568   .sibling_of = "nat-default",
1569   .format_trace = format_nat_in2out_ed_trace,
1570   .type = VLIB_NODE_TYPE_INTERNAL,
1571   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1572   .error_strings = nat_in2out_ed_error_strings,
1573   .runtime_data_bytes = sizeof (snat_runtime_t),
1574 };
1575
1576 static u8 *
1577 format_nat_pre_trace (u8 * s, va_list * args)
1578 {
1579   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1580   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1581   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1582   return format (s, "in2out next_index %d arc_next_index %d", t->next_index,
1583                  t->arc_next_index);
1584 }
1585
1586 VLIB_NODE_FN (nat_pre_in2out_node)
1587   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1588 {
1589   return nat_pre_node_fn_inline (vm, node, frame,
1590                                  NAT_NEXT_IN2OUT_ED_FAST_PATH);
1591 }
1592
1593 VLIB_NODE_FN (nat_pre_in2out_output_node)
1594   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1595 {
1596   return nat_pre_node_fn_inline (vm, node, frame,
1597                                  NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH);
1598 }
1599
1600 VLIB_REGISTER_NODE (nat_pre_in2out_node) = {
1601   .name = "nat-pre-in2out",
1602   .vector_size = sizeof (u32),
1603   .sibling_of = "nat-default",
1604   .format_trace = format_nat_pre_trace,
1605   .type = VLIB_NODE_TYPE_INTERNAL,
1606   .n_errors = 0,
1607 };
1608
1609 VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = {
1610   .name = "nat-pre-in2out-output",
1611   .vector_size = sizeof (u32),
1612   .sibling_of = "nat-default",
1613   .format_trace = format_nat_pre_trace,
1614   .type = VLIB_NODE_TYPE_INTERNAL,
1615   .n_errors = 0,
1616 };
1617
1618 /*
1619  * fd.io coding-style-patch-verification: ON
1620  *
1621  * Local Variables:
1622  * eval: (c-set-style "gnu")
1623  * End:
1624  */