95e2ea972af15451b1e626ba23b0766688c0564f
[vpp.git] / src / plugins / nat / out2in_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent outside to inside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27 #include <nat/nat.h>
28 #include <nat/lib/ipfix_logging.h>
29 #include <nat/nat_inlines.h>
30 #include <nat/nat44/inlines.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/nat_ha.h>
33 #include <nat/nat44/ed_inlines.h>
34
35 static char *nat_out2in_ed_error_strings[] = {
36 #define _(sym,string) string,
37   foreach_nat_out2in_ed_error
38 #undef _
39 };
40
41 typedef struct
42 {
43   u32 sw_if_index;
44   u32 next_index;
45   u32 session_index;
46   u32 is_slow_path;
47 } nat44_ed_out2in_trace_t;
48
49 typedef struct
50 {
51   u16 thread_next;
52 } nat44_ed_out2in_handoff_trace_t;
53
54 static u8 *
55 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *);
60   char *tag;
61
62   tag =
63     t->is_slow_path ? "NAT44_OUT2IN_ED_SLOW_PATH" :
64     "NAT44_OUT2IN_ED_FAST_PATH";
65
66   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
67               t->sw_if_index, t->next_index, t->session_index);
68
69   return s;
70 }
71
72 static inline u32
73 icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
74                           ip4_header_t * ip0, icmp46_header_t * icmp0,
75                           u32 sw_if_index0, u32 rx_fib_index0,
76                           vlib_node_runtime_t * node, u32 next0, f64 now,
77                           u32 thread_index, snat_session_t ** p_s0)
78 {
79   vlib_main_t *vm = vlib_get_main ();
80
81   next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
82                        next0, thread_index, p_s0, 0);
83   snat_session_t *s0 = *p_s0;
84   if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
85     {
86       /* Accounting */
87       nat44_session_update_counters (s0, now,
88                                      vlib_buffer_length_in_chain
89                                      (vm, b0), thread_index);
90       /* Per-user LRU list maintenance */
91       nat44_session_update_lru (sm, s0, thread_index);
92     }
93   return next0;
94 }
95
96 #ifndef CLIB_MARCH_VARIANT
97 int
98 nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
99 {
100   snat_main_t *sm = &snat_main;
101   nat44_is_idle_session_ctx_t *ctx = arg;
102   snat_session_t *s;
103   u64 sess_timeout_time;
104   u8 proto;
105   u16 r_port, l_port;
106   ip4_address_t *l_addr, *r_addr;
107   u32 fib_index;
108   clib_bihash_kv_16_8_t ed_kv;
109   int i;
110   //snat_address_t *a;
111   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
112                                                        ctx->thread_index);
113
114   s = pool_elt_at_index (tsm->sessions, kv->value);
115   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
116   if (ctx->now >= sess_timeout_time)
117     {
118       l_addr = &s->in2out.addr;
119       r_addr = &s->ext_host_addr;
120       fib_index = s->in2out.fib_index;
121       if (snat_is_unk_proto_session (s))
122         {
123           proto = s->in2out.port;
124           r_port = 0;
125           l_port = 0;
126         }
127       else
128         {
129           proto = nat_proto_to_ip_proto (s->nat_proto);
130           l_port = s->in2out.port;
131           r_port = s->ext_host_port;
132         }
133       if (is_twice_nat_session (s))
134         {
135           r_addr = &s->ext_host_nat_addr;
136           r_port = s->ext_host_nat_port;
137         }
138       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
139       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
140         nat_elog_warn ("in2out_ed key del failed");
141
142       if (snat_is_unk_proto_session (s))
143         goto delete;
144
145       nat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
146                                           s->in2out.addr.as_u32,
147                                           s->out2in.addr.as_u32,
148                                           s->nat_proto,
149                                           s->in2out.port,
150                                           s->out2in.port,
151                                           s->in2out.fib_index);
152
153       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
154                              &s->in2out.addr, s->in2out.port,
155                              &s->ext_host_nat_addr, s->ext_host_nat_port,
156                              &s->out2in.addr, s->out2in.port,
157                              &s->ext_host_addr, s->ext_host_port,
158                              s->nat_proto, is_twice_nat_session (s));
159
160       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
161                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
162                    ctx->thread_index);
163
164       if (is_twice_nat_session (s))
165         {
166           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
167             {
168               // FIXME TODO this is obviously wrong code ... needs fix!
169               //       key.protocol = s->nat_proto;
170               //       key.port = s->ext_host_nat_port;
171               //       a = sm->twice_nat_addresses + i;
172               //       if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
173               //      {
174               //        snat_free_outside_address_and_port (sm->twice_nat_addresses,
175               //                                            ctx->thread_index,
176               //                                            &key);
177               //        break;
178               //      }
179             }
180         }
181
182       if (snat_is_session_static (s))
183         goto delete;
184
185       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
186                                           &s->out2in.addr, s->out2in.port,
187                                           s->nat_proto);
188     delete:
189       nat_ed_session_delete (sm, s, ctx->thread_index, 1);
190       return 1;
191     }
192
193   return 0;
194 }
195 #endif
196
197 // allocate exact address based on preference
198 static_always_inline int
199 nat_alloc_addr_and_port_exact (snat_address_t * a,
200                                u32 thread_index,
201                                nat_protocol_t proto,
202                                ip4_address_t * addr,
203                                u16 * port,
204                                u16 port_per_thread, u32 snat_thread_index)
205 {
206   u32 portnum;
207
208   switch (proto)
209     {
210 #define _(N, j, n, s) \
211     case NAT_PROTOCOL_##N: \
212       if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
213         { \
214           while (1) \
215             { \
216               portnum = (port_per_thread * \
217                 snat_thread_index) + \
218                 snat_random_port(0, port_per_thread - 1) + 1024; \
219               if (a->busy_##n##_port_refcounts[portnum]) \
220                 continue; \
221               --a->busy_##n##_port_refcounts[portnum]; \
222               a->busy_##n##_ports_per_thread[thread_index]++; \
223               a->busy_##n##_ports++; \
224               *addr = a->addr; \
225               *port = clib_host_to_net_u16(portnum); \
226               return 0; \
227             } \
228         } \
229       break;
230       foreach_nat_protocol
231 #undef _
232     default:
233       nat_elog_info ("unknown protocol");
234       return 1;
235     }
236
237   /* Totally out of translations to use... */
238   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
239   return 1;
240 }
241
242
243 static snat_session_t *
244 create_session_for_static_mapping_ed (snat_main_t * sm,
245                                       vlib_buffer_t * b,
246                                       ip4_address_t i2o_addr,
247                                       u16 i2o_port,
248                                       u32 i2o_fib_index,
249                                       ip4_address_t o2i_addr,
250                                       u16 o2i_port,
251                                       u32 o2i_fib_index,
252                                       nat_protocol_t nat_proto,
253                                       vlib_node_runtime_t * node,
254                                       u32 rx_fib_index,
255                                       u32 thread_index,
256                                       twice_nat_type_t twice_nat,
257                                       lb_nat_type_t lb_nat, f64 now,
258                                       snat_static_mapping_t * mapping)
259 {
260   snat_session_t *s;
261   ip4_header_t *ip;
262   udp_header_t *udp;
263   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
264   clib_bihash_kv_16_8_t kv;
265   nat44_is_idle_session_ctx_t ctx;
266
267   if (PREDICT_FALSE
268       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
269     {
270       b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
271       nat_elog_notice ("maximum sessions exceeded");
272       return 0;
273     }
274
275   s = nat_ed_session_alloc (sm, thread_index, now, nat_proto);
276   if (!s)
277     {
278       b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
279       nat_elog_warn ("create NAT session failed");
280       return 0;
281     }
282
283   ip = vlib_buffer_get_current (b);
284   udp = ip4_next_header (ip);
285
286   s->ext_host_addr.as_u32 = ip->src_address.as_u32;
287   s->ext_host_port = nat_proto == NAT_PROTOCOL_ICMP ? 0 : udp->src_port;
288   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
289   if (lb_nat)
290     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
291   if (lb_nat == AFFINITY_LB_NAT)
292     s->flags |= SNAT_SESSION_FLAG_AFFINITY;
293   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
294   s->out2in.addr = o2i_addr;
295   s->out2in.port = o2i_port;
296   s->out2in.fib_index = o2i_fib_index;
297   s->in2out.addr = i2o_addr;
298   s->in2out.port = i2o_port;
299   s->in2out.fib_index = i2o_fib_index;
300   s->nat_proto = nat_proto;
301
302   /* Add to lookup tables */
303   init_ed_kv (&kv, o2i_addr, o2i_port, s->ext_host_addr, s->ext_host_port,
304               o2i_fib_index, ip->protocol, thread_index, s - tsm->sessions);
305   ctx.now = now;
306   ctx.thread_index = thread_index;
307   if (clib_bihash_add_or_overwrite_stale_16_8 (&sm->out2in_ed, &kv,
308                                                nat44_o2i_ed_is_idle_session_cb,
309                                                &ctx))
310     nat_elog_notice ("out2in-ed key add failed");
311
312   if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF &&
313                                  ip->src_address.as_u32 == i2o_addr.as_u32))
314     {
315       int rc = 0;
316       snat_address_t *filter = 0;
317
318       // if exact address is specified use this address
319       if (is_exact_address (mapping))
320         {
321           snat_address_t *ap;
322           vec_foreach (ap, sm->twice_nat_addresses)
323           {
324             if (mapping->pool_addr.as_u32 == ap->addr.as_u32)
325               {
326                 filter = ap;
327                 break;
328               }
329           }
330         }
331
332       if (filter)
333         {
334           rc = nat_alloc_addr_and_port_exact (filter,
335                                               thread_index,
336                                               nat_proto,
337                                               &s->ext_host_nat_addr,
338                                               &s->ext_host_nat_port,
339                                               sm->port_per_thread,
340                                               tsm->snat_thread_index);
341           s->flags |= SNAT_SESSION_FLAG_EXACT_ADDRESS;
342         }
343       else
344         {
345           rc =
346             snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
347                                                  thread_index, nat_proto,
348                                                  &s->ext_host_nat_addr,
349                                                  &s->ext_host_nat_port,
350                                                  sm->port_per_thread,
351                                                  tsm->snat_thread_index);
352         }
353
354       if (rc)
355         {
356           b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS];
357           nat_ed_session_delete (sm, s, thread_index, 1);
358           if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 0))
359             nat_elog_notice ("out2in-ed key del failed");
360           return 0;
361         }
362
363       s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
364       init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_nat_addr,
365                   s->ext_host_nat_port, i2o_fib_index, ip->protocol,
366                   thread_index, s - tsm->sessions);
367     }
368   else
369     {
370       init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_addr,
371                   s->ext_host_port, i2o_fib_index, ip->protocol,
372                   thread_index, s - tsm->sessions);
373     }
374   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv,
375                                                nat44_i2o_ed_is_idle_session_cb,
376                                                &ctx))
377     nat_elog_notice ("in2out-ed key add failed");
378
379   nat_ipfix_logging_nat44_ses_create (thread_index,
380                                       s->in2out.addr.as_u32,
381                                       s->out2in.addr.as_u32,
382                                       s->nat_proto,
383                                       s->in2out.port,
384                                       s->out2in.port, s->in2out.fib_index);
385
386   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
387                          &s->in2out.addr, s->in2out.port,
388                          &s->ext_host_nat_addr, s->ext_host_nat_port,
389                          &s->out2in.addr, s->out2in.port,
390                          &s->ext_host_addr, s->ext_host_port,
391                          s->nat_proto, is_twice_nat_session (s));
392
393   nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
394                s->out2in.port, &s->ext_host_addr, s->ext_host_port,
395                &s->ext_host_nat_addr, s->ext_host_nat_port,
396                s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);
397
398   per_vrf_sessions_register_session (s, thread_index);
399
400   return s;
401 }
402
403 static int
404 next_src_nat (snat_main_t * sm, ip4_header_t * ip, u16 src_port,
405               u16 dst_port, u32 thread_index, u32 rx_fib_index)
406 {
407   clib_bihash_kv_16_8_t kv, value;
408   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
409
410   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
411              rx_fib_index, ip->protocol);
412   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
413     return 1;
414
415   return 0;
416 }
417
418 static void
419 create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
420                        u32 rx_fib_index, u32 thread_index)
421 {
422   clib_bihash_kv_16_8_t kv, value;
423   udp_header_t *udp;
424   snat_session_t *s = 0;
425   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
426   vlib_main_t *vm = vlib_get_main ();
427   f64 now = vlib_time_now (vm);
428   u16 l_port, r_port;
429
430   if (ip->protocol == IP_PROTOCOL_ICMP)
431     {
432       if (get_icmp_o2i_ed_key
433           (b, ip, rx_fib_index, ~0, ~0, 0, &l_port, &r_port, &kv))
434         return;
435     }
436   else
437     {
438       if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
439         {
440           udp = ip4_next_header (ip);
441           l_port = udp->dst_port;
442           r_port = udp->src_port;
443         }
444       else
445         {
446           l_port = 0;
447           r_port = 0;
448         }
449       init_ed_k (&kv, ip->dst_address, l_port, ip->src_address, r_port,
450                  rx_fib_index, ip->protocol);
451     }
452
453   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
454     {
455       ASSERT (thread_index == ed_value_get_thread_index (&value));
456       s =
457         pool_elt_at_index (tsm->sessions,
458                            ed_value_get_session_index (&value));
459     }
460   else if (ip->protocol == IP_PROTOCOL_ICMP &&
461            icmp_type_is_error_message
462            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
463     {
464       return;
465     }
466   else
467     {
468       u32 proto;
469
470       if (PREDICT_FALSE
471           (nat44_ed_maximum_sessions_exceeded
472            (sm, rx_fib_index, thread_index)))
473         return;
474
475       s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
476       if (!s)
477         {
478           nat_elog_warn ("create NAT session failed");
479           return;
480         }
481
482       proto = ip_proto_to_nat_proto (ip->protocol);
483
484       s->ext_host_addr = ip->src_address;
485       s->ext_host_port = r_port;
486       s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
487       s->out2in.addr = ip->dst_address;
488       s->out2in.port = l_port;
489       s->nat_proto = proto;
490       if (proto == NAT_PROTOCOL_OTHER)
491         {
492           s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
493           s->out2in.port = ip->protocol;
494         }
495       s->out2in.fib_index = rx_fib_index;
496       s->in2out.addr = s->out2in.addr;
497       s->in2out.port = s->out2in.port;
498       s->in2out.fib_index = s->out2in.fib_index;
499
500       kv.value = s - tsm->sessions;
501       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
502         nat_elog_notice ("in2out_ed key add failed");
503
504       per_vrf_sessions_register_session (s, thread_index);
505     }
506
507   if (ip->protocol == IP_PROTOCOL_TCP)
508     {
509       tcp_header_t *tcp = ip4_next_header (ip);
510       nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags,
511                                        tcp->ack_number, tcp->seq_number,
512                                        thread_index);
513     }
514
515   /* Accounting */
516   nat44_session_update_counters (s, now, 0, thread_index);
517   /* Per-user LRU list maintenance */
518   nat44_session_update_lru (sm, s, thread_index);
519 }
520
521 static_always_inline int
522 create_bypass_for_fwd_worker (snat_main_t * sm,
523                               vlib_buffer_t * b, ip4_header_t * ip,
524                               u32 rx_fib_index, u32 thread_index)
525 {
526   ip4_header_t tmp = {
527     .src_address = ip->dst_address,
528   };
529   u32 index = sm->worker_in2out_cb (&tmp, rx_fib_index, 0);
530
531   if (index != thread_index)
532     {
533       vnet_buffer2 (b)->nat.thread_next = index;
534       return 1;
535     }
536
537   create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
538   return 0;
539 }
540
541 #ifndef CLIB_MARCH_VARIANT
542 u32
543 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
544                       u32 thread_index, vlib_buffer_t * b,
545                       ip4_header_t * ip, ip4_address_t * addr,
546                       u16 * port, u32 * fib_index, nat_protocol_t * proto,
547                       void *d, void *e, u8 * dont_translate)
548 {
549   u32 next = ~0, sw_if_index, rx_fib_index;
550   clib_bihash_kv_16_8_t kv, value;
551   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
552   snat_session_t *s = 0;
553   u8 is_addr_only, identity_nat;
554   u16 l_port, r_port;
555   vlib_main_t *vm = vlib_get_main ();
556   ip4_address_t sm_addr;
557   u16 sm_port;
558   u32 sm_fib_index;
559   *dont_translate = 0;
560   snat_static_mapping_t *m;
561
562   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
563   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
564
565   if (get_icmp_o2i_ed_key
566       (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, &r_port, &kv))
567     {
568       b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL];
569       next = NAT_NEXT_DROP;
570       goto out;
571     }
572
573   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
574     {
575       if (snat_static_mapping_match
576           (sm, ip->dst_address, l_port, rx_fib_index,
577            ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port,
578            &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m))
579         {
580           // static mapping not matched
581           if (!sm->forwarding_enabled)
582             {
583               /* Don't NAT packet aimed at the intfc address */
584               if (PREDICT_FALSE (is_interface_addr (sm, node, sw_if_index,
585                                                     ip->dst_address.as_u32)))
586                 {
587                   *dont_translate = 1;
588                 }
589               else
590                 {
591                   b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
592                   next = NAT_NEXT_DROP;
593                 }
594             }
595           else
596             {
597               *dont_translate = 1;
598               if (next_src_nat (sm, ip, l_port, r_port,
599                                 thread_index, rx_fib_index))
600                 {
601                   next = NAT_NEXT_IN2OUT_ED_FAST_PATH;
602                 }
603               else
604                 {
605                   if (sm->num_workers > 1)
606                     {
607                       if (create_bypass_for_fwd_worker (sm, b, ip,
608                                                         rx_fib_index,
609                                                         thread_index))
610                         {
611                           next = NAT_NEXT_OUT2IN_ED_HANDOFF;
612                         }
613                     }
614                   else
615                     {
616                       create_bypass_for_fwd (sm, b, ip, rx_fib_index,
617                                              thread_index);
618                     }
619                 }
620             }
621           goto out;
622         }
623
624       if (PREDICT_FALSE
625           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
626            ICMP4_echo_reply
627            && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
628                ICMP4_echo_request || !is_addr_only)))
629         {
630           b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
631           next = NAT_NEXT_DROP;
632           goto out;
633         }
634
635       if (PREDICT_FALSE (identity_nat))
636         {
637           *dont_translate = 1;
638           goto out;
639         }
640
641       /* Create session initiated by host from external network */
642       s =
643         create_session_for_static_mapping_ed (sm, b, sm_addr, sm_port,
644                                               sm_fib_index, ip->dst_address,
645                                               l_port, rx_fib_index, *proto,
646                                               node, rx_fib_index,
647                                               thread_index, 0, 0,
648                                               vlib_time_now (vm), m);
649       if (!s)
650         next = NAT_NEXT_DROP;
651     }
652   else
653     {
654       if (PREDICT_FALSE
655           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
656            ICMP4_echo_reply
657            && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
658            ICMP4_echo_request
659            && !icmp_type_is_error_message (vnet_buffer (b)->ip.
660                                            reass.icmp_type_or_tcp_flags)))
661         {
662           b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
663           next = NAT_NEXT_DROP;
664           goto out;
665         }
666
667       ASSERT (thread_index == ed_value_get_thread_index (&value));
668       s =
669         pool_elt_at_index (tsm->sessions,
670                            ed_value_get_session_index (&value));
671     }
672 out:
673   if (s)
674     {
675       *addr = s->in2out.addr;
676       *port = s->in2out.port;
677       *fib_index = s->in2out.fib_index;
678     }
679   if (d)
680     *(snat_session_t **) d = s;
681   return next;
682 }
683 #endif
684
685 static snat_session_t *
686 nat44_ed_out2in_unknown_proto (snat_main_t * sm,
687                                vlib_buffer_t * b,
688                                ip4_header_t * ip,
689                                u32 rx_fib_index,
690                                u32 thread_index,
691                                f64 now,
692                                vlib_main_t * vm, vlib_node_runtime_t * node)
693 {
694   clib_bihash_kv_8_8_t kv, value;
695   clib_bihash_kv_16_8_t s_kv, s_value;
696   snat_static_mapping_t *m;
697   u32 old_addr, new_addr;
698   ip_csum_t sum;
699   snat_session_t *s;
700   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
701
702   old_addr = ip->dst_address.as_u32;
703
704   init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, rx_fib_index,
705              ip->protocol);
706
707   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
708     {
709       ASSERT (thread_index == ed_value_get_thread_index (&s_value));
710       s =
711         pool_elt_at_index (tsm->sessions,
712                            ed_value_get_session_index (&s_value));
713       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
714     }
715   else
716     {
717       if (PREDICT_FALSE
718           (nat44_ed_maximum_sessions_exceeded
719            (sm, rx_fib_index, thread_index)))
720         {
721           b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
722           nat_elog_notice ("maximum sessions exceeded");
723           return 0;
724         }
725
726       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
727       if (clib_bihash_search_8_8
728           (&sm->static_mapping_by_external, &kv, &value))
729         {
730           b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
731           return 0;
732         }
733
734       m = pool_elt_at_index (sm->static_mappings, value.value);
735
736       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
737
738       /* Create a new session */
739       s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
740       if (!s)
741         {
742           b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
743           nat_elog_warn ("create NAT session failed");
744           return 0;
745         }
746
747       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
748       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
749       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
750       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
751       s->out2in.addr.as_u32 = old_addr;
752       s->out2in.fib_index = rx_fib_index;
753       s->in2out.addr.as_u32 = new_addr;
754       s->in2out.fib_index = m->fib_index;
755       s->in2out.port = s->out2in.port = ip->protocol;
756
757       /* Add to lookup tables */
758       s_kv.value = s - tsm->sessions;
759       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
760         nat_elog_notice ("out2in key add failed");
761
762       init_ed_kv (&s_kv, ip->dst_address, 0, ip->src_address, 0, m->fib_index,
763                   ip->protocol, thread_index, s - tsm->sessions);
764       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
765         nat_elog_notice ("in2out key add failed");
766
767       per_vrf_sessions_register_session (s, thread_index);
768     }
769
770   /* Update IP checksum */
771   sum = ip->checksum;
772   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
773   ip->checksum = ip_csum_fold (sum);
774
775   vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
776
777   /* Accounting */
778   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
779                                  thread_index);
780   /* Per-user LRU list maintenance */
781   nat44_session_update_lru (sm, s, thread_index);
782
783   return s;
784 }
785
786 static inline uword
787 nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
788                                           vlib_node_runtime_t * node,
789                                           vlib_frame_t * frame,
790                                           int is_multi_worker)
791 {
792   u32 n_left_from, *from;
793   snat_main_t *sm = &snat_main;
794   f64 now = vlib_time_now (vm);
795   u32 thread_index = vm->thread_index;
796   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
797
798   from = vlib_frame_vector_args (frame);
799   n_left_from = frame->n_vectors;
800
801   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
802   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
803   vlib_get_buffers (vm, from, b, n_left_from);
804
805   while (n_left_from > 0)
806     {
807       vlib_buffer_t *b0;
808       u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
809       u16 old_port0, new_port0;
810       ip4_header_t *ip0;
811       udp_header_t *udp0;
812       tcp_header_t *tcp0;
813       snat_session_t *s0 = 0;
814       clib_bihash_kv_16_8_t kv0, value0;
815       ip_csum_t sum0;
816
817       b0 = *b;
818       b++;
819
820       /* Prefetch next iteration. */
821       if (PREDICT_TRUE (n_left_from >= 2))
822         {
823           vlib_buffer_t *p2;
824
825           p2 = *b;
826
827           vlib_prefetch_buffer_header (p2, LOAD);
828
829           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
830         }
831
832       next[0] = vnet_buffer2 (b0)->nat.arc_next;
833
834       vnet_buffer (b0)->snat.flags = 0;
835       ip0 = vlib_buffer_get_current (b0);
836
837       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
838       rx_fib_index0 =
839         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
840
841       if (PREDICT_FALSE (ip0->ttl == 1))
842         {
843           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
844           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
845                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
846                                        0);
847           next[0] = NAT_NEXT_ICMP_ERROR;
848           goto trace0;
849         }
850
851       udp0 = ip4_next_header (ip0);
852       tcp0 = (tcp_header_t *) udp0;
853       proto0 = ip_proto_to_nat_proto (ip0->protocol);
854
855       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
856         {
857           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
858           goto trace0;
859         }
860
861       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
862         {
863           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
864           goto trace0;
865         }
866
867       init_ed_k (&kv0, ip0->dst_address,
868                  vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
869                  vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
870                  ip0->protocol);
871
872       /* there is a stashed index in vnet_buffer2 from handoff node,
873        * see if we can use it */
874       if (is_multi_worker
875           &&
876           PREDICT_TRUE (!pool_is_free_index
877                         (tsm->sessions,
878                          vnet_buffer2 (b0)->nat.ed_out2in_nat_session_index)))
879         {
880           s0 = pool_elt_at_index (tsm->sessions,
881                                   vnet_buffer2 (b0)->
882                                   nat.ed_out2in_nat_session_index);
883           if (PREDICT_TRUE
884               (s0->out2in.addr.as_u32 == ip0->dst_address.as_u32
885                && s0->out2in.port == vnet_buffer (b0)->ip.reass.l4_dst_port
886                && s0->nat_proto == ip_proto_to_nat_proto (ip0->protocol)
887                && s0->out2in.fib_index == rx_fib_index0
888                && s0->ext_host_addr.as_u32 == ip0->src_address.as_u32
889                && s0->ext_host_port ==
890                vnet_buffer (b0)->ip.reass.l4_src_port))
891             {
892               /* yes, this is the droid we're looking for */
893               goto skip_lookup;
894             }
895         }
896
897       // lookup for session
898       if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
899         {
900           // session does not exist go slow path
901           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
902           goto trace0;
903         }
904       ASSERT (thread_index == ed_value_get_thread_index (&value0));
905       s0 =
906         pool_elt_at_index (tsm->sessions,
907                            ed_value_get_session_index (&value0));
908
909     skip_lookup:
910
911       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
912         {
913           // session is closed, go slow path
914           nat_free_session_data (sm, s0, thread_index, 0);
915           nat_ed_session_delete (sm, s0, thread_index, 1);
916           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
917           goto trace0;
918         }
919
920       if (s0->tcp_closed_timestamp)
921         {
922           if (now >= s0->tcp_closed_timestamp)
923             {
924               // session is closed, go slow path, freed in slow path
925               next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
926             }
927           else
928             {
929               // session in transitory timeout, drop
930               b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED];
931               next[0] = NAT_NEXT_DROP;
932             }
933           goto trace0;
934         }
935
936       // drop if session expired
937       u64 sess_timeout_time;
938       sess_timeout_time =
939         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
940       if (now >= sess_timeout_time)
941         {
942           // session is closed, go slow path
943           nat_free_session_data (sm, s0, thread_index, 0);
944           nat_ed_session_delete (sm, s0, thread_index, 1);
945           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
946           goto trace0;
947         }
948
949       old_addr0 = ip0->dst_address.as_u32;
950       new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
951       vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
952
953       sum0 = ip0->checksum;
954       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
955                              dst_address);
956       if (PREDICT_FALSE (is_twice_nat_session (s0)))
957         sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
958                                s0->ext_host_nat_addr.as_u32, ip4_header_t,
959                                src_address);
960       ip0->checksum = ip_csum_fold (sum0);
961
962       old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
963
964       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
965         {
966           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
967             {
968               new_port0 = udp0->dst_port = s0->in2out.port;
969               sum0 = tcp0->checksum;
970               sum0 =
971                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
972                                 dst_address);
973               sum0 =
974                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
975                                 length);
976               if (is_twice_nat_session (s0))
977                 {
978                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
979                                          s0->ext_host_nat_addr.as_u32,
980                                          ip4_header_t, dst_address);
981                   sum0 =
982                     ip_csum_update (sum0,
983                                     vnet_buffer (b0)->ip.reass.l4_src_port,
984                                     s0->ext_host_nat_port, ip4_header_t,
985                                     length);
986                   tcp0->src_port = s0->ext_host_nat_port;
987                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
988                 }
989               tcp0->checksum = ip_csum_fold (sum0);
990             }
991           vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.tcp,
992                                          thread_index, sw_if_index0, 1);
993           nat44_set_tcp_session_state_o2i (sm, now, s0,
994                                            vnet_buffer (b0)->ip.
995                                            reass.icmp_type_or_tcp_flags,
996                                            vnet_buffer (b0)->ip.
997                                            reass.tcp_ack_number,
998                                            vnet_buffer (b0)->ip.
999                                            reass.tcp_seq_number,
1000                                            thread_index);
1001         }
1002       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1003                && udp0->checksum)
1004         {
1005           new_port0 = udp0->dst_port = s0->in2out.port;
1006           sum0 = udp0->checksum;
1007           sum0 =
1008             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1009                             dst_address);
1010           sum0 =
1011             ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length);
1012           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1013             {
1014               sum0 =
1015                 ip_csum_update (sum0, ip0->src_address.as_u32,
1016                                 s0->ext_host_nat_addr.as_u32, ip4_header_t,
1017                                 dst_address);
1018               sum0 =
1019                 ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_src_port,
1020                                 s0->ext_host_nat_port, ip4_header_t, length);
1021               udp0->src_port = s0->ext_host_nat_port;
1022               ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1023             }
1024           udp0->checksum = ip_csum_fold (sum0);
1025           vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
1026                                          thread_index, sw_if_index0, 1);
1027         }
1028       else
1029         {
1030           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1031             {
1032               new_port0 = udp0->dst_port = s0->in2out.port;
1033               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1034                 {
1035                   udp0->src_port = s0->ext_host_nat_port;
1036                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1037                 }
1038             }
1039           vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
1040                                          thread_index, sw_if_index0, 1);
1041         }
1042
1043       /* Accounting */
1044       nat44_session_update_counters (s0, now,
1045                                      vlib_buffer_length_in_chain (vm, b0),
1046                                      thread_index);
1047       /* Per-user LRU list maintenance */
1048       nat44_session_update_lru (sm, s0, thread_index);
1049
1050     trace0:
1051       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1052                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1053         {
1054           nat44_ed_out2in_trace_t *t =
1055             vlib_add_trace (vm, node, b0, sizeof (*t));
1056           t->sw_if_index = sw_if_index0;
1057           t->next_index = next[0];
1058           t->is_slow_path = 0;
1059
1060           if (s0)
1061             t->session_index = s0 - tsm->sessions;
1062           else
1063             t->session_index = ~0;
1064         }
1065
1066       if (next[0] == NAT_NEXT_DROP)
1067         {
1068           vlib_increment_simple_counter (&sm->counters.fastpath.
1069                                          out2in_ed.drops, thread_index,
1070                                          sw_if_index0, 1);
1071         }
1072
1073       n_left_from--;
1074       next++;
1075     }
1076
1077   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1078                                frame->n_vectors);
1079   return frame->n_vectors;
1080 }
1081
1082 static inline uword
1083 nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
1084                                           vlib_node_runtime_t * node,
1085                                           vlib_frame_t * frame)
1086 {
1087   u32 n_left_from, *from;
1088   snat_main_t *sm = &snat_main;
1089   f64 now = vlib_time_now (vm);
1090   u32 thread_index = vm->thread_index;
1091   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1092   snat_static_mapping_t *m;
1093
1094   from = vlib_frame_vector_args (frame);
1095   n_left_from = frame->n_vectors;
1096
1097   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1098   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1099   vlib_get_buffers (vm, from, b, n_left_from);
1100
1101   while (n_left_from > 0)
1102     {
1103       vlib_buffer_t *b0;
1104       u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
1105       u16 old_port0, new_port0;
1106       ip4_header_t *ip0;
1107       udp_header_t *udp0;
1108       tcp_header_t *tcp0;
1109       icmp46_header_t *icmp0;
1110       snat_session_t *s0 = 0;
1111       clib_bihash_kv_16_8_t kv0, value0;
1112       ip_csum_t sum0;
1113       lb_nat_type_t lb_nat0;
1114       twice_nat_type_t twice_nat0;
1115       u8 identity_nat0;
1116       ip4_address_t sm_addr;
1117       u16 sm_port;
1118       u32 sm_fib_index;
1119
1120       b0 = *b;
1121       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1122
1123       vnet_buffer (b0)->snat.flags = 0;
1124       ip0 = vlib_buffer_get_current (b0);
1125
1126       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1127       rx_fib_index0 =
1128         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1129
1130       if (PREDICT_FALSE (ip0->ttl == 1))
1131         {
1132           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1133           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1134                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1135                                        0);
1136           next[0] = NAT_NEXT_ICMP_ERROR;
1137           goto trace0;
1138         }
1139
1140       udp0 = ip4_next_header (ip0);
1141       tcp0 = (tcp_header_t *) udp0;
1142       icmp0 = (icmp46_header_t *) udp0;
1143       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1144
1145       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1146         {
1147           s0 =
1148             nat44_ed_out2in_unknown_proto (sm, b0, ip0, rx_fib_index0,
1149                                            thread_index, now, vm, node);
1150           if (!sm->forwarding_enabled)
1151             {
1152               if (!s0)
1153                 next[0] = NAT_NEXT_DROP;
1154             }
1155           vlib_increment_simple_counter (&sm->counters.slowpath.
1156                                          out2in_ed.other, thread_index,
1157                                          sw_if_index0, 1);
1158           goto trace0;
1159         }
1160
1161       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1162         {
1163           next[0] = icmp_out2in_ed_slow_path
1164             (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1165              next[0], now, thread_index, &s0);
1166           vlib_increment_simple_counter (&sm->counters.slowpath.
1167                                          out2in_ed.icmp, thread_index,
1168                                          sw_if_index0, 1);
1169           goto trace0;
1170         }
1171
1172       init_ed_k (&kv0, ip0->dst_address,
1173                  vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
1174                  vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
1175                  ip0->protocol);
1176
1177       s0 = NULL;
1178       if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
1179         {
1180           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1181           s0 =
1182             pool_elt_at_index (tsm->sessions,
1183                                ed_value_get_session_index (&value0));
1184
1185           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1186             {
1187               nat_free_session_data (sm, s0, thread_index, 0);
1188               nat_ed_session_delete (sm, s0, thread_index, 1);
1189               s0 = NULL;
1190             }
1191         }
1192
1193       if (!s0)
1194         {
1195           /* Try to match static mapping by external address and port,
1196              destination address and port in packet */
1197
1198           if (snat_static_mapping_match
1199               (sm, ip0->dst_address,
1200                vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1201                proto0, &sm_addr, &sm_port, &sm_fib_index, 1, 0,
1202                &twice_nat0, &lb_nat0, &ip0->src_address, &identity_nat0, &m))
1203             {
1204               /*
1205                * Send DHCP packets to the ipv4 stack, or we won't
1206                * be able to use dhcp client on the outside interface
1207                */
1208               if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP
1209                                  && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1210                                      clib_host_to_net_u16
1211                                      (UDP_DST_PORT_dhcp_to_client))))
1212                 {
1213                   goto trace0;
1214                 }
1215
1216               if (!sm->forwarding_enabled)
1217                 {
1218                   b0->error =
1219                     node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
1220                   next[0] = NAT_NEXT_DROP;
1221                 }
1222               else
1223                 {
1224                   if (next_src_nat
1225                       (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1226                        vnet_buffer (b0)->ip.reass.l4_dst_port,
1227                        thread_index, rx_fib_index0))
1228                     {
1229                       next[0] = NAT_NEXT_IN2OUT_ED_FAST_PATH;
1230                     }
1231                   else
1232                     {
1233                       if ((sm->num_workers > 1)
1234                           && create_bypass_for_fwd_worker (sm, b0, ip0,
1235                                                            rx_fib_index0,
1236                                                            thread_index))
1237                         {
1238                           next[0] = NAT_NEXT_OUT2IN_ED_HANDOFF;
1239                         }
1240                       else
1241                         {
1242                           create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
1243                                                  thread_index);
1244                         }
1245                     }
1246                 }
1247               goto trace0;
1248             }
1249
1250           if (PREDICT_FALSE (identity_nat0))
1251             goto trace0;
1252
1253           if ((proto0 == NAT_PROTOCOL_TCP)
1254               && !tcp_flags_is_init (vnet_buffer (b0)->ip.
1255                                      reass.icmp_type_or_tcp_flags))
1256             {
1257               b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
1258               next[0] = NAT_NEXT_DROP;
1259               goto trace0;
1260             }
1261
1262           /* Create session initiated by host from external network */
1263           s0 = create_session_for_static_mapping_ed (sm, b0,
1264                                                      sm_addr, sm_port,
1265                                                      sm_fib_index,
1266                                                      ip0->dst_address,
1267                                                      vnet_buffer (b0)->
1268                                                      ip.reass.l4_dst_port,
1269                                                      rx_fib_index0, proto0,
1270                                                      node, rx_fib_index0,
1271                                                      thread_index, twice_nat0,
1272                                                      lb_nat0, now, m);
1273           if (!s0)
1274             {
1275               next[0] = NAT_NEXT_DROP;
1276               goto trace0;
1277             }
1278         }
1279
1280       old_addr0 = ip0->dst_address.as_u32;
1281       new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
1282       vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1283
1284       sum0 = ip0->checksum;
1285       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1286                              dst_address);
1287       if (PREDICT_FALSE (is_twice_nat_session (s0)))
1288         sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1289                                s0->ext_host_nat_addr.as_u32, ip4_header_t,
1290                                src_address);
1291       ip0->checksum = ip_csum_fold (sum0);
1292
1293       old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
1294
1295       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1296         {
1297           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1298             {
1299               new_port0 = udp0->dst_port = s0->in2out.port;
1300               sum0 = tcp0->checksum;
1301               sum0 =
1302                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1303                                 dst_address);
1304               sum0 =
1305                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1306                                 length);
1307               if (is_twice_nat_session (s0))
1308                 {
1309                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1310                                          s0->ext_host_nat_addr.as_u32,
1311                                          ip4_header_t, dst_address);
1312                   sum0 =
1313                     ip_csum_update (sum0,
1314                                     vnet_buffer (b0)->ip.reass.l4_src_port,
1315                                     s0->ext_host_nat_port, ip4_header_t,
1316                                     length);
1317                   tcp0->src_port = s0->ext_host_nat_port;
1318                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1319                 }
1320               tcp0->checksum = ip_csum_fold (sum0);
1321             }
1322           vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.tcp,
1323                                          thread_index, sw_if_index0, 1);
1324           nat44_set_tcp_session_state_o2i (sm, now, s0,
1325                                            vnet_buffer (b0)->ip.
1326                                            reass.icmp_type_or_tcp_flags,
1327                                            vnet_buffer (b0)->ip.
1328                                            reass.tcp_ack_number,
1329                                            vnet_buffer (b0)->ip.
1330                                            reass.tcp_seq_number,
1331                                            thread_index);
1332         }
1333       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1334                && udp0->checksum)
1335         {
1336           new_port0 = udp0->dst_port = s0->in2out.port;
1337           sum0 = udp0->checksum;
1338           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1339                                  dst_address);
1340           sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1341                                  length);
1342           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1343             {
1344               sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1345                                      s0->ext_host_nat_addr.as_u32,
1346                                      ip4_header_t, dst_address);
1347               sum0 =
1348                 ip_csum_update (sum0,
1349                                 vnet_buffer (b0)->ip.reass.l4_src_port,
1350                                 s0->ext_host_nat_port, ip4_header_t, length);
1351               udp0->src_port = s0->ext_host_nat_port;
1352               ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1353             }
1354           udp0->checksum = ip_csum_fold (sum0);
1355           vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
1356                                          thread_index, sw_if_index0, 1);
1357         }
1358       else
1359         {
1360           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1361             {
1362               new_port0 = udp0->dst_port = s0->in2out.port;
1363               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1364                 {
1365                   udp0->src_port = s0->ext_host_nat_port;
1366                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1367                 }
1368             }
1369           vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
1370                                          thread_index, sw_if_index0, 1);
1371         }
1372
1373       /* Accounting */
1374       nat44_session_update_counters (s0, now,
1375                                      vlib_buffer_length_in_chain (vm, b0),
1376                                      thread_index);
1377       /* Per-user LRU list maintenance */
1378       nat44_session_update_lru (sm, s0, thread_index);
1379
1380     trace0:
1381       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1382                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1383         {
1384           nat44_ed_out2in_trace_t *t =
1385             vlib_add_trace (vm, node, b0, sizeof (*t));
1386           t->sw_if_index = sw_if_index0;
1387           t->next_index = next[0];
1388           t->is_slow_path = 1;
1389
1390           if (s0)
1391             t->session_index = s0 - tsm->sessions;
1392           else
1393             t->session_index = ~0;
1394         }
1395
1396       if (next[0] == NAT_NEXT_DROP)
1397         {
1398           vlib_increment_simple_counter (&sm->counters.slowpath.
1399                                          out2in_ed.drops, thread_index,
1400                                          sw_if_index0, 1);
1401         }
1402
1403       n_left_from--;
1404       next++;
1405       b++;
1406     }
1407
1408   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1409                                frame->n_vectors);
1410
1411   return frame->n_vectors;
1412 }
1413
1414 static inline uword
1415 nat_handoff_node_fn_inline (vlib_main_t * vm,
1416                             vlib_node_runtime_t * node,
1417                             vlib_frame_t * frame, u32 fq_index)
1418 {
1419   u32 n_enq, n_left_from, *from;
1420
1421   u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices;
1422   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1423
1424   from = vlib_frame_vector_args (frame);
1425   n_left_from = frame->n_vectors;
1426
1427   vlib_get_buffers (vm, from, b, n_left_from);
1428
1429   while (n_left_from >= 4)
1430     {
1431       if (PREDICT_TRUE (n_left_from >= 8))
1432         {
1433           vlib_prefetch_buffer_header (b[4], LOAD);
1434           vlib_prefetch_buffer_header (b[5], LOAD);
1435           vlib_prefetch_buffer_header (b[6], LOAD);
1436           vlib_prefetch_buffer_header (b[7], LOAD);
1437           CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1438           CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1439           CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1440           CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1441         }
1442
1443       ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
1444       ti[1] = vnet_buffer2 (b[1])->nat.thread_next;
1445       ti[2] = vnet_buffer2 (b[2])->nat.thread_next;
1446       ti[3] = vnet_buffer2 (b[3])->nat.thread_next;
1447
1448       b += 4;
1449       ti += 4;
1450       n_left_from -= 4;
1451     }
1452
1453   while (n_left_from > 0)
1454     {
1455       ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
1456
1457       b += 1;
1458       ti += 1;
1459       n_left_from -= 1;
1460     }
1461
1462   if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
1463     {
1464       u32 i;
1465       b = bufs;
1466       ti = thread_indices;
1467
1468       for (i = 0; i < frame->n_vectors; i++)
1469         {
1470           if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
1471             {
1472               nat44_ed_out2in_handoff_trace_t *t =
1473                 vlib_add_trace (vm, node, b[0], sizeof (*t));
1474               t->thread_next = ti[0];
1475               b += 1;
1476               ti += 1;
1477             }
1478           else
1479             break;
1480         }
1481     }
1482
1483   n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1484                                          frame->n_vectors, 1);
1485
1486   if (n_enq < frame->n_vectors)
1487     {
1488       vlib_node_increment_counter (vm, node->node_index,
1489                                    NAT44_HANDOFF_ERROR_CONGESTION_DROP,
1490                                    frame->n_vectors - n_enq);
1491     }
1492
1493   return frame->n_vectors;
1494 }
1495
1496 VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm,
1497                                      vlib_node_runtime_t * node,
1498                                      vlib_frame_t * frame)
1499 {
1500   if (snat_main.num_workers > 1)
1501     {
1502       return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 1);
1503     }
1504   else
1505     {
1506       return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 0);
1507     }
1508 }
1509
1510 /* *INDENT-OFF* */
1511 VLIB_REGISTER_NODE (nat44_ed_out2in_node) = {
1512   .name = "nat44-ed-out2in",
1513   .vector_size = sizeof (u32),
1514   .sibling_of = "nat-default",
1515   .format_trace = format_nat44_ed_out2in_trace,
1516   .type = VLIB_NODE_TYPE_INTERNAL,
1517   .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
1518   .error_strings = nat_out2in_ed_error_strings,
1519   .runtime_data_bytes = sizeof (snat_runtime_t),
1520 };
1521 /* *INDENT-ON* */
1522
1523 VLIB_NODE_FN (nat44_ed_out2in_slowpath_node) (vlib_main_t * vm,
1524                                               vlib_node_runtime_t * node,
1525                                               vlib_frame_t * frame)
1526 {
1527   return nat44_ed_out2in_slow_path_node_fn_inline (vm, node, frame);
1528 }
1529
1530 /* *INDENT-OFF* */
1531 VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
1532   .name = "nat44-ed-out2in-slowpath",
1533   .vector_size = sizeof (u32),
1534   .sibling_of = "nat-default",
1535   .format_trace = format_nat44_ed_out2in_trace,
1536   .type = VLIB_NODE_TYPE_INTERNAL,
1537   .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
1538   .error_strings = nat_out2in_ed_error_strings,
1539   .runtime_data_bytes = sizeof (snat_runtime_t),
1540 };
1541 /* *INDENT-ON* */
1542
1543 static u8 *
1544 format_nat44_ed_out2in_handoff_trace (u8 * s, va_list * args)
1545 {
1546   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1547   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1548   nat44_ed_out2in_handoff_trace_t *t =
1549     va_arg (*args, nat44_ed_out2in_handoff_trace_t *);
1550   return format (s, "out2in ed handoff thread_next index %d", t->thread_next);
1551 }
1552
1553 VLIB_NODE_FN (nat44_ed_out2in_handoff_node) (vlib_main_t * vm,
1554                                              vlib_node_runtime_t * node,
1555                                              vlib_frame_t * frame)
1556 {
1557   return nat_handoff_node_fn_inline (vm, node, frame,
1558                                      snat_main.ed_out2in_node_index);
1559 }
1560
1561 /* *INDENT-OFF* */
1562 VLIB_REGISTER_NODE (nat44_ed_out2in_handoff_node) = {
1563   .name = "nat44-ed-out2in-handoff",
1564   .vector_size = sizeof (u32),
1565   .sibling_of = "nat-default",
1566   .format_trace = format_nat44_ed_out2in_handoff_trace,
1567   .type = VLIB_NODE_TYPE_INTERNAL,
1568   .n_errors = 0,
1569 };
1570 /* *INDENT-ON* */
1571
1572 static u8 *
1573 format_nat_pre_trace (u8 * s, va_list * args)
1574 {
1575   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1576   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1577   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1578   return format (s, "out2in next_index %d arc_next_index %d", t->next_index,
1579                  t->arc_next_index);
1580 }
1581
1582 VLIB_NODE_FN (nat_pre_out2in_node) (vlib_main_t * vm,
1583                                     vlib_node_runtime_t * node,
1584                                     vlib_frame_t * frame)
1585 {
1586   return nat_pre_node_fn_inline (vm, node, frame,
1587                                  NAT_NEXT_OUT2IN_ED_FAST_PATH);
1588 }
1589
1590 /* *INDENT-OFF* */
1591 VLIB_REGISTER_NODE (nat_pre_out2in_node) = {
1592   .name = "nat-pre-out2in",
1593   .vector_size = sizeof (u32),
1594   .sibling_of = "nat-default",
1595   .format_trace = format_nat_pre_trace,
1596   .type = VLIB_NODE_TYPE_INTERNAL,
1597   .n_errors = 0,
1598  };
1599 /* *INDENT-ON* */
1600
1601 /*
1602  * fd.io coding-style-patch-verification: ON
1603  *
1604  * Local Variables:
1605  * eval: (c-set-style "gnu")
1606  * End:
1607  */