beb259eee33f63ecc6dbc7e3f7ca49bfce1d2be5
[vpp.git] / src / plugins / nat / out2in_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent outside to inside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp_local.h>
26 #include <vppinfra/error.h>
27 #include <nat/nat.h>
28 #include <nat/lib/ipfix_logging.h>
29 #include <nat/nat_inlines.h>
30 #include <nat/nat44/inlines.h>
31 #include <nat/lib/nat_syslog.h>
32 #include <nat/nat44/ed_inlines.h>
33
34 static char *nat_out2in_ed_error_strings[] = {
35 #define _(sym,string) string,
36   foreach_nat_out2in_ed_error
37 #undef _
38 };
39
40 typedef struct
41 {
42   u32 sw_if_index;
43   u32 next_index;
44   u32 session_index;
45   u32 is_slow_path;
46 } nat44_ed_out2in_trace_t;
47
48 typedef struct
49 {
50   u16 thread_next;
51 } nat44_ed_out2in_handoff_trace_t;
52
53 static u8 *
54 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
55 {
56   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
57   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
58   nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *);
59   char *tag;
60
61   tag =
62     t->is_slow_path ? "NAT44_OUT2IN_ED_SLOW_PATH" :
63     "NAT44_OUT2IN_ED_FAST_PATH";
64
65   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
66               t->sw_if_index, t->next_index, t->session_index);
67
68   return s;
69 }
70
71 static inline u32
72 icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
73                           ip4_header_t * ip0, icmp46_header_t * icmp0,
74                           u32 sw_if_index0, u32 rx_fib_index0,
75                           vlib_node_runtime_t * node, u32 next0, f64 now,
76                           u32 thread_index, snat_session_t ** p_s0)
77 {
78   vlib_main_t *vm = vlib_get_main ();
79
80   next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
81                        next0, thread_index, p_s0, 0);
82   snat_session_t *s0 = *p_s0;
83   if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
84     {
85       /* Accounting */
86       nat44_session_update_counters (s0, now,
87                                      vlib_buffer_length_in_chain
88                                      (vm, b0), thread_index);
89       /* Per-user LRU list maintenance */
90       nat44_session_update_lru (sm, s0, thread_index);
91     }
92   return next0;
93 }
94
95 #ifndef CLIB_MARCH_VARIANT
96 int
97 nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
98 {
99   snat_main_t *sm = &snat_main;
100   nat44_is_idle_session_ctx_t *ctx = arg;
101   snat_session_t *s;
102   u64 sess_timeout_time;
103   u8 proto;
104   u16 r_port, l_port;
105   ip4_address_t *l_addr, *r_addr;
106   u32 fib_index;
107   clib_bihash_kv_16_8_t ed_kv;
108   int i;
109   //snat_address_t *a;
110   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
111                                                        ctx->thread_index);
112
113   s = pool_elt_at_index (tsm->sessions, kv->value);
114   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
115   if (ctx->now >= sess_timeout_time)
116     {
117       l_addr = &s->in2out.addr;
118       r_addr = &s->ext_host_addr;
119       fib_index = s->in2out.fib_index;
120       if (snat_is_unk_proto_session (s))
121         {
122           proto = s->in2out.port;
123           r_port = 0;
124           l_port = 0;
125         }
126       else
127         {
128           proto = nat_proto_to_ip_proto (s->nat_proto);
129           l_port = s->in2out.port;
130           r_port = s->ext_host_port;
131         }
132       if (is_twice_nat_session (s))
133         {
134           r_addr = &s->ext_host_nat_addr;
135           r_port = s->ext_host_nat_port;
136         }
137       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
138       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
139         nat_elog_warn ("in2out_ed key del failed");
140
141       if (snat_is_unk_proto_session (s))
142         goto delete;
143
144       nat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
145                                           s->in2out.addr.as_u32,
146                                           s->out2in.addr.as_u32,
147                                           s->nat_proto,
148                                           s->in2out.port,
149                                           s->out2in.port,
150                                           s->in2out.fib_index);
151
152       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
153                              &s->in2out.addr, s->in2out.port,
154                              &s->ext_host_nat_addr, s->ext_host_nat_port,
155                              &s->out2in.addr, s->out2in.port,
156                              &s->ext_host_addr, s->ext_host_port,
157                              s->nat_proto, is_twice_nat_session (s));
158
159       if (is_twice_nat_session (s))
160         {
161           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
162             {
163               // FIXME TODO this is obviously wrong code ... needs fix!
164               //       key.protocol = s->nat_proto;
165               //       key.port = s->ext_host_nat_port;
166               //       a = sm->twice_nat_addresses + i;
167               //       if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
168               //      {
169               //        snat_free_outside_address_and_port (sm->twice_nat_addresses,
170               //                                            ctx->thread_index,
171               //                                            &key);
172               //        break;
173               //      }
174             }
175         }
176
177       if (snat_is_session_static (s))
178         goto delete;
179
180       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
181                                           &s->out2in.addr, s->out2in.port,
182                                           s->nat_proto);
183     delete:
184       nat_ed_session_delete (sm, s, ctx->thread_index, 1);
185       return 1;
186     }
187
188   return 0;
189 }
190 #endif
191
192 // allocate exact address based on preference
193 static_always_inline int
194 nat_alloc_addr_and_port_exact (snat_address_t * a,
195                                u32 thread_index,
196                                nat_protocol_t proto,
197                                ip4_address_t * addr,
198                                u16 * port,
199                                u16 port_per_thread, u32 snat_thread_index)
200 {
201   u32 portnum;
202
203   switch (proto)
204     {
205 #define _(N, j, n, s) \
206     case NAT_PROTOCOL_##N: \
207       if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
208         { \
209           while (1) \
210             { \
211               portnum = (port_per_thread * \
212                 snat_thread_index) + \
213                 snat_random_port(0, port_per_thread - 1) + 1024; \
214               if (a->busy_##n##_port_refcounts[portnum]) \
215                 continue; \
216               --a->busy_##n##_port_refcounts[portnum]; \
217               a->busy_##n##_ports_per_thread[thread_index]++; \
218               a->busy_##n##_ports++; \
219               *addr = a->addr; \
220               *port = clib_host_to_net_u16(portnum); \
221               return 0; \
222             } \
223         } \
224       break;
225       foreach_nat_protocol
226 #undef _
227     default:
228       nat_elog_info ("unknown protocol");
229       return 1;
230     }
231
232   /* Totally out of translations to use... */
233   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
234   return 1;
235 }
236
237 static_always_inline int
238 nat44_ed_alloc_outside_addr_and_port (snat_address_t *addresses, u32 fib_index,
239                                       u32 thread_index, nat_protocol_t proto,
240                                       ip4_address_t *addr, u16 *port,
241                                       u16 port_per_thread,
242                                       u32 snat_thread_index)
243 {
244   int i;
245   snat_address_t *a, *ga = 0;
246   u32 portnum;
247
248   for (i = 0; i < vec_len (addresses); i++)
249     {
250       a = addresses + i;
251       switch (proto)
252         {
253 #define _(N, j, n, s)                                                         \
254   case NAT_PROTOCOL_##N:                                                      \
255     if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread)       \
256       {                                                                       \
257         if (a->fib_index == fib_index)                                        \
258           {                                                                   \
259             while (1)                                                         \
260               {                                                               \
261                 portnum = (port_per_thread * snat_thread_index) +             \
262                           snat_random_port (0, port_per_thread - 1) + 1024;   \
263                 if (a->busy_##n##_port_refcounts[portnum])                    \
264                   continue;                                                   \
265                 --a->busy_##n##_port_refcounts[portnum];                      \
266                 a->busy_##n##_ports_per_thread[thread_index]++;               \
267                 a->busy_##n##_ports++;                                        \
268                 *addr = a->addr;                                              \
269                 *port = clib_host_to_net_u16 (portnum);                       \
270                 return 0;                                                     \
271               }                                                               \
272           }                                                                   \
273         else if (a->fib_index == ~0)                                          \
274           {                                                                   \
275             ga = a;                                                           \
276           }                                                                   \
277       }                                                                       \
278     break;
279           foreach_nat_protocol
280 #undef _
281             default : nat_elog_info ("unknown protocol");
282           return 1;
283         }
284     }
285
286   if (ga)
287     {
288       a = ga;
289       switch (proto)
290         {
291 #define _(N, j, n, s)                                                         \
292   case NAT_PROTOCOL_##N:                                                      \
293     while (1)                                                                 \
294       {                                                                       \
295         portnum = (port_per_thread * snat_thread_index) +                     \
296                   snat_random_port (0, port_per_thread - 1) + 1024;           \
297         if (a->busy_##n##_port_refcounts[portnum])                            \
298           continue;                                                           \
299         ++a->busy_##n##_port_refcounts[portnum];                              \
300         a->busy_##n##_ports_per_thread[thread_index]++;                       \
301         a->busy_##n##_ports++;                                                \
302         *addr = a->addr;                                                      \
303         *port = clib_host_to_net_u16 (portnum);                               \
304         return 0;                                                             \
305       }
306           break;
307           foreach_nat_protocol
308 #undef _
309             default : nat_elog_info ("unknown protocol");
310           return 1;
311         }
312     }
313
314   /* Totally out of translations to use... */
315   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
316   return 1;
317 }
318
319 static snat_session_t *
320 create_session_for_static_mapping_ed (snat_main_t * sm,
321                                       vlib_buffer_t * b,
322                                       ip4_address_t i2o_addr,
323                                       u16 i2o_port,
324                                       u32 i2o_fib_index,
325                                       ip4_address_t o2i_addr,
326                                       u16 o2i_port,
327                                       u32 o2i_fib_index,
328                                       nat_protocol_t nat_proto,
329                                       vlib_node_runtime_t * node,
330                                       u32 rx_fib_index,
331                                       u32 thread_index,
332                                       twice_nat_type_t twice_nat,
333                                       lb_nat_type_t lb_nat, f64 now,
334                                       snat_static_mapping_t * mapping)
335 {
336   snat_session_t *s;
337   ip4_header_t *ip;
338   udp_header_t *udp;
339   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
340   clib_bihash_kv_16_8_t kv;
341   nat44_is_idle_session_ctx_t ctx;
342
343   if (PREDICT_FALSE
344       (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
345     {
346       b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
347       nat_elog_notice ("maximum sessions exceeded");
348       return 0;
349     }
350
351   s = nat_ed_session_alloc (sm, thread_index, now, nat_proto);
352   if (!s)
353     {
354       b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
355       nat_elog_warn ("create NAT session failed");
356       return 0;
357     }
358
359   ip = vlib_buffer_get_current (b);
360   udp = ip4_next_header (ip);
361
362   s->ext_host_addr.as_u32 = ip->src_address.as_u32;
363   s->ext_host_port = nat_proto == NAT_PROTOCOL_ICMP ? 0 : udp->src_port;
364   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
365   if (lb_nat)
366     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
367   if (lb_nat == AFFINITY_LB_NAT)
368     s->flags |= SNAT_SESSION_FLAG_AFFINITY;
369   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
370   s->out2in.addr = o2i_addr;
371   s->out2in.port = o2i_port;
372   s->out2in.fib_index = o2i_fib_index;
373   s->in2out.addr = i2o_addr;
374   s->in2out.port = i2o_port;
375   s->in2out.fib_index = i2o_fib_index;
376   s->nat_proto = nat_proto;
377
378   /* Add to lookup tables */
379   init_ed_kv (&kv, o2i_addr, o2i_port, s->ext_host_addr, s->ext_host_port,
380               o2i_fib_index, ip->protocol, thread_index, s - tsm->sessions);
381   ctx.now = now;
382   ctx.thread_index = thread_index;
383   if (clib_bihash_add_or_overwrite_stale_16_8 (&sm->out2in_ed, &kv,
384                                                nat44_o2i_ed_is_idle_session_cb,
385                                                &ctx))
386     nat_elog_notice ("out2in-ed key add failed");
387
388   if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF &&
389                                  ip->src_address.as_u32 == i2o_addr.as_u32))
390     {
391       int rc = 0;
392       snat_address_t *filter = 0;
393
394       // if exact address is specified use this address
395       if (is_exact_address (mapping))
396         {
397           snat_address_t *ap;
398           vec_foreach (ap, sm->twice_nat_addresses)
399           {
400             if (mapping->pool_addr.as_u32 == ap->addr.as_u32)
401               {
402                 filter = ap;
403                 break;
404               }
405           }
406         }
407
408       if (filter)
409         {
410           rc = nat_alloc_addr_and_port_exact (filter,
411                                               thread_index,
412                                               nat_proto,
413                                               &s->ext_host_nat_addr,
414                                               &s->ext_host_nat_port,
415                                               sm->port_per_thread,
416                                               tsm->snat_thread_index);
417           s->flags |= SNAT_SESSION_FLAG_EXACT_ADDRESS;
418         }
419       else
420         {
421           rc = nat44_ed_alloc_outside_addr_and_port (
422             sm->twice_nat_addresses, 0, thread_index, nat_proto,
423             &s->ext_host_nat_addr, &s->ext_host_nat_port, sm->port_per_thread,
424             tsm->snat_thread_index);
425         }
426
427       if (rc)
428         {
429           b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS];
430           nat_ed_session_delete (sm, s, thread_index, 1);
431           if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 0))
432             nat_elog_notice ("out2in-ed key del failed");
433           return 0;
434         }
435
436       s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
437       init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_nat_addr,
438                   s->ext_host_nat_port, i2o_fib_index, ip->protocol,
439                   thread_index, s - tsm->sessions);
440     }
441   else
442     {
443       init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_addr,
444                   s->ext_host_port, i2o_fib_index, ip->protocol,
445                   thread_index, s - tsm->sessions);
446     }
447   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv,
448                                                nat44_i2o_ed_is_idle_session_cb,
449                                                &ctx))
450     nat_elog_notice ("in2out-ed key add failed");
451
452   nat_ipfix_logging_nat44_ses_create (thread_index,
453                                       s->in2out.addr.as_u32,
454                                       s->out2in.addr.as_u32,
455                                       s->nat_proto,
456                                       s->in2out.port,
457                                       s->out2in.port, s->in2out.fib_index);
458
459   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
460                          &s->in2out.addr, s->in2out.port,
461                          &s->ext_host_nat_addr, s->ext_host_nat_port,
462                          &s->out2in.addr, s->out2in.port,
463                          &s->ext_host_addr, s->ext_host_port,
464                          s->nat_proto, is_twice_nat_session (s));
465
466   per_vrf_sessions_register_session (s, thread_index);
467
468   return s;
469 }
470
471 static int
472 next_src_nat (snat_main_t * sm, ip4_header_t * ip, u16 src_port,
473               u16 dst_port, u32 thread_index, u32 rx_fib_index)
474 {
475   clib_bihash_kv_16_8_t kv, value;
476   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
477
478   init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
479              rx_fib_index, ip->protocol);
480   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
481     return 1;
482
483   return 0;
484 }
485
486 static void
487 create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
488                        u32 rx_fib_index, u32 thread_index)
489 {
490   clib_bihash_kv_16_8_t kv, value;
491   udp_header_t *udp;
492   snat_session_t *s = 0;
493   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
494   vlib_main_t *vm = vlib_get_main ();
495   f64 now = vlib_time_now (vm);
496   u16 l_port, r_port;
497
498   if (ip->protocol == IP_PROTOCOL_ICMP)
499     {
500       if (get_icmp_o2i_ed_key
501           (b, ip, rx_fib_index, ~0, ~0, 0, &l_port, &r_port, &kv))
502         return;
503     }
504   else
505     {
506       if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
507         {
508           udp = ip4_next_header (ip);
509           l_port = udp->dst_port;
510           r_port = udp->src_port;
511         }
512       else
513         {
514           l_port = 0;
515           r_port = 0;
516         }
517       init_ed_k (&kv, ip->dst_address, l_port, ip->src_address, r_port,
518                  rx_fib_index, ip->protocol);
519     }
520
521   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
522     {
523       ASSERT (thread_index == ed_value_get_thread_index (&value));
524       s =
525         pool_elt_at_index (tsm->sessions,
526                            ed_value_get_session_index (&value));
527     }
528   else if (ip->protocol == IP_PROTOCOL_ICMP &&
529            icmp_type_is_error_message
530            (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
531     {
532       return;
533     }
534   else
535     {
536       u32 proto;
537
538       if (PREDICT_FALSE
539           (nat44_ed_maximum_sessions_exceeded
540            (sm, rx_fib_index, thread_index)))
541         return;
542
543       s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
544       if (!s)
545         {
546           nat_elog_warn ("create NAT session failed");
547           return;
548         }
549
550       proto = ip_proto_to_nat_proto (ip->protocol);
551
552       s->ext_host_addr = ip->src_address;
553       s->ext_host_port = r_port;
554       s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
555       s->out2in.addr = ip->dst_address;
556       s->out2in.port = l_port;
557       s->nat_proto = proto;
558       if (proto == NAT_PROTOCOL_OTHER)
559         {
560           s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
561           s->out2in.port = ip->protocol;
562         }
563       s->out2in.fib_index = rx_fib_index;
564       s->in2out.addr = s->out2in.addr;
565       s->in2out.port = s->out2in.port;
566       s->in2out.fib_index = s->out2in.fib_index;
567
568       kv.value = s - tsm->sessions;
569       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
570         nat_elog_notice ("in2out_ed key add failed");
571
572       per_vrf_sessions_register_session (s, thread_index);
573     }
574
575   if (ip->protocol == IP_PROTOCOL_TCP)
576     {
577       tcp_header_t *tcp = ip4_next_header (ip);
578       nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags,
579                                        tcp->ack_number, tcp->seq_number,
580                                        thread_index);
581     }
582
583   /* Accounting */
584   nat44_session_update_counters (s, now, 0, thread_index);
585   /* Per-user LRU list maintenance */
586   nat44_session_update_lru (sm, s, thread_index);
587 }
588
589 static_always_inline int
590 create_bypass_for_fwd_worker (snat_main_t * sm,
591                               vlib_buffer_t * b, ip4_header_t * ip,
592                               u32 rx_fib_index, u32 thread_index)
593 {
594   ip4_header_t tmp = {
595     .src_address = ip->dst_address,
596   };
597   u32 index = sm->worker_in2out_cb (&tmp, rx_fib_index, 0);
598
599   if (index != thread_index)
600     {
601       vnet_buffer2 (b)->nat.thread_next = index;
602       return 1;
603     }
604
605   create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
606   return 0;
607 }
608
609 #ifndef CLIB_MARCH_VARIANT
610 u32
611 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
612                       u32 thread_index, vlib_buffer_t * b,
613                       ip4_header_t * ip, ip4_address_t * addr,
614                       u16 * port, u32 * fib_index, nat_protocol_t * proto,
615                       void *d, void *e, u8 * dont_translate)
616 {
617   u32 next = ~0, sw_if_index, rx_fib_index;
618   clib_bihash_kv_16_8_t kv, value;
619   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
620   snat_session_t *s = 0;
621   u8 is_addr_only, identity_nat;
622   u16 l_port, r_port;
623   vlib_main_t *vm = vlib_get_main ();
624   ip4_address_t sm_addr;
625   u16 sm_port;
626   u32 sm_fib_index;
627   *dont_translate = 0;
628   snat_static_mapping_t *m;
629
630   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
631   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
632
633   if (get_icmp_o2i_ed_key
634       (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, &r_port, &kv))
635     {
636       b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL];
637       next = NAT_NEXT_DROP;
638       goto out;
639     }
640
641   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
642     {
643       if (snat_static_mapping_match
644           (sm, ip->dst_address, l_port, rx_fib_index,
645            ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port,
646            &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m))
647         {
648           // static mapping not matched
649           if (!sm->forwarding_enabled)
650             {
651               /* Don't NAT packet aimed at the intfc address */
652               if (PREDICT_FALSE (is_interface_addr (sm, node, sw_if_index,
653                                                     ip->dst_address.as_u32)))
654                 {
655                   *dont_translate = 1;
656                 }
657               else
658                 {
659                   b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
660                   next = NAT_NEXT_DROP;
661                 }
662             }
663           else
664             {
665               *dont_translate = 1;
666               if (next_src_nat (sm, ip, l_port, r_port,
667                                 thread_index, rx_fib_index))
668                 {
669                   next = NAT_NEXT_IN2OUT_ED_FAST_PATH;
670                 }
671               else
672                 {
673                   if (sm->num_workers > 1)
674                     {
675                       if (create_bypass_for_fwd_worker (sm, b, ip,
676                                                         rx_fib_index,
677                                                         thread_index))
678                         {
679                           next = NAT_NEXT_OUT2IN_ED_HANDOFF;
680                         }
681                     }
682                   else
683                     {
684                       create_bypass_for_fwd (sm, b, ip, rx_fib_index,
685                                              thread_index);
686                     }
687                 }
688             }
689           goto out;
690         }
691
692       if (PREDICT_FALSE
693           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
694            ICMP4_echo_reply
695            && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
696                ICMP4_echo_request || !is_addr_only)))
697         {
698           b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
699           next = NAT_NEXT_DROP;
700           goto out;
701         }
702
703       if (PREDICT_FALSE (identity_nat))
704         {
705           *dont_translate = 1;
706           goto out;
707         }
708
709       /* Create session initiated by host from external network */
710       s =
711         create_session_for_static_mapping_ed (sm, b, sm_addr, sm_port,
712                                               sm_fib_index, ip->dst_address,
713                                               l_port, rx_fib_index, *proto,
714                                               node, rx_fib_index,
715                                               thread_index, 0, 0,
716                                               vlib_time_now (vm), m);
717       if (!s)
718         next = NAT_NEXT_DROP;
719     }
720   else
721     {
722       if (PREDICT_FALSE
723           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
724            ICMP4_echo_reply
725            && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
726            ICMP4_echo_request
727            && !icmp_type_is_error_message (vnet_buffer (b)->ip.
728                                            reass.icmp_type_or_tcp_flags)))
729         {
730           b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
731           next = NAT_NEXT_DROP;
732           goto out;
733         }
734
735       ASSERT (thread_index == ed_value_get_thread_index (&value));
736       s =
737         pool_elt_at_index (tsm->sessions,
738                            ed_value_get_session_index (&value));
739     }
740 out:
741   if (s)
742     {
743       *addr = s->in2out.addr;
744       *port = s->in2out.port;
745       *fib_index = s->in2out.fib_index;
746     }
747   if (d)
748     *(snat_session_t **) d = s;
749   return next;
750 }
751 #endif
752
753 static snat_session_t *
754 nat44_ed_out2in_unknown_proto (snat_main_t * sm,
755                                vlib_buffer_t * b,
756                                ip4_header_t * ip,
757                                u32 rx_fib_index,
758                                u32 thread_index,
759                                f64 now,
760                                vlib_main_t * vm, vlib_node_runtime_t * node)
761 {
762   clib_bihash_kv_8_8_t kv, value;
763   clib_bihash_kv_16_8_t s_kv, s_value;
764   snat_static_mapping_t *m;
765   u32 old_addr, new_addr;
766   ip_csum_t sum;
767   snat_session_t *s;
768   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
769
770   old_addr = ip->dst_address.as_u32;
771
772   init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, rx_fib_index,
773              ip->protocol);
774
775   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
776     {
777       ASSERT (thread_index == ed_value_get_thread_index (&s_value));
778       s =
779         pool_elt_at_index (tsm->sessions,
780                            ed_value_get_session_index (&s_value));
781       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
782     }
783   else
784     {
785       if (PREDICT_FALSE
786           (nat44_ed_maximum_sessions_exceeded
787            (sm, rx_fib_index, thread_index)))
788         {
789           b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
790           nat_elog_notice ("maximum sessions exceeded");
791           return 0;
792         }
793
794       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
795       if (clib_bihash_search_8_8
796           (&sm->static_mapping_by_external, &kv, &value))
797         {
798           b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
799           return 0;
800         }
801
802       m = pool_elt_at_index (sm->static_mappings, value.value);
803
804       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
805
806       /* Create a new session */
807       s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
808       if (!s)
809         {
810           b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
811           nat_elog_warn ("create NAT session failed");
812           return 0;
813         }
814
815       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
816       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
817       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
818       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
819       s->out2in.addr.as_u32 = old_addr;
820       s->out2in.fib_index = rx_fib_index;
821       s->in2out.addr.as_u32 = new_addr;
822       s->in2out.fib_index = m->fib_index;
823       s->in2out.port = s->out2in.port = ip->protocol;
824
825       /* Add to lookup tables */
826       s_kv.value = s - tsm->sessions;
827       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
828         nat_elog_notice ("out2in key add failed");
829
830       init_ed_kv (&s_kv, ip->dst_address, 0, ip->src_address, 0, m->fib_index,
831                   ip->protocol, thread_index, s - tsm->sessions);
832       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
833         nat_elog_notice ("in2out key add failed");
834
835       per_vrf_sessions_register_session (s, thread_index);
836     }
837
838   /* Update IP checksum */
839   sum = ip->checksum;
840   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
841   ip->checksum = ip_csum_fold (sum);
842
843   vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
844
845   /* Accounting */
846   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
847                                  thread_index);
848   /* Per-user LRU list maintenance */
849   nat44_session_update_lru (sm, s, thread_index);
850
851   return s;
852 }
853
854 static inline uword
855 nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
856                                           vlib_node_runtime_t * node,
857                                           vlib_frame_t * frame,
858                                           int is_multi_worker)
859 {
860   u32 n_left_from, *from;
861   snat_main_t *sm = &snat_main;
862   f64 now = vlib_time_now (vm);
863   u32 thread_index = vm->thread_index;
864   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
865
866   from = vlib_frame_vector_args (frame);
867   n_left_from = frame->n_vectors;
868
869   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
870   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
871   vlib_get_buffers (vm, from, b, n_left_from);
872
873   while (n_left_from > 0)
874     {
875       vlib_buffer_t *b0;
876       u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
877       u16 old_port0, new_port0;
878       ip4_header_t *ip0;
879       udp_header_t *udp0;
880       tcp_header_t *tcp0;
881       snat_session_t *s0 = 0;
882       clib_bihash_kv_16_8_t kv0, value0;
883       ip_csum_t sum0;
884
885       b0 = *b;
886       b++;
887
888       /* Prefetch next iteration. */
889       if (PREDICT_TRUE (n_left_from >= 2))
890         {
891           vlib_buffer_t *p2;
892
893           p2 = *b;
894
895           vlib_prefetch_buffer_header (p2, LOAD);
896
897           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
898         }
899
900       next[0] = vnet_buffer2 (b0)->nat.arc_next;
901
902       vnet_buffer (b0)->snat.flags = 0;
903       ip0 = vlib_buffer_get_current (b0);
904
905       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
906       rx_fib_index0 =
907         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
908
909       if (PREDICT_FALSE (ip0->ttl == 1))
910         {
911           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
912           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
913                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
914                                        0);
915           next[0] = NAT_NEXT_ICMP_ERROR;
916           goto trace0;
917         }
918
919       udp0 = ip4_next_header (ip0);
920       tcp0 = (tcp_header_t *) udp0;
921       proto0 = ip_proto_to_nat_proto (ip0->protocol);
922
923       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
924         {
925           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
926           goto trace0;
927         }
928
929       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
930         {
931           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
932           goto trace0;
933         }
934
935       init_ed_k (&kv0, ip0->dst_address,
936                  vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
937                  vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
938                  ip0->protocol);
939
940       /* there is a stashed index in vnet_buffer2 from handoff node,
941        * see if we can use it */
942       if (is_multi_worker
943           &&
944           PREDICT_TRUE (!pool_is_free_index
945                         (tsm->sessions,
946                          vnet_buffer2 (b0)->nat.ed_out2in_nat_session_index)))
947         {
948           s0 = pool_elt_at_index (tsm->sessions,
949                                   vnet_buffer2 (b0)->
950                                   nat.ed_out2in_nat_session_index);
951           if (PREDICT_TRUE
952               (s0->out2in.addr.as_u32 == ip0->dst_address.as_u32
953                && s0->out2in.port == vnet_buffer (b0)->ip.reass.l4_dst_port
954                && s0->nat_proto == ip_proto_to_nat_proto (ip0->protocol)
955                && s0->out2in.fib_index == rx_fib_index0
956                && s0->ext_host_addr.as_u32 == ip0->src_address.as_u32
957                && s0->ext_host_port ==
958                vnet_buffer (b0)->ip.reass.l4_src_port))
959             {
960               /* yes, this is the droid we're looking for */
961               goto skip_lookup;
962             }
963         }
964
965       // lookup for session
966       if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
967         {
968           // session does not exist go slow path
969           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
970           goto trace0;
971         }
972       ASSERT (thread_index == ed_value_get_thread_index (&value0));
973       s0 =
974         pool_elt_at_index (tsm->sessions,
975                            ed_value_get_session_index (&value0));
976
977     skip_lookup:
978
979       if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
980         {
981           // session is closed, go slow path
982           nat_free_session_data (sm, s0, thread_index, 0);
983           nat_ed_session_delete (sm, s0, thread_index, 1);
984           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
985           goto trace0;
986         }
987
988       if (s0->tcp_closed_timestamp)
989         {
990           if (now >= s0->tcp_closed_timestamp)
991             {
992               // session is closed, go slow path, freed in slow path
993               next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
994             }
995           else
996             {
997               // session in transitory timeout, drop
998               b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED];
999               next[0] = NAT_NEXT_DROP;
1000             }
1001           goto trace0;
1002         }
1003
1004       // drop if session expired
1005       u64 sess_timeout_time;
1006       sess_timeout_time =
1007         s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
1008       if (now >= sess_timeout_time)
1009         {
1010           // session is closed, go slow path
1011           nat_free_session_data (sm, s0, thread_index, 0);
1012           nat_ed_session_delete (sm, s0, thread_index, 1);
1013           next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
1014           goto trace0;
1015         }
1016
1017       old_addr0 = ip0->dst_address.as_u32;
1018       new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
1019       vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1020
1021       sum0 = ip0->checksum;
1022       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1023                              dst_address);
1024       if (PREDICT_FALSE (is_twice_nat_session (s0)))
1025         sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1026                                s0->ext_host_nat_addr.as_u32, ip4_header_t,
1027                                src_address);
1028       ip0->checksum = ip_csum_fold (sum0);
1029
1030       old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
1031
1032       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1033         {
1034           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1035             {
1036               new_port0 = udp0->dst_port = s0->in2out.port;
1037               sum0 = tcp0->checksum;
1038               sum0 =
1039                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1040                                 dst_address);
1041               sum0 =
1042                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1043                                 length);
1044               if (is_twice_nat_session (s0))
1045                 {
1046                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1047                                          s0->ext_host_nat_addr.as_u32,
1048                                          ip4_header_t, dst_address);
1049                   sum0 =
1050                     ip_csum_update (sum0,
1051                                     vnet_buffer (b0)->ip.reass.l4_src_port,
1052                                     s0->ext_host_nat_port, ip4_header_t,
1053                                     length);
1054                   tcp0->src_port = s0->ext_host_nat_port;
1055                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1056                 }
1057               tcp0->checksum = ip_csum_fold (sum0);
1058             }
1059           vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.tcp,
1060                                          thread_index, sw_if_index0, 1);
1061           nat44_set_tcp_session_state_o2i (sm, now, s0,
1062                                            vnet_buffer (b0)->ip.
1063                                            reass.icmp_type_or_tcp_flags,
1064                                            vnet_buffer (b0)->ip.
1065                                            reass.tcp_ack_number,
1066                                            vnet_buffer (b0)->ip.
1067                                            reass.tcp_seq_number,
1068                                            thread_index);
1069         }
1070       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1071                && udp0->checksum)
1072         {
1073           new_port0 = udp0->dst_port = s0->in2out.port;
1074           sum0 = udp0->checksum;
1075           sum0 =
1076             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1077                             dst_address);
1078           sum0 =
1079             ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length);
1080           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1081             {
1082               sum0 =
1083                 ip_csum_update (sum0, ip0->src_address.as_u32,
1084                                 s0->ext_host_nat_addr.as_u32, ip4_header_t,
1085                                 dst_address);
1086               sum0 =
1087                 ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_src_port,
1088                                 s0->ext_host_nat_port, ip4_header_t, length);
1089               udp0->src_port = s0->ext_host_nat_port;
1090               ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1091             }
1092           udp0->checksum = ip_csum_fold (sum0);
1093           vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
1094                                          thread_index, sw_if_index0, 1);
1095         }
1096       else
1097         {
1098           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1099             {
1100               new_port0 = udp0->dst_port = s0->in2out.port;
1101               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1102                 {
1103                   udp0->src_port = s0->ext_host_nat_port;
1104                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1105                 }
1106             }
1107           vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
1108                                          thread_index, sw_if_index0, 1);
1109         }
1110
1111       /* Accounting */
1112       nat44_session_update_counters (s0, now,
1113                                      vlib_buffer_length_in_chain (vm, b0),
1114                                      thread_index);
1115       /* Per-user LRU list maintenance */
1116       nat44_session_update_lru (sm, s0, thread_index);
1117
1118     trace0:
1119       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1120                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1121         {
1122           nat44_ed_out2in_trace_t *t =
1123             vlib_add_trace (vm, node, b0, sizeof (*t));
1124           t->sw_if_index = sw_if_index0;
1125           t->next_index = next[0];
1126           t->is_slow_path = 0;
1127
1128           if (s0)
1129             t->session_index = s0 - tsm->sessions;
1130           else
1131             t->session_index = ~0;
1132         }
1133
1134       if (next[0] == NAT_NEXT_DROP)
1135         {
1136           vlib_increment_simple_counter (&sm->counters.fastpath.
1137                                          out2in_ed.drops, thread_index,
1138                                          sw_if_index0, 1);
1139         }
1140
1141       n_left_from--;
1142       next++;
1143     }
1144
1145   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1146                                frame->n_vectors);
1147   return frame->n_vectors;
1148 }
1149
1150 static inline uword
1151 nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
1152                                           vlib_node_runtime_t * node,
1153                                           vlib_frame_t * frame)
1154 {
1155   u32 n_left_from, *from;
1156   snat_main_t *sm = &snat_main;
1157   f64 now = vlib_time_now (vm);
1158   u32 thread_index = vm->thread_index;
1159   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1160   snat_static_mapping_t *m;
1161
1162   from = vlib_frame_vector_args (frame);
1163   n_left_from = frame->n_vectors;
1164
1165   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1166   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1167   vlib_get_buffers (vm, from, b, n_left_from);
1168
1169   while (n_left_from > 0)
1170     {
1171       vlib_buffer_t *b0;
1172       u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
1173       u16 old_port0, new_port0;
1174       ip4_header_t *ip0;
1175       udp_header_t *udp0;
1176       tcp_header_t *tcp0;
1177       icmp46_header_t *icmp0;
1178       snat_session_t *s0 = 0;
1179       clib_bihash_kv_16_8_t kv0, value0;
1180       ip_csum_t sum0;
1181       lb_nat_type_t lb_nat0;
1182       twice_nat_type_t twice_nat0;
1183       u8 identity_nat0;
1184       ip4_address_t sm_addr;
1185       u16 sm_port;
1186       u32 sm_fib_index;
1187
1188       b0 = *b;
1189       next[0] = vnet_buffer2 (b0)->nat.arc_next;
1190
1191       vnet_buffer (b0)->snat.flags = 0;
1192       ip0 = vlib_buffer_get_current (b0);
1193
1194       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1195       rx_fib_index0 =
1196         fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1197
1198       if (PREDICT_FALSE (ip0->ttl == 1))
1199         {
1200           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1201           icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1202                                        ICMP4_time_exceeded_ttl_exceeded_in_transit,
1203                                        0);
1204           next[0] = NAT_NEXT_ICMP_ERROR;
1205           goto trace0;
1206         }
1207
1208       udp0 = ip4_next_header (ip0);
1209       tcp0 = (tcp_header_t *) udp0;
1210       icmp0 = (icmp46_header_t *) udp0;
1211       proto0 = ip_proto_to_nat_proto (ip0->protocol);
1212
1213       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1214         {
1215           s0 =
1216             nat44_ed_out2in_unknown_proto (sm, b0, ip0, rx_fib_index0,
1217                                            thread_index, now, vm, node);
1218           if (!sm->forwarding_enabled)
1219             {
1220               if (!s0)
1221                 next[0] = NAT_NEXT_DROP;
1222             }
1223           vlib_increment_simple_counter (&sm->counters.slowpath.
1224                                          out2in_ed.other, thread_index,
1225                                          sw_if_index0, 1);
1226           goto trace0;
1227         }
1228
1229       if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1230         {
1231           next[0] = icmp_out2in_ed_slow_path
1232             (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1233              next[0], now, thread_index, &s0);
1234           vlib_increment_simple_counter (&sm->counters.slowpath.
1235                                          out2in_ed.icmp, thread_index,
1236                                          sw_if_index0, 1);
1237           goto trace0;
1238         }
1239
1240       init_ed_k (&kv0, ip0->dst_address,
1241                  vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
1242                  vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
1243                  ip0->protocol);
1244
1245       s0 = NULL;
1246       if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
1247         {
1248           ASSERT (thread_index == ed_value_get_thread_index (&value0));
1249           s0 =
1250             pool_elt_at_index (tsm->sessions,
1251                                ed_value_get_session_index (&value0));
1252
1253           if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1254             {
1255               nat_free_session_data (sm, s0, thread_index, 0);
1256               nat_ed_session_delete (sm, s0, thread_index, 1);
1257               s0 = NULL;
1258             }
1259         }
1260
1261       if (!s0)
1262         {
1263           /* Try to match static mapping by external address and port,
1264              destination address and port in packet */
1265
1266           if (snat_static_mapping_match
1267               (sm, ip0->dst_address,
1268                vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1269                proto0, &sm_addr, &sm_port, &sm_fib_index, 1, 0,
1270                &twice_nat0, &lb_nat0, &ip0->src_address, &identity_nat0, &m))
1271             {
1272               /*
1273                * Send DHCP packets to the ipv4 stack, or we won't
1274                * be able to use dhcp client on the outside interface
1275                */
1276               if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP
1277                                  && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1278                                      clib_host_to_net_u16
1279                                      (UDP_DST_PORT_dhcp_to_client))))
1280                 {
1281                   goto trace0;
1282                 }
1283
1284               if (!sm->forwarding_enabled)
1285                 {
1286                   b0->error =
1287                     node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
1288                   next[0] = NAT_NEXT_DROP;
1289                 }
1290               else
1291                 {
1292                   if (next_src_nat
1293                       (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1294                        vnet_buffer (b0)->ip.reass.l4_dst_port,
1295                        thread_index, rx_fib_index0))
1296                     {
1297                       next[0] = NAT_NEXT_IN2OUT_ED_FAST_PATH;
1298                     }
1299                   else
1300                     {
1301                       if ((sm->num_workers > 1)
1302                           && create_bypass_for_fwd_worker (sm, b0, ip0,
1303                                                            rx_fib_index0,
1304                                                            thread_index))
1305                         {
1306                           next[0] = NAT_NEXT_OUT2IN_ED_HANDOFF;
1307                         }
1308                       else
1309                         {
1310                           create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
1311                                                  thread_index);
1312                         }
1313                     }
1314                 }
1315               goto trace0;
1316             }
1317
1318           if (PREDICT_FALSE (identity_nat0))
1319             goto trace0;
1320
1321           if ((proto0 == NAT_PROTOCOL_TCP)
1322               && !tcp_flags_is_init (vnet_buffer (b0)->ip.
1323                                      reass.icmp_type_or_tcp_flags))
1324             {
1325               b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
1326               next[0] = NAT_NEXT_DROP;
1327               goto trace0;
1328             }
1329
1330           /* Create session initiated by host from external network */
1331           s0 = create_session_for_static_mapping_ed (sm, b0,
1332                                                      sm_addr, sm_port,
1333                                                      sm_fib_index,
1334                                                      ip0->dst_address,
1335                                                      vnet_buffer (b0)->
1336                                                      ip.reass.l4_dst_port,
1337                                                      rx_fib_index0, proto0,
1338                                                      node, rx_fib_index0,
1339                                                      thread_index, twice_nat0,
1340                                                      lb_nat0, now, m);
1341           if (!s0)
1342             {
1343               next[0] = NAT_NEXT_DROP;
1344               goto trace0;
1345             }
1346         }
1347
1348       old_addr0 = ip0->dst_address.as_u32;
1349       new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
1350       vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1351
1352       sum0 = ip0->checksum;
1353       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1354                              dst_address);
1355       if (PREDICT_FALSE (is_twice_nat_session (s0)))
1356         sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1357                                s0->ext_host_nat_addr.as_u32, ip4_header_t,
1358                                src_address);
1359       ip0->checksum = ip_csum_fold (sum0);
1360
1361       old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
1362
1363       if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1364         {
1365           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1366             {
1367               new_port0 = udp0->dst_port = s0->in2out.port;
1368               sum0 = tcp0->checksum;
1369               sum0 =
1370                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1371                                 dst_address);
1372               sum0 =
1373                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1374                                 length);
1375               if (is_twice_nat_session (s0))
1376                 {
1377                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1378                                          s0->ext_host_nat_addr.as_u32,
1379                                          ip4_header_t, dst_address);
1380                   sum0 =
1381                     ip_csum_update (sum0,
1382                                     vnet_buffer (b0)->ip.reass.l4_src_port,
1383                                     s0->ext_host_nat_port, ip4_header_t,
1384                                     length);
1385                   tcp0->src_port = s0->ext_host_nat_port;
1386                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1387                 }
1388               tcp0->checksum = ip_csum_fold (sum0);
1389             }
1390           vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.tcp,
1391                                          thread_index, sw_if_index0, 1);
1392           nat44_set_tcp_session_state_o2i (sm, now, s0,
1393                                            vnet_buffer (b0)->ip.
1394                                            reass.icmp_type_or_tcp_flags,
1395                                            vnet_buffer (b0)->ip.
1396                                            reass.tcp_ack_number,
1397                                            vnet_buffer (b0)->ip.
1398                                            reass.tcp_seq_number,
1399                                            thread_index);
1400         }
1401       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1402                && udp0->checksum)
1403         {
1404           new_port0 = udp0->dst_port = s0->in2out.port;
1405           sum0 = udp0->checksum;
1406           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1407                                  dst_address);
1408           sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1409                                  length);
1410           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1411             {
1412               sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1413                                      s0->ext_host_nat_addr.as_u32,
1414                                      ip4_header_t, dst_address);
1415               sum0 =
1416                 ip_csum_update (sum0,
1417                                 vnet_buffer (b0)->ip.reass.l4_src_port,
1418                                 s0->ext_host_nat_port, ip4_header_t, length);
1419               udp0->src_port = s0->ext_host_nat_port;
1420               ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1421             }
1422           udp0->checksum = ip_csum_fold (sum0);
1423           vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
1424                                          thread_index, sw_if_index0, 1);
1425         }
1426       else
1427         {
1428           if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1429             {
1430               new_port0 = udp0->dst_port = s0->in2out.port;
1431               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1432                 {
1433                   udp0->src_port = s0->ext_host_nat_port;
1434                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1435                 }
1436             }
1437           vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
1438                                          thread_index, sw_if_index0, 1);
1439         }
1440
1441       /* Accounting */
1442       nat44_session_update_counters (s0, now,
1443                                      vlib_buffer_length_in_chain (vm, b0),
1444                                      thread_index);
1445       /* Per-user LRU list maintenance */
1446       nat44_session_update_lru (sm, s0, thread_index);
1447
1448     trace0:
1449       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1450                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1451         {
1452           nat44_ed_out2in_trace_t *t =
1453             vlib_add_trace (vm, node, b0, sizeof (*t));
1454           t->sw_if_index = sw_if_index0;
1455           t->next_index = next[0];
1456           t->is_slow_path = 1;
1457
1458           if (s0)
1459             t->session_index = s0 - tsm->sessions;
1460           else
1461             t->session_index = ~0;
1462         }
1463
1464       if (next[0] == NAT_NEXT_DROP)
1465         {
1466           vlib_increment_simple_counter (&sm->counters.slowpath.
1467                                          out2in_ed.drops, thread_index,
1468                                          sw_if_index0, 1);
1469         }
1470
1471       n_left_from--;
1472       next++;
1473       b++;
1474     }
1475
1476   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1477                                frame->n_vectors);
1478
1479   return frame->n_vectors;
1480 }
1481
1482 static inline uword
1483 nat_handoff_node_fn_inline (vlib_main_t * vm,
1484                             vlib_node_runtime_t * node,
1485                             vlib_frame_t * frame, u32 fq_index)
1486 {
1487   u32 n_enq, n_left_from, *from;
1488
1489   u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices;
1490   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1491
1492   from = vlib_frame_vector_args (frame);
1493   n_left_from = frame->n_vectors;
1494
1495   vlib_get_buffers (vm, from, b, n_left_from);
1496
1497   while (n_left_from >= 4)
1498     {
1499       if (PREDICT_TRUE (n_left_from >= 8))
1500         {
1501           vlib_prefetch_buffer_header (b[4], LOAD);
1502           vlib_prefetch_buffer_header (b[5], LOAD);
1503           vlib_prefetch_buffer_header (b[6], LOAD);
1504           vlib_prefetch_buffer_header (b[7], LOAD);
1505           CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1506           CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1507           CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1508           CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1509         }
1510
1511       ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
1512       ti[1] = vnet_buffer2 (b[1])->nat.thread_next;
1513       ti[2] = vnet_buffer2 (b[2])->nat.thread_next;
1514       ti[3] = vnet_buffer2 (b[3])->nat.thread_next;
1515
1516       b += 4;
1517       ti += 4;
1518       n_left_from -= 4;
1519     }
1520
1521   while (n_left_from > 0)
1522     {
1523       ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
1524
1525       b += 1;
1526       ti += 1;
1527       n_left_from -= 1;
1528     }
1529
1530   if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
1531     {
1532       u32 i;
1533       b = bufs;
1534       ti = thread_indices;
1535
1536       for (i = 0; i < frame->n_vectors; i++)
1537         {
1538           if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
1539             {
1540               nat44_ed_out2in_handoff_trace_t *t =
1541                 vlib_add_trace (vm, node, b[0], sizeof (*t));
1542               t->thread_next = ti[0];
1543               b += 1;
1544               ti += 1;
1545             }
1546           else
1547             break;
1548         }
1549     }
1550
1551   n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1552                                          frame->n_vectors, 1);
1553
1554   if (n_enq < frame->n_vectors)
1555     {
1556       vlib_node_increment_counter (vm, node->node_index,
1557                                    NAT44_HANDOFF_ERROR_CONGESTION_DROP,
1558                                    frame->n_vectors - n_enq);
1559     }
1560
1561   return frame->n_vectors;
1562 }
1563
1564 VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm,
1565                                      vlib_node_runtime_t * node,
1566                                      vlib_frame_t * frame)
1567 {
1568   if (snat_main.num_workers > 1)
1569     {
1570       return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 1);
1571     }
1572   else
1573     {
1574       return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 0);
1575     }
1576 }
1577
1578 /* *INDENT-OFF* */
1579 VLIB_REGISTER_NODE (nat44_ed_out2in_node) = {
1580   .name = "nat44-ed-out2in",
1581   .vector_size = sizeof (u32),
1582   .sibling_of = "nat-default",
1583   .format_trace = format_nat44_ed_out2in_trace,
1584   .type = VLIB_NODE_TYPE_INTERNAL,
1585   .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
1586   .error_strings = nat_out2in_ed_error_strings,
1587   .runtime_data_bytes = sizeof (snat_runtime_t),
1588 };
1589 /* *INDENT-ON* */
1590
1591 VLIB_NODE_FN (nat44_ed_out2in_slowpath_node) (vlib_main_t * vm,
1592                                               vlib_node_runtime_t * node,
1593                                               vlib_frame_t * frame)
1594 {
1595   return nat44_ed_out2in_slow_path_node_fn_inline (vm, node, frame);
1596 }
1597
1598 /* *INDENT-OFF* */
1599 VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
1600   .name = "nat44-ed-out2in-slowpath",
1601   .vector_size = sizeof (u32),
1602   .sibling_of = "nat-default",
1603   .format_trace = format_nat44_ed_out2in_trace,
1604   .type = VLIB_NODE_TYPE_INTERNAL,
1605   .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
1606   .error_strings = nat_out2in_ed_error_strings,
1607   .runtime_data_bytes = sizeof (snat_runtime_t),
1608 };
1609 /* *INDENT-ON* */
1610
1611 static u8 *
1612 format_nat44_ed_out2in_handoff_trace (u8 * s, va_list * args)
1613 {
1614   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1615   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1616   nat44_ed_out2in_handoff_trace_t *t =
1617     va_arg (*args, nat44_ed_out2in_handoff_trace_t *);
1618   return format (s, "out2in ed handoff thread_next index %d", t->thread_next);
1619 }
1620
1621 VLIB_NODE_FN (nat44_ed_out2in_handoff_node) (vlib_main_t * vm,
1622                                              vlib_node_runtime_t * node,
1623                                              vlib_frame_t * frame)
1624 {
1625   return nat_handoff_node_fn_inline (vm, node, frame,
1626                                      snat_main.ed_out2in_node_index);
1627 }
1628
1629 /* *INDENT-OFF* */
1630 VLIB_REGISTER_NODE (nat44_ed_out2in_handoff_node) = {
1631   .name = "nat44-ed-out2in-handoff",
1632   .vector_size = sizeof (u32),
1633   .sibling_of = "nat-default",
1634   .format_trace = format_nat44_ed_out2in_handoff_trace,
1635   .type = VLIB_NODE_TYPE_INTERNAL,
1636   .n_errors = 0,
1637 };
1638 /* *INDENT-ON* */
1639
1640 static u8 *
1641 format_nat_pre_trace (u8 * s, va_list * args)
1642 {
1643   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1644   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1645   nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1646   return format (s, "out2in next_index %d arc_next_index %d", t->next_index,
1647                  t->arc_next_index);
1648 }
1649
1650 VLIB_NODE_FN (nat_pre_out2in_node) (vlib_main_t * vm,
1651                                     vlib_node_runtime_t * node,
1652                                     vlib_frame_t * frame)
1653 {
1654   return nat_pre_node_fn_inline (vm, node, frame,
1655                                  NAT_NEXT_OUT2IN_ED_FAST_PATH);
1656 }
1657
1658 /* *INDENT-OFF* */
1659 VLIB_REGISTER_NODE (nat_pre_out2in_node) = {
1660   .name = "nat-pre-out2in",
1661   .vector_size = sizeof (u32),
1662   .sibling_of = "nat-default",
1663   .format_trace = format_nat_pre_trace,
1664   .type = VLIB_NODE_TYPE_INTERNAL,
1665   .n_errors = 0,
1666  };
1667 /* *INDENT-ON* */
1668
1669 /*
1670  * fd.io coding-style-patch-verification: ON
1671  *
1672  * Local Variables:
1673  * eval: (c-set-style "gnu")
1674  * End:
1675  */