nat: elog rewrite for multi-worker support
[vpp.git] / src / plugins / nat / in2out_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/pg/pg.h>
23 #include <vnet/ip/ip.h>
24 #include <vnet/ethernet/ethernet.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vppinfra/error.h>
27 #include <nat/nat.h>
28 #include <nat/nat_ipfix_logging.h>
29 #include <nat/nat_reass.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat_syslog.h>
32 #include <nat/nat_ha.h>
33
34 #define foreach_nat_in2out_ed_error                     \
35 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
36 _(IN2OUT_PACKETS, "good in2out packets processed")      \
37 _(OUT_OF_PORTS, "out of ports")                         \
38 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
39 _(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
40 _(DROP_FRAGMENT, "drop fragment")                       \
41 _(MAX_REASS, "maximum reassemblies exceeded")           \
42 _(MAX_FRAG, "maximum fragments per reassembly exceeded")\
43 _(NON_SYN, "non-SYN packet try to create session")      \
44 _(TCP_PACKETS, "TCP packets")                           \
45 _(UDP_PACKETS, "UDP packets")                           \
46 _(ICMP_PACKETS, "ICMP packets")                         \
47 _(OTHER_PACKETS, "other protocol packets")              \
48 _(FRAGMENTS, "fragments")                               \
49 _(CACHED_FRAGMENTS, "cached fragments")                 \
50 _(PROCESSED_FRAGMENTS, "processed fragments")
51
52
53 typedef enum
54 {
55 #define _(sym,str) NAT_IN2OUT_ED_ERROR_##sym,
56   foreach_nat_in2out_ed_error
57 #undef _
58     NAT_IN2OUT_ED_N_ERROR,
59 } nat_in2out_ed_error_t;
60
61 static char *nat_in2out_ed_error_strings[] = {
62 #define _(sym,string) string,
63   foreach_nat_in2out_ed_error
64 #undef _
65 };
66
67 typedef enum
68 {
69   NAT_IN2OUT_ED_NEXT_LOOKUP,
70   NAT_IN2OUT_ED_NEXT_DROP,
71   NAT_IN2OUT_ED_NEXT_ICMP_ERROR,
72   NAT_IN2OUT_ED_NEXT_SLOW_PATH,
73   NAT_IN2OUT_ED_NEXT_REASS,
74   NAT_IN2OUT_ED_N_NEXT,
75 } nat_in2out_ed_next_t;
76
77 typedef struct
78 {
79   u32 sw_if_index;
80   u32 next_index;
81   u32 session_index;
82   u32 is_slow_path;
83 } nat_in2out_ed_trace_t;
84
85 static u8 *
86 format_nat_in2out_ed_trace (u8 * s, va_list * args)
87 {
88   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
89   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
90   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
91   char *tag;
92
93   tag =
94     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
95     "NAT44_IN2OUT_ED_FAST_PATH";
96
97   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
98               t->sw_if_index, t->next_index, t->session_index);
99
100   return s;
101 }
102
103 static_always_inline int
104 icmp_get_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
105 {
106   icmp46_header_t *icmp0;
107   nat_ed_ses_key_t key0;
108   icmp_echo_header_t *echo0, *inner_echo0 = 0;
109   ip4_header_t *inner_ip0 = 0;
110   void *l4_header = 0;
111   icmp46_header_t *inner_icmp0;
112
113   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
114   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
115
116   if (!icmp_is_error_message (icmp0))
117     {
118       key0.proto = IP_PROTOCOL_ICMP;
119       key0.l_addr = ip0->src_address;
120       key0.r_addr = ip0->dst_address;
121       key0.l_port = echo0->identifier;
122       key0.r_port = 0;
123     }
124   else
125     {
126       inner_ip0 = (ip4_header_t *) (echo0 + 1);
127       l4_header = ip4_next_header (inner_ip0);
128       key0.proto = inner_ip0->protocol;
129       key0.r_addr = inner_ip0->src_address;
130       key0.l_addr = inner_ip0->dst_address;
131       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
132         {
133         case SNAT_PROTOCOL_ICMP:
134           inner_icmp0 = (icmp46_header_t *) l4_header;
135           inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
136           key0.r_port = 0;
137           key0.l_port = inner_echo0->identifier;
138           break;
139         case SNAT_PROTOCOL_UDP:
140         case SNAT_PROTOCOL_TCP:
141           key0.l_port = ((tcp_udp_header_t *) l4_header)->dst_port;
142           key0.r_port = ((tcp_udp_header_t *) l4_header)->src_port;
143           break;
144         default:
145           return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
146         }
147     }
148   *p_key0 = key0;
149   return 0;
150 }
151
152 #ifndef CLIB_MARCH_VARIANT
153 int
154 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
155 {
156   snat_main_t *sm = &snat_main;
157   nat44_is_idle_session_ctx_t *ctx = arg;
158   snat_session_t *s;
159   u64 sess_timeout_time;
160   nat_ed_ses_key_t ed_key;
161   clib_bihash_kv_16_8_t ed_kv;
162   int i;
163   snat_address_t *a;
164   snat_session_key_t key;
165   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
166                                                        ctx->thread_index);
167
168   s = pool_elt_at_index (tsm->sessions, kv->value);
169   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
170   if (ctx->now >= sess_timeout_time)
171     {
172       if (is_fwd_bypass_session (s))
173         goto delete;
174
175       ed_key.l_addr = s->out2in.addr;
176       ed_key.r_addr = s->ext_host_addr;
177       ed_key.fib_index = s->out2in.fib_index;
178       if (snat_is_unk_proto_session (s))
179         {
180           ed_key.proto = s->in2out.port;
181           ed_key.r_port = 0;
182           ed_key.l_port = 0;
183         }
184       else
185         {
186           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
187           ed_key.l_port = s->out2in.port;
188           ed_key.r_port = s->ext_host_port;
189         }
190       ed_kv.key[0] = ed_key.as_u64[0];
191       ed_kv.key[1] = ed_key.as_u64[1];
192       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
193         nat_elog_warn ("out2in_ed key del failed");
194
195       if (snat_is_unk_proto_session (s))
196         goto delete;
197
198       snat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
199                                            s->in2out.addr.as_u32,
200                                            s->out2in.addr.as_u32,
201                                            s->in2out.protocol,
202                                            s->in2out.port,
203                                            s->out2in.port,
204                                            s->in2out.fib_index);
205
206       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
207                              &s->in2out.addr, s->in2out.port,
208                              &s->ext_host_nat_addr, s->ext_host_nat_port,
209                              &s->out2in.addr, s->out2in.port,
210                              &s->ext_host_addr, s->ext_host_port,
211                              s->in2out.protocol, is_twice_nat_session (s));
212
213       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
214                    s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
215                    ctx->thread_index);
216
217       if (is_twice_nat_session (s))
218         {
219           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
220             {
221               key.protocol = s->in2out.protocol;
222               key.port = s->ext_host_nat_port;
223               a = sm->twice_nat_addresses + i;
224               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
225                 {
226                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
227                                                       ctx->thread_index,
228                                                       &key);
229                   break;
230                 }
231             }
232         }
233
234       if (snat_is_session_static (s))
235         goto delete;
236
237       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
238                                           &s->out2in);
239     delete:
240       nat44_delete_session (sm, s, ctx->thread_index);
241       return 1;
242     }
243
244   return 0;
245 }
246 #endif
247
248 static inline u32
249 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
250                           ip4_header_t * ip0, icmp46_header_t * icmp0,
251                           u32 sw_if_index0, u32 rx_fib_index0,
252                           vlib_node_runtime_t * node, u32 next0, f64 now,
253                           u32 thread_index, snat_session_t ** p_s0)
254 {
255   next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
256                        next0, thread_index, p_s0, 0);
257   snat_session_t *s0 = *p_s0;
258   if (PREDICT_TRUE (next0 != NAT_IN2OUT_ED_NEXT_DROP && s0))
259     {
260       /* Accounting */
261       nat44_session_update_counters (s0, now,
262                                      vlib_buffer_length_in_chain
263                                      (sm->vlib_main, b0), thread_index);
264       /* Per-user LRU list maintenance */
265       nat44_session_update_lru (sm, s0, thread_index);
266     }
267   return next0;
268 }
269
270 static u32
271 slow_path_ed (snat_main_t * sm,
272               vlib_buffer_t * b,
273               u32 rx_fib_index,
274               clib_bihash_kv_16_8_t * kv,
275               snat_session_t ** sessionp,
276               vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now,
277               tcp_header_t * tcp)
278 {
279   snat_session_t *s = 0;
280   snat_user_t *u;
281   snat_session_key_t key0, key1;
282   lb_nat_type_t lb = 0, is_sm = 0;
283   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
284   nat_ed_ses_key_t *key = (nat_ed_ses_key_t *) kv->key;
285   u32 proto = ip_proto_to_snat_proto (key->proto);
286   nat_outside_fib_t *outside_fib;
287   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
288   u8 identity_nat;
289   fib_prefix_t pfx = {
290     .fp_proto = FIB_PROTOCOL_IP4,
291     .fp_len = 32,
292     .fp_addr = {
293                 .ip4.as_u32 = key->r_addr.as_u32,
294                 },
295   };
296   nat44_is_idle_session_ctx_t ctx;
297
298   if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
299     {
300       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
301       nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
302       nat_elog_notice ("maximum sessions exceeded");
303       return NAT_IN2OUT_ED_NEXT_DROP;
304     }
305
306   key0.addr = key->l_addr;
307   key0.port = key->l_port;
308   key1.protocol = key0.protocol = proto;
309   key0.fib_index = rx_fib_index;
310   key1.fib_index = sm->outside_fib_index;
311   /* First try to match static mapping by local address and port */
312   if (snat_static_mapping_match
313       (sm, key0, &key1, 0, 0, 0, &lb, 0, &identity_nat))
314     {
315       /* Try to create dynamic translation */
316       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
317                                                thread_index, &key1,
318                                                sm->port_per_thread,
319                                                tsm->snat_thread_index))
320         {
321           nat_elog_notice ("addresses exhausted");
322           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
323           return NAT_IN2OUT_ED_NEXT_DROP;
324         }
325     }
326   else
327     {
328       if (PREDICT_FALSE (identity_nat))
329         {
330           *sessionp = s;
331           return next;
332         }
333
334       is_sm = 1;
335     }
336
337   if (proto == SNAT_PROTOCOL_TCP)
338     {
339       if (!tcp_is_init (tcp))
340         {
341           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
342           return NAT_IN2OUT_ED_NEXT_DROP;
343         }
344     }
345
346   u = nat_user_get_or_create (sm, &key->l_addr, rx_fib_index, thread_index);
347   if (!u)
348     {
349       nat_elog_warn ("create NAT user failed");
350       if (!is_sm)
351         snat_free_outside_address_and_port (sm->addresses,
352                                             thread_index, &key1);
353       return NAT_IN2OUT_ED_NEXT_DROP;
354     }
355
356   s = nat_ed_session_alloc (sm, u, thread_index, now);
357   if (!s)
358     {
359       nat44_delete_user_with_no_session (sm, u, thread_index);
360       nat_elog_warn ("create NAT session failed");
361       if (!is_sm)
362         snat_free_outside_address_and_port (sm->addresses,
363                                             thread_index, &key1);
364       return NAT_IN2OUT_ED_NEXT_DROP;
365     }
366
367   user_session_increment (sm, u, is_sm);
368   if (is_sm)
369     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
370   if (lb)
371     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
372   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
373   s->ext_host_addr = key->r_addr;
374   s->ext_host_port = key->r_port;
375   s->in2out = key0;
376   s->out2in = key1;
377   s->out2in.protocol = key0.protocol;
378
379   switch (vec_len (sm->outside_fibs))
380     {
381     case 0:
382       s->out2in.fib_index = sm->outside_fib_index;
383       break;
384     case 1:
385       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
386       break;
387     default:
388       /* *INDENT-OFF* */
389       vec_foreach (outside_fib, sm->outside_fibs)
390        {
391           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
392           if (FIB_NODE_INDEX_INVALID != fei)
393             {
394               if (fib_entry_get_resolving_interface (fei) != ~0)
395                 {
396                   s->out2in.fib_index = outside_fib->fib_index;
397                   break;
398                 }
399             }
400         }
401       /* *INDENT-ON* */
402       break;
403     }
404
405   /* Add to lookup tables */
406   kv->value = s - tsm->sessions;
407   ctx.now = now;
408   ctx.thread_index = thread_index;
409   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, kv,
410                                                nat44_i2o_ed_is_idle_session_cb,
411                                                &ctx))
412     nat_elog_notice ("in2out-ed key add failed");
413
414   make_ed_kv (kv, &key1.addr, &key->r_addr, key->proto, s->out2in.fib_index,
415               key1.port, key->r_port);
416   kv->value = s - tsm->sessions;
417   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->out2in_ed, kv,
418                                                nat44_o2i_ed_is_idle_session_cb,
419                                                &ctx))
420     nat_elog_notice ("out2in-ed key add failed");
421
422   *sessionp = s;
423
424   /* log NAT event */
425   snat_ipfix_logging_nat44_ses_create (thread_index,
426                                        s->in2out.addr.as_u32,
427                                        s->out2in.addr.as_u32,
428                                        s->in2out.protocol,
429                                        s->in2out.port,
430                                        s->out2in.port, s->in2out.fib_index);
431
432   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
433                          &s->in2out.addr, s->in2out.port,
434                          &s->ext_host_nat_addr, s->ext_host_nat_port,
435                          &s->out2in.addr, s->out2in.port,
436                          &s->ext_host_addr, s->ext_host_port,
437                          s->in2out.protocol, 0);
438
439   nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
440                s->out2in.port, &s->ext_host_addr, s->ext_host_port,
441                &s->ext_host_nat_addr, s->ext_host_nat_port,
442                s->in2out.protocol, s->in2out.fib_index, s->flags,
443                thread_index, 0);
444
445   return next;
446 }
447
448 static_always_inline int
449 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
450                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
451                         u32 rx_fib_index, u32 thread_index)
452 {
453   udp_header_t *udp = ip4_next_header (ip);
454   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
455   clib_bihash_kv_16_8_t kv, value;
456   snat_session_key_t key0, key1;
457
458   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, ip->protocol,
459               sm->outside_fib_index, udp->dst_port, udp->src_port);
460
461   /* NAT packet aimed at external address if */
462   /* has active sessions */
463   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
464     {
465       key0.addr = ip->dst_address;
466       key0.port = udp->dst_port;
467       key0.protocol = proto;
468       key0.fib_index = sm->outside_fib_index;
469       /* or is static mappings */
470       if (!snat_static_mapping_match (sm, key0, &key1, 1, 0, 0, 0, 0, 0))
471         return 0;
472     }
473   else
474     return 0;
475
476   if (sm->forwarding_enabled)
477     return 1;
478
479   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
480                                   rx_fib_index);
481 }
482
483 static_always_inline int
484 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
485                                       u32 thread_index, f64 now,
486                                       vlib_main_t * vm, vlib_buffer_t * b)
487 {
488   nat_ed_ses_key_t key;
489   clib_bihash_kv_16_8_t kv, value;
490   udp_header_t *udp;
491   snat_session_t *s = 0;
492   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
493
494   if (!sm->forwarding_enabled)
495     return 0;
496
497   if (ip->protocol == IP_PROTOCOL_ICMP)
498     {
499       key.as_u64[0] = key.as_u64[1] = 0;
500       if (icmp_get_ed_key (ip, &key))
501         return 0;
502       key.fib_index = 0;
503       kv.key[0] = key.as_u64[0];
504       kv.key[1] = key.as_u64[1];
505     }
506   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
507     {
508       udp = ip4_next_header (ip);
509       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0,
510                   udp->src_port, udp->dst_port);
511     }
512   else
513     {
514       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0, 0,
515                   0);
516     }
517
518   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
519     {
520       s = pool_elt_at_index (tsm->sessions, value.value);
521       if (is_fwd_bypass_session (s))
522         {
523           if (ip->protocol == IP_PROTOCOL_TCP)
524             {
525               tcp_header_t *tcp = ip4_next_header (ip);
526               if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index))
527                 return 1;
528             }
529           /* Accounting */
530           nat44_session_update_counters (s, now,
531                                          vlib_buffer_length_in_chain (vm, b),
532                                          thread_index);
533           /* Per-user LRU list maintenance */
534           nat44_session_update_lru (sm, s, thread_index);
535           return 1;
536         }
537       else
538         return 0;
539     }
540
541   return 0;
542 }
543
544 static_always_inline int
545 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
546                                        u8 proto, u16 src_port, u16 dst_port,
547                                        u32 thread_index, u32 rx_sw_if_index,
548                                        u32 tx_sw_if_index)
549 {
550   clib_bihash_kv_16_8_t kv, value;
551   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
552   snat_interface_t *i;
553   snat_session_t *s;
554   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
555   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
556
557   /* src NAT check */
558   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto, tx_fib_index,
559               src_port, dst_port);
560   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
561     {
562       s = pool_elt_at_index (tsm->sessions, value.value);
563       if (nat44_is_ses_closed (s))
564         {
565           nat_free_session_data (sm, s, thread_index, 0);
566           nat44_delete_session (sm, s, thread_index);
567         }
568       else
569         s->flags |= SNAT_SESSION_FLAG_OUTPUT_FEATURE;
570       return 1;
571     }
572
573   /* dst NAT check */
574   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, proto, rx_fib_index,
575               dst_port, src_port);
576   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
577     {
578       s = pool_elt_at_index (tsm->sessions, value.value);
579       if (is_fwd_bypass_session (s))
580         return 0;
581
582       /* hairpinning */
583       /* *INDENT-OFF* */
584       pool_foreach (i, sm->output_feature_interfaces,
585       ({
586         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
587            return 0;
588       }));
589       /* *INDENT-ON* */
590       return 1;
591     }
592
593   return 0;
594 }
595
596 #ifndef CLIB_MARCH_VARIANT
597 u32
598 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
599                       u32 thread_index, vlib_buffer_t * b, ip4_header_t * ip,
600                       u8 * p_proto, snat_session_key_t * p_value,
601                       u8 * p_dont_translate, void *d, void *e)
602 {
603   icmp46_header_t *icmp;
604   u32 sw_if_index;
605   u32 rx_fib_index;
606   nat_ed_ses_key_t key;
607   snat_session_t *s = 0;
608   u8 dont_translate = 0;
609   clib_bihash_kv_16_8_t kv, value;
610   u32 next = ~0;
611   int err;
612   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
613
614   icmp = (icmp46_header_t *) ip4_next_header (ip);
615   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
616   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
617
618   key.as_u64[0] = key.as_u64[1] = 0;
619   err = icmp_get_ed_key (ip, &key);
620   if (err != 0)
621     {
622       b->error = node->errors[err];
623       next = NAT_IN2OUT_ED_NEXT_DROP;
624       goto out;
625     }
626   key.fib_index = rx_fib_index;
627
628   kv.key[0] = key.as_u64[0];
629   kv.key[1] = key.as_u64[1];
630
631   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
632     {
633       if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
634         {
635           if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (sm, ip,
636                                                                     key.proto,
637                                                                     key.
638                                                                     l_port,
639                                                                     key.
640                                                                     r_port,
641                                                                     thread_index,
642                                                                     sw_if_index,
643                                                                     vnet_buffer
644                                                                     (b)->
645                                                                     sw_if_index
646                                                                     [VLIB_TX])))
647             {
648               dont_translate = 1;
649               goto out;
650             }
651         }
652       else
653         {
654           if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index,
655                                                      ip, SNAT_PROTOCOL_ICMP,
656                                                      rx_fib_index,
657                                                      thread_index)))
658             {
659               dont_translate = 1;
660               goto out;
661             }
662         }
663
664       if (PREDICT_FALSE (icmp_is_error_message (icmp)))
665         {
666           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
667           next = NAT_IN2OUT_ED_NEXT_DROP;
668           goto out;
669         }
670
671       next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next,
672                            thread_index, vlib_time_now (sm->vlib_main), 0);
673
674       if (PREDICT_FALSE (next == NAT_IN2OUT_ED_NEXT_DROP))
675         goto out;
676
677       if (!s)
678         {
679           dont_translate = 1;
680           goto out;
681         }
682     }
683   else
684     {
685       if (PREDICT_FALSE (icmp->type != ICMP4_echo_request &&
686                          icmp->type != ICMP4_echo_reply &&
687                          !icmp_is_error_message (icmp)))
688         {
689           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
690           next = NAT_IN2OUT_ED_NEXT_DROP;
691           goto out;
692         }
693
694       s = pool_elt_at_index (tsm->sessions, value.value);
695     }
696
697   *p_proto = ip_proto_to_snat_proto (key.proto);
698 out:
699   if (s)
700     *p_value = s->out2in;
701   *p_dont_translate = dont_translate;
702   if (d)
703     *(snat_session_t **) d = s;
704   return next;
705 }
706 #endif
707
708 static snat_session_t *
709 nat44_ed_in2out_unknown_proto (snat_main_t * sm,
710                                vlib_buffer_t * b,
711                                ip4_header_t * ip,
712                                u32 rx_fib_index,
713                                u32 thread_index,
714                                f64 now,
715                                vlib_main_t * vm, vlib_node_runtime_t * node)
716 {
717   clib_bihash_kv_8_8_t kv, value;
718   clib_bihash_kv_16_8_t s_kv, s_value;
719   snat_static_mapping_t *m;
720   u32 old_addr, new_addr = 0;
721   ip_csum_t sum;
722   snat_user_t *u;
723   dlist_elt_t *head, *elt;
724   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
725   u32 elt_index, head_index, ses_index;
726   snat_session_t *s;
727   u32 outside_fib_index = sm->outside_fib_index;
728   int i;
729   u8 is_sm = 0;
730   nat_outside_fib_t *outside_fib;
731   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
732   fib_prefix_t pfx = {
733     .fp_proto = FIB_PROTOCOL_IP4,
734     .fp_len = 32,
735     .fp_addr = {
736                 .ip4.as_u32 = ip->dst_address.as_u32,
737                 },
738   };
739
740   switch (vec_len (sm->outside_fibs))
741     {
742     case 0:
743       outside_fib_index = sm->outside_fib_index;
744       break;
745     case 1:
746       outside_fib_index = sm->outside_fibs[0].fib_index;
747       break;
748     default:
749       /* *INDENT-OFF* */
750       vec_foreach (outside_fib, sm->outside_fibs)
751         {
752           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
753           if (FIB_NODE_INDEX_INVALID != fei)
754             {
755               if (fib_entry_get_resolving_interface (fei) != ~0)
756                 {
757                   outside_fib_index = outside_fib->fib_index;
758                   break;
759                 }
760             }
761         }
762       /* *INDENT-ON* */
763       break;
764     }
765   old_addr = ip->src_address.as_u32;
766
767   make_ed_kv (&s_kv, &ip->src_address, &ip->dst_address, ip->protocol,
768               rx_fib_index, 0, 0);
769
770   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
771     {
772       s = pool_elt_at_index (tsm->sessions, s_value.value);
773       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
774     }
775   else
776     {
777       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
778         {
779           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
780           nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
781           nat_elog_notice ("maximum sessions exceeded");
782           return 0;
783         }
784
785       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
786                                   thread_index);
787       if (!u)
788         {
789           nat_elog_warn ("create NAT user failed");
790           return 0;
791         }
792
793       make_sm_kv (&kv, &ip->src_address, 0, rx_fib_index, 0);
794
795       /* Try to find static mapping first */
796       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
797         {
798           m = pool_elt_at_index (sm->static_mappings, value.value);
799           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
800           is_sm = 1;
801           goto create_ses;
802         }
803       /* Fallback to 3-tuple key */
804       else
805         {
806           /* Choose same out address as for TCP/UDP session to same destination */
807           head_index = u->sessions_per_user_list_head_index;
808           head = pool_elt_at_index (tsm->list_pool, head_index);
809           elt_index = head->next;
810           if (PREDICT_FALSE (elt_index == ~0))
811             ses_index = ~0;
812           else
813             {
814               elt = pool_elt_at_index (tsm->list_pool, elt_index);
815               ses_index = elt->value;
816             }
817
818           while (ses_index != ~0)
819             {
820               s = pool_elt_at_index (tsm->sessions, ses_index);
821               elt_index = elt->next;
822               elt = pool_elt_at_index (tsm->list_pool, elt_index);
823               ses_index = elt->value;
824
825               if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
826                 {
827                   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
828
829                   make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address,
830                               ip->protocol, outside_fib_index, 0, 0);
831                   if (clib_bihash_search_16_8
832                       (&tsm->out2in_ed, &s_kv, &s_value))
833                     goto create_ses;
834
835                   break;
836                 }
837             }
838
839           for (i = 0; i < vec_len (sm->addresses); i++)
840             {
841               make_ed_kv (&s_kv, &sm->addresses[i].addr, &ip->dst_address,
842                           ip->protocol, outside_fib_index, 0, 0);
843               if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
844                 {
845                   new_addr = ip->src_address.as_u32 =
846                     sm->addresses[i].addr.as_u32;
847                   goto create_ses;
848                 }
849             }
850           return 0;
851         }
852
853     create_ses:
854       s = nat_ed_session_alloc (sm, u, thread_index, now);
855       if (!s)
856         {
857           nat44_delete_user_with_no_session (sm, u, thread_index);
858           nat_elog_warn ("create NAT session failed");
859           return 0;
860         }
861
862       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
863       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
864       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
865       s->out2in.addr.as_u32 = new_addr;
866       s->out2in.fib_index = outside_fib_index;
867       s->in2out.addr.as_u32 = old_addr;
868       s->in2out.fib_index = rx_fib_index;
869       s->in2out.port = s->out2in.port = ip->protocol;
870       if (is_sm)
871         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
872       user_session_increment (sm, u, is_sm);
873
874       /* Add to lookup tables */
875       make_ed_kv (&s_kv, &s->in2out.addr, &ip->dst_address, ip->protocol,
876                   rx_fib_index, 0, 0);
877       s_kv.value = s - tsm->sessions;
878       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
879         nat_elog_notice ("in2out key add failed");
880
881       make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address, ip->protocol,
882                   outside_fib_index, 0, 0);
883       s_kv.value = s - tsm->sessions;
884       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
885         nat_elog_notice ("out2in key add failed");
886     }
887
888   /* Update IP checksum */
889   sum = ip->checksum;
890   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
891   ip->checksum = ip_csum_fold (sum);
892
893   /* Accounting */
894   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
895                                  thread_index);
896   /* Per-user LRU list maintenance */
897   nat44_session_update_lru (sm, s, thread_index);
898
899   /* Hairpinning */
900   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
901     nat44_ed_hairpinning_unknown_proto (sm, b, ip);
902
903   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
904     vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index;
905
906   return s;
907 }
908
909 static inline uword
910 nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
911                                 vlib_node_runtime_t * node,
912                                 vlib_frame_t * frame, int is_slow_path,
913                                 int is_output_feature)
914 {
915   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
916   nat_in2out_ed_next_t next_index;
917   snat_main_t *sm = &snat_main;
918   f64 now = vlib_time_now (vm);
919   u32 thread_index = vm->thread_index;
920   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
921   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
922     0, fragments = 0;
923
924   stats_node_index = is_slow_path ? sm->ed_in2out_slowpath_node_index :
925     sm->ed_in2out_node_index;
926
927   from = vlib_frame_vector_args (frame);
928   n_left_from = frame->n_vectors;
929   next_index = node->cached_next_index;
930
931   while (n_left_from > 0)
932     {
933       u32 n_left_to_next;
934
935       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
936
937       while (n_left_from >= 4 && n_left_to_next >= 2)
938         {
939           u32 bi0, bi1;
940           vlib_buffer_t *b0, *b1;
941           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
942             new_addr0, old_addr0;
943           u32 next1, sw_if_index1, rx_fib_index1, iph_offset1 = 0, proto1,
944             new_addr1, old_addr1;
945           u16 old_port0, new_port0, old_port1, new_port1;
946           ip4_header_t *ip0, *ip1;
947           udp_header_t *udp0, *udp1;
948           tcp_header_t *tcp0, *tcp1;
949           icmp46_header_t *icmp0, *icmp1;
950           snat_session_t *s0 = 0, *s1 = 0;
951           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
952           ip_csum_t sum0, sum1;
953
954           /* Prefetch next iteration. */
955           {
956             vlib_buffer_t *p2, *p3;
957
958             p2 = vlib_get_buffer (vm, from[2]);
959             p3 = vlib_get_buffer (vm, from[3]);
960
961             vlib_prefetch_buffer_header (p2, LOAD);
962             vlib_prefetch_buffer_header (p3, LOAD);
963
964             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
965             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
966           }
967
968           /* speculatively enqueue b0 and b1 to the current next frame */
969           to_next[0] = bi0 = from[0];
970           to_next[1] = bi1 = from[1];
971           from += 2;
972           to_next += 2;
973           n_left_from -= 2;
974           n_left_to_next -= 2;
975
976           b0 = vlib_get_buffer (vm, bi0);
977           b1 = vlib_get_buffer (vm, bi1);
978
979           next0 = NAT_IN2OUT_ED_NEXT_LOOKUP;
980
981           if (is_output_feature)
982             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
983
984           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
985                                   iph_offset0);
986
987           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
988           rx_fib_index0 =
989             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
990                                                  sw_if_index0);
991
992           if (PREDICT_FALSE (ip0->ttl == 1))
993             {
994               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
995               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
996                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
997                                            0);
998               next0 = NAT_IN2OUT_ED_NEXT_ICMP_ERROR;
999               goto trace00;
1000             }
1001
1002           udp0 = ip4_next_header (ip0);
1003           tcp0 = (tcp_header_t *) udp0;
1004           icmp0 = (icmp46_header_t *) udp0;
1005           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1006
1007           if (is_slow_path)
1008             {
1009               if (PREDICT_FALSE (proto0 == ~0))
1010                 {
1011                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1012                                                       rx_fib_index0,
1013                                                       thread_index, now, vm,
1014                                                       node);
1015                   if (!s0)
1016                     next0 = NAT_IN2OUT_ED_NEXT_DROP;
1017                   other_packets++;
1018                   goto trace00;
1019                 }
1020
1021               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1022                 {
1023                   next0 = icmp_in2out_ed_slow_path
1024                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1025                      next0, now, thread_index, &s0);
1026                   icmp_packets++;
1027                   goto trace00;
1028                 }
1029             }
1030           else
1031             {
1032               if (PREDICT_FALSE (proto0 == ~0))
1033                 {
1034                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1035                   goto trace00;
1036                 }
1037
1038               if (ip4_is_fragment (ip0))
1039                 {
1040                   next0 = NAT_IN2OUT_ED_NEXT_REASS;
1041                   fragments++;
1042                   goto trace00;
1043                 }
1044
1045               if (is_output_feature)
1046                 {
1047                   if (PREDICT_FALSE
1048                       (nat_not_translate_output_feature_fwd
1049                        (sm, ip0, thread_index, now, vm, b0)))
1050                     goto trace00;
1051                 }
1052
1053               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1054                 {
1055                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1056                   goto trace00;
1057                 }
1058             }
1059
1060           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
1061                       ip0->protocol, rx_fib_index0, udp0->src_port,
1062                       udp0->dst_port);
1063
1064           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1065             {
1066               if (is_slow_path)
1067                 {
1068                   if (is_output_feature)
1069                     {
1070                       if (PREDICT_FALSE
1071                           (nat44_ed_not_translate_output_feature
1072                            (sm, ip0, ip0->protocol, udp0->src_port,
1073                             udp0->dst_port, thread_index, sw_if_index0,
1074                             vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1075                         goto trace00;
1076                     }
1077                   else
1078                     {
1079                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1080                                                                  sw_if_index0,
1081                                                                  ip0, proto0,
1082                                                                  rx_fib_index0,
1083                                                                  thread_index)))
1084                         goto trace00;
1085                     }
1086
1087                   next0 =
1088                     slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
1089                                   next0, thread_index, now, tcp0);
1090
1091                   if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP))
1092                     goto trace00;
1093
1094                   if (PREDICT_FALSE (!s0))
1095                     goto trace00;
1096                 }
1097               else
1098                 {
1099                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1100                   goto trace00;
1101                 }
1102             }
1103           else
1104             {
1105               s0 = pool_elt_at_index (tsm->sessions, value0.value);
1106             }
1107
1108           b0->flags |= VNET_BUFFER_F_IS_NATED;
1109
1110           if (!is_output_feature)
1111             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1112
1113           old_addr0 = ip0->src_address.as_u32;
1114           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1115           sum0 = ip0->checksum;
1116           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1117                                  src_address);
1118           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1119             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1120                                    s0->ext_host_addr.as_u32, ip4_header_t,
1121                                    dst_address);
1122           ip0->checksum = ip_csum_fold (sum0);
1123
1124           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1125             {
1126               old_port0 = tcp0->src_port;
1127               new_port0 = tcp0->src_port = s0->out2in.port;
1128
1129               sum0 = tcp0->checksum;
1130               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1131                                      dst_address);
1132               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1133                                      length);
1134               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1135                 {
1136                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1137                                          s0->ext_host_addr.as_u32,
1138                                          ip4_header_t, dst_address);
1139                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
1140                                          s0->ext_host_port, ip4_header_t,
1141                                          length);
1142                   tcp0->dst_port = s0->ext_host_port;
1143                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1144                 }
1145               mss_clamping (sm, tcp0, &sum0);
1146               tcp0->checksum = ip_csum_fold (sum0);
1147               tcp_packets++;
1148               if (nat44_set_tcp_session_state_i2o
1149                   (sm, s0, tcp0, thread_index))
1150                 goto trace00;
1151             }
1152           else
1153             {
1154               udp0->src_port = s0->out2in.port;
1155               udp0->checksum = 0;
1156               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1157                 {
1158                   udp0->dst_port = s0->ext_host_port;
1159                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1160                 }
1161               udp_packets++;
1162             }
1163
1164           /* Accounting */
1165           nat44_session_update_counters (s0, now,
1166                                          vlib_buffer_length_in_chain (vm,
1167                                                                       b0),
1168                                          thread_index);
1169           /* Per-user LRU list maintenance */
1170           nat44_session_update_lru (sm, s0, thread_index);
1171
1172         trace00:
1173           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1174                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1175             {
1176               nat_in2out_ed_trace_t *t =
1177                 vlib_add_trace (vm, node, b0, sizeof (*t));
1178               t->is_slow_path = is_slow_path;
1179               t->sw_if_index = sw_if_index0;
1180               t->next_index = next0;
1181               t->session_index = ~0;
1182               if (s0)
1183                 t->session_index = s0 - tsm->sessions;
1184             }
1185
1186           pkts_processed += next0 == NAT_IN2OUT_ED_NEXT_LOOKUP;
1187
1188
1189           next1 = NAT_IN2OUT_ED_NEXT_LOOKUP;
1190
1191           if (is_output_feature)
1192             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1193
1194           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1195                                   iph_offset1);
1196
1197           sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1198           rx_fib_index1 =
1199             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1200                                                  sw_if_index1);
1201
1202           if (PREDICT_FALSE (ip1->ttl == 1))
1203             {
1204               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1205               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1206                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1207                                            0);
1208               next1 = NAT_IN2OUT_ED_NEXT_ICMP_ERROR;
1209               goto trace01;
1210             }
1211
1212           udp1 = ip4_next_header (ip1);
1213           tcp1 = (tcp_header_t *) udp1;
1214           icmp1 = (icmp46_header_t *) udp1;
1215           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1216
1217           if (is_slow_path)
1218             {
1219               if (PREDICT_FALSE (proto1 == ~0))
1220                 {
1221                   s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1,
1222                                                       rx_fib_index1,
1223                                                       thread_index, now, vm,
1224                                                       node);
1225                   if (!s1)
1226                     next1 = NAT_IN2OUT_ED_NEXT_DROP;
1227                   other_packets++;
1228                   goto trace01;
1229                 }
1230
1231               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1232                 {
1233                   next1 = icmp_in2out_ed_slow_path
1234                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1235                      next1, now, thread_index, &s1);
1236                   icmp_packets++;
1237                   goto trace01;
1238                 }
1239             }
1240           else
1241             {
1242               if (PREDICT_FALSE (proto1 == ~0))
1243                 {
1244                   next1 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1245                   goto trace01;
1246                 }
1247
1248               if (ip4_is_fragment (ip1))
1249                 {
1250                   next1 = NAT_IN2OUT_ED_NEXT_REASS;
1251                   fragments++;
1252                   goto trace01;
1253                 }
1254
1255               if (is_output_feature)
1256                 {
1257                   if (PREDICT_FALSE
1258                       (nat_not_translate_output_feature_fwd
1259                        (sm, ip1, thread_index, now, vm, b1)))
1260                     goto trace01;
1261                 }
1262
1263               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1264                 {
1265                   next1 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1266                   goto trace01;
1267                 }
1268             }
1269
1270           make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address,
1271                       ip1->protocol, rx_fib_index1, udp1->src_port,
1272                       udp1->dst_port);
1273
1274           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1))
1275             {
1276               if (is_slow_path)
1277                 {
1278                   if (is_output_feature)
1279                     {
1280                       if (PREDICT_FALSE
1281                           (nat44_ed_not_translate_output_feature
1282                            (sm, ip1, ip1->protocol, udp1->src_port,
1283                             udp1->dst_port, thread_index, sw_if_index1,
1284                             vnet_buffer (b1)->sw_if_index[VLIB_TX])))
1285                         goto trace01;
1286                     }
1287                   else
1288                     {
1289                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1290                                                                  sw_if_index1,
1291                                                                  ip1, proto1,
1292                                                                  rx_fib_index1,
1293                                                                  thread_index)))
1294                         goto trace01;
1295                     }
1296
1297                   next1 =
1298                     slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node,
1299                                   next1, thread_index, now, tcp1);
1300
1301                   if (PREDICT_FALSE (next1 == NAT_IN2OUT_ED_NEXT_DROP))
1302                     goto trace01;
1303
1304                   if (PREDICT_FALSE (!s1))
1305                     goto trace01;
1306                 }
1307               else
1308                 {
1309                   next1 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1310                   goto trace01;
1311                 }
1312             }
1313           else
1314             {
1315               s1 = pool_elt_at_index (tsm->sessions, value1.value);
1316             }
1317
1318           b1->flags |= VNET_BUFFER_F_IS_NATED;
1319
1320           if (!is_output_feature)
1321             vnet_buffer (b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1322
1323           old_addr1 = ip1->src_address.as_u32;
1324           new_addr1 = ip1->src_address.as_u32 = s1->out2in.addr.as_u32;
1325           sum1 = ip1->checksum;
1326           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
1327                                  src_address);
1328           if (PREDICT_FALSE (is_twice_nat_session (s1)))
1329             sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
1330                                    s1->ext_host_addr.as_u32, ip4_header_t,
1331                                    dst_address);
1332           ip1->checksum = ip_csum_fold (sum1);
1333
1334           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
1335             {
1336               old_port1 = tcp1->src_port;
1337               new_port1 = tcp1->src_port = s1->out2in.port;
1338
1339               sum1 = tcp1->checksum;
1340               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
1341                                      dst_address);
1342               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
1343                                      length);
1344               if (PREDICT_FALSE (is_twice_nat_session (s1)))
1345                 {
1346                   sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
1347                                          s1->ext_host_addr.as_u32,
1348                                          ip4_header_t, dst_address);
1349                   sum1 = ip_csum_update (sum1, tcp1->dst_port,
1350                                          s1->ext_host_port, ip4_header_t,
1351                                          length);
1352                   tcp1->dst_port = s1->ext_host_port;
1353                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
1354                 }
1355               tcp1->checksum = ip_csum_fold (sum1);
1356               mss_clamping (sm, tcp1, &sum1);
1357               tcp_packets++;
1358               if (nat44_set_tcp_session_state_i2o
1359                   (sm, s1, tcp1, thread_index))
1360                 goto trace01;
1361             }
1362           else
1363             {
1364               udp1->src_port = s1->out2in.port;
1365               udp1->checksum = 0;
1366               if (PREDICT_FALSE (is_twice_nat_session (s1)))
1367                 {
1368                   udp1->dst_port = s1->ext_host_port;
1369                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
1370                 }
1371               udp_packets++;
1372             }
1373
1374           /* Accounting */
1375           nat44_session_update_counters (s1, now,
1376                                          vlib_buffer_length_in_chain (vm, b1),
1377                                          thread_index);
1378           /* Per-user LRU list maintenance */
1379           nat44_session_update_lru (sm, s1, thread_index);
1380
1381         trace01:
1382           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1383                              && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1384             {
1385               nat_in2out_ed_trace_t *t =
1386                 vlib_add_trace (vm, node, b1, sizeof (*t));
1387               t->is_slow_path = is_slow_path;
1388               t->sw_if_index = sw_if_index1;
1389               t->next_index = next1;
1390               t->session_index = ~0;
1391               if (s1)
1392                 t->session_index = s1 - tsm->sessions;
1393             }
1394
1395           pkts_processed += next1 == NAT_IN2OUT_ED_NEXT_LOOKUP;
1396
1397           /* verify speculative enqueues, maybe switch current next frame */
1398           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1399                                            to_next, n_left_to_next,
1400                                            bi0, bi1, next0, next1);
1401         }
1402
1403       while (n_left_from > 0 && n_left_to_next > 0)
1404         {
1405           u32 bi0;
1406           vlib_buffer_t *b0;
1407           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
1408             new_addr0, old_addr0;
1409           u16 old_port0, new_port0;
1410           ip4_header_t *ip0;
1411           udp_header_t *udp0;
1412           tcp_header_t *tcp0;
1413           icmp46_header_t *icmp0;
1414           snat_session_t *s0 = 0;
1415           clib_bihash_kv_16_8_t kv0, value0;
1416           ip_csum_t sum0;
1417
1418           /* speculatively enqueue b0 to the current next frame */
1419           bi0 = from[0];
1420           to_next[0] = bi0;
1421           from += 1;
1422           to_next += 1;
1423           n_left_from -= 1;
1424           n_left_to_next -= 1;
1425
1426           b0 = vlib_get_buffer (vm, bi0);
1427           next0 = NAT_IN2OUT_ED_NEXT_LOOKUP;
1428
1429           if (is_output_feature)
1430             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1431
1432           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1433                                   iph_offset0);
1434
1435           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1436           rx_fib_index0 =
1437             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1438                                                  sw_if_index0);
1439
1440           if (PREDICT_FALSE (ip0->ttl == 1))
1441             {
1442               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1443               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1444                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1445                                            0);
1446               next0 = NAT_IN2OUT_ED_NEXT_ICMP_ERROR;
1447               goto trace0;
1448             }
1449
1450           udp0 = ip4_next_header (ip0);
1451           tcp0 = (tcp_header_t *) udp0;
1452           icmp0 = (icmp46_header_t *) udp0;
1453           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1454
1455           if (is_slow_path)
1456             {
1457               if (PREDICT_FALSE (proto0 == ~0))
1458                 {
1459                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1460                                                       rx_fib_index0,
1461                                                       thread_index, now, vm,
1462                                                       node);
1463                   if (!s0)
1464                     next0 = NAT_IN2OUT_ED_NEXT_DROP;
1465                   other_packets++;
1466                   goto trace0;
1467                 }
1468
1469               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1470                 {
1471                   next0 = icmp_in2out_ed_slow_path
1472                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1473                      next0, now, thread_index, &s0);
1474                   icmp_packets++;
1475                   goto trace0;
1476                 }
1477             }
1478           else
1479             {
1480               if (PREDICT_FALSE (proto0 == ~0))
1481                 {
1482                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1483                   goto trace0;
1484                 }
1485
1486               if (ip4_is_fragment (ip0))
1487                 {
1488                   next0 = NAT_IN2OUT_ED_NEXT_REASS;
1489                   fragments++;
1490                   goto trace0;
1491                 }
1492
1493               if (is_output_feature)
1494                 {
1495                   if (PREDICT_FALSE
1496                       (nat_not_translate_output_feature_fwd
1497                        (sm, ip0, thread_index, now, vm, b0)))
1498                     goto trace0;
1499                 }
1500
1501               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1502                 {
1503                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1504                   goto trace0;
1505                 }
1506             }
1507
1508           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
1509                       ip0->protocol, rx_fib_index0, udp0->src_port,
1510                       udp0->dst_port);
1511
1512           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1513             {
1514               if (is_slow_path)
1515                 {
1516                   if (is_output_feature)
1517                     {
1518                       if (PREDICT_FALSE
1519                           (nat44_ed_not_translate_output_feature
1520                            (sm, ip0, ip0->protocol, udp0->src_port,
1521                             udp0->dst_port, thread_index, sw_if_index0,
1522                             vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1523                         goto trace0;
1524                     }
1525                   else
1526                     {
1527                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1528                                                                  sw_if_index0,
1529                                                                  ip0, proto0,
1530                                                                  rx_fib_index0,
1531                                                                  thread_index)))
1532                         goto trace0;
1533                     }
1534
1535                   next0 =
1536                     slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
1537                                   next0, thread_index, now, tcp0);
1538
1539                   if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP))
1540                     goto trace0;
1541
1542                   if (PREDICT_FALSE (!s0))
1543                     goto trace0;
1544                 }
1545               else
1546                 {
1547                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1548                   goto trace0;
1549                 }
1550             }
1551           else
1552             {
1553               s0 = pool_elt_at_index (tsm->sessions, value0.value);
1554             }
1555
1556           b0->flags |= VNET_BUFFER_F_IS_NATED;
1557
1558           if (!is_output_feature)
1559             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1560
1561           old_addr0 = ip0->src_address.as_u32;
1562           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1563           sum0 = ip0->checksum;
1564           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1565                                  src_address);
1566           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1567             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1568                                    s0->ext_host_addr.as_u32, ip4_header_t,
1569                                    dst_address);
1570           ip0->checksum = ip_csum_fold (sum0);
1571
1572           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1573             {
1574               old_port0 = tcp0->src_port;
1575               new_port0 = tcp0->src_port = s0->out2in.port;
1576
1577               sum0 = tcp0->checksum;
1578               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1579                                      dst_address);
1580               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1581                                      length);
1582               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1583                 {
1584                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1585                                          s0->ext_host_addr.as_u32,
1586                                          ip4_header_t, dst_address);
1587                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
1588                                          s0->ext_host_port, ip4_header_t,
1589                                          length);
1590                   tcp0->dst_port = s0->ext_host_port;
1591                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1592                 }
1593               mss_clamping (sm, tcp0, &sum0);
1594               tcp0->checksum = ip_csum_fold (sum0);
1595               tcp_packets++;
1596               if (nat44_set_tcp_session_state_i2o
1597                   (sm, s0, tcp0, thread_index))
1598                 goto trace0;
1599             }
1600           else
1601             {
1602               udp0->src_port = s0->out2in.port;
1603               udp0->checksum = 0;
1604               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1605                 {
1606                   udp0->dst_port = s0->ext_host_port;
1607                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1608                 }
1609               udp_packets++;
1610             }
1611
1612           /* Accounting */
1613           nat44_session_update_counters (s0, now,
1614                                          vlib_buffer_length_in_chain (vm, b0),
1615                                          thread_index);
1616           /* Per-user LRU list maintenance */
1617           nat44_session_update_lru (sm, s0, thread_index);
1618
1619         trace0:
1620           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1621                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1622             {
1623               nat_in2out_ed_trace_t *t =
1624                 vlib_add_trace (vm, node, b0, sizeof (*t));
1625               t->is_slow_path = is_slow_path;
1626               t->sw_if_index = sw_if_index0;
1627               t->next_index = next0;
1628               t->session_index = ~0;
1629               if (s0)
1630                 t->session_index = s0 - tsm->sessions;
1631             }
1632
1633           pkts_processed += next0 == NAT_IN2OUT_ED_NEXT_LOOKUP;
1634
1635           /* verify speculative enqueue, maybe switch current next frame */
1636           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1637                                            to_next, n_left_to_next,
1638                                            bi0, next0);
1639         }
1640
1641       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1642     }
1643
1644   vlib_node_increment_counter (vm, stats_node_index,
1645                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1646                                pkts_processed);
1647   vlib_node_increment_counter (vm, stats_node_index,
1648                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1649   vlib_node_increment_counter (vm, stats_node_index,
1650                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, tcp_packets);
1651   vlib_node_increment_counter (vm, stats_node_index,
1652                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1653                                icmp_packets);
1654   vlib_node_increment_counter (vm, stats_node_index,
1655                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1656                                other_packets);
1657   vlib_node_increment_counter (vm, stats_node_index,
1658                                NAT_IN2OUT_ED_ERROR_FRAGMENTS, fragments);
1659
1660   return frame->n_vectors;
1661 }
1662
1663 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1664                                      vlib_node_runtime_t * node,
1665                                      vlib_frame_t * frame)
1666 {
1667   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 0);
1668 }
1669
1670 /* *INDENT-OFF* */
1671 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1672   .name = "nat44-ed-in2out",
1673   .vector_size = sizeof (u32),
1674   .format_trace = format_nat_in2out_ed_trace,
1675   .type = VLIB_NODE_TYPE_INTERNAL,
1676   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1677   .error_strings = nat_in2out_ed_error_strings,
1678   .runtime_data_bytes = sizeof (snat_runtime_t),
1679   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1680   .next_nodes = {
1681     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1682     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "ip4-lookup",
1683     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
1684     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1685     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
1686   },
1687 };
1688 /* *INDENT-ON* */
1689
1690 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1691                                             vlib_node_runtime_t * node,
1692                                             vlib_frame_t * frame)
1693 {
1694   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 1);
1695 }
1696
1697 /* *INDENT-OFF* */
1698 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1699   .name = "nat44-ed-in2out-output",
1700   .vector_size = sizeof (u32),
1701   .format_trace = format_nat_in2out_ed_trace,
1702   .type = VLIB_NODE_TYPE_INTERNAL,
1703   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1704   .error_strings = nat_in2out_ed_error_strings,
1705   .runtime_data_bytes = sizeof (snat_runtime_t),
1706   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1707   .next_nodes = {
1708     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1709     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "interface-output",
1710     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
1711     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1712     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass-output",
1713   },
1714 };
1715 /* *INDENT-ON* */
1716
1717 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1718                                               vlib_node_runtime_t * node,
1719                                               vlib_frame_t * frame)
1720 {
1721   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0);
1722 }
1723
1724 /* *INDENT-OFF* */
1725 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1726   .name = "nat44-ed-in2out-slowpath",
1727   .vector_size = sizeof (u32),
1728   .format_trace = format_nat_in2out_ed_trace,
1729   .type = VLIB_NODE_TYPE_INTERNAL,
1730   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1731   .error_strings = nat_in2out_ed_error_strings,
1732   .runtime_data_bytes = sizeof (snat_runtime_t),
1733   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1734   .next_nodes = {
1735     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1736     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "ip4-lookup",
1737     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
1738     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1739     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
1740   },
1741 };
1742 /* *INDENT-ON* */
1743
1744 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1745                                                      vlib_node_runtime_t *
1746                                                      node,
1747                                                      vlib_frame_t * frame)
1748 {
1749   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1);
1750 }
1751
1752 /* *INDENT-OFF* */
1753 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1754   .name = "nat44-ed-in2out-output-slowpath",
1755   .vector_size = sizeof (u32),
1756   .format_trace = format_nat_in2out_ed_trace,
1757   .type = VLIB_NODE_TYPE_INTERNAL,
1758   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1759   .error_strings = nat_in2out_ed_error_strings,
1760   .runtime_data_bytes = sizeof (snat_runtime_t),
1761   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1762   .next_nodes = {
1763     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1764     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "interface-output",
1765     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
1766     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1767     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
1768   },
1769 };
1770 /* *INDENT-ON* */
1771
1772 static inline uword
1773 nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm,
1774                                       vlib_node_runtime_t * node,
1775                                       vlib_frame_t * frame,
1776                                       int is_output_feature)
1777 {
1778   u32 n_left_from, *from, *to_next;
1779   nat_in2out_ed_next_t next_index;
1780   u32 pkts_processed = 0, cached_fragments = 0;
1781   snat_main_t *sm = &snat_main;
1782   f64 now = vlib_time_now (vm);
1783   u32 thread_index = vm->thread_index;
1784   snat_main_per_thread_data_t *per_thread_data =
1785     &sm->per_thread_data[thread_index];
1786   u32 *fragments_to_drop = 0;
1787   u32 *fragments_to_loopback = 0;
1788
1789   from = vlib_frame_vector_args (frame);
1790   n_left_from = frame->n_vectors;
1791   next_index = node->cached_next_index;
1792
1793   while (n_left_from > 0)
1794     {
1795       u32 n_left_to_next;
1796
1797       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1798
1799       while (n_left_from > 0 && n_left_to_next > 0)
1800         {
1801           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1802           u32 iph_offset0 = 0;
1803           vlib_buffer_t *b0;
1804           u32 next0;
1805           u8 cached0 = 0;
1806           ip4_header_t *ip0 = 0;
1807           nat_reass_ip4_t *reass0;
1808           udp_header_t *udp0;
1809           tcp_header_t *tcp0;
1810           icmp46_header_t *icmp0;
1811           clib_bihash_kv_16_8_t kv0, value0;
1812           snat_session_t *s0 = 0;
1813           u16 old_port0, new_port0;
1814           ip_csum_t sum0;
1815
1816           /* speculatively enqueue b0 to the current next frame */
1817           bi0 = from[0];
1818           to_next[0] = bi0;
1819           from += 1;
1820           to_next += 1;
1821           n_left_from -= 1;
1822           n_left_to_next -= 1;
1823
1824           b0 = vlib_get_buffer (vm, bi0);
1825
1826           next0 = NAT_IN2OUT_ED_NEXT_LOOKUP;
1827
1828           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1829           rx_fib_index0 =
1830             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1831                                                  sw_if_index0);
1832
1833           if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
1834             {
1835               next0 = NAT_IN2OUT_ED_NEXT_DROP;
1836               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT];
1837               goto trace0;
1838             }
1839
1840           if (is_output_feature)
1841             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1842
1843           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1844                                   iph_offset0);
1845
1846           udp0 = ip4_next_header (ip0);
1847           tcp0 = (tcp_header_t *) udp0;
1848           icmp0 = (icmp46_header_t *) udp0;
1849           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1850
1851           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1852                                                  ip0->dst_address,
1853                                                  ip0->fragment_id,
1854                                                  ip0->protocol,
1855                                                  1, &fragments_to_drop);
1856
1857           if (PREDICT_FALSE (!reass0))
1858             {
1859               next0 = NAT_IN2OUT_ED_NEXT_DROP;
1860               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_REASS];
1861               nat_elog_notice ("maximum reassemblies exceeded");
1862               goto trace0;
1863             }
1864
1865           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1866             {
1867               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1868                 {
1869                   if (is_output_feature)
1870                     {
1871                       if (PREDICT_FALSE
1872                           (nat_not_translate_output_feature_fwd
1873                            (sm, ip0, thread_index, now, vm, b0)))
1874                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1875                       goto trace0;
1876                     }
1877
1878                   next0 = icmp_in2out_ed_slow_path
1879                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1880                      next0, now, thread_index, &s0);
1881
1882                   if (PREDICT_TRUE (next0 != NAT_IN2OUT_ED_NEXT_DROP))
1883                     {
1884                       if (s0)
1885                         reass0->sess_index = s0 - per_thread_data->sessions;
1886                       else
1887                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1888                       nat_ip4_reass_get_frags (reass0,
1889                                                &fragments_to_loopback);
1890                     }
1891
1892                   goto trace0;
1893                 }
1894
1895               make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
1896                           ip0->protocol, rx_fib_index0, udp0->src_port,
1897                           udp0->dst_port);
1898
1899               if (clib_bihash_search_16_8
1900                   (&per_thread_data->in2out_ed, &kv0, &value0))
1901                 {
1902                   if (is_output_feature)
1903                     {
1904                       if (PREDICT_FALSE
1905                           (nat44_ed_not_translate_output_feature
1906                            (sm, ip0, ip0->protocol, udp0->src_port,
1907                             udp0->dst_port, thread_index, sw_if_index0,
1908                             vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1909                         {
1910                           reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1911                           nat_ip4_reass_get_frags (reass0,
1912                                                    &fragments_to_loopback);
1913                           goto trace0;
1914                         }
1915                     }
1916                   else
1917                     {
1918                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1919                                                                  sw_if_index0,
1920                                                                  ip0, proto0,
1921                                                                  rx_fib_index0,
1922                                                                  thread_index)))
1923                         {
1924                           reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1925                           nat_ip4_reass_get_frags (reass0,
1926                                                    &fragments_to_loopback);
1927                           goto trace0;
1928                         }
1929                     }
1930
1931                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0,
1932                                         &s0, node, next0, thread_index, now,
1933                                         tcp0);
1934
1935                   if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP))
1936                     goto trace0;
1937
1938                   if (PREDICT_FALSE (!s0))
1939                     {
1940                       reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1941                       goto trace0;
1942                     }
1943
1944                   reass0->sess_index = s0 - per_thread_data->sessions;
1945                 }
1946               else
1947                 {
1948                   s0 = pool_elt_at_index (per_thread_data->sessions,
1949                                           value0.value);
1950                   reass0->sess_index = value0.value;
1951                 }
1952               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1953             }
1954           else
1955             {
1956               if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE)
1957                 goto trace0;
1958               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1959                 {
1960                   if (nat_ip4_reass_add_fragment
1961                       (thread_index, reass0, bi0, &fragments_to_drop))
1962                     {
1963                       b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_FRAG];
1964                       nat_elog_notice
1965                         ("maximum fragments per reassembly exceeded");
1966                       next0 = NAT_IN2OUT_ED_NEXT_DROP;
1967                       goto trace0;
1968                     }
1969                   cached0 = 1;
1970                   goto trace0;
1971                 }
1972               s0 = pool_elt_at_index (per_thread_data->sessions,
1973                                       reass0->sess_index);
1974             }
1975
1976           old_addr0 = ip0->src_address.as_u32;
1977           ip0->src_address = s0->out2in.addr;
1978           new_addr0 = ip0->src_address.as_u32;
1979           if (!is_output_feature)
1980             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1981
1982           sum0 = ip0->checksum;
1983           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1984                                  ip4_header_t,
1985                                  src_address /* changed member */ );
1986           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1987             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1988                                    s0->ext_host_addr.as_u32, ip4_header_t,
1989                                    dst_address);
1990           ip0->checksum = ip_csum_fold (sum0);
1991
1992           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1993             {
1994               if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1995                 {
1996                   old_port0 = tcp0->src_port;
1997                   tcp0->src_port = s0->out2in.port;
1998                   new_port0 = tcp0->src_port;
1999
2000                   sum0 = tcp0->checksum;
2001                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2002                                          ip4_header_t,
2003                                          dst_address /* changed member */ );
2004                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2005                                          ip4_header_t /* cheat */ ,
2006                                          length /* changed member */ );
2007                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
2008                     {
2009                       sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
2010                                              s0->ext_host_addr.as_u32,
2011                                              ip4_header_t, dst_address);
2012                       sum0 = ip_csum_update (sum0, tcp0->dst_port,
2013                                              s0->ext_host_port, ip4_header_t,
2014                                              length);
2015                       tcp0->dst_port = s0->ext_host_port;
2016                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
2017                     }
2018                   tcp0->checksum = ip_csum_fold (sum0);
2019                 }
2020               else
2021                 {
2022                   old_port0 = udp0->src_port;
2023                   udp0->src_port = s0->out2in.port;
2024                   udp0->checksum = 0;
2025                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
2026                     {
2027                       udp0->dst_port = s0->ext_host_port;
2028                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
2029                     }
2030                 }
2031             }
2032
2033           /* Hairpinning */
2034           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2035                                    s0->ext_host_port, proto0, 1);
2036
2037           /* Accounting */
2038           nat44_session_update_counters (s0, now,
2039                                          vlib_buffer_length_in_chain (vm, b0),
2040                                          thread_index);
2041           /* Per-user LRU list maintenance */
2042           nat44_session_update_lru (sm, s0, thread_index);
2043
2044         trace0:
2045           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2046                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2047             {
2048               nat44_reass_trace_t *t =
2049                 vlib_add_trace (vm, node, b0, sizeof (*t));
2050               t->cached = cached0;
2051               t->sw_if_index = sw_if_index0;
2052               t->next_index = next0;
2053             }
2054
2055           if (cached0)
2056             {
2057               n_left_to_next++;
2058               to_next--;
2059               cached_fragments++;
2060             }
2061           else
2062             {
2063               pkts_processed += next0 != NAT_IN2OUT_ED_NEXT_DROP;
2064
2065               /* verify speculative enqueue, maybe switch current next frame */
2066               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2067                                                to_next, n_left_to_next,
2068                                                bi0, next0);
2069             }
2070
2071           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2072             {
2073               from = vlib_frame_vector_args (frame);
2074               u32 len = vec_len (fragments_to_loopback);
2075               if (len <= VLIB_FRAME_SIZE)
2076                 {
2077                   clib_memcpy_fast (from, fragments_to_loopback,
2078                                     sizeof (u32) * len);
2079                   n_left_from = len;
2080                   vec_reset_length (fragments_to_loopback);
2081                 }
2082               else
2083                 {
2084                   clib_memcpy_fast (from, fragments_to_loopback +
2085                                     (len - VLIB_FRAME_SIZE),
2086                                     sizeof (u32) * VLIB_FRAME_SIZE);
2087                   n_left_from = VLIB_FRAME_SIZE;
2088                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2089                 }
2090             }
2091         }
2092
2093       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2094     }
2095
2096   vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index,
2097                                NAT_IN2OUT_ED_ERROR_PROCESSED_FRAGMENTS,
2098                                pkts_processed);
2099   vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index,
2100                                NAT_IN2OUT_ED_ERROR_CACHED_FRAGMENTS,
2101                                cached_fragments);
2102
2103   nat_send_all_to_node (vm, fragments_to_drop, node,
2104                         &node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT],
2105                         NAT_IN2OUT_ED_NEXT_DROP);
2106
2107   vec_free (fragments_to_drop);
2108   vec_free (fragments_to_loopback);
2109   return frame->n_vectors;
2110 }
2111
2112 VLIB_NODE_FN (nat44_ed_in2out_reass_node) (vlib_main_t * vm,
2113                                            vlib_node_runtime_t * node,
2114                                            vlib_frame_t * frame)
2115 {
2116   return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 0);
2117 }
2118
2119 /* *INDENT-OFF* */
2120 VLIB_REGISTER_NODE (nat44_ed_in2out_reass_node) = {
2121   .name = "nat44-ed-in2out-reass",
2122   .vector_size = sizeof (u32),
2123   .format_trace = format_nat44_reass_trace,
2124   .type = VLIB_NODE_TYPE_INTERNAL,
2125   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
2126   .error_strings = nat_in2out_ed_error_strings,
2127   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
2128   .next_nodes = {
2129     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
2130     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "ip4-lookup",
2131     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2132     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2133     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
2134   },
2135 };
2136 /* *INDENT-ON* */
2137
2138 VLIB_NODE_FN (nat44_ed_in2out_reass_output_node) (vlib_main_t * vm,
2139                                                   vlib_node_runtime_t * node,
2140                                                   vlib_frame_t * frame)
2141 {
2142   return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 1);
2143 }
2144
2145 /* *INDENT-OFF* */
2146 VLIB_REGISTER_NODE (nat44_ed_in2out_reass_output_node) = {
2147   .name = "nat44-ed-in2out-reass-output",
2148   .vector_size = sizeof (u32),
2149   .format_trace = format_nat44_reass_trace,
2150   .type = VLIB_NODE_TYPE_INTERNAL,
2151   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
2152   .error_strings = nat_in2out_ed_error_strings,
2153   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
2154   .next_nodes = {
2155     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
2156     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "interface-output",
2157     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2158     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2159     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
2160   },
2161 };
2162 /* *INDENT-ON* */
2163
2164 /*
2165  * fd.io coding-style-patch-verification: ON
2166  *
2167  * Local Variables:
2168  * eval: (c-set-style "gnu")
2169  * End:
2170  */