NAT44: active-passive HA (VPP-1571)
[vpp.git] / src / plugins / nat / in2out_ed.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT44 endpoint-dependent inside to outside network translation
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/pg/pg.h>
23 #include <vnet/ip/ip.h>
24 #include <vnet/ethernet/ethernet.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vppinfra/error.h>
27 #include <nat/nat.h>
28 #include <nat/nat_ipfix_logging.h>
29 #include <nat/nat_reass.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat_syslog.h>
32 #include <nat/nat_ha.h>
33
34 #define foreach_nat_in2out_ed_error                     \
35 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
36 _(IN2OUT_PACKETS, "good in2out packets processed")      \
37 _(OUT_OF_PORTS, "out of ports")                         \
38 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
39 _(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
40 _(DROP_FRAGMENT, "drop fragment")                       \
41 _(MAX_REASS, "maximum reassemblies exceeded")           \
42 _(MAX_FRAG, "maximum fragments per reassembly exceeded")\
43 _(NON_SYN, "non-SYN packet try to create session")      \
44 _(TCP_PACKETS, "TCP packets")                           \
45 _(UDP_PACKETS, "UDP packets")                           \
46 _(ICMP_PACKETS, "ICMP packets")                         \
47 _(OTHER_PACKETS, "other protocol packets")              \
48 _(FRAGMENTS, "fragments")                               \
49 _(CACHED_FRAGMENTS, "cached fragments")                 \
50 _(PROCESSED_FRAGMENTS, "processed fragments")
51
52
53 typedef enum
54 {
55 #define _(sym,str) NAT_IN2OUT_ED_ERROR_##sym,
56   foreach_nat_in2out_ed_error
57 #undef _
58     NAT_IN2OUT_ED_N_ERROR,
59 } nat_in2out_ed_error_t;
60
61 static char *nat_in2out_ed_error_strings[] = {
62 #define _(sym,string) string,
63   foreach_nat_in2out_ed_error
64 #undef _
65 };
66
67 typedef enum
68 {
69   NAT_IN2OUT_ED_NEXT_LOOKUP,
70   NAT_IN2OUT_ED_NEXT_DROP,
71   NAT_IN2OUT_ED_NEXT_ICMP_ERROR,
72   NAT_IN2OUT_ED_NEXT_SLOW_PATH,
73   NAT_IN2OUT_ED_NEXT_REASS,
74   NAT_IN2OUT_ED_N_NEXT,
75 } nat_in2out_ed_next_t;
76
77 typedef struct
78 {
79   u32 sw_if_index;
80   u32 next_index;
81   u32 session_index;
82   u32 is_slow_path;
83 } nat_in2out_ed_trace_t;
84
85 static u8 *
86 format_nat_in2out_ed_trace (u8 * s, va_list * args)
87 {
88   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
89   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
90   nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *);
91   char *tag;
92
93   tag =
94     t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" :
95     "NAT44_IN2OUT_ED_FAST_PATH";
96
97   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
98               t->sw_if_index, t->next_index, t->session_index);
99
100   return s;
101 }
102
103 static_always_inline int
104 icmp_get_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
105 {
106   icmp46_header_t *icmp0;
107   nat_ed_ses_key_t key0;
108   icmp_echo_header_t *echo0, *inner_echo0 = 0;
109   ip4_header_t *inner_ip0 = 0;
110   void *l4_header = 0;
111   icmp46_header_t *inner_icmp0;
112
113   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
114   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
115
116   if (!icmp_is_error_message (icmp0))
117     {
118       key0.proto = IP_PROTOCOL_ICMP;
119       key0.l_addr = ip0->src_address;
120       key0.r_addr = ip0->dst_address;
121       key0.l_port = echo0->identifier;
122       key0.r_port = 0;
123     }
124   else
125     {
126       inner_ip0 = (ip4_header_t *) (echo0 + 1);
127       l4_header = ip4_next_header (inner_ip0);
128       key0.proto = inner_ip0->protocol;
129       key0.r_addr = inner_ip0->src_address;
130       key0.l_addr = inner_ip0->dst_address;
131       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
132         {
133         case SNAT_PROTOCOL_ICMP:
134           inner_icmp0 = (icmp46_header_t *) l4_header;
135           inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
136           key0.r_port = 0;
137           key0.l_port = inner_echo0->identifier;
138           break;
139         case SNAT_PROTOCOL_UDP:
140         case SNAT_PROTOCOL_TCP:
141           key0.l_port = ((tcp_udp_header_t *) l4_header)->dst_port;
142           key0.r_port = ((tcp_udp_header_t *) l4_header)->src_port;
143           break;
144         default:
145           return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
146         }
147     }
148   *p_key0 = key0;
149   return 0;
150 }
151
152 #ifndef CLIB_MARCH_VARIANT
153 int
154 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
155 {
156   snat_main_t *sm = &snat_main;
157   nat44_is_idle_session_ctx_t *ctx = arg;
158   snat_session_t *s;
159   u64 sess_timeout_time;
160   nat_ed_ses_key_t ed_key;
161   clib_bihash_kv_16_8_t ed_kv;
162   int i;
163   snat_address_t *a;
164   snat_session_key_t key;
165   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
166                                                        ctx->thread_index);
167
168   s = pool_elt_at_index (tsm->sessions, kv->value);
169   sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
170   if (ctx->now >= sess_timeout_time)
171     {
172       if (is_fwd_bypass_session (s))
173         goto delete;
174
175       ed_key.l_addr = s->out2in.addr;
176       ed_key.r_addr = s->ext_host_addr;
177       ed_key.fib_index = s->out2in.fib_index;
178       if (snat_is_unk_proto_session (s))
179         {
180           ed_key.proto = s->in2out.port;
181           ed_key.r_port = 0;
182           ed_key.l_port = 0;
183         }
184       else
185         {
186           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
187           ed_key.l_port = s->out2in.port;
188           ed_key.r_port = s->ext_host_port;
189         }
190       ed_kv.key[0] = ed_key.as_u64[0];
191       ed_kv.key[1] = ed_key.as_u64[1];
192       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
193         nat_log_warn ("out2in_ed key del failed");
194
195       if (snat_is_unk_proto_session (s))
196         goto delete;
197
198       snat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
199                                            s->in2out.addr.as_u32,
200                                            s->out2in.addr.as_u32,
201                                            s->in2out.protocol,
202                                            s->in2out.port,
203                                            s->out2in.port,
204                                            s->in2out.fib_index);
205
206       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
207                              &s->in2out.addr, s->in2out.port,
208                              &s->ext_host_nat_addr, s->ext_host_nat_port,
209                              &s->out2in.addr, s->out2in.port,
210                              &s->ext_host_addr, s->ext_host_port,
211                              s->in2out.protocol, is_twice_nat_session (s));
212
213       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
214                    s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
215                    ctx->thread_index);
216
217       if (is_twice_nat_session (s))
218         {
219           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
220             {
221               key.protocol = s->in2out.protocol;
222               key.port = s->ext_host_nat_port;
223               a = sm->twice_nat_addresses + i;
224               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
225                 {
226                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
227                                                       ctx->thread_index,
228                                                       &key);
229                   break;
230                 }
231             }
232         }
233
234       if (snat_is_session_static (s))
235         goto delete;
236
237       snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
238                                           &s->out2in);
239     delete:
240       nat44_delete_session (sm, s, ctx->thread_index);
241       return 1;
242     }
243
244   return 0;
245 }
246 #endif
247
248 static inline u32
249 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
250                           ip4_header_t * ip0, icmp46_header_t * icmp0,
251                           u32 sw_if_index0, u32 rx_fib_index0,
252                           vlib_node_runtime_t * node, u32 next0, f64 now,
253                           u32 thread_index, snat_session_t ** p_s0)
254 {
255   next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
256                        next0, thread_index, p_s0, 0);
257   snat_session_t *s0 = *p_s0;
258   if (PREDICT_TRUE (next0 != NAT_IN2OUT_ED_NEXT_DROP && s0))
259     {
260       /* Accounting */
261       nat44_session_update_counters (s0, now,
262                                      vlib_buffer_length_in_chain
263                                      (sm->vlib_main, b0), thread_index);
264       /* Per-user LRU list maintenance */
265       nat44_session_update_lru (sm, s0, thread_index);
266     }
267   return next0;
268 }
269
270 static u32
271 slow_path_ed (snat_main_t * sm,
272               vlib_buffer_t * b,
273               u32 rx_fib_index,
274               clib_bihash_kv_16_8_t * kv,
275               snat_session_t ** sessionp,
276               vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now,
277               tcp_header_t * tcp)
278 {
279   snat_session_t *s = 0;
280   snat_user_t *u;
281   snat_session_key_t key0, key1;
282   lb_nat_type_t lb = 0, is_sm = 0;
283   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
284   nat_ed_ses_key_t *key = (nat_ed_ses_key_t *) kv->key;
285   u32 proto = ip_proto_to_snat_proto (key->proto);
286   nat_outside_fib_t *outside_fib;
287   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
288   u8 identity_nat;
289   fib_prefix_t pfx = {
290     .fp_proto = FIB_PROTOCOL_IP4,
291     .fp_len = 32,
292     .fp_addr = {
293                 .ip4.as_u32 = key->r_addr.as_u32,
294                 },
295   };
296   nat44_is_idle_session_ctx_t ctx;
297
298   if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
299     {
300       b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
301       nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
302       nat_log_notice ("maximum sessions exceeded");
303       return NAT_IN2OUT_ED_NEXT_DROP;
304     }
305
306   key0.addr = key->l_addr;
307   key0.port = key->l_port;
308   key1.protocol = key0.protocol = proto;
309   key0.fib_index = rx_fib_index;
310   key1.fib_index = sm->outside_fib_index;
311   /* First try to match static mapping by local address and port */
312   if (snat_static_mapping_match
313       (sm, key0, &key1, 0, 0, 0, &lb, 0, &identity_nat))
314     {
315       /* Try to create dynamic translation */
316       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
317                                                thread_index, &key1,
318                                                sm->port_per_thread,
319                                                tsm->snat_thread_index))
320         {
321           nat_log_notice ("addresses exhausted");
322           b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
323           return NAT_IN2OUT_ED_NEXT_DROP;
324         }
325     }
326   else
327     {
328       if (PREDICT_FALSE (identity_nat))
329         {
330           *sessionp = s;
331           return next;
332         }
333
334       is_sm = 1;
335     }
336
337   if (proto == SNAT_PROTOCOL_TCP)
338     {
339       if (!tcp_is_init (tcp))
340         {
341           b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
342           return NAT_IN2OUT_ED_NEXT_DROP;
343         }
344     }
345
346   u = nat_user_get_or_create (sm, &key->l_addr, rx_fib_index, thread_index);
347   if (!u)
348     {
349       nat_log_warn ("create NAT user failed");
350       if (!is_sm)
351         snat_free_outside_address_and_port (sm->addresses,
352                                             thread_index, &key1);
353       return NAT_IN2OUT_ED_NEXT_DROP;
354     }
355
356   s = nat_ed_session_alloc (sm, u, thread_index, now);
357   if (!s)
358     {
359       nat44_delete_user_with_no_session (sm, u, thread_index);
360       nat_log_warn ("create NAT session failed");
361       if (!is_sm)
362         snat_free_outside_address_and_port (sm->addresses,
363                                             thread_index, &key1);
364       return NAT_IN2OUT_ED_NEXT_DROP;
365     }
366
367   user_session_increment (sm, u, is_sm);
368   if (is_sm)
369     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
370   if (lb)
371     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
372   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
373   s->ext_host_addr = key->r_addr;
374   s->ext_host_port = key->r_port;
375   s->in2out = key0;
376   s->out2in = key1;
377   s->out2in.protocol = key0.protocol;
378
379   switch (vec_len (sm->outside_fibs))
380     {
381     case 0:
382       s->out2in.fib_index = sm->outside_fib_index;
383       break;
384     case 1:
385       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
386       break;
387     default:
388       /* *INDENT-OFF* */
389       vec_foreach (outside_fib, sm->outside_fibs)
390        {
391           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
392           if (FIB_NODE_INDEX_INVALID != fei)
393             {
394               if (fib_entry_get_resolving_interface (fei) != ~0)
395                 {
396                   s->out2in.fib_index = outside_fib->fib_index;
397                   break;
398                 }
399             }
400         }
401       /* *INDENT-ON* */
402       break;
403     }
404
405   /* Add to lookup tables */
406   kv->value = s - tsm->sessions;
407   ctx.now = now;
408   ctx.thread_index = thread_index;
409   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, kv,
410                                                nat44_i2o_ed_is_idle_session_cb,
411                                                &ctx))
412     nat_log_notice ("in2out-ed key add failed");
413
414   make_ed_kv (kv, &key1.addr, &key->r_addr, key->proto, s->out2in.fib_index,
415               key1.port, key->r_port);
416   kv->value = s - tsm->sessions;
417   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->out2in_ed, kv,
418                                                nat44_o2i_ed_is_idle_session_cb,
419                                                &ctx))
420     nat_log_notice ("out2in-ed key add failed");
421
422   *sessionp = s;
423
424   /* log NAT event */
425   snat_ipfix_logging_nat44_ses_create (thread_index,
426                                        s->in2out.addr.as_u32,
427                                        s->out2in.addr.as_u32,
428                                        s->in2out.protocol,
429                                        s->in2out.port,
430                                        s->out2in.port, s->in2out.fib_index);
431
432   nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
433                          &s->in2out.addr, s->in2out.port,
434                          &s->ext_host_nat_addr, s->ext_host_nat_port,
435                          &s->out2in.addr, s->out2in.port,
436                          &s->ext_host_addr, s->ext_host_port,
437                          s->in2out.protocol, 0);
438
439   nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
440                s->out2in.port, &s->ext_host_addr, s->ext_host_port,
441                &s->ext_host_nat_addr, s->ext_host_nat_port,
442                s->in2out.protocol, s->in2out.fib_index, s->flags,
443                thread_index, 0);
444
445   return next;
446 }
447
448 static_always_inline int
449 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node,
450                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
451                         u32 rx_fib_index, u32 thread_index)
452 {
453   udp_header_t *udp = ip4_next_header (ip);
454   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
455   clib_bihash_kv_16_8_t kv, value;
456   snat_session_key_t key0, key1;
457
458   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, ip->protocol,
459               sm->outside_fib_index, udp->dst_port, udp->src_port);
460
461   /* NAT packet aimed at external address if */
462   /* has active sessions */
463   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
464     {
465       key0.addr = ip->dst_address;
466       key0.port = udp->dst_port;
467       key0.protocol = proto;
468       key0.fib_index = sm->outside_fib_index;
469       /* or is static mappings */
470       if (!snat_static_mapping_match (sm, key0, &key1, 1, 0, 0, 0, 0, 0))
471         return 0;
472     }
473   else
474     return 0;
475
476   if (sm->forwarding_enabled)
477     return 1;
478
479   return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
480                                   rx_fib_index);
481 }
482
483 static_always_inline int
484 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
485                                       u32 thread_index, f64 now,
486                                       vlib_main_t * vm, vlib_buffer_t * b)
487 {
488   nat_ed_ses_key_t key;
489   clib_bihash_kv_16_8_t kv, value;
490   udp_header_t *udp;
491   snat_session_t *s = 0;
492   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
493
494   if (!sm->forwarding_enabled)
495     return 0;
496
497   if (ip->protocol == IP_PROTOCOL_ICMP)
498     {
499       key.as_u64[0] = key.as_u64[1] = 0;
500       if (icmp_get_ed_key (ip, &key))
501         return 0;
502       key.fib_index = 0;
503       kv.key[0] = key.as_u64[0];
504       kv.key[1] = key.as_u64[1];
505     }
506   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
507     {
508       udp = ip4_next_header (ip);
509       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0,
510                   udp->src_port, udp->dst_port);
511     }
512   else
513     {
514       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0, 0,
515                   0);
516     }
517
518   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
519     {
520       s = pool_elt_at_index (tsm->sessions, value.value);
521       if (is_fwd_bypass_session (s))
522         {
523           if (ip->protocol == IP_PROTOCOL_TCP)
524             {
525               tcp_header_t *tcp = ip4_next_header (ip);
526               if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index))
527                 return 1;
528             }
529           /* Accounting */
530           nat44_session_update_counters (s, now,
531                                          vlib_buffer_length_in_chain (vm, b),
532                                          thread_index);
533           /* Per-user LRU list maintenance */
534           nat44_session_update_lru (sm, s, thread_index);
535           return 1;
536         }
537       else
538         return 0;
539     }
540
541   return 0;
542 }
543
544 static_always_inline int
545 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
546                                        u8 proto, u16 src_port, u16 dst_port,
547                                        u32 thread_index, u32 rx_sw_if_index,
548                                        u32 tx_sw_if_index)
549 {
550   clib_bihash_kv_16_8_t kv, value;
551   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
552   snat_interface_t *i;
553   snat_session_t *s;
554   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
555   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
556
557   /* src NAT check */
558   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto, tx_fib_index,
559               src_port, dst_port);
560   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
561     {
562       s = pool_elt_at_index (tsm->sessions, value.value);
563       if (nat44_is_ses_closed (s))
564         {
565           nat_log_debug ("TCP close connection %U", format_snat_session,
566                          &sm->per_thread_data[thread_index], s);
567           nat_free_session_data (sm, s, thread_index, 0);
568           nat44_delete_session (sm, s, thread_index);
569         }
570       else
571         s->flags |= SNAT_SESSION_FLAG_OUTPUT_FEATURE;
572       return 1;
573     }
574
575   /* dst NAT check */
576   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, proto, rx_fib_index,
577               dst_port, src_port);
578   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
579     {
580       s = pool_elt_at_index (tsm->sessions, value.value);
581       if (is_fwd_bypass_session (s))
582         return 0;
583
584       /* hairpinning */
585       /* *INDENT-OFF* */
586       pool_foreach (i, sm->output_feature_interfaces,
587       ({
588         if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
589            return 0;
590       }));
591       /* *INDENT-ON* */
592       return 1;
593     }
594
595   return 0;
596 }
597
598 #ifndef CLIB_MARCH_VARIANT
599 u32
600 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
601                       u32 thread_index, vlib_buffer_t * b, ip4_header_t * ip,
602                       u8 * p_proto, snat_session_key_t * p_value,
603                       u8 * p_dont_translate, void *d, void *e)
604 {
605   icmp46_header_t *icmp;
606   u32 sw_if_index;
607   u32 rx_fib_index;
608   nat_ed_ses_key_t key;
609   snat_session_t *s = 0;
610   u8 dont_translate = 0;
611   clib_bihash_kv_16_8_t kv, value;
612   u32 next = ~0;
613   int err;
614   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
615
616   icmp = (icmp46_header_t *) ip4_next_header (ip);
617   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
618   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
619
620   key.as_u64[0] = key.as_u64[1] = 0;
621   err = icmp_get_ed_key (ip, &key);
622   if (err != 0)
623     {
624       b->error = node->errors[err];
625       next = NAT_IN2OUT_ED_NEXT_DROP;
626       goto out;
627     }
628   key.fib_index = rx_fib_index;
629
630   kv.key[0] = key.as_u64[0];
631   kv.key[1] = key.as_u64[1];
632
633   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
634     {
635       if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
636         {
637           if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (sm, ip,
638                                                                     key.proto,
639                                                                     key.
640                                                                     l_port,
641                                                                     key.
642                                                                     r_port,
643                                                                     thread_index,
644                                                                     sw_if_index,
645                                                                     vnet_buffer
646                                                                     (b)->
647                                                                     sw_if_index
648                                                                     [VLIB_TX])))
649             {
650               dont_translate = 1;
651               goto out;
652             }
653         }
654       else
655         {
656           if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index,
657                                                      ip, SNAT_PROTOCOL_ICMP,
658                                                      rx_fib_index,
659                                                      thread_index)))
660             {
661               dont_translate = 1;
662               goto out;
663             }
664         }
665
666       if (PREDICT_FALSE (icmp_is_error_message (icmp)))
667         {
668           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
669           next = NAT_IN2OUT_ED_NEXT_DROP;
670           goto out;
671         }
672
673       next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next,
674                            thread_index, vlib_time_now (sm->vlib_main), 0);
675
676       if (PREDICT_FALSE (next == NAT_IN2OUT_ED_NEXT_DROP))
677         goto out;
678
679       if (!s)
680         {
681           dont_translate = 1;
682           goto out;
683         }
684     }
685   else
686     {
687       if (PREDICT_FALSE (icmp->type != ICMP4_echo_request &&
688                          icmp->type != ICMP4_echo_reply &&
689                          !icmp_is_error_message (icmp)))
690         {
691           b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
692           next = NAT_IN2OUT_ED_NEXT_DROP;
693           goto out;
694         }
695
696       s = pool_elt_at_index (tsm->sessions, value.value);
697     }
698
699   *p_proto = ip_proto_to_snat_proto (key.proto);
700 out:
701   if (s)
702     *p_value = s->out2in;
703   *p_dont_translate = dont_translate;
704   if (d)
705     *(snat_session_t **) d = s;
706   return next;
707 }
708 #endif
709
710 static snat_session_t *
711 nat44_ed_in2out_unknown_proto (snat_main_t * sm,
712                                vlib_buffer_t * b,
713                                ip4_header_t * ip,
714                                u32 rx_fib_index,
715                                u32 thread_index,
716                                f64 now,
717                                vlib_main_t * vm, vlib_node_runtime_t * node)
718 {
719   clib_bihash_kv_8_8_t kv, value;
720   clib_bihash_kv_16_8_t s_kv, s_value;
721   snat_static_mapping_t *m;
722   u32 old_addr, new_addr = 0;
723   ip_csum_t sum;
724   snat_user_t *u;
725   dlist_elt_t *head, *elt;
726   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
727   u32 elt_index, head_index, ses_index;
728   snat_session_t *s;
729   u32 outside_fib_index = sm->outside_fib_index;
730   int i;
731   u8 is_sm = 0;
732   nat_outside_fib_t *outside_fib;
733   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
734   fib_prefix_t pfx = {
735     .fp_proto = FIB_PROTOCOL_IP4,
736     .fp_len = 32,
737     .fp_addr = {
738                 .ip4.as_u32 = ip->dst_address.as_u32,
739                 },
740   };
741
742   switch (vec_len (sm->outside_fibs))
743     {
744     case 0:
745       outside_fib_index = sm->outside_fib_index;
746       break;
747     case 1:
748       outside_fib_index = sm->outside_fibs[0].fib_index;
749       break;
750     default:
751       /* *INDENT-OFF* */
752       vec_foreach (outside_fib, sm->outside_fibs)
753         {
754           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
755           if (FIB_NODE_INDEX_INVALID != fei)
756             {
757               if (fib_entry_get_resolving_interface (fei) != ~0)
758                 {
759                   outside_fib_index = outside_fib->fib_index;
760                   break;
761                 }
762             }
763         }
764       /* *INDENT-ON* */
765       break;
766     }
767   old_addr = ip->src_address.as_u32;
768
769   make_ed_kv (&s_kv, &ip->src_address, &ip->dst_address, ip->protocol,
770               rx_fib_index, 0, 0);
771
772   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
773     {
774       s = pool_elt_at_index (tsm->sessions, s_value.value);
775       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
776     }
777   else
778     {
779       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
780         {
781           b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED];
782           nat_ipfix_logging_max_sessions (thread_index, sm->max_translations);
783           nat_log_notice ("maximum sessions exceeded");
784           return 0;
785         }
786
787       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
788                                   thread_index);
789       if (!u)
790         {
791           nat_log_warn ("create NAT user failed");
792           return 0;
793         }
794
795       make_sm_kv (&kv, &ip->src_address, 0, rx_fib_index, 0);
796
797       /* Try to find static mapping first */
798       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
799         {
800           m = pool_elt_at_index (sm->static_mappings, value.value);
801           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
802           is_sm = 1;
803           goto create_ses;
804         }
805       /* Fallback to 3-tuple key */
806       else
807         {
808           /* Choose same out address as for TCP/UDP session to same destination */
809           head_index = u->sessions_per_user_list_head_index;
810           head = pool_elt_at_index (tsm->list_pool, head_index);
811           elt_index = head->next;
812           if (PREDICT_FALSE (elt_index == ~0))
813             ses_index = ~0;
814           else
815             {
816               elt = pool_elt_at_index (tsm->list_pool, elt_index);
817               ses_index = elt->value;
818             }
819
820           while (ses_index != ~0)
821             {
822               s = pool_elt_at_index (tsm->sessions, ses_index);
823               elt_index = elt->next;
824               elt = pool_elt_at_index (tsm->list_pool, elt_index);
825               ses_index = elt->value;
826
827               if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
828                 {
829                   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
830
831                   make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address,
832                               ip->protocol, outside_fib_index, 0, 0);
833                   if (clib_bihash_search_16_8
834                       (&tsm->out2in_ed, &s_kv, &s_value))
835                     goto create_ses;
836
837                   break;
838                 }
839             }
840
841           for (i = 0; i < vec_len (sm->addresses); i++)
842             {
843               make_ed_kv (&s_kv, &sm->addresses[i].addr, &ip->dst_address,
844                           ip->protocol, outside_fib_index, 0, 0);
845               if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
846                 {
847                   new_addr = ip->src_address.as_u32 =
848                     sm->addresses[i].addr.as_u32;
849                   goto create_ses;
850                 }
851             }
852           return 0;
853         }
854
855     create_ses:
856       s = nat_ed_session_alloc (sm, u, thread_index, now);
857       if (!s)
858         {
859           nat44_delete_user_with_no_session (sm, u, thread_index);
860           nat_log_warn ("create NAT session failed");
861           return 0;
862         }
863
864       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
865       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
866       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
867       s->out2in.addr.as_u32 = new_addr;
868       s->out2in.fib_index = outside_fib_index;
869       s->in2out.addr.as_u32 = old_addr;
870       s->in2out.fib_index = rx_fib_index;
871       s->in2out.port = s->out2in.port = ip->protocol;
872       if (is_sm)
873         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
874       user_session_increment (sm, u, is_sm);
875
876       /* Add to lookup tables */
877       make_ed_kv (&s_kv, &s->in2out.addr, &ip->dst_address, ip->protocol,
878                   rx_fib_index, 0, 0);
879       s_kv.value = s - tsm->sessions;
880       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
881         nat_log_notice ("in2out key add failed");
882
883       make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address, ip->protocol,
884                   outside_fib_index, 0, 0);
885       s_kv.value = s - tsm->sessions;
886       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
887         nat_log_notice ("out2in key add failed");
888     }
889
890   /* Update IP checksum */
891   sum = ip->checksum;
892   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
893   ip->checksum = ip_csum_fold (sum);
894
895   /* Accounting */
896   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
897                                  thread_index);
898   /* Per-user LRU list maintenance */
899   nat44_session_update_lru (sm, s, thread_index);
900
901   /* Hairpinning */
902   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
903     nat44_ed_hairpinning_unknown_proto (sm, b, ip);
904
905   if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
906     vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index;
907
908   return s;
909 }
910
911 static inline uword
912 nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
913                                 vlib_node_runtime_t * node,
914                                 vlib_frame_t * frame, int is_slow_path,
915                                 int is_output_feature)
916 {
917   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
918   nat_in2out_ed_next_t next_index;
919   snat_main_t *sm = &snat_main;
920   f64 now = vlib_time_now (vm);
921   u32 thread_index = vm->thread_index;
922   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
923   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
924     0, fragments = 0;
925
926   stats_node_index = is_slow_path ? sm->ed_in2out_slowpath_node_index :
927     sm->ed_in2out_node_index;
928
929   from = vlib_frame_vector_args (frame);
930   n_left_from = frame->n_vectors;
931   next_index = node->cached_next_index;
932
933   while (n_left_from > 0)
934     {
935       u32 n_left_to_next;
936
937       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
938
939       while (n_left_from >= 4 && n_left_to_next >= 2)
940         {
941           u32 bi0, bi1;
942           vlib_buffer_t *b0, *b1;
943           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
944             new_addr0, old_addr0;
945           u32 next1, sw_if_index1, rx_fib_index1, iph_offset1 = 0, proto1,
946             new_addr1, old_addr1;
947           u16 old_port0, new_port0, old_port1, new_port1;
948           ip4_header_t *ip0, *ip1;
949           udp_header_t *udp0, *udp1;
950           tcp_header_t *tcp0, *tcp1;
951           icmp46_header_t *icmp0, *icmp1;
952           snat_session_t *s0 = 0, *s1 = 0;
953           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
954           ip_csum_t sum0, sum1;
955
956           /* Prefetch next iteration. */
957           {
958             vlib_buffer_t *p2, *p3;
959
960             p2 = vlib_get_buffer (vm, from[2]);
961             p3 = vlib_get_buffer (vm, from[3]);
962
963             vlib_prefetch_buffer_header (p2, LOAD);
964             vlib_prefetch_buffer_header (p3, LOAD);
965
966             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
967             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
968           }
969
970           /* speculatively enqueue b0 and b1 to the current next frame */
971           to_next[0] = bi0 = from[0];
972           to_next[1] = bi1 = from[1];
973           from += 2;
974           to_next += 2;
975           n_left_from -= 2;
976           n_left_to_next -= 2;
977
978           b0 = vlib_get_buffer (vm, bi0);
979           b1 = vlib_get_buffer (vm, bi1);
980
981           next0 = NAT_IN2OUT_ED_NEXT_LOOKUP;
982
983           if (is_output_feature)
984             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
985
986           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
987                                   iph_offset0);
988
989           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
990           rx_fib_index0 =
991             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
992                                                  sw_if_index0);
993
994           if (PREDICT_FALSE (ip0->ttl == 1))
995             {
996               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
997               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
998                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
999                                            0);
1000               next0 = NAT_IN2OUT_ED_NEXT_ICMP_ERROR;
1001               goto trace00;
1002             }
1003
1004           udp0 = ip4_next_header (ip0);
1005           tcp0 = (tcp_header_t *) udp0;
1006           icmp0 = (icmp46_header_t *) udp0;
1007           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1008
1009           if (is_slow_path)
1010             {
1011               if (PREDICT_FALSE (proto0 == ~0))
1012                 {
1013                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1014                                                       rx_fib_index0,
1015                                                       thread_index, now, vm,
1016                                                       node);
1017                   if (!s0)
1018                     next0 = NAT_IN2OUT_ED_NEXT_DROP;
1019                   other_packets++;
1020                   goto trace00;
1021                 }
1022
1023               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1024                 {
1025                   next0 = icmp_in2out_ed_slow_path
1026                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1027                      next0, now, thread_index, &s0);
1028                   icmp_packets++;
1029                   goto trace00;
1030                 }
1031             }
1032           else
1033             {
1034               if (PREDICT_FALSE (proto0 == ~0))
1035                 {
1036                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1037                   goto trace00;
1038                 }
1039
1040               if (ip4_is_fragment (ip0))
1041                 {
1042                   next0 = NAT_IN2OUT_ED_NEXT_REASS;
1043                   fragments++;
1044                   goto trace00;
1045                 }
1046
1047               if (is_output_feature)
1048                 {
1049                   if (PREDICT_FALSE
1050                       (nat_not_translate_output_feature_fwd
1051                        (sm, ip0, thread_index, now, vm, b0)))
1052                     goto trace00;
1053                 }
1054
1055               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1056                 {
1057                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1058                   goto trace00;
1059                 }
1060             }
1061
1062           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
1063                       ip0->protocol, rx_fib_index0, udp0->src_port,
1064                       udp0->dst_port);
1065
1066           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1067             {
1068               if (is_slow_path)
1069                 {
1070                   if (is_output_feature)
1071                     {
1072                       if (PREDICT_FALSE
1073                           (nat44_ed_not_translate_output_feature
1074                            (sm, ip0, ip0->protocol, udp0->src_port,
1075                             udp0->dst_port, thread_index, sw_if_index0,
1076                             vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1077                         goto trace00;
1078                     }
1079                   else
1080                     {
1081                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1082                                                                  sw_if_index0,
1083                                                                  ip0, proto0,
1084                                                                  rx_fib_index0,
1085                                                                  thread_index)))
1086                         goto trace00;
1087                     }
1088
1089                   next0 =
1090                     slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
1091                                   next0, thread_index, now, tcp0);
1092
1093                   if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP))
1094                     goto trace00;
1095
1096                   if (PREDICT_FALSE (!s0))
1097                     goto trace00;
1098                 }
1099               else
1100                 {
1101                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1102                   goto trace00;
1103                 }
1104             }
1105           else
1106             {
1107               s0 = pool_elt_at_index (tsm->sessions, value0.value);
1108             }
1109
1110           b0->flags |= VNET_BUFFER_F_IS_NATED;
1111
1112           if (!is_output_feature)
1113             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1114
1115           old_addr0 = ip0->src_address.as_u32;
1116           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1117           sum0 = ip0->checksum;
1118           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1119                                  src_address);
1120           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1121             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1122                                    s0->ext_host_addr.as_u32, ip4_header_t,
1123                                    dst_address);
1124           ip0->checksum = ip_csum_fold (sum0);
1125
1126           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1127             {
1128               old_port0 = tcp0->src_port;
1129               new_port0 = tcp0->src_port = s0->out2in.port;
1130
1131               sum0 = tcp0->checksum;
1132               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1133                                      dst_address);
1134               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1135                                      length);
1136               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1137                 {
1138                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1139                                          s0->ext_host_addr.as_u32,
1140                                          ip4_header_t, dst_address);
1141                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
1142                                          s0->ext_host_port, ip4_header_t,
1143                                          length);
1144                   tcp0->dst_port = s0->ext_host_port;
1145                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1146                 }
1147               mss_clamping (sm, tcp0, &sum0);
1148               tcp0->checksum = ip_csum_fold (sum0);
1149               tcp_packets++;
1150               if (nat44_set_tcp_session_state_i2o
1151                   (sm, s0, tcp0, thread_index))
1152                 goto trace00;
1153             }
1154           else
1155             {
1156               udp0->src_port = s0->out2in.port;
1157               udp0->checksum = 0;
1158               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1159                 {
1160                   udp0->dst_port = s0->ext_host_port;
1161                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1162                 }
1163               udp_packets++;
1164             }
1165
1166           /* Accounting */
1167           nat44_session_update_counters (s0, now,
1168                                          vlib_buffer_length_in_chain (vm,
1169                                                                       b0),
1170                                          thread_index);
1171           /* Per-user LRU list maintenance */
1172           nat44_session_update_lru (sm, s0, thread_index);
1173
1174         trace00:
1175           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1176                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1177             {
1178               nat_in2out_ed_trace_t *t =
1179                 vlib_add_trace (vm, node, b0, sizeof (*t));
1180               t->is_slow_path = is_slow_path;
1181               t->sw_if_index = sw_if_index0;
1182               t->next_index = next0;
1183               t->session_index = ~0;
1184               if (s0)
1185                 t->session_index = s0 - tsm->sessions;
1186             }
1187
1188           pkts_processed += next0 == NAT_IN2OUT_ED_NEXT_LOOKUP;
1189
1190
1191           next1 = NAT_IN2OUT_ED_NEXT_LOOKUP;
1192
1193           if (is_output_feature)
1194             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1195
1196           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1197                                   iph_offset1);
1198
1199           sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1200           rx_fib_index1 =
1201             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1202                                                  sw_if_index1);
1203
1204           if (PREDICT_FALSE (ip1->ttl == 1))
1205             {
1206               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1207               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1208                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1209                                            0);
1210               next1 = NAT_IN2OUT_ED_NEXT_ICMP_ERROR;
1211               goto trace01;
1212             }
1213
1214           udp1 = ip4_next_header (ip1);
1215           tcp1 = (tcp_header_t *) udp1;
1216           icmp1 = (icmp46_header_t *) udp1;
1217           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1218
1219           if (is_slow_path)
1220             {
1221               if (PREDICT_FALSE (proto1 == ~0))
1222                 {
1223                   s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1,
1224                                                       rx_fib_index1,
1225                                                       thread_index, now, vm,
1226                                                       node);
1227                   if (!s1)
1228                     next1 = NAT_IN2OUT_ED_NEXT_DROP;
1229                   other_packets++;
1230                   goto trace01;
1231                 }
1232
1233               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1234                 {
1235                   next1 = icmp_in2out_ed_slow_path
1236                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1237                      next1, now, thread_index, &s1);
1238                   icmp_packets++;
1239                   goto trace01;
1240                 }
1241             }
1242           else
1243             {
1244               if (PREDICT_FALSE (proto1 == ~0))
1245                 {
1246                   next1 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1247                   goto trace01;
1248                 }
1249
1250               if (ip4_is_fragment (ip1))
1251                 {
1252                   next1 = NAT_IN2OUT_ED_NEXT_REASS;
1253                   fragments++;
1254                   goto trace01;
1255                 }
1256
1257               if (is_output_feature)
1258                 {
1259                   if (PREDICT_FALSE
1260                       (nat_not_translate_output_feature_fwd
1261                        (sm, ip1, thread_index, now, vm, b1)))
1262                     goto trace01;
1263                 }
1264
1265               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1266                 {
1267                   next1 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1268                   goto trace01;
1269                 }
1270             }
1271
1272           make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address,
1273                       ip1->protocol, rx_fib_index1, udp1->src_port,
1274                       udp1->dst_port);
1275
1276           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1))
1277             {
1278               if (is_slow_path)
1279                 {
1280                   if (is_output_feature)
1281                     {
1282                       if (PREDICT_FALSE
1283                           (nat44_ed_not_translate_output_feature
1284                            (sm, ip1, ip1->protocol, udp1->src_port,
1285                             udp1->dst_port, thread_index, sw_if_index1,
1286                             vnet_buffer (b1)->sw_if_index[VLIB_TX])))
1287                         goto trace01;
1288                     }
1289                   else
1290                     {
1291                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1292                                                                  sw_if_index1,
1293                                                                  ip1, proto1,
1294                                                                  rx_fib_index1,
1295                                                                  thread_index)))
1296                         goto trace01;
1297                     }
1298
1299                   next1 =
1300                     slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node,
1301                                   next1, thread_index, now, tcp1);
1302
1303                   if (PREDICT_FALSE (next1 == NAT_IN2OUT_ED_NEXT_DROP))
1304                     goto trace01;
1305
1306                   if (PREDICT_FALSE (!s1))
1307                     goto trace01;
1308                 }
1309               else
1310                 {
1311                   next1 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1312                   goto trace01;
1313                 }
1314             }
1315           else
1316             {
1317               s1 = pool_elt_at_index (tsm->sessions, value1.value);
1318             }
1319
1320           b1->flags |= VNET_BUFFER_F_IS_NATED;
1321
1322           if (!is_output_feature)
1323             vnet_buffer (b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1324
1325           old_addr1 = ip1->src_address.as_u32;
1326           new_addr1 = ip1->src_address.as_u32 = s1->out2in.addr.as_u32;
1327           sum1 = ip1->checksum;
1328           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
1329                                  src_address);
1330           if (PREDICT_FALSE (is_twice_nat_session (s1)))
1331             sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
1332                                    s1->ext_host_addr.as_u32, ip4_header_t,
1333                                    dst_address);
1334           ip1->checksum = ip_csum_fold (sum1);
1335
1336           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
1337             {
1338               old_port1 = tcp1->src_port;
1339               new_port1 = tcp1->src_port = s1->out2in.port;
1340
1341               sum1 = tcp1->checksum;
1342               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
1343                                      dst_address);
1344               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
1345                                      length);
1346               if (PREDICT_FALSE (is_twice_nat_session (s1)))
1347                 {
1348                   sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
1349                                          s1->ext_host_addr.as_u32,
1350                                          ip4_header_t, dst_address);
1351                   sum1 = ip_csum_update (sum1, tcp1->dst_port,
1352                                          s1->ext_host_port, ip4_header_t,
1353                                          length);
1354                   tcp1->dst_port = s1->ext_host_port;
1355                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
1356                 }
1357               tcp1->checksum = ip_csum_fold (sum1);
1358               mss_clamping (sm, tcp1, &sum1);
1359               tcp_packets++;
1360               if (nat44_set_tcp_session_state_i2o
1361                   (sm, s1, tcp1, thread_index))
1362                 goto trace01;
1363             }
1364           else
1365             {
1366               udp1->src_port = s1->out2in.port;
1367               udp1->checksum = 0;
1368               if (PREDICT_FALSE (is_twice_nat_session (s1)))
1369                 {
1370                   udp1->dst_port = s1->ext_host_port;
1371                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
1372                 }
1373               udp_packets++;
1374             }
1375
1376           /* Accounting */
1377           nat44_session_update_counters (s1, now,
1378                                          vlib_buffer_length_in_chain (vm, b1),
1379                                          thread_index);
1380           /* Per-user LRU list maintenance */
1381           nat44_session_update_lru (sm, s1, thread_index);
1382
1383         trace01:
1384           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1385                              && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1386             {
1387               nat_in2out_ed_trace_t *t =
1388                 vlib_add_trace (vm, node, b1, sizeof (*t));
1389               t->is_slow_path = is_slow_path;
1390               t->sw_if_index = sw_if_index1;
1391               t->next_index = next1;
1392               t->session_index = ~0;
1393               if (s1)
1394                 t->session_index = s1 - tsm->sessions;
1395             }
1396
1397           pkts_processed += next1 == NAT_IN2OUT_ED_NEXT_LOOKUP;
1398
1399           /* verify speculative enqueues, maybe switch current next frame */
1400           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1401                                            to_next, n_left_to_next,
1402                                            bi0, bi1, next0, next1);
1403         }
1404
1405       while (n_left_from > 0 && n_left_to_next > 0)
1406         {
1407           u32 bi0;
1408           vlib_buffer_t *b0;
1409           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
1410             new_addr0, old_addr0;
1411           u16 old_port0, new_port0;
1412           ip4_header_t *ip0;
1413           udp_header_t *udp0;
1414           tcp_header_t *tcp0;
1415           icmp46_header_t *icmp0;
1416           snat_session_t *s0 = 0;
1417           clib_bihash_kv_16_8_t kv0, value0;
1418           ip_csum_t sum0;
1419
1420           /* speculatively enqueue b0 to the current next frame */
1421           bi0 = from[0];
1422           to_next[0] = bi0;
1423           from += 1;
1424           to_next += 1;
1425           n_left_from -= 1;
1426           n_left_to_next -= 1;
1427
1428           b0 = vlib_get_buffer (vm, bi0);
1429           next0 = NAT_IN2OUT_ED_NEXT_LOOKUP;
1430
1431           if (is_output_feature)
1432             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1433
1434           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1435                                   iph_offset0);
1436
1437           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1438           rx_fib_index0 =
1439             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1440                                                  sw_if_index0);
1441
1442           if (PREDICT_FALSE (ip0->ttl == 1))
1443             {
1444               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1445               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1446                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1447                                            0);
1448               next0 = NAT_IN2OUT_ED_NEXT_ICMP_ERROR;
1449               goto trace0;
1450             }
1451
1452           udp0 = ip4_next_header (ip0);
1453           tcp0 = (tcp_header_t *) udp0;
1454           icmp0 = (icmp46_header_t *) udp0;
1455           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1456
1457           if (is_slow_path)
1458             {
1459               if (PREDICT_FALSE (proto0 == ~0))
1460                 {
1461                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
1462                                                       rx_fib_index0,
1463                                                       thread_index, now, vm,
1464                                                       node);
1465                   if (!s0)
1466                     next0 = NAT_IN2OUT_ED_NEXT_DROP;
1467                   other_packets++;
1468                   goto trace0;
1469                 }
1470
1471               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1472                 {
1473                   next0 = icmp_in2out_ed_slow_path
1474                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1475                      next0, now, thread_index, &s0);
1476                   icmp_packets++;
1477                   goto trace0;
1478                 }
1479             }
1480           else
1481             {
1482               if (PREDICT_FALSE (proto0 == ~0))
1483                 {
1484                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1485                   goto trace0;
1486                 }
1487
1488               if (ip4_is_fragment (ip0))
1489                 {
1490                   next0 = NAT_IN2OUT_ED_NEXT_REASS;
1491                   fragments++;
1492                   goto trace0;
1493                 }
1494
1495               if (is_output_feature)
1496                 {
1497                   if (PREDICT_FALSE
1498                       (nat_not_translate_output_feature_fwd
1499                        (sm, ip0, thread_index, now, vm, b0)))
1500                     goto trace0;
1501                 }
1502
1503               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1504                 {
1505                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1506                   goto trace0;
1507                 }
1508             }
1509
1510           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
1511                       ip0->protocol, rx_fib_index0, udp0->src_port,
1512                       udp0->dst_port);
1513
1514           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
1515             {
1516               if (is_slow_path)
1517                 {
1518                   if (is_output_feature)
1519                     {
1520                       if (PREDICT_FALSE
1521                           (nat44_ed_not_translate_output_feature
1522                            (sm, ip0, ip0->protocol, udp0->src_port,
1523                             udp0->dst_port, thread_index, sw_if_index0,
1524                             vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1525                         goto trace0;
1526                     }
1527                   else
1528                     {
1529                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1530                                                                  sw_if_index0,
1531                                                                  ip0, proto0,
1532                                                                  rx_fib_index0,
1533                                                                  thread_index)))
1534                         goto trace0;
1535                     }
1536
1537                   next0 =
1538                     slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
1539                                   next0, thread_index, now, tcp0);
1540
1541                   if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP))
1542                     goto trace0;
1543
1544                   if (PREDICT_FALSE (!s0))
1545                     goto trace0;
1546                 }
1547               else
1548                 {
1549                   next0 = NAT_IN2OUT_ED_NEXT_SLOW_PATH;
1550                   goto trace0;
1551                 }
1552             }
1553           else
1554             {
1555               s0 = pool_elt_at_index (tsm->sessions, value0.value);
1556             }
1557
1558           b0->flags |= VNET_BUFFER_F_IS_NATED;
1559
1560           if (!is_output_feature)
1561             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1562
1563           old_addr0 = ip0->src_address.as_u32;
1564           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
1565           sum0 = ip0->checksum;
1566           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1567                                  src_address);
1568           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1569             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1570                                    s0->ext_host_addr.as_u32, ip4_header_t,
1571                                    dst_address);
1572           ip0->checksum = ip_csum_fold (sum0);
1573
1574           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1575             {
1576               old_port0 = tcp0->src_port;
1577               new_port0 = tcp0->src_port = s0->out2in.port;
1578
1579               sum0 = tcp0->checksum;
1580               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1581                                      dst_address);
1582               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1583                                      length);
1584               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1585                 {
1586                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1587                                          s0->ext_host_addr.as_u32,
1588                                          ip4_header_t, dst_address);
1589                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
1590                                          s0->ext_host_port, ip4_header_t,
1591                                          length);
1592                   tcp0->dst_port = s0->ext_host_port;
1593                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1594                 }
1595               mss_clamping (sm, tcp0, &sum0);
1596               tcp0->checksum = ip_csum_fold (sum0);
1597               tcp_packets++;
1598               if (nat44_set_tcp_session_state_i2o
1599                   (sm, s0, tcp0, thread_index))
1600                 goto trace0;
1601             }
1602           else
1603             {
1604               udp0->src_port = s0->out2in.port;
1605               udp0->checksum = 0;
1606               if (PREDICT_FALSE (is_twice_nat_session (s0)))
1607                 {
1608                   udp0->dst_port = s0->ext_host_port;
1609                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
1610                 }
1611               udp_packets++;
1612             }
1613
1614           /* Accounting */
1615           nat44_session_update_counters (s0, now,
1616                                          vlib_buffer_length_in_chain (vm, b0),
1617                                          thread_index);
1618           /* Per-user LRU list maintenance */
1619           nat44_session_update_lru (sm, s0, thread_index);
1620
1621         trace0:
1622           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1623                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1624             {
1625               nat_in2out_ed_trace_t *t =
1626                 vlib_add_trace (vm, node, b0, sizeof (*t));
1627               t->is_slow_path = is_slow_path;
1628               t->sw_if_index = sw_if_index0;
1629               t->next_index = next0;
1630               t->session_index = ~0;
1631               if (s0)
1632                 t->session_index = s0 - tsm->sessions;
1633             }
1634
1635           pkts_processed += next0 == NAT_IN2OUT_ED_NEXT_LOOKUP;
1636
1637           /* verify speculative enqueue, maybe switch current next frame */
1638           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1639                                            to_next, n_left_to_next,
1640                                            bi0, next0);
1641         }
1642
1643       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1644     }
1645
1646   vlib_node_increment_counter (vm, stats_node_index,
1647                                NAT_IN2OUT_ED_ERROR_IN2OUT_PACKETS,
1648                                pkts_processed);
1649   vlib_node_increment_counter (vm, stats_node_index,
1650                                NAT_IN2OUT_ED_ERROR_TCP_PACKETS, tcp_packets);
1651   vlib_node_increment_counter (vm, stats_node_index,
1652                                NAT_IN2OUT_ED_ERROR_UDP_PACKETS, tcp_packets);
1653   vlib_node_increment_counter (vm, stats_node_index,
1654                                NAT_IN2OUT_ED_ERROR_ICMP_PACKETS,
1655                                icmp_packets);
1656   vlib_node_increment_counter (vm, stats_node_index,
1657                                NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
1658                                other_packets);
1659   vlib_node_increment_counter (vm, stats_node_index,
1660                                NAT_IN2OUT_ED_ERROR_FRAGMENTS, fragments);
1661
1662   return frame->n_vectors;
1663 }
1664
1665 VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
1666                                      vlib_node_runtime_t * node,
1667                                      vlib_frame_t * frame)
1668 {
1669   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 0);
1670 }
1671
1672 /* *INDENT-OFF* */
1673 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
1674   .name = "nat44-ed-in2out",
1675   .vector_size = sizeof (u32),
1676   .format_trace = format_nat_in2out_ed_trace,
1677   .type = VLIB_NODE_TYPE_INTERNAL,
1678   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1679   .error_strings = nat_in2out_ed_error_strings,
1680   .runtime_data_bytes = sizeof (snat_runtime_t),
1681   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1682   .next_nodes = {
1683     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1684     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "ip4-lookup",
1685     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
1686     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1687     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
1688   },
1689 };
1690 /* *INDENT-ON* */
1691
1692 VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
1693                                             vlib_node_runtime_t * node,
1694                                             vlib_frame_t * frame)
1695 {
1696   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 1);
1697 }
1698
1699 /* *INDENT-OFF* */
1700 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
1701   .name = "nat44-ed-in2out-output",
1702   .vector_size = sizeof (u32),
1703   .format_trace = format_nat_in2out_ed_trace,
1704   .type = VLIB_NODE_TYPE_INTERNAL,
1705   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1706   .error_strings = nat_in2out_ed_error_strings,
1707   .runtime_data_bytes = sizeof (snat_runtime_t),
1708   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1709   .next_nodes = {
1710     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1711     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "interface-output",
1712     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
1713     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1714     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass-output",
1715   },
1716 };
1717 /* *INDENT-ON* */
1718
1719 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
1720                                               vlib_node_runtime_t * node,
1721                                               vlib_frame_t * frame)
1722 {
1723   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0);
1724 }
1725
1726 /* *INDENT-OFF* */
1727 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
1728   .name = "nat44-ed-in2out-slowpath",
1729   .vector_size = sizeof (u32),
1730   .format_trace = format_nat_in2out_ed_trace,
1731   .type = VLIB_NODE_TYPE_INTERNAL,
1732   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1733   .error_strings = nat_in2out_ed_error_strings,
1734   .runtime_data_bytes = sizeof (snat_runtime_t),
1735   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1736   .next_nodes = {
1737     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1738     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "ip4-lookup",
1739     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
1740     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1741     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
1742   },
1743 };
1744 /* *INDENT-ON* */
1745
1746 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
1747                                                      vlib_node_runtime_t *
1748                                                      node,
1749                                                      vlib_frame_t * frame)
1750 {
1751   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1);
1752 }
1753
1754 /* *INDENT-OFF* */
1755 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
1756   .name = "nat44-ed-in2out-output-slowpath",
1757   .vector_size = sizeof (u32),
1758   .format_trace = format_nat_in2out_ed_trace,
1759   .type = VLIB_NODE_TYPE_INTERNAL,
1760   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
1761   .error_strings = nat_in2out_ed_error_strings,
1762   .runtime_data_bytes = sizeof (snat_runtime_t),
1763   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
1764   .next_nodes = {
1765     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
1766     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "interface-output",
1767     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
1768     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1769     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
1770   },
1771 };
1772 /* *INDENT-ON* */
1773
1774 static inline uword
1775 nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm,
1776                                       vlib_node_runtime_t * node,
1777                                       vlib_frame_t * frame,
1778                                       int is_output_feature)
1779 {
1780   u32 n_left_from, *from, *to_next;
1781   nat_in2out_ed_next_t next_index;
1782   u32 pkts_processed = 0, cached_fragments = 0;
1783   snat_main_t *sm = &snat_main;
1784   f64 now = vlib_time_now (vm);
1785   u32 thread_index = vm->thread_index;
1786   snat_main_per_thread_data_t *per_thread_data =
1787     &sm->per_thread_data[thread_index];
1788   u32 *fragments_to_drop = 0;
1789   u32 *fragments_to_loopback = 0;
1790
1791   from = vlib_frame_vector_args (frame);
1792   n_left_from = frame->n_vectors;
1793   next_index = node->cached_next_index;
1794
1795   while (n_left_from > 0)
1796     {
1797       u32 n_left_to_next;
1798
1799       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1800
1801       while (n_left_from > 0 && n_left_to_next > 0)
1802         {
1803           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1804           u32 iph_offset0 = 0;
1805           vlib_buffer_t *b0;
1806           u32 next0;
1807           u8 cached0 = 0;
1808           ip4_header_t *ip0 = 0;
1809           nat_reass_ip4_t *reass0;
1810           udp_header_t *udp0;
1811           tcp_header_t *tcp0;
1812           icmp46_header_t *icmp0;
1813           clib_bihash_kv_16_8_t kv0, value0;
1814           snat_session_t *s0 = 0;
1815           u16 old_port0, new_port0;
1816           ip_csum_t sum0;
1817
1818           /* speculatively enqueue b0 to the current next frame */
1819           bi0 = from[0];
1820           to_next[0] = bi0;
1821           from += 1;
1822           to_next += 1;
1823           n_left_from -= 1;
1824           n_left_to_next -= 1;
1825
1826           b0 = vlib_get_buffer (vm, bi0);
1827
1828           next0 = NAT_IN2OUT_ED_NEXT_LOOKUP;
1829
1830           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1831           rx_fib_index0 =
1832             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1833                                                  sw_if_index0);
1834
1835           if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
1836             {
1837               next0 = NAT_IN2OUT_ED_NEXT_DROP;
1838               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT];
1839               goto trace0;
1840             }
1841
1842           if (is_output_feature)
1843             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1844
1845           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1846                                   iph_offset0);
1847
1848           udp0 = ip4_next_header (ip0);
1849           tcp0 = (tcp_header_t *) udp0;
1850           icmp0 = (icmp46_header_t *) udp0;
1851           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1852
1853           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1854                                                  ip0->dst_address,
1855                                                  ip0->fragment_id,
1856                                                  ip0->protocol,
1857                                                  1, &fragments_to_drop);
1858
1859           if (PREDICT_FALSE (!reass0))
1860             {
1861               next0 = NAT_IN2OUT_ED_NEXT_DROP;
1862               b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_REASS];
1863               nat_log_notice ("maximum reassemblies exceeded");
1864               goto trace0;
1865             }
1866
1867           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1868             {
1869               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1870                 {
1871                   if (is_output_feature)
1872                     {
1873                       if (PREDICT_FALSE
1874                           (nat_not_translate_output_feature_fwd
1875                            (sm, ip0, thread_index, now, vm, b0)))
1876                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1877                       goto trace0;
1878                     }
1879
1880                   next0 = icmp_in2out_ed_slow_path
1881                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1882                      next0, now, thread_index, &s0);
1883
1884                   if (PREDICT_TRUE (next0 != NAT_IN2OUT_ED_NEXT_DROP))
1885                     {
1886                       if (s0)
1887                         reass0->sess_index = s0 - per_thread_data->sessions;
1888                       else
1889                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1890                       nat_ip4_reass_get_frags (reass0,
1891                                                &fragments_to_loopback);
1892                     }
1893
1894                   goto trace0;
1895                 }
1896
1897               make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
1898                           ip0->protocol, rx_fib_index0, udp0->src_port,
1899                           udp0->dst_port);
1900
1901               if (clib_bihash_search_16_8
1902                   (&per_thread_data->in2out_ed, &kv0, &value0))
1903                 {
1904                   if (is_output_feature)
1905                     {
1906                       if (PREDICT_FALSE
1907                           (nat44_ed_not_translate_output_feature
1908                            (sm, ip0, ip0->protocol, udp0->src_port,
1909                             udp0->dst_port, thread_index, sw_if_index0,
1910                             vnet_buffer (b0)->sw_if_index[VLIB_TX])))
1911                         {
1912                           reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1913                           nat_ip4_reass_get_frags (reass0,
1914                                                    &fragments_to_loopback);
1915                           goto trace0;
1916                         }
1917                     }
1918                   else
1919                     {
1920                       if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
1921                                                                  sw_if_index0,
1922                                                                  ip0, proto0,
1923                                                                  rx_fib_index0,
1924                                                                  thread_index)))
1925                         {
1926                           reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1927                           nat_ip4_reass_get_frags (reass0,
1928                                                    &fragments_to_loopback);
1929                           goto trace0;
1930                         }
1931                     }
1932
1933                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0,
1934                                         &s0, node, next0, thread_index, now,
1935                                         tcp0);
1936
1937                   if (PREDICT_FALSE (next0 == NAT_IN2OUT_ED_NEXT_DROP))
1938                     goto trace0;
1939
1940                   if (PREDICT_FALSE (!s0))
1941                     {
1942                       reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
1943                       goto trace0;
1944                     }
1945
1946                   reass0->sess_index = s0 - per_thread_data->sessions;
1947                 }
1948               else
1949                 {
1950                   s0 = pool_elt_at_index (per_thread_data->sessions,
1951                                           value0.value);
1952                   reass0->sess_index = value0.value;
1953                 }
1954               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1955             }
1956           else
1957             {
1958               if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE)
1959                 goto trace0;
1960               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1961                 {
1962                   if (nat_ip4_reass_add_fragment
1963                       (thread_index, reass0, bi0, &fragments_to_drop))
1964                     {
1965                       b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_FRAG];
1966                       nat_log_notice
1967                         ("maximum fragments per reassembly exceeded");
1968                       next0 = NAT_IN2OUT_ED_NEXT_DROP;
1969                       goto trace0;
1970                     }
1971                   cached0 = 1;
1972                   goto trace0;
1973                 }
1974               s0 = pool_elt_at_index (per_thread_data->sessions,
1975                                       reass0->sess_index);
1976             }
1977
1978           old_addr0 = ip0->src_address.as_u32;
1979           ip0->src_address = s0->out2in.addr;
1980           new_addr0 = ip0->src_address.as_u32;
1981           if (!is_output_feature)
1982             vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1983
1984           sum0 = ip0->checksum;
1985           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1986                                  ip4_header_t,
1987                                  src_address /* changed member */ );
1988           if (PREDICT_FALSE (is_twice_nat_session (s0)))
1989             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
1990                                    s0->ext_host_addr.as_u32, ip4_header_t,
1991                                    dst_address);
1992           ip0->checksum = ip_csum_fold (sum0);
1993
1994           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1995             {
1996               if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
1997                 {
1998                   old_port0 = tcp0->src_port;
1999                   tcp0->src_port = s0->out2in.port;
2000                   new_port0 = tcp0->src_port;
2001
2002                   sum0 = tcp0->checksum;
2003                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2004                                          ip4_header_t,
2005                                          dst_address /* changed member */ );
2006                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2007                                          ip4_header_t /* cheat */ ,
2008                                          length /* changed member */ );
2009                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
2010                     {
2011                       sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
2012                                              s0->ext_host_addr.as_u32,
2013                                              ip4_header_t, dst_address);
2014                       sum0 = ip_csum_update (sum0, tcp0->dst_port,
2015                                              s0->ext_host_port, ip4_header_t,
2016                                              length);
2017                       tcp0->dst_port = s0->ext_host_port;
2018                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
2019                     }
2020                   tcp0->checksum = ip_csum_fold (sum0);
2021                 }
2022               else
2023                 {
2024                   old_port0 = udp0->src_port;
2025                   udp0->src_port = s0->out2in.port;
2026                   udp0->checksum = 0;
2027                   if (PREDICT_FALSE (is_twice_nat_session (s0)))
2028                     {
2029                       udp0->dst_port = s0->ext_host_port;
2030                       ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
2031                     }
2032                 }
2033             }
2034
2035           /* Hairpinning */
2036           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2037                                    s0->ext_host_port, proto0, 1);
2038
2039           /* Accounting */
2040           nat44_session_update_counters (s0, now,
2041                                          vlib_buffer_length_in_chain (vm, b0),
2042                                          thread_index);
2043           /* Per-user LRU list maintenance */
2044           nat44_session_update_lru (sm, s0, thread_index);
2045
2046         trace0:
2047           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2048                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2049             {
2050               nat44_reass_trace_t *t =
2051                 vlib_add_trace (vm, node, b0, sizeof (*t));
2052               t->cached = cached0;
2053               t->sw_if_index = sw_if_index0;
2054               t->next_index = next0;
2055             }
2056
2057           if (cached0)
2058             {
2059               n_left_to_next++;
2060               to_next--;
2061               cached_fragments++;
2062             }
2063           else
2064             {
2065               pkts_processed += next0 != NAT_IN2OUT_ED_NEXT_DROP;
2066
2067               /* verify speculative enqueue, maybe switch current next frame */
2068               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2069                                                to_next, n_left_to_next,
2070                                                bi0, next0);
2071             }
2072
2073           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2074             {
2075               from = vlib_frame_vector_args (frame);
2076               u32 len = vec_len (fragments_to_loopback);
2077               if (len <= VLIB_FRAME_SIZE)
2078                 {
2079                   clib_memcpy_fast (from, fragments_to_loopback,
2080                                     sizeof (u32) * len);
2081                   n_left_from = len;
2082                   vec_reset_length (fragments_to_loopback);
2083                 }
2084               else
2085                 {
2086                   clib_memcpy_fast (from, fragments_to_loopback +
2087                                     (len - VLIB_FRAME_SIZE),
2088                                     sizeof (u32) * VLIB_FRAME_SIZE);
2089                   n_left_from = VLIB_FRAME_SIZE;
2090                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2091                 }
2092             }
2093         }
2094
2095       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2096     }
2097
2098   vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index,
2099                                NAT_IN2OUT_ED_ERROR_PROCESSED_FRAGMENTS,
2100                                pkts_processed);
2101   vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index,
2102                                NAT_IN2OUT_ED_ERROR_CACHED_FRAGMENTS,
2103                                cached_fragments);
2104
2105   nat_send_all_to_node (vm, fragments_to_drop, node,
2106                         &node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT],
2107                         NAT_IN2OUT_ED_NEXT_DROP);
2108
2109   vec_free (fragments_to_drop);
2110   vec_free (fragments_to_loopback);
2111   return frame->n_vectors;
2112 }
2113
2114 VLIB_NODE_FN (nat44_ed_in2out_reass_node) (vlib_main_t * vm,
2115                                            vlib_node_runtime_t * node,
2116                                            vlib_frame_t * frame)
2117 {
2118   return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 0);
2119 }
2120
2121 /* *INDENT-OFF* */
2122 VLIB_REGISTER_NODE (nat44_ed_in2out_reass_node) = {
2123   .name = "nat44-ed-in2out-reass",
2124   .vector_size = sizeof (u32),
2125   .format_trace = format_nat44_reass_trace,
2126   .type = VLIB_NODE_TYPE_INTERNAL,
2127   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
2128   .error_strings = nat_in2out_ed_error_strings,
2129   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
2130   .next_nodes = {
2131     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
2132     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "ip4-lookup",
2133     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2134     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2135     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
2136   },
2137 };
2138 /* *INDENT-ON* */
2139
2140 VLIB_NODE_FN (nat44_ed_in2out_reass_output_node) (vlib_main_t * vm,
2141                                                   vlib_node_runtime_t * node,
2142                                                   vlib_frame_t * frame)
2143 {
2144   return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 1);
2145 }
2146
2147 /* *INDENT-OFF* */
2148 VLIB_REGISTER_NODE (nat44_ed_in2out_reass_output_node) = {
2149   .name = "nat44-ed-in2out-reass-output",
2150   .vector_size = sizeof (u32),
2151   .format_trace = format_nat44_reass_trace,
2152   .type = VLIB_NODE_TYPE_INTERNAL,
2153   .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
2154   .error_strings = nat_in2out_ed_error_strings,
2155   .n_next_nodes = NAT_IN2OUT_ED_N_NEXT,
2156   .next_nodes = {
2157     [NAT_IN2OUT_ED_NEXT_DROP] = "error-drop",
2158     [NAT_IN2OUT_ED_NEXT_LOOKUP] = "interface-output",
2159     [NAT_IN2OUT_ED_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2160     [NAT_IN2OUT_ED_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2161     [NAT_IN2OUT_ED_NEXT_REASS] = "nat44-ed-in2out-reass",
2162   },
2163 };
2164 /* *INDENT-ON* */
2165
2166 /*
2167  * fd.io coding-style-patch-verification: ON
2168  *
2169  * Local Variables:
2170  * eval: (c-set-style "gnu")
2171  * End:
2172  */