NAT44: client-IP based session affinity for load-balancing (VPP-1297)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28 #include <nat/nat_inlines.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38   u32 is_slow_path;
39 } snat_in2out_trace_t;
40
41 typedef struct {
42   u32 next_worker_index;
43   u8 do_handoff;
44 } snat_in2out_worker_handoff_trace_t;
45
46 /* packet trace format function */
47 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
48 {
49   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
50   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
51   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
52   char * tag;
53
54   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
55
56   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
57               t->sw_if_index, t->next_index, t->session_index);
58
59   return s;
60 }
61
62 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
63 {
64   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
65   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
66   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
67
68   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
69               t->sw_if_index, t->next_index);
70
71   return s;
72 }
73
74 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
75 {
76   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
77   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
78   snat_in2out_worker_handoff_trace_t * t =
79     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
80   char * m;
81
82   m = t->do_handoff ? "next worker" : "same worker";
83   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
84
85   return s;
86 }
87
88 typedef struct {
89   u32 sw_if_index;
90   u32 next_index;
91   u8 cached;
92 } nat44_in2out_reass_trace_t;
93
94 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
95 {
96   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
97   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
98   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
99
100   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
101               t->sw_if_index, t->next_index,
102               t->cached ? "cached" : "translated");
103
104   return s;
105 }
106
107 vlib_node_registration_t snat_in2out_node;
108 vlib_node_registration_t snat_in2out_slowpath_node;
109 vlib_node_registration_t snat_in2out_fast_node;
110 vlib_node_registration_t snat_in2out_worker_handoff_node;
111 vlib_node_registration_t snat_det_in2out_node;
112 vlib_node_registration_t snat_in2out_output_node;
113 vlib_node_registration_t snat_in2out_output_slowpath_node;
114 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
115 vlib_node_registration_t snat_hairpin_dst_node;
116 vlib_node_registration_t snat_hairpin_src_node;
117 vlib_node_registration_t nat44_hairpinning_node;
118 vlib_node_registration_t nat44_in2out_reass_node;
119 vlib_node_registration_t nat44_ed_in2out_node;
120 vlib_node_registration_t nat44_ed_in2out_slowpath_node;
121 vlib_node_registration_t nat44_ed_in2out_output_node;
122 vlib_node_registration_t nat44_ed_in2out_output_slowpath_node;
123 vlib_node_registration_t nat44_ed_hairpin_dst_node;
124 vlib_node_registration_t nat44_ed_hairpin_src_node;
125 vlib_node_registration_t nat44_ed_hairpinning_node;
126
127 #define foreach_snat_in2out_error                       \
128 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
129 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
130 _(OUT_OF_PORTS, "Out of ports")                         \
131 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
132 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
133 _(NO_TRANSLATION, "No translation")                     \
134 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
135 _(DROP_FRAGMENT, "Drop fragment")                       \
136 _(MAX_REASS, "Maximum reassemblies exceeded")           \
137 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")\
138 _(FQ_CONGESTED, "Handoff frame queue congested")
139
140 typedef enum {
141 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
142   foreach_snat_in2out_error
143 #undef _
144   SNAT_IN2OUT_N_ERROR,
145 } snat_in2out_error_t;
146
147 static char * snat_in2out_error_strings[] = {
148 #define _(sym,string) string,
149   foreach_snat_in2out_error
150 #undef _
151 };
152
153 typedef enum {
154   SNAT_IN2OUT_NEXT_LOOKUP,
155   SNAT_IN2OUT_NEXT_DROP,
156   SNAT_IN2OUT_NEXT_ICMP_ERROR,
157   SNAT_IN2OUT_NEXT_SLOW_PATH,
158   SNAT_IN2OUT_NEXT_REASS,
159   SNAT_IN2OUT_N_NEXT,
160 } snat_in2out_next_t;
161
162 typedef enum {
163   SNAT_HAIRPIN_SRC_NEXT_DROP,
164   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
165   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
166   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
167   SNAT_HAIRPIN_SRC_N_NEXT,
168 } snat_hairpin_next_t;
169
170 /**
171  * @brief Check if packet should be translated
172  *
173  * Packets aimed at outside interface and external address with active session
174  * should be translated.
175  *
176  * @param sm            NAT main
177  * @param rt            NAT runtime data
178  * @param sw_if_index0  index of the inside interface
179  * @param ip0           IPv4 header
180  * @param proto0        NAT protocol
181  * @param rx_fib_index0 RX FIB index
182  *
183  * @returns 0 if packet should be translated otherwise 1
184  */
185 static inline int
186 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
187                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
188                          u32 rx_fib_index0)
189 {
190   if (sm->out2in_dpo)
191     return 0;
192
193   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
194   nat_outside_fib_t *outside_fib;
195   fib_prefix_t pfx = {
196     .fp_proto = FIB_PROTOCOL_IP4,
197     .fp_len = 32,
198     .fp_addr = {
199         .ip4.as_u32 = ip0->dst_address.as_u32,
200     },
201   };
202
203   /* Don't NAT packet aimed at the intfc address */
204   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
205                                       ip0->dst_address.as_u32)))
206     return 1;
207
208   fei = fib_table_lookup (rx_fib_index0, &pfx);
209   if (FIB_NODE_INDEX_INVALID != fei)
210     {
211       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
212       if (sw_if_index == ~0)
213         {
214           vec_foreach (outside_fib, sm->outside_fibs)
215             {
216               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
217               if (FIB_NODE_INDEX_INVALID != fei)
218                 {
219                   sw_if_index = fib_entry_get_resolving_interface (fei);
220                   if (sw_if_index != ~0)
221                     break;
222                 }
223             }
224         }
225       if (sw_if_index == ~0)
226         return 1;
227
228       snat_interface_t *i;
229       pool_foreach (i, sm->interfaces,
230       ({
231         /* NAT packet aimed at outside interface */
232         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
233           return 0;
234       }));
235     }
236
237   return 1;
238 }
239
240 static inline int
241 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
242                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
243                     u32 rx_fib_index0, u32 thread_index)
244 {
245   udp_header_t * udp0 = ip4_next_header (ip0);
246   snat_session_key_t key0, sm0;
247   clib_bihash_kv_8_8_t kv0, value0;
248
249   key0.addr = ip0->dst_address;
250   key0.port = udp0->dst_port;
251   key0.protocol = proto0;
252   key0.fib_index = sm->outside_fib_index;
253   kv0.key = key0.as_u64;
254
255   /* NAT packet aimed at external address if */
256   /* has active sessions */
257   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
258                               &value0))
259     {
260       /* or is static mappings */
261       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
262         return 0;
263     }
264   else
265     return 0;
266
267   if (sm->forwarding_enabled)
268     return 1;
269
270   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
271                                  rx_fib_index0);
272 }
273
274 static inline int
275 nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0,
276                                   u32 proto0, u16 src_port, u16 dst_port,
277                                   u32 thread_index, u32 sw_if_index)
278 {
279   snat_session_key_t key0;
280   clib_bihash_kv_8_8_t kv0, value0;
281   snat_interface_t *i;
282
283   /* src NAT check */
284   key0.addr = ip0->src_address;
285   key0.port = src_port;
286   key0.protocol = proto0;
287   key0.fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
288   kv0.key = key0.as_u64;
289
290   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
291                                &value0))
292     return 1;
293
294   /* dst NAT check */
295   key0.addr = ip0->dst_address;
296   key0.port = dst_port;
297   key0.protocol = proto0;
298   kv0.key = key0.as_u64;
299   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
300                                &value0))
301   {
302     /* hairpinning */
303     pool_foreach (i, sm->output_feature_interfaces,
304     ({
305       if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index))
306         return 0;
307     }));
308     return 1;
309   }
310
311   return 0;
312 }
313
314 int
315 nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void * arg)
316 {
317   snat_main_t *sm = &snat_main;
318   nat44_is_idle_session_ctx_t *ctx = arg;
319   snat_session_t *s;
320   u64 sess_timeout_time;
321   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
322                                                        ctx->thread_index);
323   clib_bihash_kv_8_8_t s_kv;
324
325   s = pool_elt_at_index (tsm->sessions, kv->value);
326   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
327   if (ctx->now >= sess_timeout_time)
328     {
329       s_kv.key = s->out2in.as_u64;
330       if (clib_bihash_add_del_8_8 (&tsm->out2in, &s_kv, 0))
331         nat_log_warn ("out2in key del failed");
332
333       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
334                                           s->out2in.addr.as_u32,
335                                           s->in2out.protocol,
336                                           s->in2out.port,
337                                           s->out2in.port,
338                                           s->in2out.fib_index);
339
340       if (!snat_is_session_static (s))
341         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
342                                             &s->out2in);
343
344       nat44_delete_session (sm, s, ctx->thread_index);
345       return 1;
346     }
347
348   return 0;
349 }
350
351 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
352                       ip4_header_t * ip0,
353                       u32 rx_fib_index0,
354                       snat_session_key_t * key0,
355                       snat_session_t ** sessionp,
356                       vlib_node_runtime_t * node,
357                       u32 next0,
358                       u32 thread_index,
359                       f64 now)
360 {
361   snat_user_t *u;
362   snat_session_t *s;
363   clib_bihash_kv_8_8_t kv0;
364   snat_session_key_t key1;
365   u32 address_index = ~0;
366   udp_header_t * udp0 = ip4_next_header (ip0);
367   u8 is_sm = 0;
368   nat_outside_fib_t *outside_fib;
369   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
370   fib_prefix_t pfx = {
371     .fp_proto = FIB_PROTOCOL_IP4,
372     .fp_len = 32,
373     .fp_addr = {
374         .ip4.as_u32 = ip0->dst_address.as_u32,
375     },
376   };
377   nat44_is_idle_session_ctx_t ctx0;
378
379   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
380     {
381       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
382       nat_ipfix_logging_max_sessions(sm->max_translations);
383       nat_log_notice ("maximum sessions exceeded");
384       return SNAT_IN2OUT_NEXT_DROP;
385     }
386
387   key1.protocol = key0->protocol;
388
389   /* First try to match static mapping by local address and port */
390   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0, 0, 0))
391     {
392       /* Try to create dynamic translation */
393       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
394                                                thread_index, &key1,
395                                                &address_index,
396                                                sm->port_per_thread,
397                                                sm->per_thread_data[thread_index].snat_thread_index))
398         {
399           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
400           return SNAT_IN2OUT_NEXT_DROP;
401         }
402     }
403   else
404     is_sm = 1;
405
406   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
407                               thread_index);
408   if (!u)
409     {
410       nat_log_warn ("create NAT user failed");
411       return SNAT_IN2OUT_NEXT_DROP;
412     }
413
414   s = nat_session_alloc_or_recycle (sm, u, thread_index);
415   if (!s)
416     {
417       nat44_delete_user_with_no_session (sm, u, thread_index);
418       nat_log_warn ("create NAT session failed");
419       return SNAT_IN2OUT_NEXT_DROP;
420     }
421
422   if (is_sm)
423     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
424   user_session_increment (sm, u, is_sm);
425   s->outside_address_index = address_index;
426   s->in2out = *key0;
427   s->out2in = key1;
428   s->out2in.protocol = key0->protocol;
429   s->out2in.fib_index = sm->outside_fib_index;
430   switch (vec_len (sm->outside_fibs))
431     {
432     case 0:
433       s->out2in.fib_index = sm->outside_fib_index;
434       break;
435     case 1:
436       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
437       break;
438     default:
439       vec_foreach (outside_fib, sm->outside_fibs)
440         {
441           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
442           if (FIB_NODE_INDEX_INVALID != fei)
443             {
444               if (fib_entry_get_resolving_interface (fei) != ~0)
445                 {
446                   s->out2in.fib_index = outside_fib->fib_index;
447                   break;
448                 }
449             }
450         }
451       break;
452     }
453   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
454   s->ext_host_port = udp0->dst_port;
455   *sessionp = s;
456
457   /* Add to translation hashes */
458   ctx0.now = now;
459   ctx0.thread_index = thread_index;
460   kv0.key = s->in2out.as_u64;
461   kv0.value = s - sm->per_thread_data[thread_index].sessions;
462   if (clib_bihash_add_or_overwrite_stale_8_8 (
463         &sm->per_thread_data[thread_index].in2out, &kv0,
464         nat44_i2o_is_idle_session_cb, &ctx0))
465       nat_log_notice ("in2out key add failed");
466
467   kv0.key = s->out2in.as_u64;
468   kv0.value = s - sm->per_thread_data[thread_index].sessions;
469
470   if (clib_bihash_add_or_overwrite_stale_8_8 (
471         &sm->per_thread_data[thread_index].out2in, &kv0,
472         nat44_o2i_is_idle_session_cb, &ctx0))
473       nat_log_notice ("out2in key add failed");
474
475   /* log NAT event */
476   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
477                                       s->out2in.addr.as_u32,
478                                       s->in2out.protocol,
479                                       s->in2out.port,
480                                       s->out2in.port,
481                                       s->in2out.fib_index);
482   return next0;
483 }
484
485 static_always_inline
486 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
487                                  snat_session_key_t *p_key0)
488 {
489   icmp46_header_t *icmp0;
490   snat_session_key_t key0;
491   icmp_echo_header_t *echo0, *inner_echo0 = 0;
492   ip4_header_t *inner_ip0 = 0;
493   void *l4_header = 0;
494   icmp46_header_t *inner_icmp0;
495
496   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
497   echo0 = (icmp_echo_header_t *)(icmp0+1);
498
499   if (!icmp_is_error_message (icmp0))
500     {
501       key0.protocol = SNAT_PROTOCOL_ICMP;
502       key0.addr = ip0->src_address;
503       key0.port = echo0->identifier;
504     }
505   else
506     {
507       inner_ip0 = (ip4_header_t *)(echo0+1);
508       l4_header = ip4_next_header (inner_ip0);
509       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
510       key0.addr = inner_ip0->dst_address;
511       switch (key0.protocol)
512         {
513         case SNAT_PROTOCOL_ICMP:
514           inner_icmp0 = (icmp46_header_t*)l4_header;
515           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
516           key0.port = inner_echo0->identifier;
517           break;
518         case SNAT_PROTOCOL_UDP:
519         case SNAT_PROTOCOL_TCP:
520           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
521           break;
522         default:
523           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
524         }
525     }
526   *p_key0 = key0;
527   return -1; /* success */
528 }
529
530 /**
531  * Get address and port values to be used for ICMP packet translation
532  * and create session if needed
533  *
534  * @param[in,out] sm             NAT main
535  * @param[in,out] node           NAT node runtime
536  * @param[in] thread_index       thread index
537  * @param[in,out] b0             buffer containing packet to be translated
538  * @param[out] p_proto           protocol used for matching
539  * @param[out] p_value           address and port after NAT translation
540  * @param[out] p_dont_translate  if packet should not be translated
541  * @param d                      optional parameter
542  * @param e                      optional parameter
543  */
544 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
545                            u32 thread_index, vlib_buffer_t *b0,
546                            ip4_header_t *ip0, u8 *p_proto,
547                            snat_session_key_t *p_value,
548                            u8 *p_dont_translate, void *d, void *e)
549 {
550   icmp46_header_t *icmp0;
551   u32 sw_if_index0;
552   u32 rx_fib_index0;
553   snat_session_key_t key0;
554   snat_session_t *s0 = 0;
555   u8 dont_translate = 0;
556   clib_bihash_kv_8_8_t kv0, value0;
557   u32 next0 = ~0;
558   int err;
559
560   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
561   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
562   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
563
564   err = icmp_get_key (ip0, &key0);
565   if (err != -1)
566     {
567       b0->error = node->errors[err];
568       next0 = SNAT_IN2OUT_NEXT_DROP;
569       goto out;
570     }
571   key0.fib_index = rx_fib_index0;
572
573   kv0.key = key0.as_u64;
574
575   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
576                               &value0))
577     {
578       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)
579         {
580           if (PREDICT_FALSE(nat_not_translate_output_feature(sm, ip0,
581               key0.protocol, key0.port, key0.port, thread_index, sw_if_index0)))
582             {
583               dont_translate = 1;
584               goto out;
585             }
586         }
587       else
588         {
589           if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
590               ip0, SNAT_PROTOCOL_ICMP, rx_fib_index0, thread_index)))
591             {
592               dont_translate = 1;
593               goto out;
594             }
595         }
596
597       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
598         {
599           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
600           next0 = SNAT_IN2OUT_NEXT_DROP;
601           goto out;
602         }
603
604       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, &s0, node, next0,
605                          thread_index, vlib_time_now (sm->vlib_main));
606
607       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
608         goto out;
609     }
610   else
611     {
612       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
613                         icmp0->type != ICMP4_echo_reply &&
614                         !icmp_is_error_message (icmp0)))
615         {
616           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
617           next0 = SNAT_IN2OUT_NEXT_DROP;
618           goto out;
619         }
620
621       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
622                               value0.value);
623     }
624
625 out:
626   *p_proto = key0.protocol;
627   if (s0)
628     *p_value = s0->out2in;
629   *p_dont_translate = dont_translate;
630   if (d)
631     *(snat_session_t**)d = s0;
632   return next0;
633 }
634
635 /**
636  * Get address and port values to be used for ICMP packet translation
637  *
638  * @param[in] sm                 NAT main
639  * @param[in,out] node           NAT node runtime
640  * @param[in] thread_index       thread index
641  * @param[in,out] b0             buffer containing packet to be translated
642  * @param[out] p_proto           protocol used for matching
643  * @param[out] p_value           address and port after NAT translation
644  * @param[out] p_dont_translate  if packet should not be translated
645  * @param d                      optional parameter
646  * @param e                      optional parameter
647  */
648 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
649                            u32 thread_index, vlib_buffer_t *b0,
650                            ip4_header_t *ip0, u8 *p_proto,
651                            snat_session_key_t *p_value,
652                            u8 *p_dont_translate, void *d, void *e)
653 {
654   icmp46_header_t *icmp0;
655   u32 sw_if_index0;
656   u32 rx_fib_index0;
657   snat_session_key_t key0;
658   snat_session_key_t sm0;
659   u8 dont_translate = 0;
660   u8 is_addr_only;
661   u32 next0 = ~0;
662   int err;
663
664   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
665   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
666   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
667
668   err = icmp_get_key (ip0, &key0);
669   if (err != -1)
670     {
671       b0->error = node->errors[err];
672       next0 = SNAT_IN2OUT_NEXT_DROP;
673       goto out2;
674     }
675   key0.fib_index = rx_fib_index0;
676
677   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0, 0, 0))
678     {
679       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
680           IP_PROTOCOL_ICMP, rx_fib_index0)))
681         {
682           dont_translate = 1;
683           goto out;
684         }
685
686       if (icmp_is_error_message (icmp0))
687         {
688           next0 = SNAT_IN2OUT_NEXT_DROP;
689           goto out;
690         }
691
692       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
693       next0 = SNAT_IN2OUT_NEXT_DROP;
694       goto out;
695     }
696
697   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
698                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
699                     !icmp_is_error_message (icmp0)))
700     {
701       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
702       next0 = SNAT_IN2OUT_NEXT_DROP;
703       goto out;
704     }
705
706 out:
707   *p_value = sm0;
708 out2:
709   *p_proto = key0.protocol;
710   *p_dont_translate = dont_translate;
711   return next0;
712 }
713
714 static inline u32 icmp_in2out (snat_main_t *sm,
715                                vlib_buffer_t * b0,
716                                ip4_header_t * ip0,
717                                icmp46_header_t * icmp0,
718                                u32 sw_if_index0,
719                                u32 rx_fib_index0,
720                                vlib_node_runtime_t * node,
721                                u32 next0,
722                                u32 thread_index,
723                                void *d,
724                                void *e)
725 {
726   snat_session_key_t sm0;
727   u8 protocol;
728   icmp_echo_header_t *echo0, *inner_echo0 = 0;
729   ip4_header_t *inner_ip0;
730   void *l4_header = 0;
731   icmp46_header_t *inner_icmp0;
732   u8 dont_translate;
733   u32 new_addr0, old_addr0;
734   u16 old_id0, new_id0;
735   ip_csum_t sum0;
736   u16 checksum0;
737   u32 next0_tmp;
738
739   echo0 = (icmp_echo_header_t *)(icmp0+1);
740
741   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
742                                        &protocol, &sm0, &dont_translate, d, e);
743   if (next0_tmp != ~0)
744     next0 = next0_tmp;
745   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
746     goto out;
747
748   sum0 = ip_incremental_checksum (0, icmp0,
749                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
750   checksum0 = ~ip_csum_fold (sum0);
751   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
752     {
753       next0 = SNAT_IN2OUT_NEXT_DROP;
754       goto out;
755     }
756
757   old_addr0 = ip0->src_address.as_u32;
758   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
759   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
760     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
761
762   sum0 = ip0->checksum;
763   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
764                          src_address /* changed member */);
765   ip0->checksum = ip_csum_fold (sum0);
766
767   if (icmp0->checksum == 0)
768     icmp0->checksum = 0xffff;
769
770   if (!icmp_is_error_message (icmp0))
771     {
772       new_id0 = sm0.port;
773       if (PREDICT_FALSE(new_id0 != echo0->identifier))
774         {
775           old_id0 = echo0->identifier;
776           new_id0 = sm0.port;
777           echo0->identifier = new_id0;
778
779           sum0 = icmp0->checksum;
780           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
781                                  identifier);
782           icmp0->checksum = ip_csum_fold (sum0);
783         }
784     }
785   else
786     {
787       inner_ip0 = (ip4_header_t *)(echo0+1);
788       l4_header = ip4_next_header (inner_ip0);
789
790       if (!ip4_header_checksum_is_valid (inner_ip0))
791         {
792           next0 = SNAT_IN2OUT_NEXT_DROP;
793           goto out;
794         }
795
796       old_addr0 = inner_ip0->dst_address.as_u32;
797       inner_ip0->dst_address = sm0.addr;
798       new_addr0 = inner_ip0->dst_address.as_u32;
799
800       sum0 = icmp0->checksum;
801       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
802                              dst_address /* changed member */);
803       icmp0->checksum = ip_csum_fold (sum0);
804
805       switch (protocol)
806         {
807           case SNAT_PROTOCOL_ICMP:
808             inner_icmp0 = (icmp46_header_t*)l4_header;
809             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
810
811             old_id0 = inner_echo0->identifier;
812             new_id0 = sm0.port;
813             inner_echo0->identifier = new_id0;
814
815             sum0 = icmp0->checksum;
816             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
817                                    identifier);
818             icmp0->checksum = ip_csum_fold (sum0);
819             break;
820           case SNAT_PROTOCOL_UDP:
821           case SNAT_PROTOCOL_TCP:
822             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
823             new_id0 = sm0.port;
824             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
825
826             sum0 = icmp0->checksum;
827             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
828                                    dst_port);
829             icmp0->checksum = ip_csum_fold (sum0);
830             break;
831           default:
832             ASSERT(0);
833         }
834     }
835
836 out:
837   return next0;
838 }
839
840 /**
841  * @brief Hairpinning
842  *
843  * Hairpinning allows two endpoints on the internal side of the NAT to
844  * communicate even if they only use each other's external IP addresses
845  * and ports.
846  *
847  * @param sm     NAT main.
848  * @param b0     Vlib buffer.
849  * @param ip0    IP header.
850  * @param udp0   UDP header.
851  * @param tcp0   TCP header.
852  * @param proto0 NAT protocol.
853  */
854 static inline int
855 snat_hairpinning (snat_main_t *sm,
856                   vlib_buffer_t * b0,
857                   ip4_header_t * ip0,
858                   udp_header_t * udp0,
859                   tcp_header_t * tcp0,
860                   u32 proto0,
861                   int is_ed)
862 {
863   snat_session_key_t key0, sm0;
864   snat_session_t * s0;
865   clib_bihash_kv_8_8_t kv0, value0;
866   ip_csum_t sum0;
867   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
868   u16 new_dst_port0, old_dst_port0;
869   int rv;
870
871   key0.addr = ip0->dst_address;
872   key0.port = udp0->dst_port;
873   key0.protocol = proto0;
874   key0.fib_index = sm->outside_fib_index;
875   kv0.key = key0.as_u64;
876
877   /* Check if destination is static mappings */
878   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
879     {
880       new_dst_addr0 = sm0.addr.as_u32;
881       new_dst_port0 = sm0.port;
882       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
883     }
884   /* or active session */
885   else
886     {
887       if (sm->num_workers > 1)
888         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
889       else
890         ti = sm->num_workers;
891
892       if (is_ed)
893         {
894           clib_bihash_kv_16_8_t ed_kv, ed_value;
895           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
896                       ip0->protocol, sm->outside_fib_index, udp0->dst_port,
897                       udp0->src_port);
898           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
899                                         &ed_kv, &ed_value);
900           si = ed_value.value;
901         }
902       else
903         {
904           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
905                                        &value0);
906           si = value0.value;
907         }
908       if (rv)
909         return 0;
910
911       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
912       new_dst_addr0 = s0->in2out.addr.as_u32;
913       new_dst_port0 = s0->in2out.port;
914       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
915     }
916
917   /* Destination is behind the same NAT, use internal address and port */
918   if (new_dst_addr0)
919     {
920       old_dst_addr0 = ip0->dst_address.as_u32;
921       ip0->dst_address.as_u32 = new_dst_addr0;
922       sum0 = ip0->checksum;
923       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
924                              ip4_header_t, dst_address);
925       ip0->checksum = ip_csum_fold (sum0);
926
927       old_dst_port0 = tcp0->dst;
928       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
929         {
930           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
931             {
932               tcp0->dst = new_dst_port0;
933               sum0 = tcp0->checksum;
934               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
935                                      ip4_header_t, dst_address);
936               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
937                                      ip4_header_t /* cheat */, length);
938               tcp0->checksum = ip_csum_fold(sum0);
939             }
940           else
941             {
942               udp0->dst_port = new_dst_port0;
943               udp0->checksum = 0;
944             }
945         }
946       else
947         {
948           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
949             {
950               sum0 = tcp0->checksum;
951               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
952                                      ip4_header_t, dst_address);
953               tcp0->checksum = ip_csum_fold(sum0);
954             }
955         }
956       return 1;
957     }
958   return 0;
959 }
960
961 static inline void
962 snat_icmp_hairpinning (snat_main_t *sm,
963                        vlib_buffer_t * b0,
964                        ip4_header_t * ip0,
965                        icmp46_header_t * icmp0,
966                        int is_ed)
967 {
968   snat_session_key_t key0, sm0;
969   clib_bihash_kv_8_8_t kv0, value0;
970   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
971   ip_csum_t sum0;
972   snat_session_t *s0;
973   int rv;
974
975   if (!icmp_is_error_message (icmp0))
976     {
977       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
978       u16 icmp_id0 = echo0->identifier;
979       key0.addr = ip0->dst_address;
980       key0.port = icmp_id0;
981       key0.protocol = SNAT_PROTOCOL_ICMP;
982       key0.fib_index = sm->outside_fib_index;
983       kv0.key = key0.as_u64;
984
985       if (sm->num_workers > 1)
986         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
987       else
988         ti = sm->num_workers;
989
990       /* Check if destination is in active sessions */
991       if (is_ed)
992         {
993           clib_bihash_kv_16_8_t ed_kv, ed_value;
994           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
995                       IP_PROTOCOL_ICMP, sm->outside_fib_index, icmp_id0, 0);
996           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
997                                         &ed_kv, &ed_value);
998           si = ed_value.value;
999         }
1000       else
1001         {
1002           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
1003                                        &value0);
1004           si = value0.value;
1005         }
1006       if (rv)
1007         {
1008           /* or static mappings */
1009           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
1010             {
1011               new_dst_addr0 = sm0.addr.as_u32;
1012               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1013             }
1014         }
1015       else
1016         {
1017           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
1018           new_dst_addr0 = s0->in2out.addr.as_u32;
1019           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1020           echo0->identifier = s0->in2out.port;
1021           sum0 = icmp0->checksum;
1022           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
1023                                  icmp_echo_header_t, identifier);
1024           icmp0->checksum = ip_csum_fold (sum0);
1025         }
1026
1027       /* Destination is behind the same NAT, use internal address and port */
1028       if (new_dst_addr0)
1029         {
1030           old_dst_addr0 = ip0->dst_address.as_u32;
1031           ip0->dst_address.as_u32 = new_dst_addr0;
1032           sum0 = ip0->checksum;
1033           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
1034                                  ip4_header_t, dst_address);
1035           ip0->checksum = ip_csum_fold (sum0);
1036         }
1037     }
1038
1039 }
1040
1041 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
1042                                          vlib_buffer_t * b0,
1043                                          ip4_header_t * ip0,
1044                                          icmp46_header_t * icmp0,
1045                                          u32 sw_if_index0,
1046                                          u32 rx_fib_index0,
1047                                          vlib_node_runtime_t * node,
1048                                          u32 next0,
1049                                          f64 now,
1050                                          u32 thread_index,
1051                                          snat_session_t ** p_s0)
1052 {
1053   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1054                       next0, thread_index, p_s0, 0);
1055   snat_session_t * s0 = *p_s0;
1056   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1057     {
1058       /* Hairpinning */
1059       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1060         snat_icmp_hairpinning(sm, b0, ip0, icmp0, sm->endpoint_dependent);
1061       /* Accounting */
1062       nat44_session_update_counters (s0, now,
1063                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
1064       /* Per-user LRU list maintenance */
1065       nat44_session_update_lru (sm, s0, thread_index);
1066     }
1067   return next0;
1068 }
1069
1070 static inline void
1071 nat_hairpinning_sm_unknown_proto (snat_main_t * sm,
1072                                   vlib_buffer_t * b,
1073                                   ip4_header_t * ip)
1074 {
1075   clib_bihash_kv_8_8_t kv, value;
1076   snat_static_mapping_t *m;
1077   u32 old_addr, new_addr;
1078   ip_csum_t sum;
1079
1080   make_sm_kv (&kv, &ip->dst_address, 0, 0, 0);
1081   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1082     return;
1083
1084   m = pool_elt_at_index (sm->static_mappings, value.value);
1085
1086   old_addr = ip->dst_address.as_u32;
1087   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1088   sum = ip->checksum;
1089   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1090   ip->checksum = ip_csum_fold (sum);
1091
1092   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1093     vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1094 }
1095
1096 static int
1097 nat_in2out_sm_unknown_proto (snat_main_t *sm,
1098                              vlib_buffer_t * b,
1099                              ip4_header_t * ip,
1100                              u32 rx_fib_index)
1101 {
1102   clib_bihash_kv_8_8_t kv, value;
1103   snat_static_mapping_t *m;
1104   snat_session_key_t m_key;
1105   u32 old_addr, new_addr;
1106   ip_csum_t sum;
1107
1108   m_key.addr = ip->src_address;
1109   m_key.port = 0;
1110   m_key.protocol = 0;
1111   m_key.fib_index = rx_fib_index;
1112   kv.key = m_key.as_u64;
1113   if (clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1114     return 1;
1115
1116   m = pool_elt_at_index (sm->static_mappings, value.value);
1117
1118   old_addr = ip->src_address.as_u32;
1119   new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1120   sum = ip->checksum;
1121   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1122   ip->checksum = ip_csum_fold (sum);
1123
1124
1125   /* Hairpinning */
1126   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1127     {
1128       vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1129       nat_hairpinning_sm_unknown_proto (sm, b, ip);
1130     }
1131
1132   return 0;
1133 }
1134
1135 static inline uword
1136 snat_in2out_node_fn_inline (vlib_main_t * vm,
1137                             vlib_node_runtime_t * node,
1138                             vlib_frame_t * frame, int is_slow_path,
1139                             int is_output_feature)
1140 {
1141   u32 n_left_from, * from, * to_next;
1142   snat_in2out_next_t next_index;
1143   u32 pkts_processed = 0;
1144   snat_main_t * sm = &snat_main;
1145   f64 now = vlib_time_now (vm);
1146   u32 stats_node_index;
1147   u32 thread_index = vm->thread_index;
1148
1149   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1150     snat_in2out_node.index;
1151
1152   from = vlib_frame_vector_args (frame);
1153   n_left_from = frame->n_vectors;
1154   next_index = node->cached_next_index;
1155
1156   while (n_left_from > 0)
1157     {
1158       u32 n_left_to_next;
1159
1160       vlib_get_next_frame (vm, node, next_index,
1161                            to_next, n_left_to_next);
1162
1163       while (n_left_from >= 4 && n_left_to_next >= 2)
1164         {
1165           u32 bi0, bi1;
1166           vlib_buffer_t * b0, * b1;
1167           u32 next0, next1;
1168           u32 sw_if_index0, sw_if_index1;
1169           ip4_header_t * ip0, * ip1;
1170           ip_csum_t sum0, sum1;
1171           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1172           u16 old_port0, new_port0, old_port1, new_port1;
1173           udp_header_t * udp0, * udp1;
1174           tcp_header_t * tcp0, * tcp1;
1175           icmp46_header_t * icmp0, * icmp1;
1176           snat_session_key_t key0, key1;
1177           u32 rx_fib_index0, rx_fib_index1;
1178           u32 proto0, proto1;
1179           snat_session_t * s0 = 0, * s1 = 0;
1180           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1181           u32 iph_offset0 = 0, iph_offset1 = 0;
1182
1183           /* Prefetch next iteration. */
1184           {
1185             vlib_buffer_t * p2, * p3;
1186
1187             p2 = vlib_get_buffer (vm, from[2]);
1188             p3 = vlib_get_buffer (vm, from[3]);
1189
1190             vlib_prefetch_buffer_header (p2, LOAD);
1191             vlib_prefetch_buffer_header (p3, LOAD);
1192
1193             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1194             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1195           }
1196
1197           /* speculatively enqueue b0 and b1 to the current next frame */
1198           to_next[0] = bi0 = from[0];
1199           to_next[1] = bi1 = from[1];
1200           from += 2;
1201           to_next += 2;
1202           n_left_from -= 2;
1203           n_left_to_next -= 2;
1204
1205           b0 = vlib_get_buffer (vm, bi0);
1206           b1 = vlib_get_buffer (vm, bi1);
1207
1208           if (is_output_feature)
1209             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1210
1211           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1212                  iph_offset0);
1213
1214           udp0 = ip4_next_header (ip0);
1215           tcp0 = (tcp_header_t *) udp0;
1216           icmp0 = (icmp46_header_t *) udp0;
1217
1218           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1219           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1220                                    sw_if_index0);
1221
1222           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1223
1224           if (PREDICT_FALSE(ip0->ttl == 1))
1225             {
1226               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1227               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1228                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1229                                            0);
1230               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1231               goto trace00;
1232             }
1233
1234           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1235
1236           /* Next configured feature, probably ip4-lookup */
1237           if (is_slow_path)
1238             {
1239               if (PREDICT_FALSE (proto0 == ~0))
1240                 {
1241                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1242                     {
1243                       next0 = SNAT_IN2OUT_NEXT_DROP;
1244                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1245                     }
1246                   goto trace00;
1247                 }
1248
1249               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1250                 {
1251                   next0 = icmp_in2out_slow_path
1252                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1253                      node, next0, now, thread_index, &s0);
1254                   goto trace00;
1255                 }
1256             }
1257           else
1258             {
1259               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1260                 {
1261                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1262                   goto trace00;
1263                 }
1264
1265               if (ip4_is_fragment (ip0))
1266                 {
1267                   next0 = SNAT_IN2OUT_NEXT_REASS;
1268                   goto trace00;
1269                 }
1270             }
1271
1272           key0.addr = ip0->src_address;
1273           key0.port = udp0->src_port;
1274           key0.protocol = proto0;
1275           key0.fib_index = rx_fib_index0;
1276
1277           kv0.key = key0.as_u64;
1278
1279           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1280               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1281             {
1282               if (is_slow_path)
1283                 {
1284                   if (is_output_feature)
1285                     {
1286                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1287                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1288                         goto trace00;
1289                     }
1290                   else
1291                     {
1292                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1293                           ip0, proto0, rx_fib_index0, thread_index)))
1294                         goto trace00;
1295                     }
1296
1297                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1298                                      &s0, node, next0, thread_index, now);
1299                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1300                     goto trace00;
1301                 }
1302               else
1303                 {
1304                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1305                   goto trace00;
1306                 }
1307             }
1308           else
1309             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1310                                     value0.value);
1311
1312           b0->flags |= VNET_BUFFER_F_IS_NATED;
1313
1314           old_addr0 = ip0->src_address.as_u32;
1315           ip0->src_address = s0->out2in.addr;
1316           new_addr0 = ip0->src_address.as_u32;
1317           if (!is_output_feature)
1318             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1319
1320           sum0 = ip0->checksum;
1321           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1322                                  ip4_header_t,
1323                                  src_address /* changed member */);
1324           ip0->checksum = ip_csum_fold (sum0);
1325
1326           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1327             {
1328               old_port0 = tcp0->src_port;
1329               tcp0->src_port = s0->out2in.port;
1330               new_port0 = tcp0->src_port;
1331
1332               sum0 = tcp0->checksum;
1333               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1334                                      ip4_header_t,
1335                                      dst_address /* changed member */);
1336               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1337                                      ip4_header_t /* cheat */,
1338                                      length /* changed member */);
1339               tcp0->checksum = ip_csum_fold(sum0);
1340             }
1341           else
1342             {
1343               old_port0 = udp0->src_port;
1344               udp0->src_port = s0->out2in.port;
1345               udp0->checksum = 0;
1346             }
1347
1348           /* Accounting */
1349           nat44_session_update_counters (s0, now,
1350                                          vlib_buffer_length_in_chain (vm, b0));
1351           /* Per-user LRU list maintenance */
1352           nat44_session_update_lru (sm, s0, thread_index);
1353         trace00:
1354
1355           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1356                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1357             {
1358               snat_in2out_trace_t *t =
1359                  vlib_add_trace (vm, node, b0, sizeof (*t));
1360               t->is_slow_path = is_slow_path;
1361               t->sw_if_index = sw_if_index0;
1362               t->next_index = next0;
1363                   t->session_index = ~0;
1364               if (s0)
1365                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1366             }
1367
1368           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1369
1370           if (is_output_feature)
1371             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1372
1373           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1374                  iph_offset1);
1375
1376           udp1 = ip4_next_header (ip1);
1377           tcp1 = (tcp_header_t *) udp1;
1378           icmp1 = (icmp46_header_t *) udp1;
1379
1380           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1381           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1382                                    sw_if_index1);
1383
1384           if (PREDICT_FALSE(ip1->ttl == 1))
1385             {
1386               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1387               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1388                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1389                                            0);
1390               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1391               goto trace01;
1392             }
1393
1394           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1395
1396           /* Next configured feature, probably ip4-lookup */
1397           if (is_slow_path)
1398             {
1399               if (PREDICT_FALSE (proto1 == ~0))
1400                 {
1401                   if (nat_in2out_sm_unknown_proto (sm, b1, ip1, rx_fib_index1))
1402                     {
1403                       next1 = SNAT_IN2OUT_NEXT_DROP;
1404                       b1->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1405                     }
1406                   goto trace01;
1407                 }
1408
1409               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1410                 {
1411                   next1 = icmp_in2out_slow_path
1412                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1413                      next1, now, thread_index, &s1);
1414                   goto trace01;
1415                 }
1416             }
1417           else
1418             {
1419               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1420                 {
1421                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1422                   goto trace01;
1423                 }
1424
1425               if (ip4_is_fragment (ip1))
1426                 {
1427                   next1 = SNAT_IN2OUT_NEXT_REASS;
1428                   goto trace01;
1429                 }
1430             }
1431
1432           key1.addr = ip1->src_address;
1433           key1.port = udp1->src_port;
1434           key1.protocol = proto1;
1435           key1.fib_index = rx_fib_index1;
1436
1437           kv1.key = key1.as_u64;
1438
1439             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1440                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1441             {
1442               if (is_slow_path)
1443                 {
1444                   if (is_output_feature)
1445                     {
1446                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1447                           ip1, proto1, udp1->src_port, udp1->dst_port, thread_index, sw_if_index1)))
1448                         goto trace01;
1449                     }
1450                   else
1451                     {
1452                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1453                           ip1, proto1, rx_fib_index1, thread_index)))
1454                         goto trace01;
1455                     }
1456
1457                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1458                                      &s1, node, next1, thread_index, now);
1459                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1460                     goto trace01;
1461                 }
1462               else
1463                 {
1464                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1465                   goto trace01;
1466                 }
1467             }
1468           else
1469             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1470                                     value1.value);
1471
1472           b1->flags |= VNET_BUFFER_F_IS_NATED;
1473
1474           old_addr1 = ip1->src_address.as_u32;
1475           ip1->src_address = s1->out2in.addr;
1476           new_addr1 = ip1->src_address.as_u32;
1477           if (!is_output_feature)
1478             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1479
1480           sum1 = ip1->checksum;
1481           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1482                                  ip4_header_t,
1483                                  src_address /* changed member */);
1484           ip1->checksum = ip_csum_fold (sum1);
1485
1486           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1487             {
1488               old_port1 = tcp1->src_port;
1489               tcp1->src_port = s1->out2in.port;
1490               new_port1 = tcp1->src_port;
1491
1492               sum1 = tcp1->checksum;
1493               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1494                                      ip4_header_t,
1495                                      dst_address /* changed member */);
1496               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1497                                      ip4_header_t /* cheat */,
1498                                      length /* changed member */);
1499               tcp1->checksum = ip_csum_fold(sum1);
1500             }
1501           else
1502             {
1503               old_port1 = udp1->src_port;
1504               udp1->src_port = s1->out2in.port;
1505               udp1->checksum = 0;
1506             }
1507
1508           /* Accounting */
1509           nat44_session_update_counters (s1, now,
1510                                          vlib_buffer_length_in_chain (vm, b1));
1511           /* Per-user LRU list maintenance */
1512           nat44_session_update_lru (sm, s1, thread_index);
1513         trace01:
1514
1515           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1516                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1517             {
1518               snat_in2out_trace_t *t =
1519                  vlib_add_trace (vm, node, b1, sizeof (*t));
1520               t->sw_if_index = sw_if_index1;
1521               t->next_index = next1;
1522               t->session_index = ~0;
1523               if (s1)
1524                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1525             }
1526
1527           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1528
1529           /* verify speculative enqueues, maybe switch current next frame */
1530           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1531                                            to_next, n_left_to_next,
1532                                            bi0, bi1, next0, next1);
1533         }
1534
1535       while (n_left_from > 0 && n_left_to_next > 0)
1536         {
1537           u32 bi0;
1538           vlib_buffer_t * b0;
1539           u32 next0;
1540           u32 sw_if_index0;
1541           ip4_header_t * ip0;
1542           ip_csum_t sum0;
1543           u32 new_addr0, old_addr0;
1544           u16 old_port0, new_port0;
1545           udp_header_t * udp0;
1546           tcp_header_t * tcp0;
1547           icmp46_header_t * icmp0;
1548           snat_session_key_t key0;
1549           u32 rx_fib_index0;
1550           u32 proto0;
1551           snat_session_t * s0 = 0;
1552           clib_bihash_kv_8_8_t kv0, value0;
1553           u32 iph_offset0 = 0;
1554
1555           /* speculatively enqueue b0 to the current next frame */
1556           bi0 = from[0];
1557           to_next[0] = bi0;
1558           from += 1;
1559           to_next += 1;
1560           n_left_from -= 1;
1561           n_left_to_next -= 1;
1562
1563           b0 = vlib_get_buffer (vm, bi0);
1564           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1565
1566           if (is_output_feature)
1567             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1568
1569           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1570                  iph_offset0);
1571
1572           udp0 = ip4_next_header (ip0);
1573           tcp0 = (tcp_header_t *) udp0;
1574           icmp0 = (icmp46_header_t *) udp0;
1575
1576           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1577           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1578                                    sw_if_index0);
1579
1580           if (PREDICT_FALSE(ip0->ttl == 1))
1581             {
1582               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1583               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1584                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1585                                            0);
1586               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1587               goto trace0;
1588             }
1589
1590           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1591
1592           /* Next configured feature, probably ip4-lookup */
1593           if (is_slow_path)
1594             {
1595               if (PREDICT_FALSE (proto0 == ~0))
1596                 {
1597                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1598                     {
1599                       next0 = SNAT_IN2OUT_NEXT_DROP;
1600                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1601                     }
1602                   goto trace0;
1603                 }
1604
1605               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1606                 {
1607                   next0 = icmp_in2out_slow_path
1608                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1609                      next0, now, thread_index, &s0);
1610                   goto trace0;
1611                 }
1612             }
1613           else
1614             {
1615               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1616                 {
1617                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1618                   goto trace0;
1619                 }
1620
1621               if (ip4_is_fragment (ip0))
1622                 {
1623                   next0 = SNAT_IN2OUT_NEXT_REASS;
1624                   goto trace0;
1625                 }
1626             }
1627
1628           key0.addr = ip0->src_address;
1629           key0.port = udp0->src_port;
1630           key0.protocol = proto0;
1631           key0.fib_index = rx_fib_index0;
1632
1633           kv0.key = key0.as_u64;
1634
1635           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1636                                       &kv0, &value0))
1637             {
1638               if (is_slow_path)
1639                 {
1640                   if (is_output_feature)
1641                     {
1642                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1643                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1644                         goto trace0;
1645                     }
1646                   else
1647                     {
1648                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1649                           ip0, proto0, rx_fib_index0, thread_index)))
1650                         goto trace0;
1651                     }
1652
1653                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1654                                      &s0, node, next0, thread_index, now);
1655
1656                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1657                     goto trace0;
1658                 }
1659               else
1660                 {
1661                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1662                   goto trace0;
1663                 }
1664             }
1665           else
1666           s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1667                                   value0.value);
1668
1669           b0->flags |= VNET_BUFFER_F_IS_NATED;
1670
1671           old_addr0 = ip0->src_address.as_u32;
1672           ip0->src_address = s0->out2in.addr;
1673           new_addr0 = ip0->src_address.as_u32;
1674           if (!is_output_feature)
1675             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1676
1677           sum0 = ip0->checksum;
1678           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1679                                  ip4_header_t,
1680                                  src_address /* changed member */);
1681           ip0->checksum = ip_csum_fold (sum0);
1682
1683           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1684             {
1685               old_port0 = tcp0->src_port;
1686               tcp0->src_port = s0->out2in.port;
1687               new_port0 = tcp0->src_port;
1688
1689               sum0 = tcp0->checksum;
1690               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1691                                      ip4_header_t,
1692                                      dst_address /* changed member */);
1693               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1694                                      ip4_header_t /* cheat */,
1695                                      length /* changed member */);
1696               tcp0->checksum = ip_csum_fold(sum0);
1697             }
1698           else
1699             {
1700               old_port0 = udp0->src_port;
1701               udp0->src_port = s0->out2in.port;
1702               udp0->checksum = 0;
1703             }
1704
1705           /* Accounting */
1706           nat44_session_update_counters (s0, now,
1707                                          vlib_buffer_length_in_chain (vm, b0));
1708           /* Per-user LRU list maintenance */
1709           nat44_session_update_lru (sm, s0, thread_index);
1710
1711         trace0:
1712           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1713                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1714             {
1715               snat_in2out_trace_t *t =
1716                  vlib_add_trace (vm, node, b0, sizeof (*t));
1717               t->is_slow_path = is_slow_path;
1718               t->sw_if_index = sw_if_index0;
1719               t->next_index = next0;
1720                   t->session_index = ~0;
1721               if (s0)
1722                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1723             }
1724
1725           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1726
1727           /* verify speculative enqueue, maybe switch current next frame */
1728           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1729                                            to_next, n_left_to_next,
1730                                            bi0, next0);
1731         }
1732
1733       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1734     }
1735
1736   vlib_node_increment_counter (vm, stats_node_index,
1737                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1738                                pkts_processed);
1739   return frame->n_vectors;
1740 }
1741
1742 static uword
1743 snat_in2out_fast_path_fn (vlib_main_t * vm,
1744                           vlib_node_runtime_t * node,
1745                           vlib_frame_t * frame)
1746 {
1747   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1748 }
1749
1750 VLIB_REGISTER_NODE (snat_in2out_node) = {
1751   .function = snat_in2out_fast_path_fn,
1752   .name = "nat44-in2out",
1753   .vector_size = sizeof (u32),
1754   .format_trace = format_snat_in2out_trace,
1755   .type = VLIB_NODE_TYPE_INTERNAL,
1756
1757   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1758   .error_strings = snat_in2out_error_strings,
1759
1760   .runtime_data_bytes = sizeof (snat_runtime_t),
1761
1762   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1763
1764   /* edit / add dispositions here */
1765   .next_nodes = {
1766     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1767     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1768     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1769     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1770     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1771   },
1772 };
1773
1774 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1775
1776 static uword
1777 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1778                                  vlib_node_runtime_t * node,
1779                                  vlib_frame_t * frame)
1780 {
1781   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1782 }
1783
1784 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1785   .function = snat_in2out_output_fast_path_fn,
1786   .name = "nat44-in2out-output",
1787   .vector_size = sizeof (u32),
1788   .format_trace = format_snat_in2out_trace,
1789   .type = VLIB_NODE_TYPE_INTERNAL,
1790
1791   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1792   .error_strings = snat_in2out_error_strings,
1793
1794   .runtime_data_bytes = sizeof (snat_runtime_t),
1795
1796   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1797
1798   /* edit / add dispositions here */
1799   .next_nodes = {
1800     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1801     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1802     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1803     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1804     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1805   },
1806 };
1807
1808 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
1809                               snat_in2out_output_fast_path_fn);
1810
1811 static uword
1812 snat_in2out_slow_path_fn (vlib_main_t * vm,
1813                           vlib_node_runtime_t * node,
1814                           vlib_frame_t * frame)
1815 {
1816   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
1817 }
1818
1819 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1820   .function = snat_in2out_slow_path_fn,
1821   .name = "nat44-in2out-slowpath",
1822   .vector_size = sizeof (u32),
1823   .format_trace = format_snat_in2out_trace,
1824   .type = VLIB_NODE_TYPE_INTERNAL,
1825
1826   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1827   .error_strings = snat_in2out_error_strings,
1828
1829   .runtime_data_bytes = sizeof (snat_runtime_t),
1830
1831   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1832
1833   /* edit / add dispositions here */
1834   .next_nodes = {
1835     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1836     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1837     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1838     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1839     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1840   },
1841 };
1842
1843 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
1844                               snat_in2out_slow_path_fn);
1845
1846 static uword
1847 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
1848                                  vlib_node_runtime_t * node,
1849                                  vlib_frame_t * frame)
1850 {
1851   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
1852 }
1853
1854 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
1855   .function = snat_in2out_output_slow_path_fn,
1856   .name = "nat44-in2out-output-slowpath",
1857   .vector_size = sizeof (u32),
1858   .format_trace = format_snat_in2out_trace,
1859   .type = VLIB_NODE_TYPE_INTERNAL,
1860
1861   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1862   .error_strings = snat_in2out_error_strings,
1863
1864   .runtime_data_bytes = sizeof (snat_runtime_t),
1865
1866   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1867
1868   /* edit / add dispositions here */
1869   .next_nodes = {
1870     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1871     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1872     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1873     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1874     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1875   },
1876 };
1877
1878 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
1879                               snat_in2out_output_slow_path_fn);
1880
1881 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
1882
1883 static inline uword
1884 nat44_hairpinning_fn_inline (vlib_main_t * vm,
1885                              vlib_node_runtime_t * node,
1886                              vlib_frame_t * frame,
1887                              int is_ed)
1888 {
1889   u32 n_left_from, * from, * to_next, stats_node_index;
1890   snat_in2out_next_t next_index;
1891   u32 pkts_processed = 0;
1892   snat_main_t * sm = &snat_main;
1893   vnet_feature_main_t *fm = &feature_main;
1894   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1895   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
1896
1897   stats_node_index = is_ed ? nat44_ed_hairpinning_node.index :
1898     nat44_hairpinning_node.index;
1899   from = vlib_frame_vector_args (frame);
1900   n_left_from = frame->n_vectors;
1901   next_index = node->cached_next_index;
1902
1903   while (n_left_from > 0)
1904     {
1905       u32 n_left_to_next;
1906
1907       vlib_get_next_frame (vm, node, next_index,
1908                            to_next, n_left_to_next);
1909
1910       while (n_left_from > 0 && n_left_to_next > 0)
1911         {
1912           u32 bi0;
1913           vlib_buffer_t * b0;
1914           u32 next0;
1915           ip4_header_t * ip0;
1916           u32 proto0;
1917           udp_header_t * udp0;
1918           tcp_header_t * tcp0;
1919
1920           /* speculatively enqueue b0 to the current next frame */
1921           bi0 = from[0];
1922           to_next[0] = bi0;
1923           from += 1;
1924           to_next += 1;
1925           n_left_from -= 1;
1926           n_left_to_next -= 1;
1927
1928           b0 = vlib_get_buffer (vm, bi0);
1929           ip0 = vlib_buffer_get_current (b0);
1930           udp0 = ip4_next_header (ip0);
1931           tcp0 = (tcp_header_t *) udp0;
1932
1933           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1934
1935           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
1936                                 &next0, 0);
1937
1938           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed))
1939             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1940
1941           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1942
1943           /* verify speculative enqueue, maybe switch current next frame */
1944           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1945                                            to_next, n_left_to_next,
1946                                            bi0, next0);
1947          }
1948
1949       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1950     }
1951
1952   vlib_node_increment_counter (vm, stats_node_index,
1953                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1954                                pkts_processed);
1955   return frame->n_vectors;
1956 }
1957
1958 static uword
1959 nat44_hairpinning_fn (vlib_main_t * vm,
1960                       vlib_node_runtime_t * node,
1961                       vlib_frame_t * frame)
1962 {
1963   return nat44_hairpinning_fn_inline (vm, node, frame, 0);
1964 }
1965
1966 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
1967   .function = nat44_hairpinning_fn,
1968   .name = "nat44-hairpinning",
1969   .vector_size = sizeof (u32),
1970   .type = VLIB_NODE_TYPE_INTERNAL,
1971   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1972   .error_strings = snat_in2out_error_strings,
1973   .n_next_nodes = 2,
1974   .next_nodes = {
1975     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1976     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1977   },
1978 };
1979
1980 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
1981                               nat44_hairpinning_fn);
1982
1983 static uword
1984 nat44_ed_hairpinning_fn (vlib_main_t * vm,
1985                          vlib_node_runtime_t * node,
1986                          vlib_frame_t * frame)
1987 {
1988   return nat44_hairpinning_fn_inline (vm, node, frame, 1);
1989 }
1990
1991 VLIB_REGISTER_NODE (nat44_ed_hairpinning_node) = {
1992   .function = nat44_ed_hairpinning_fn,
1993   .name = "nat44-ed-hairpinning",
1994   .vector_size = sizeof (u32),
1995   .type = VLIB_NODE_TYPE_INTERNAL,
1996   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1997   .error_strings = snat_in2out_error_strings,
1998   .n_next_nodes = 2,
1999   .next_nodes = {
2000     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2001     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2002   },
2003 };
2004
2005 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpinning_node,
2006                               nat44_ed_hairpinning_fn);
2007
2008 static inline void
2009 nat44_reass_hairpinning (snat_main_t *sm,
2010                          vlib_buffer_t * b0,
2011                          ip4_header_t * ip0,
2012                          u16 sport,
2013                          u16 dport,
2014                          u32 proto0)
2015 {
2016   snat_session_key_t key0, sm0;
2017   snat_session_t * s0;
2018   clib_bihash_kv_8_8_t kv0, value0;
2019   ip_csum_t sum0;
2020   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2021   u16 new_dst_port0, old_dst_port0;
2022   udp_header_t * udp0;
2023   tcp_header_t * tcp0;
2024
2025   key0.addr = ip0->dst_address;
2026   key0.port = dport;
2027   key0.protocol = proto0;
2028   key0.fib_index = sm->outside_fib_index;
2029   kv0.key = key0.as_u64;
2030
2031   udp0 = ip4_next_header (ip0);
2032
2033   /* Check if destination is static mappings */
2034   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
2035     {
2036       new_dst_addr0 = sm0.addr.as_u32;
2037       new_dst_port0 = sm0.port;
2038       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2039     }
2040   /* or active sessions */
2041   else
2042     {
2043       if (sm->num_workers > 1)
2044         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2045       else
2046         ti = sm->num_workers;
2047
2048       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2049         {
2050           si = value0.value;
2051           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2052           new_dst_addr0 = s0->in2out.addr.as_u32;
2053           new_dst_port0 = s0->in2out.port;
2054           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2055         }
2056     }
2057
2058   /* Destination is behind the same NAT, use internal address and port */
2059   if (new_dst_addr0)
2060     {
2061       old_dst_addr0 = ip0->dst_address.as_u32;
2062       ip0->dst_address.as_u32 = new_dst_addr0;
2063       sum0 = ip0->checksum;
2064       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2065                              ip4_header_t, dst_address);
2066       ip0->checksum = ip_csum_fold (sum0);
2067
2068       old_dst_port0 = dport;
2069       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2070                        ip4_is_first_fragment (ip0)))
2071         {
2072           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2073             {
2074               tcp0 = ip4_next_header (ip0);
2075               tcp0->dst = new_dst_port0;
2076               sum0 = tcp0->checksum;
2077               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2078                                      ip4_header_t, dst_address);
2079               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2080                                      ip4_header_t /* cheat */, length);
2081               tcp0->checksum = ip_csum_fold(sum0);
2082             }
2083           else
2084             {
2085               udp0->dst_port = new_dst_port0;
2086               udp0->checksum = 0;
2087             }
2088         }
2089       else
2090         {
2091           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2092             {
2093               tcp0 = ip4_next_header (ip0);
2094               sum0 = tcp0->checksum;
2095               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2096                                      ip4_header_t, dst_address);
2097               tcp0->checksum = ip_csum_fold(sum0);
2098             }
2099         }
2100     }
2101 }
2102
2103 static uword
2104 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2105                             vlib_node_runtime_t * node,
2106                             vlib_frame_t * frame)
2107 {
2108   u32 n_left_from, *from, *to_next;
2109   snat_in2out_next_t next_index;
2110   u32 pkts_processed = 0;
2111   snat_main_t *sm = &snat_main;
2112   f64 now = vlib_time_now (vm);
2113   u32 thread_index = vm->thread_index;
2114   snat_main_per_thread_data_t *per_thread_data =
2115     &sm->per_thread_data[thread_index];
2116   u32 *fragments_to_drop = 0;
2117   u32 *fragments_to_loopback = 0;
2118
2119   from = vlib_frame_vector_args (frame);
2120   n_left_from = frame->n_vectors;
2121   next_index = node->cached_next_index;
2122
2123   while (n_left_from > 0)
2124     {
2125       u32 n_left_to_next;
2126
2127       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2128
2129       while (n_left_from > 0 && n_left_to_next > 0)
2130        {
2131           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2132           vlib_buffer_t *b0;
2133           u32 next0;
2134           u8 cached0 = 0;
2135           ip4_header_t *ip0;
2136           nat_reass_ip4_t *reass0;
2137           udp_header_t * udp0;
2138           tcp_header_t * tcp0;
2139           snat_session_key_t key0;
2140           clib_bihash_kv_8_8_t kv0, value0;
2141           snat_session_t * s0 = 0;
2142           u16 old_port0, new_port0;
2143           ip_csum_t sum0;
2144
2145           /* speculatively enqueue b0 to the current next frame */
2146           bi0 = from[0];
2147           to_next[0] = bi0;
2148           from += 1;
2149           to_next += 1;
2150           n_left_from -= 1;
2151           n_left_to_next -= 1;
2152
2153           b0 = vlib_get_buffer (vm, bi0);
2154           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2155
2156           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2157           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2158                                                                sw_if_index0);
2159
2160           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2161             {
2162               next0 = SNAT_IN2OUT_NEXT_DROP;
2163               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2164               goto trace0;
2165             }
2166
2167           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2168           udp0 = ip4_next_header (ip0);
2169           tcp0 = (tcp_header_t *) udp0;
2170           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2171
2172           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2173                                                  ip0->dst_address,
2174                                                  ip0->fragment_id,
2175                                                  ip0->protocol,
2176                                                  1,
2177                                                  &fragments_to_drop);
2178
2179           if (PREDICT_FALSE (!reass0))
2180             {
2181               next0 = SNAT_IN2OUT_NEXT_DROP;
2182               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2183               nat_log_notice ("maximum reassemblies exceeded");
2184               goto trace0;
2185             }
2186
2187           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2188             {
2189               key0.addr = ip0->src_address;
2190               key0.port = udp0->src_port;
2191               key0.protocol = proto0;
2192               key0.fib_index = rx_fib_index0;
2193               kv0.key = key0.as_u64;
2194
2195               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2196                 {
2197                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2198                       ip0, proto0, rx_fib_index0, thread_index)))
2199                     goto trace0;
2200
2201                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2202                                      &s0, node, next0, thread_index, now);
2203
2204                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2205                     goto trace0;
2206
2207                   reass0->sess_index = s0 - per_thread_data->sessions;
2208                 }
2209               else
2210                 {
2211                   s0 = pool_elt_at_index (per_thread_data->sessions,
2212                                           value0.value);
2213                   reass0->sess_index = value0.value;
2214                 }
2215               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2216             }
2217           else
2218             {
2219               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2220                 {
2221                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2222                     {
2223                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2224                       nat_log_notice ("maximum fragments per reassembly exceeded");
2225                       next0 = SNAT_IN2OUT_NEXT_DROP;
2226                       goto trace0;
2227                     }
2228                   cached0 = 1;
2229                   goto trace0;
2230                 }
2231               s0 = pool_elt_at_index (per_thread_data->sessions,
2232                                       reass0->sess_index);
2233             }
2234
2235           old_addr0 = ip0->src_address.as_u32;
2236           ip0->src_address = s0->out2in.addr;
2237           new_addr0 = ip0->src_address.as_u32;
2238           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2239
2240           sum0 = ip0->checksum;
2241           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2242                                  ip4_header_t,
2243                                  src_address /* changed member */);
2244           ip0->checksum = ip_csum_fold (sum0);
2245
2246           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2247             {
2248               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2249                 {
2250                   old_port0 = tcp0->src_port;
2251                   tcp0->src_port = s0->out2in.port;
2252                   new_port0 = tcp0->src_port;
2253
2254                   sum0 = tcp0->checksum;
2255                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2256                                          ip4_header_t,
2257                                          dst_address /* changed member */);
2258                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2259                                          ip4_header_t /* cheat */,
2260                                          length /* changed member */);
2261                   tcp0->checksum = ip_csum_fold(sum0);
2262                 }
2263               else
2264                 {
2265                   old_port0 = udp0->src_port;
2266                   udp0->src_port = s0->out2in.port;
2267                   udp0->checksum = 0;
2268                 }
2269             }
2270
2271           /* Hairpinning */
2272           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2273                                    s0->ext_host_port, proto0);
2274
2275           /* Accounting */
2276           nat44_session_update_counters (s0, now,
2277                                          vlib_buffer_length_in_chain (vm, b0));
2278           /* Per-user LRU list maintenance */
2279           nat44_session_update_lru (sm, s0, thread_index);
2280
2281         trace0:
2282           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2283                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2284             {
2285               nat44_in2out_reass_trace_t *t =
2286                  vlib_add_trace (vm, node, b0, sizeof (*t));
2287               t->cached = cached0;
2288               t->sw_if_index = sw_if_index0;
2289               t->next_index = next0;
2290             }
2291
2292           if (cached0)
2293             {
2294               n_left_to_next++;
2295               to_next--;
2296             }
2297           else
2298             {
2299               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2300
2301               /* verify speculative enqueue, maybe switch current next frame */
2302               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2303                                                to_next, n_left_to_next,
2304                                                bi0, next0);
2305             }
2306
2307           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2308             {
2309               from = vlib_frame_vector_args (frame);
2310               u32 len = vec_len (fragments_to_loopback);
2311               if (len <= VLIB_FRAME_SIZE)
2312                 {
2313                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2314                   n_left_from = len;
2315                   vec_reset_length (fragments_to_loopback);
2316                 }
2317               else
2318                 {
2319                   clib_memcpy (from,
2320                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2321                                sizeof (u32) * VLIB_FRAME_SIZE);
2322                   n_left_from = VLIB_FRAME_SIZE;
2323                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2324                 }
2325             }
2326        }
2327
2328       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2329     }
2330
2331   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2332                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2333                                pkts_processed);
2334
2335   nat_send_all_to_node (vm, fragments_to_drop, node,
2336                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2337                         SNAT_IN2OUT_NEXT_DROP);
2338
2339   vec_free (fragments_to_drop);
2340   vec_free (fragments_to_loopback);
2341   return frame->n_vectors;
2342 }
2343
2344 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2345   .function = nat44_in2out_reass_node_fn,
2346   .name = "nat44-in2out-reass",
2347   .vector_size = sizeof (u32),
2348   .format_trace = format_nat44_in2out_reass_trace,
2349   .type = VLIB_NODE_TYPE_INTERNAL,
2350
2351   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2352   .error_strings = snat_in2out_error_strings,
2353
2354   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2355   .next_nodes = {
2356     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2357     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2358     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2359     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2360     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2361   },
2362 };
2363
2364 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2365                               nat44_in2out_reass_node_fn);
2366
2367 /*******************************/
2368 /*** endpoint-dependent mode ***/
2369 /*******************************/
2370
2371 static_always_inline int
2372 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
2373 {
2374   icmp46_header_t *icmp0;
2375   nat_ed_ses_key_t key0;
2376   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2377   ip4_header_t *inner_ip0 = 0;
2378   void *l4_header = 0;
2379   icmp46_header_t *inner_icmp0;
2380
2381   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2382   echo0 = (icmp_echo_header_t *)(icmp0+1);
2383
2384   if (!icmp_is_error_message (icmp0))
2385     {
2386       key0.proto = IP_PROTOCOL_ICMP;
2387       key0.l_addr = ip0->src_address;
2388       key0.r_addr = ip0->dst_address;
2389       key0.l_port = echo0->identifier;
2390       key0.r_port = 0;
2391     }
2392   else
2393     {
2394       inner_ip0 = (ip4_header_t *)(echo0+1);
2395       l4_header = ip4_next_header (inner_ip0);
2396       key0.proto = inner_ip0->protocol;
2397       key0.r_addr = inner_ip0->src_address;
2398       key0.l_addr = inner_ip0->dst_address;
2399       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
2400         {
2401         case SNAT_PROTOCOL_ICMP:
2402           inner_icmp0 = (icmp46_header_t*)l4_header;
2403           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2404           key0.r_port = 0;
2405           key0.l_port = inner_echo0->identifier;
2406           break;
2407         case SNAT_PROTOCOL_UDP:
2408         case SNAT_PROTOCOL_TCP:
2409           key0.l_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2410           key0.r_port = ((tcp_udp_header_t*)l4_header)->src_port;
2411           break;
2412         default:
2413           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
2414         }
2415     }
2416   *p_key0 = key0;
2417   return 0;
2418 }
2419
2420 int
2421 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void * arg)
2422 {
2423   snat_main_t *sm = &snat_main;
2424   nat44_is_idle_session_ctx_t *ctx = arg;
2425   snat_session_t *s;
2426   u64 sess_timeout_time;
2427   nat_ed_ses_key_t ed_key;
2428   clib_bihash_kv_16_8_t ed_kv;
2429   int i;
2430   snat_address_t *a;
2431   snat_session_key_t key;
2432   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
2433                                                        ctx->thread_index);
2434
2435   s = pool_elt_at_index (tsm->sessions, kv->value);
2436   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
2437   if (ctx->now >= sess_timeout_time)
2438     {
2439       if (is_fwd_bypass_session (s))
2440         goto delete;
2441
2442       ed_key.l_addr = s->out2in.addr;
2443       ed_key.r_addr = s->ext_host_addr;
2444       ed_key.fib_index = s->out2in.fib_index;
2445       if (snat_is_unk_proto_session (s))
2446         {
2447           ed_key.proto = s->in2out.port;
2448           ed_key.r_port = 0;
2449           ed_key.l_port = 0;
2450         }
2451       else
2452         {
2453           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
2454           ed_key.l_port = s->out2in.port;
2455           ed_key.r_port = s->ext_host_port;
2456         }
2457       ed_kv.key[0] = ed_key.as_u64[0];
2458       ed_kv.key[1] = ed_key.as_u64[1];
2459       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
2460         nat_log_warn ("out2in_ed key del failed");
2461
2462       if (snat_is_unk_proto_session (s))
2463         goto delete;
2464
2465       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
2466                                           s->out2in.addr.as_u32,
2467                                           s->in2out.protocol,
2468                                           s->in2out.port,
2469                                           s->out2in.port,
2470                                           s->in2out.fib_index);
2471
2472       if (is_twice_nat_session (s))
2473         {
2474           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
2475             {
2476               key.protocol = s->in2out.protocol;
2477               key.port = s->ext_host_nat_port;
2478               a = sm->twice_nat_addresses + i;
2479               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
2480                 {
2481                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
2482                                                       ctx->thread_index, &key);
2483                   break;
2484                 }
2485             }
2486         }
2487
2488       if (snat_is_session_static (s))
2489         goto delete;
2490
2491       if (s->outside_address_index != ~0)
2492         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
2493                                             &s->out2in);
2494     delete:
2495       nat44_delete_session (sm, s, ctx->thread_index);
2496       return 1;
2497     }
2498
2499   return 0;
2500 }
2501
2502 static inline u32
2503 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
2504                           ip4_header_t * ip0, icmp46_header_t * icmp0,
2505                           u32 sw_if_index0, u32 rx_fib_index0,
2506                           vlib_node_runtime_t * node, u32 next0, f64 now,
2507                           u32 thread_index, snat_session_t ** p_s0)
2508 {
2509   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2510                       next0, thread_index, p_s0, 0);
2511   snat_session_t * s0 = *p_s0;
2512   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
2513     {
2514       /* Hairpinning */
2515       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
2516         snat_icmp_hairpinning(sm, b0, ip0, icmp0, sm->endpoint_dependent);
2517       /* Accounting */
2518       nat44_session_update_counters (s0, now,
2519                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
2520     }
2521   return next0;
2522 }
2523
2524 static u32
2525 slow_path_ed (snat_main_t *sm,
2526               vlib_buffer_t *b,
2527               u32 rx_fib_index,
2528               clib_bihash_kv_16_8_t *kv,
2529               snat_session_t ** sessionp,
2530               vlib_node_runtime_t * node,
2531               u32 next,
2532               u32 thread_index,
2533               f64 now)
2534 {
2535   snat_session_t *s;
2536   snat_user_t *u;
2537   snat_session_key_t key0, key1;
2538   lb_nat_type_t lb = 0, is_sm = 0;
2539   u32 address_index = ~0;
2540   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2541   nat_ed_ses_key_t *key = (nat_ed_ses_key_t *) kv->key;
2542   u32 proto = ip_proto_to_snat_proto (key->proto);
2543   nat_outside_fib_t *outside_fib;
2544   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2545   fib_prefix_t pfx = {
2546     .fp_proto = FIB_PROTOCOL_IP4,
2547     .fp_len = 32,
2548     .fp_addr = {
2549         .ip4.as_u32 = key->r_addr.as_u32,
2550     },
2551   };
2552   nat44_is_idle_session_ctx_t ctx;
2553
2554   if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
2555     {
2556       b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
2557       nat_ipfix_logging_max_sessions(sm->max_translations);
2558       nat_log_notice ("maximum sessions exceeded");
2559       return SNAT_IN2OUT_NEXT_DROP;
2560     }
2561
2562   key0.addr = key->l_addr;
2563   key0.port = key->l_port;
2564   key1.protocol = key0.protocol = proto;
2565   key0.fib_index = rx_fib_index;
2566   key1.fib_index = sm->outside_fib_index;
2567   /* First try to match static mapping by local address and port */
2568   if (snat_static_mapping_match (sm, key0, &key1, 0, 0, 0, &lb, 0))
2569     {
2570       /* Try to create dynamic translation */
2571       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
2572                                                thread_index, &key1,
2573                                                &address_index,
2574                                                sm->port_per_thread,
2575                                                tsm->snat_thread_index))
2576         {
2577           nat_log_notice ("addresses exhausted");
2578           b->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2579           return SNAT_IN2OUT_NEXT_DROP;
2580         }
2581     }
2582   else
2583     is_sm = 1;
2584
2585   u = nat_user_get_or_create (sm, &key->l_addr, rx_fib_index, thread_index);
2586   if (!u)
2587     {
2588       nat_log_warn ("create NAT user failed");
2589       if (!is_sm)
2590         snat_free_outside_address_and_port (sm->addresses,
2591                                             thread_index, &key1);
2592       return SNAT_IN2OUT_NEXT_DROP;
2593     }
2594
2595   s = nat_ed_session_alloc (sm, u, thread_index);
2596   if (!s)
2597     {
2598       nat44_delete_user_with_no_session (sm, u, thread_index);
2599       nat_log_warn ("create NAT session failed");
2600       if (!is_sm)
2601         snat_free_outside_address_and_port (sm->addresses,
2602                                             thread_index, &key1);
2603       return SNAT_IN2OUT_NEXT_DROP;
2604     }
2605
2606   user_session_increment (sm, u, is_sm);
2607   if (is_sm)
2608     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2609   if (lb)
2610     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
2611   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2612   s->outside_address_index = address_index;
2613   s->ext_host_addr = key->r_addr;
2614   s->ext_host_port = key->r_port;
2615   s->in2out = key0;
2616   s->out2in = key1;
2617   s->out2in.protocol = key0.protocol;
2618
2619   switch (vec_len (sm->outside_fibs))
2620     {
2621     case 0:
2622       s->out2in.fib_index = sm->outside_fib_index;
2623       break;
2624     case 1:
2625       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
2626       break;
2627     default:
2628       vec_foreach (outside_fib, sm->outside_fibs)
2629         {
2630           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2631           if (FIB_NODE_INDEX_INVALID != fei)
2632             {
2633               if (fib_entry_get_resolving_interface (fei) != ~0)
2634                 {
2635                   s->out2in.fib_index = outside_fib->fib_index;
2636                   break;
2637                 }
2638             }
2639         }
2640       break;
2641     }
2642
2643   /* Add to lookup tables */
2644   kv->value = s - tsm->sessions;
2645   ctx.now = now;
2646   ctx.thread_index = thread_index;
2647   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, kv,
2648                                                nat44_i2o_ed_is_idle_session_cb,
2649                                                &ctx))
2650     nat_log_notice ("in2out-ed key add failed");
2651
2652   make_ed_kv (kv, &key1.addr, &key->r_addr, key->proto, s->out2in.fib_index,
2653               key1.port, key->r_port);
2654   kv->value = s - tsm->sessions;
2655   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->out2in_ed, kv,
2656                                                nat44_o2i_ed_is_idle_session_cb,
2657                                                &ctx))
2658     nat_log_notice ("out2in-ed key add failed");
2659
2660   *sessionp = s;
2661
2662   /* log NAT event */
2663   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
2664                                       s->out2in.addr.as_u32,
2665                                       s->in2out.protocol,
2666                                       s->in2out.port,
2667                                       s->out2in.port,
2668                                       s->in2out.fib_index);
2669   return next;
2670 }
2671
2672 static_always_inline int
2673 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
2674                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
2675                         u32 rx_fib_index, u32 thread_index)
2676 {
2677   udp_header_t *udp = ip4_next_header (ip);
2678   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2679   clib_bihash_kv_16_8_t kv, value;
2680   snat_session_key_t key0, key1;
2681
2682   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, ip->protocol,
2683               sm->outside_fib_index, udp->dst_port, udp->src_port);
2684
2685   /* NAT packet aimed at external address if */
2686   /* has active sessions */
2687   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2688     {
2689       key0.addr = ip->dst_address;
2690       key0.port = udp->dst_port;
2691       key0.protocol = proto;
2692       key0.fib_index = sm->outside_fib_index;
2693       /* or is static mappings */
2694       if (!snat_static_mapping_match(sm, key0, &key1, 1, 0, 0, 0, 0))
2695         return 0;
2696     }
2697   else
2698     return 0;
2699
2700   if (sm->forwarding_enabled)
2701     return 1;
2702
2703   return snat_not_translate_fast(sm, node, sw_if_index, ip, proto, rx_fib_index);
2704 }
2705
2706 static_always_inline int
2707 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
2708                                       u32 thread_index, f64 now,
2709                                       vlib_main_t * vm, vlib_buffer_t * b)
2710 {
2711   nat_ed_ses_key_t key;
2712   clib_bihash_kv_16_8_t kv, value;
2713   udp_header_t *udp;
2714   snat_session_t *s = 0;
2715   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2716
2717   if (!sm->forwarding_enabled)
2718     return 0;
2719
2720   if (ip->protocol == IP_PROTOCOL_ICMP)
2721     {
2722       key.as_u64[0] = key.as_u64[1] = 0;
2723       if (icmp_get_ed_key (ip, &key))
2724         return 0;
2725       key.fib_index = 0;
2726       kv.key[0] = key.as_u64[0];
2727       kv.key[1] = key.as_u64[1];
2728     }
2729   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
2730     {
2731       udp = ip4_next_header(ip);
2732       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0,
2733                   udp->src_port, udp->dst_port);
2734     }
2735   else
2736     {
2737       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0, 0,
2738                   0);
2739     }
2740
2741   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2742     {
2743       s = pool_elt_at_index (tsm->sessions, value.value);
2744       if (is_fwd_bypass_session (s))
2745         {
2746           if (ip->protocol == IP_PROTOCOL_TCP)
2747             {
2748               tcp_header_t *tcp = ip4_next_header(ip);
2749               if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index))
2750                 return 1;
2751             }
2752           /* Accounting */
2753           nat44_session_update_counters (s, now,
2754                                          vlib_buffer_length_in_chain (vm, b));
2755           return 1;
2756         }
2757       else
2758         return 0;
2759     }
2760
2761   return 0;
2762 }
2763
2764 static_always_inline int
2765 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
2766                                        u8 proto, u16 src_port, u16 dst_port,
2767                                        u32 thread_index, u32 rx_sw_if_index,
2768                                        u32 tx_sw_if_index)
2769 {
2770   clib_bihash_kv_16_8_t kv, value;
2771   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2772   snat_interface_t *i;
2773   snat_session_t *s;
2774   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
2775   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
2776
2777   /* src NAT check */
2778   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto, tx_fib_index,
2779               src_port, dst_port);
2780   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2781     return 1;
2782
2783   /* dst NAT check */
2784   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, proto, rx_fib_index,
2785               dst_port, src_port);
2786   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2787   {
2788     s = pool_elt_at_index (tsm->sessions, value.value);
2789     if (is_fwd_bypass_session (s))
2790       return 0;
2791
2792     /* hairpinning */
2793     pool_foreach (i, sm->output_feature_interfaces,
2794     ({
2795       if ((nat_interface_is_inside(i)) && (rx_sw_if_index == i->sw_if_index))
2796         return 0;
2797     }));
2798     return 1;
2799   }
2800
2801   return 0;
2802 }
2803
2804 u32
2805 icmp_match_in2out_ed(snat_main_t *sm, vlib_node_runtime_t *node,
2806                      u32 thread_index, vlib_buffer_t *b, ip4_header_t *ip,
2807                      u8 *p_proto, snat_session_key_t *p_value,
2808                      u8 *p_dont_translate, void *d, void *e)
2809 {
2810   icmp46_header_t *icmp;
2811   u32 sw_if_index;
2812   u32 rx_fib_index;
2813   nat_ed_ses_key_t key;
2814   snat_session_t *s = 0;
2815   u8 dont_translate = 0;
2816   clib_bihash_kv_16_8_t kv, value;
2817   u32 next = ~0;
2818   int err;
2819   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2820
2821   icmp = (icmp46_header_t *) ip4_next_header (ip);
2822   sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
2823   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
2824
2825   key.as_u64[0] = key.as_u64[1] = 0;
2826   err = icmp_get_ed_key (ip, &key);
2827   if (err != 0)
2828     {
2829       b->error = node->errors[err];
2830       next = SNAT_IN2OUT_NEXT_DROP;
2831       goto out;
2832     }
2833   key.fib_index = rx_fib_index;
2834
2835   kv.key[0] = key.as_u64[0];
2836   kv.key[1] = key.as_u64[1];
2837
2838   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2839     {
2840       if (vnet_buffer(b)->sw_if_index[VLIB_TX] != ~0)
2841         {
2842           if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(sm, ip,
2843               key.proto, key.l_port, key.r_port, thread_index, sw_if_index,
2844               vnet_buffer(b)->sw_if_index[VLIB_TX])))
2845             {
2846               dont_translate = 1;
2847               goto out;
2848             }
2849         }
2850       else
2851         {
2852           if (PREDICT_FALSE(nat44_ed_not_translate(sm, node, sw_if_index,
2853               ip, SNAT_PROTOCOL_ICMP, rx_fib_index, thread_index)))
2854             {
2855               dont_translate = 1;
2856               goto out;
2857             }
2858         }
2859
2860       if (PREDICT_FALSE(icmp_is_error_message (icmp)))
2861         {
2862           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2863           next = SNAT_IN2OUT_NEXT_DROP;
2864           goto out;
2865         }
2866
2867       next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next,
2868                            thread_index, vlib_time_now (sm->vlib_main));
2869
2870       if (PREDICT_FALSE (next == SNAT_IN2OUT_NEXT_DROP))
2871         goto out;
2872     }
2873   else
2874     {
2875       if (PREDICT_FALSE(icmp->type != ICMP4_echo_request &&
2876                         icmp->type != ICMP4_echo_reply &&
2877                         !icmp_is_error_message (icmp)))
2878         {
2879           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2880           next = SNAT_IN2OUT_NEXT_DROP;
2881           goto out;
2882         }
2883
2884       s = pool_elt_at_index (tsm->sessions, value.value);
2885     }
2886
2887   *p_proto = ip_proto_to_snat_proto (key.proto);
2888 out:
2889   if (s)
2890     *p_value = s->out2in;
2891   *p_dont_translate = dont_translate;
2892   if (d)
2893     *(snat_session_t**)d = s;
2894   return next;
2895 }
2896
2897 static inline void
2898 nat44_ed_hairpinning_unknown_proto (snat_main_t *sm,
2899                                     vlib_buffer_t * b,
2900                                     ip4_header_t * ip)
2901 {
2902   u32 old_addr, new_addr = 0, ti = 0;
2903   clib_bihash_kv_8_8_t kv, value;
2904   clib_bihash_kv_16_8_t s_kv, s_value;
2905   snat_static_mapping_t *m;
2906   ip_csum_t sum;
2907   snat_session_t *s;
2908   snat_main_per_thread_data_t *tsm;
2909
2910   if (sm->num_workers > 1)
2911     ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
2912   else
2913     ti = sm->num_workers;
2914   tsm = &sm->per_thread_data[ti];
2915
2916   old_addr = ip->dst_address.as_u32;
2917   make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2918               sm->outside_fib_index, 0, 0);
2919   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2920     {
2921       make_sm_kv (&kv, &ip->dst_address, 0, 0, 0);
2922       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2923         return;
2924
2925       m = pool_elt_at_index (sm->static_mappings, value.value);
2926       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2927         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
2928       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
2929     }
2930   else
2931     {
2932       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
2933       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2934         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
2935       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
2936     }
2937   sum = ip->checksum;
2938   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
2939   ip->checksum = ip_csum_fold (sum);
2940 }
2941
2942 static snat_session_t *
2943 nat44_ed_in2out_unknown_proto (snat_main_t *sm,
2944                                vlib_buffer_t * b,
2945                                ip4_header_t * ip,
2946                                u32 rx_fib_index,
2947                                u32 thread_index,
2948                                f64 now,
2949                                vlib_main_t * vm,
2950                                vlib_node_runtime_t * node)
2951 {
2952   clib_bihash_kv_8_8_t kv, value;
2953   clib_bihash_kv_16_8_t s_kv, s_value;
2954   snat_static_mapping_t *m;
2955   u32 old_addr, new_addr = 0;
2956   ip_csum_t sum;
2957   snat_user_t *u;
2958   dlist_elt_t *head, *elt;
2959   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2960   u32 elt_index, head_index, ses_index;
2961   snat_session_t * s;
2962   u32 address_index = ~0, outside_fib_index = sm->outside_fib_index;
2963   int i;
2964   u8 is_sm = 0;
2965   nat_outside_fib_t *outside_fib;
2966   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2967   fib_prefix_t pfx = {
2968     .fp_proto = FIB_PROTOCOL_IP4,
2969     .fp_len = 32,
2970     .fp_addr = {
2971         .ip4.as_u32 = ip->dst_address.as_u32,
2972     },
2973   };
2974
2975   switch (vec_len (sm->outside_fibs))
2976     {
2977     case 0:
2978       outside_fib_index = sm->outside_fib_index;
2979       break;
2980     case 1:
2981       outside_fib_index = sm->outside_fibs[0].fib_index;
2982       break;
2983     default:
2984       vec_foreach (outside_fib, sm->outside_fibs)
2985         {
2986           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2987           if (FIB_NODE_INDEX_INVALID != fei)
2988             {
2989               if (fib_entry_get_resolving_interface (fei) != ~0)
2990                 {
2991                   outside_fib_index = outside_fib->fib_index;
2992                   break;
2993                 }
2994             }
2995         }
2996       break;
2997     }
2998   old_addr = ip->src_address.as_u32;
2999
3000   make_ed_kv (&s_kv, &ip->src_address, &ip->dst_address, ip->protocol,
3001               rx_fib_index, 0, 0);
3002
3003   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
3004     {
3005       s = pool_elt_at_index (tsm->sessions, s_value.value);
3006       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
3007     }
3008   else
3009     {
3010       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
3011         {
3012           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
3013           nat_ipfix_logging_max_sessions(sm->max_translations);
3014           nat_log_notice ("maximum sessions exceeded");
3015           return 0;
3016         }
3017
3018       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
3019                                   thread_index);
3020       if (!u)
3021         {
3022           nat_log_warn ("create NAT user failed");
3023           return 0;
3024         }
3025
3026       make_sm_kv (&kv, &ip->src_address, 0, rx_fib_index, 0);
3027
3028       /* Try to find static mapping first */
3029       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
3030         {
3031           m = pool_elt_at_index (sm->static_mappings, value.value);
3032           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
3033           is_sm = 1;
3034           goto create_ses;
3035         }
3036       /* Fallback to 3-tuple key */
3037       else
3038         {
3039           /* Choose same out address as for TCP/UDP session to same destination */
3040           head_index = u->sessions_per_user_list_head_index;
3041           head = pool_elt_at_index (tsm->list_pool, head_index);
3042           elt_index = head->next;
3043           if (PREDICT_FALSE (elt_index == ~0))
3044             ses_index = ~0;
3045           else
3046             {
3047               elt = pool_elt_at_index (tsm->list_pool, elt_index);
3048               ses_index = elt->value;
3049             }
3050
3051           while (ses_index != ~0)
3052             {
3053               s =  pool_elt_at_index (tsm->sessions, ses_index);
3054               elt_index = elt->next;
3055               elt = pool_elt_at_index (tsm->list_pool, elt_index);
3056               ses_index = elt->value;
3057
3058               if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
3059                 {
3060                   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
3061                   address_index = s->outside_address_index;
3062
3063                   make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address,
3064                               ip->protocol, outside_fib_index, 0, 0);
3065                   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
3066                     goto create_ses;
3067
3068                   break;
3069                 }
3070             }
3071
3072           for (i = 0; i < vec_len (sm->addresses); i++)
3073             {
3074               make_ed_kv (&s_kv, &sm->addresses[i].addr, &ip->dst_address,
3075                           ip->protocol, outside_fib_index, 0, 0);
3076               if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
3077                 {
3078                   new_addr = ip->src_address.as_u32 =
3079                     sm->addresses[i].addr.as_u32;
3080                   address_index = i;
3081                   goto create_ses;
3082                 }
3083             }
3084           return 0;
3085         }
3086
3087 create_ses:
3088       s = nat_ed_session_alloc (sm, u, thread_index);
3089       if (!s)
3090         {
3091           nat44_delete_user_with_no_session (sm, u, thread_index);
3092           nat_log_warn ("create NAT session failed");
3093           return 0;
3094         }
3095
3096       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
3097       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
3098       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
3099       s->outside_address_index = address_index;
3100       s->out2in.addr.as_u32 = new_addr;
3101       s->out2in.fib_index = outside_fib_index;
3102       s->in2out.addr.as_u32 = old_addr;
3103       s->in2out.fib_index = rx_fib_index;
3104       s->in2out.port = s->out2in.port = ip->protocol;
3105       if (is_sm)
3106         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
3107       user_session_increment (sm, u, is_sm);
3108
3109       /* Add to lookup tables */
3110       make_ed_kv (&s_kv, &s->in2out.addr, &ip->dst_address, ip->protocol,
3111                   rx_fib_index, 0, 0);
3112       s_kv.value = s - tsm->sessions;
3113       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
3114         nat_log_notice ("in2out key add failed");
3115
3116       make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address, ip->protocol,
3117                   outside_fib_index, 0, 0);
3118       s_kv.value = s - tsm->sessions;
3119       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
3120         nat_log_notice ("out2in key add failed");
3121   }
3122
3123   /* Update IP checksum */
3124   sum = ip->checksum;
3125   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
3126   ip->checksum = ip_csum_fold (sum);
3127
3128   /* Accounting */
3129   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b));
3130
3131   /* Hairpinning */
3132   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
3133     nat44_ed_hairpinning_unknown_proto(sm, b, ip);
3134
3135   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
3136     vnet_buffer(b)->sw_if_index[VLIB_TX] = outside_fib_index;
3137
3138   return s;
3139 }
3140
3141 static inline uword
3142 nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
3143                                 vlib_node_runtime_t * node,
3144                                 vlib_frame_t * frame, int is_slow_path,
3145                                 int is_output_feature)
3146 {
3147   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
3148   snat_in2out_next_t next_index;
3149   snat_main_t *sm = &snat_main;
3150   f64 now = vlib_time_now (vm);
3151   u32 thread_index = vm->thread_index;
3152   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3153
3154   stats_node_index = is_slow_path ? nat44_ed_in2out_slowpath_node.index :
3155     nat44_ed_in2out_node.index;
3156
3157   from = vlib_frame_vector_args (frame);
3158   n_left_from = frame->n_vectors;
3159   next_index = node->cached_next_index;
3160
3161   while (n_left_from > 0)
3162     {
3163       u32 n_left_to_next;
3164
3165       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
3166
3167       while (n_left_from >= 4 && n_left_to_next >= 2)
3168         {
3169           u32 bi0, bi1;
3170           vlib_buffer_t *b0, *b1;
3171           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
3172               new_addr0, old_addr0;
3173           u32 next1, sw_if_index1, rx_fib_index1, iph_offset1 = 0, proto1,
3174               new_addr1, old_addr1;
3175           u16 old_port0, new_port0, old_port1, new_port1;
3176           ip4_header_t *ip0, *ip1;
3177           udp_header_t *udp0, *udp1;
3178           tcp_header_t *tcp0, *tcp1;
3179           icmp46_header_t *icmp0, *icmp1;
3180           snat_session_t *s0 = 0, *s1 = 0;
3181           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
3182           ip_csum_t sum0, sum1;
3183
3184           /* Prefetch next iteration. */
3185           {
3186             vlib_buffer_t * p2, * p3;
3187
3188             p2 = vlib_get_buffer (vm, from[2]);
3189             p3 = vlib_get_buffer (vm, from[3]);
3190
3191             vlib_prefetch_buffer_header (p2, LOAD);
3192             vlib_prefetch_buffer_header (p3, LOAD);
3193
3194             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
3195             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
3196           }
3197
3198           /* speculatively enqueue b0 and b1 to the current next frame */
3199           to_next[0] = bi0 = from[0];
3200           to_next[1] = bi1 = from[1];
3201           from += 2;
3202           to_next += 2;
3203           n_left_from -= 2;
3204           n_left_to_next -= 2;
3205
3206           b0 = vlib_get_buffer (vm, bi0);
3207           b1 = vlib_get_buffer (vm, bi1);
3208
3209           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3210
3211           if (is_output_feature)
3212             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
3213
3214           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
3215                  iph_offset0);
3216
3217           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3218           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3219                                                                sw_if_index0);
3220
3221           if (PREDICT_FALSE(ip0->ttl == 1))
3222             {
3223               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3224               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3225                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3226                                            0);
3227               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3228               goto trace00;
3229             }
3230
3231           udp0 = ip4_next_header (ip0);
3232           tcp0 = (tcp_header_t *) udp0;
3233           icmp0 = (icmp46_header_t *) udp0;
3234           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3235
3236           if (is_slow_path)
3237             {
3238               if (PREDICT_FALSE (proto0 == ~0))
3239                 {
3240                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
3241                                                       rx_fib_index0,
3242                                                       thread_index, now, vm,
3243                                                       node);
3244                   if (!s0)
3245                     next0 = SNAT_IN2OUT_NEXT_DROP;
3246                   goto trace00;
3247                 }
3248
3249               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3250                 {
3251                   next0 = icmp_in2out_ed_slow_path
3252                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
3253                      next0, now, thread_index, &s0);
3254                   goto trace00;
3255                 }
3256             }
3257           else
3258             {
3259                if (is_output_feature)
3260                 {
3261                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3262                       sm, ip0, thread_index, now, vm, b0)))
3263                     goto trace00;
3264                 }
3265
3266               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3267                 {
3268                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3269                   goto trace00;
3270                 }
3271
3272               if (ip4_is_fragment (ip0))
3273                 {
3274                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3275                   next0 = SNAT_IN2OUT_NEXT_DROP;
3276                   goto trace00;
3277                 }
3278             }
3279
3280           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3281                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3282
3283           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3284             {
3285               if (is_slow_path)
3286                 {
3287                   if (is_output_feature)
3288                     {
3289                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3290                           sm, ip0, ip0->protocol, udp0->src_port,
3291                           udp0->dst_port, thread_index, sw_if_index0,
3292                           vnet_buffer(b0)->sw_if_index[VLIB_TX])))
3293                         goto trace00;
3294                     }
3295                   else
3296                     {
3297                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3298                           sw_if_index0, ip0, proto0, rx_fib_index0,
3299                           thread_index)))
3300                         goto trace00;
3301                     }
3302
3303                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3304                                         next0, thread_index, now);
3305
3306                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3307                     goto trace00;
3308                 }
3309               else
3310                 {
3311                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3312                   goto trace00;
3313                 }
3314             }
3315           else
3316             {
3317               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3318             }
3319
3320           b0->flags |= VNET_BUFFER_F_IS_NATED;
3321
3322           if (!is_output_feature)
3323             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3324
3325           old_addr0 = ip0->src_address.as_u32;
3326           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3327           sum0 = ip0->checksum;
3328           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3329                                  src_address);
3330           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3331             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3332                                    s0->ext_host_addr.as_u32, ip4_header_t,
3333                                    dst_address);
3334           ip0->checksum = ip_csum_fold (sum0);
3335
3336           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3337             {
3338               old_port0 = tcp0->src_port;
3339               new_port0 = tcp0->src_port = s0->out2in.port;
3340
3341               sum0 = tcp0->checksum;
3342               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3343                                      dst_address);
3344               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3345                                      length);
3346               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3347                 {
3348                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3349                                          s0->ext_host_addr.as_u32,
3350                                          ip4_header_t, dst_address);
3351                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3352                                          s0->ext_host_port, ip4_header_t,
3353                                          length);
3354                   tcp0->dst_port = s0->ext_host_port;
3355                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3356                 }
3357               tcp0->checksum = ip_csum_fold(sum0);
3358               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3359                 goto trace00;
3360             }
3361           else
3362             {
3363               udp0->src_port = s0->out2in.port;
3364               udp0->checksum = 0;
3365               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3366                 {
3367                   udp0->dst_port = s0->ext_host_port;
3368                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3369                 }
3370             }
3371
3372           /* Accounting */
3373           nat44_session_update_counters (s0, now,
3374                                          vlib_buffer_length_in_chain (vm, b0));
3375
3376         trace00:
3377           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3378                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3379             {
3380               snat_in2out_trace_t *t =
3381                 vlib_add_trace (vm, node, b0, sizeof (*t));
3382               t->is_slow_path = is_slow_path;
3383               t->sw_if_index = sw_if_index0;
3384               t->next_index = next0;
3385               t->session_index = ~0;
3386               if (s0)
3387                 t->session_index = s0 - tsm->sessions;
3388             }
3389
3390           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3391
3392
3393           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
3394
3395           if (is_output_feature)
3396             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
3397
3398           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
3399                  iph_offset1);
3400
3401           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3402           rx_fib_index1 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3403                                                                sw_if_index1);
3404
3405           if (PREDICT_FALSE(ip1->ttl == 1))
3406             {
3407               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3408               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3409                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3410                                            0);
3411               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3412               goto trace01;
3413             }
3414
3415           udp1 = ip4_next_header (ip1);
3416           tcp1 = (tcp_header_t *) udp1;
3417           icmp1 = (icmp46_header_t *) udp1;
3418           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3419
3420           if (is_slow_path)
3421             {
3422               if (PREDICT_FALSE (proto1 == ~0))
3423                 {
3424                   s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1,
3425                                                       rx_fib_index1,
3426                                                       thread_index, now, vm,
3427                                                       node);
3428                   if (!s1)
3429                     next1 = SNAT_IN2OUT_NEXT_DROP;
3430                   goto trace01;
3431                 }
3432
3433               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
3434                 {
3435                   next1 = icmp_in2out_ed_slow_path
3436                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
3437                      next1, now, thread_index, &s1);
3438                   goto trace01;
3439                 }
3440             }
3441           else
3442             {
3443                if (is_output_feature)
3444                 {
3445                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3446                       sm, ip1, thread_index, now, vm, b1)))
3447                     goto trace01;
3448                 }
3449
3450               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
3451                 {
3452                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3453                   goto trace01;
3454                 }
3455
3456               if (ip4_is_fragment (ip1))
3457                 {
3458                   b1->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3459                   next1 = SNAT_IN2OUT_NEXT_DROP;
3460                   goto trace01;
3461                 }
3462             }
3463
3464           make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address, ip1->protocol,
3465                       rx_fib_index1, udp1->src_port, udp1->dst_port);
3466
3467           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1))
3468             {
3469               if (is_slow_path)
3470                 {
3471                   if (is_output_feature)
3472                     {
3473                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3474                           sm, ip1, ip1->protocol, udp1->src_port,
3475                           udp1->dst_port, thread_index, sw_if_index1,
3476                           vnet_buffer(b1)->sw_if_index[VLIB_TX])))
3477                         goto trace01;
3478                     }
3479                   else
3480                     {
3481                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3482                           sw_if_index1, ip1, proto1, rx_fib_index1,
3483                           thread_index)))
3484                         goto trace01;
3485                     }
3486
3487                   next1 = slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node,
3488                                         next1, thread_index, now);
3489
3490                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
3491                     goto trace01;
3492                 }
3493               else
3494                 {
3495                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3496                   goto trace01;
3497                 }
3498             }
3499           else
3500             {
3501               s1 = pool_elt_at_index (tsm->sessions, value1.value);
3502             }
3503
3504           b1->flags |= VNET_BUFFER_F_IS_NATED;
3505
3506           if (!is_output_feature)
3507             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
3508
3509           old_addr1 = ip1->src_address.as_u32;
3510           new_addr1 = ip1->src_address.as_u32 = s1->out2in.addr.as_u32;
3511           sum1 = ip1->checksum;
3512           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3513                                  src_address);
3514           if (PREDICT_FALSE (is_twice_nat_session (s1)))
3515             sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3516                                    s1->ext_host_addr.as_u32, ip4_header_t,
3517                                    dst_address);
3518           ip1->checksum = ip_csum_fold (sum1);
3519
3520           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
3521             {
3522               old_port1 = tcp1->src_port;
3523               new_port1 = tcp1->src_port = s1->out2in.port;
3524
3525               sum1 = tcp1->checksum;
3526               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3527                                      dst_address);
3528               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
3529                                      length);
3530               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3531                 {
3532                   sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3533                                          s1->ext_host_addr.as_u32,
3534                                          ip4_header_t, dst_address);
3535                   sum1 = ip_csum_update (sum1, tcp1->dst_port,
3536                                          s1->ext_host_port, ip4_header_t,
3537                                          length);
3538                   tcp1->dst_port = s1->ext_host_port;
3539                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3540                 }
3541               tcp1->checksum = ip_csum_fold(sum1);
3542               if (nat44_set_tcp_session_state_i2o (sm, s1, tcp1, thread_index))
3543                 goto trace01;
3544             }
3545           else
3546             {
3547               udp1->src_port = s1->out2in.port;
3548               udp1->checksum = 0;
3549               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3550                 {
3551                   udp1->dst_port = s1->ext_host_port;
3552                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3553                 }
3554             }
3555
3556           /* Accounting */
3557           nat44_session_update_counters (s1, now,
3558                                          vlib_buffer_length_in_chain (vm, b1));
3559
3560         trace01:
3561           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3562                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3563             {
3564               snat_in2out_trace_t *t =
3565                 vlib_add_trace (vm, node, b1, sizeof (*t));
3566               t->is_slow_path = is_slow_path;
3567               t->sw_if_index = sw_if_index1;
3568               t->next_index = next1;
3569               t->session_index = ~0;
3570               if (s1)
3571                 t->session_index = s1 - tsm->sessions;
3572             }
3573
3574           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
3575
3576           /* verify speculative enqueues, maybe switch current next frame */
3577           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3578                                            to_next, n_left_to_next,
3579                                            bi0, bi1, next0, next1);
3580         }
3581
3582       while (n_left_from > 0 && n_left_to_next > 0)
3583         {
3584           u32 bi0;
3585           vlib_buffer_t *b0;
3586           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
3587               new_addr0, old_addr0;
3588           u16 old_port0, new_port0;
3589           ip4_header_t *ip0;
3590           udp_header_t *udp0;
3591           tcp_header_t *tcp0;
3592           icmp46_header_t * icmp0;
3593           snat_session_t *s0 = 0;
3594           clib_bihash_kv_16_8_t kv0, value0;
3595           ip_csum_t sum0;
3596
3597           /* speculatively enqueue b0 to the current next frame */
3598           bi0 = from[0];
3599           to_next[0] = bi0;
3600           from += 1;
3601           to_next += 1;
3602           n_left_from -= 1;
3603           n_left_to_next -= 1;
3604
3605           b0 = vlib_get_buffer (vm, bi0);
3606           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3607
3608           if (is_output_feature)
3609             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
3610
3611           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
3612                  iph_offset0);
3613
3614           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3615           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3616                                                                sw_if_index0);
3617
3618           if (PREDICT_FALSE(ip0->ttl == 1))
3619             {
3620               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3621               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3622                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3623                                            0);
3624               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3625               goto trace0;
3626             }
3627
3628           udp0 = ip4_next_header (ip0);
3629           tcp0 = (tcp_header_t *) udp0;
3630           icmp0 = (icmp46_header_t *) udp0;
3631           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3632
3633           if (is_slow_path)
3634             {
3635               if (PREDICT_FALSE (proto0 == ~0))
3636                 {
3637                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
3638                                                       rx_fib_index0,
3639                                                       thread_index, now, vm,
3640                                                       node);
3641                   if (!s0)
3642                     next0 = SNAT_IN2OUT_NEXT_DROP;
3643                   goto trace0;
3644                 }
3645
3646               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3647                 {
3648                   next0 = icmp_in2out_ed_slow_path
3649                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
3650                      next0, now, thread_index, &s0);
3651                   goto trace0;
3652                 }
3653             }
3654           else
3655             {
3656                if (is_output_feature)
3657                 {
3658                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3659                       sm, ip0, thread_index, now, vm, b0)))
3660                     goto trace0;
3661                 }
3662
3663               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3664                 {
3665                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3666                   goto trace0;
3667                 }
3668
3669               if (ip4_is_fragment (ip0))
3670                 {
3671                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3672                   next0 = SNAT_IN2OUT_NEXT_DROP;
3673                   goto trace0;
3674                 }
3675             }
3676
3677           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3678                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3679
3680           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3681             {
3682               if (is_slow_path)
3683                 {
3684                   if (is_output_feature)
3685                     {
3686                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3687                           sm, ip0, ip0->protocol, udp0->src_port,
3688                           udp0->dst_port, thread_index, sw_if_index0,
3689                           vnet_buffer(b0)->sw_if_index[VLIB_TX])))
3690                         goto trace0;
3691                     }
3692                   else
3693                     {
3694                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3695                           sw_if_index0, ip0, proto0, rx_fib_index0,
3696                           thread_index)))
3697                         goto trace0;
3698                     }
3699
3700                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3701                                         next0, thread_index, now);
3702
3703                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3704                     goto trace0;
3705                 }
3706               else
3707                 {
3708                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3709                   goto trace0;
3710                 }
3711             }
3712           else
3713             {
3714               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3715             }
3716
3717           b0->flags |= VNET_BUFFER_F_IS_NATED;
3718
3719           if (!is_output_feature)
3720             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3721
3722           old_addr0 = ip0->src_address.as_u32;
3723           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3724           sum0 = ip0->checksum;
3725           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3726                                  src_address);
3727           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3728             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3729                                    s0->ext_host_addr.as_u32, ip4_header_t,
3730                                    dst_address);
3731           ip0->checksum = ip_csum_fold (sum0);
3732
3733           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3734             {
3735               old_port0 = tcp0->src_port;
3736               new_port0 = tcp0->src_port = s0->out2in.port;
3737
3738               sum0 = tcp0->checksum;
3739               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3740                                      dst_address);
3741               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3742                                      length);
3743               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3744                 {
3745                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3746                                          s0->ext_host_addr.as_u32,
3747                                          ip4_header_t, dst_address);
3748                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3749                                          s0->ext_host_port, ip4_header_t,
3750                                          length);
3751                   tcp0->dst_port = s0->ext_host_port;
3752                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3753                 }
3754               tcp0->checksum = ip_csum_fold(sum0);
3755               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3756                 goto trace0;
3757             }
3758           else
3759             {
3760               udp0->src_port = s0->out2in.port;
3761               udp0->checksum = 0;
3762               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3763                 {
3764                   udp0->dst_port = s0->ext_host_port;
3765                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3766                 }
3767             }
3768
3769           /* Accounting */
3770           nat44_session_update_counters (s0, now,
3771                                          vlib_buffer_length_in_chain (vm, b0));
3772
3773         trace0:
3774           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3775                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3776             {
3777               snat_in2out_trace_t *t =
3778                 vlib_add_trace (vm, node, b0, sizeof (*t));
3779               t->is_slow_path = is_slow_path;
3780               t->sw_if_index = sw_if_index0;
3781               t->next_index = next0;
3782               t->session_index = ~0;
3783               if (s0)
3784                 t->session_index = s0 - tsm->sessions;
3785             }
3786
3787           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3788
3789           /* verify speculative enqueue, maybe switch current next frame */
3790           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3791                                            to_next, n_left_to_next,
3792                                            bi0, next0);
3793         }
3794
3795       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3796     }
3797
3798   vlib_node_increment_counter (vm, stats_node_index,
3799                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3800                                pkts_processed);
3801   return frame->n_vectors;
3802 }
3803
3804 static uword
3805 nat44_ed_in2out_fast_path_fn (vlib_main_t * vm,
3806                               vlib_node_runtime_t * node,
3807                               vlib_frame_t * frame)
3808 {
3809   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 0);
3810 }
3811
3812 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
3813   .function = nat44_ed_in2out_fast_path_fn,
3814   .name = "nat44-ed-in2out",
3815   .vector_size = sizeof (u32),
3816   .format_trace = format_snat_in2out_trace,
3817   .type = VLIB_NODE_TYPE_INTERNAL,
3818
3819   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3820   .error_strings = snat_in2out_error_strings,
3821
3822   .runtime_data_bytes = sizeof (snat_runtime_t),
3823
3824   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3825
3826   /* edit / add dispositions here */
3827   .next_nodes = {
3828     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3829     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3830     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3831     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3832     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3833   },
3834 };
3835
3836 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_node, nat44_ed_in2out_fast_path_fn);
3837
3838 static uword
3839 nat44_ed_in2out_output_fast_path_fn (vlib_main_t * vm,
3840                                      vlib_node_runtime_t * node,
3841                                      vlib_frame_t * frame)
3842 {
3843   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 1);
3844 }
3845
3846 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
3847   .function = nat44_ed_in2out_output_fast_path_fn,
3848   .name = "nat44-ed-in2out-output",
3849   .vector_size = sizeof (u32),
3850   .format_trace = format_snat_in2out_trace,
3851   .type = VLIB_NODE_TYPE_INTERNAL,
3852
3853   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3854   .error_strings = snat_in2out_error_strings,
3855
3856   .runtime_data_bytes = sizeof (snat_runtime_t),
3857
3858   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3859
3860   /* edit / add dispositions here */
3861   .next_nodes = {
3862     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3863     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3864     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3865     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3866     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3867   },
3868 };
3869
3870 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_node,
3871                               nat44_ed_in2out_output_fast_path_fn);
3872
3873 static uword
3874 nat44_ed_in2out_slow_path_fn (vlib_main_t * vm,
3875                               vlib_node_runtime_t * node,
3876                               vlib_frame_t * frame)
3877 {
3878   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0);
3879 }
3880
3881 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
3882   .function = nat44_ed_in2out_slow_path_fn,
3883   .name = "nat44-ed-in2out-slowpath",
3884   .vector_size = sizeof (u32),
3885   .format_trace = format_snat_in2out_trace,
3886   .type = VLIB_NODE_TYPE_INTERNAL,
3887
3888   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3889   .error_strings = snat_in2out_error_strings,
3890
3891   .runtime_data_bytes = sizeof (snat_runtime_t),
3892
3893   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3894
3895   /* edit / add dispositions here */
3896   .next_nodes = {
3897     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3898     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3899     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3900     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3901     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3902   },
3903 };
3904
3905 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_slowpath_node,
3906                               nat44_ed_in2out_slow_path_fn);
3907
3908 static uword
3909 nat44_ed_in2out_output_slow_path_fn (vlib_main_t * vm,
3910                                      vlib_node_runtime_t * node,
3911                                      vlib_frame_t * frame)
3912 {
3913   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1);
3914 }
3915
3916 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
3917   .function = nat44_ed_in2out_output_slow_path_fn,
3918   .name = "nat44-ed-in2out-output-slowpath",
3919   .vector_size = sizeof (u32),
3920   .format_trace = format_snat_in2out_trace,
3921   .type = VLIB_NODE_TYPE_INTERNAL,
3922
3923   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3924   .error_strings = snat_in2out_error_strings,
3925
3926   .runtime_data_bytes = sizeof (snat_runtime_t),
3927
3928   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3929
3930   /* edit / add dispositions here */
3931   .next_nodes = {
3932     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3933     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3934     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3935     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3936     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3937   },
3938 };
3939
3940 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_slowpath_node,
3941                               nat44_ed_in2out_output_slow_path_fn);
3942
3943 /**************************/
3944 /*** deterministic mode ***/
3945 /**************************/
3946 static uword
3947 snat_det_in2out_node_fn (vlib_main_t * vm,
3948                          vlib_node_runtime_t * node,
3949                          vlib_frame_t * frame)
3950 {
3951   u32 n_left_from, * from, * to_next;
3952   snat_in2out_next_t next_index;
3953   u32 pkts_processed = 0;
3954   snat_main_t * sm = &snat_main;
3955   u32 now = (u32) vlib_time_now (vm);
3956   u32 thread_index = vm->thread_index;
3957
3958   from = vlib_frame_vector_args (frame);
3959   n_left_from = frame->n_vectors;
3960   next_index = node->cached_next_index;
3961
3962   while (n_left_from > 0)
3963     {
3964       u32 n_left_to_next;
3965
3966       vlib_get_next_frame (vm, node, next_index,
3967                            to_next, n_left_to_next);
3968
3969       while (n_left_from >= 4 && n_left_to_next >= 2)
3970         {
3971           u32 bi0, bi1;
3972           vlib_buffer_t * b0, * b1;
3973           u32 next0, next1;
3974           u32 sw_if_index0, sw_if_index1;
3975           ip4_header_t * ip0, * ip1;
3976           ip_csum_t sum0, sum1;
3977           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
3978           u16 old_port0, new_port0, lo_port0, i0;
3979           u16 old_port1, new_port1, lo_port1, i1;
3980           udp_header_t * udp0, * udp1;
3981           tcp_header_t * tcp0, * tcp1;
3982           u32 proto0, proto1;
3983           snat_det_out_key_t key0, key1;
3984           snat_det_map_t * dm0, * dm1;
3985           snat_det_session_t * ses0 = 0, * ses1 = 0;
3986           u32 rx_fib_index0, rx_fib_index1;
3987           icmp46_header_t * icmp0, * icmp1;
3988
3989           /* Prefetch next iteration. */
3990           {
3991             vlib_buffer_t * p2, * p3;
3992
3993             p2 = vlib_get_buffer (vm, from[2]);
3994             p3 = vlib_get_buffer (vm, from[3]);
3995
3996             vlib_prefetch_buffer_header (p2, LOAD);
3997             vlib_prefetch_buffer_header (p3, LOAD);
3998
3999             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
4000             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
4001           }
4002
4003           /* speculatively enqueue b0 and b1 to the current next frame */
4004           to_next[0] = bi0 = from[0];
4005           to_next[1] = bi1 = from[1];
4006           from += 2;
4007           to_next += 2;
4008           n_left_from -= 2;
4009           n_left_to_next -= 2;
4010
4011           b0 = vlib_get_buffer (vm, bi0);
4012           b1 = vlib_get_buffer (vm, bi1);
4013
4014           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4015           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
4016
4017           ip0 = vlib_buffer_get_current (b0);
4018           udp0 = ip4_next_header (ip0);
4019           tcp0 = (tcp_header_t *) udp0;
4020
4021           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4022
4023           if (PREDICT_FALSE(ip0->ttl == 1))
4024             {
4025               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4026               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4027                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4028                                            0);
4029               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4030               goto trace0;
4031             }
4032
4033           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4034
4035           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
4036             {
4037               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4038               icmp0 = (icmp46_header_t *) udp0;
4039
4040               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4041                                   rx_fib_index0, node, next0, thread_index,
4042                                   &ses0, &dm0);
4043               goto trace0;
4044             }
4045
4046           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
4047           if (PREDICT_FALSE(!dm0))
4048             {
4049               nat_log_info ("no match for internal host %U",
4050                             format_ip4_address, &ip0->src_address);
4051               next0 = SNAT_IN2OUT_NEXT_DROP;
4052               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4053               goto trace0;
4054             }
4055
4056           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
4057
4058           key0.ext_host_addr = ip0->dst_address;
4059           key0.ext_host_port = tcp0->dst;
4060
4061           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
4062           if (PREDICT_FALSE(!ses0))
4063             {
4064               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4065                 {
4066                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
4067                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
4068
4069                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
4070                     continue;
4071
4072                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
4073                   break;
4074                 }
4075               if (PREDICT_FALSE(!ses0))
4076                 {
4077                   /* too many sessions for user, send ICMP error packet */
4078
4079                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4080                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
4081                                                ICMP4_destination_unreachable_destination_unreachable_host,
4082                                                0);
4083                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4084                   goto trace0;
4085                 }
4086             }
4087
4088           new_port0 = ses0->out.out_port;
4089
4090           old_addr0.as_u32 = ip0->src_address.as_u32;
4091           ip0->src_address.as_u32 = new_addr0.as_u32;
4092           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4093
4094           sum0 = ip0->checksum;
4095           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4096                                  ip4_header_t,
4097                                  src_address /* changed member */);
4098           ip0->checksum = ip_csum_fold (sum0);
4099
4100           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4101             {
4102               if (tcp0->flags & TCP_FLAG_SYN)
4103                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
4104               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
4105                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4106               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
4107                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
4108               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
4109                 snat_det_ses_close(dm0, ses0);
4110               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4111                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
4112               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
4113                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4114
4115               old_port0 = tcp0->src;
4116               tcp0->src = new_port0;
4117
4118               sum0 = tcp0->checksum;
4119               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4120                                      ip4_header_t,
4121                                      dst_address /* changed member */);
4122               sum0 = ip_csum_update (sum0, old_port0, new_port0,
4123                                      ip4_header_t /* cheat */,
4124                                      length /* changed member */);
4125               tcp0->checksum = ip_csum_fold(sum0);
4126             }
4127           else
4128             {
4129               ses0->state = SNAT_SESSION_UDP_ACTIVE;
4130               old_port0 = udp0->src_port;
4131               udp0->src_port = new_port0;
4132               udp0->checksum = 0;
4133             }
4134
4135           switch(ses0->state)
4136             {
4137             case SNAT_SESSION_UDP_ACTIVE:
4138                 ses0->expire = now + sm->udp_timeout;
4139                 break;
4140             case SNAT_SESSION_TCP_SYN_SENT:
4141             case SNAT_SESSION_TCP_FIN_WAIT:
4142             case SNAT_SESSION_TCP_CLOSE_WAIT:
4143             case SNAT_SESSION_TCP_LAST_ACK:
4144                 ses0->expire = now + sm->tcp_transitory_timeout;
4145                 break;
4146             case SNAT_SESSION_TCP_ESTABLISHED:
4147                 ses0->expire = now + sm->tcp_established_timeout;
4148                 break;
4149             }
4150
4151         trace0:
4152           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4153                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4154             {
4155               snat_in2out_trace_t *t =
4156                  vlib_add_trace (vm, node, b0, sizeof (*t));
4157               t->is_slow_path = 0;
4158               t->sw_if_index = sw_if_index0;
4159               t->next_index = next0;
4160               t->session_index = ~0;
4161               if (ses0)
4162                 t->session_index = ses0 - dm0->sessions;
4163             }
4164
4165           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4166
4167           ip1 = vlib_buffer_get_current (b1);
4168           udp1 = ip4_next_header (ip1);
4169           tcp1 = (tcp_header_t *) udp1;
4170
4171           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
4172
4173           if (PREDICT_FALSE(ip1->ttl == 1))
4174             {
4175               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4176               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
4177                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4178                                            0);
4179               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4180               goto trace1;
4181             }
4182
4183           proto1 = ip_proto_to_snat_proto (ip1->protocol);
4184
4185           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
4186             {
4187               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
4188               icmp1 = (icmp46_header_t *) udp1;
4189
4190               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
4191                                   rx_fib_index1, node, next1, thread_index,
4192                                   &ses1, &dm1);
4193               goto trace1;
4194             }
4195
4196           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
4197           if (PREDICT_FALSE(!dm1))
4198             {
4199               nat_log_info ("no match for internal host %U",
4200                             format_ip4_address, &ip0->src_address);
4201               next1 = SNAT_IN2OUT_NEXT_DROP;
4202               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4203               goto trace1;
4204             }
4205
4206           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
4207
4208           key1.ext_host_addr = ip1->dst_address;
4209           key1.ext_host_port = tcp1->dst;
4210
4211           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
4212           if (PREDICT_FALSE(!ses1))
4213             {
4214               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
4215                 {
4216                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
4217                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
4218
4219                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
4220                     continue;
4221
4222                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
4223                   break;
4224                 }
4225               if (PREDICT_FALSE(!ses1))
4226                 {
4227                   /* too many sessions for user, send ICMP error packet */
4228
4229                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4230                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
4231                                                ICMP4_destination_unreachable_destination_unreachable_host,
4232                                                0);
4233                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4234                   goto trace1;
4235                 }
4236             }
4237
4238           new_port1 = ses1->out.out_port;
4239
4240           old_addr1.as_u32 = ip1->src_address.as_u32;
4241           ip1->src_address.as_u32 = new_addr1.as_u32;
4242           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4243
4244           sum1 = ip1->checksum;
4245           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
4246                                  ip4_header_t,
4247                                  src_address /* changed member */);
4248           ip1->checksum = ip_csum_fold (sum1);
4249
4250           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
4251             {
4252               if (tcp1->flags & TCP_FLAG_SYN)
4253                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
4254               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
4255                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
4256               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
4257                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
4258               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
4259                 snat_det_ses_close(dm1, ses1);
4260               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4261                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
4262               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
4263                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
4264
4265               old_port1 = tcp1->src;
4266               tcp1->src = new_port1;
4267
4268               sum1 = tcp1->checksum;
4269               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
4270                                      ip4_header_t,
4271                                      dst_address /* changed member */);
4272               sum1 = ip_csum_update (sum1, old_port1, new_port1,
4273                                      ip4_header_t /* cheat */,
4274                                      length /* changed member */);
4275               tcp1->checksum = ip_csum_fold(sum1);
4276             }
4277           else
4278             {
4279               ses1->state = SNAT_SESSION_UDP_ACTIVE;
4280               old_port1 = udp1->src_port;
4281               udp1->src_port = new_port1;
4282               udp1->checksum = 0;
4283             }
4284
4285           switch(ses1->state)
4286             {
4287             case SNAT_SESSION_UDP_ACTIVE:
4288                 ses1->expire = now + sm->udp_timeout;
4289                 break;
4290             case SNAT_SESSION_TCP_SYN_SENT:
4291             case SNAT_SESSION_TCP_FIN_WAIT:
4292             case SNAT_SESSION_TCP_CLOSE_WAIT:
4293             case SNAT_SESSION_TCP_LAST_ACK:
4294                 ses1->expire = now + sm->tcp_transitory_timeout;
4295                 break;
4296             case SNAT_SESSION_TCP_ESTABLISHED:
4297                 ses1->expire = now + sm->tcp_established_timeout;
4298                 break;
4299             }
4300
4301         trace1:
4302           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4303                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
4304             {
4305               snat_in2out_trace_t *t =
4306                  vlib_add_trace (vm, node, b1, sizeof (*t));
4307               t->is_slow_path = 0;
4308               t->sw_if_index = sw_if_index1;
4309               t->next_index = next1;
4310               t->session_index = ~0;
4311               if (ses1)
4312                 t->session_index = ses1 - dm1->sessions;
4313             }
4314
4315           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
4316
4317           /* verify speculative enqueues, maybe switch current next frame */
4318           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
4319                                            to_next, n_left_to_next,
4320                                            bi0, bi1, next0, next1);
4321          }
4322
4323       while (n_left_from > 0 && n_left_to_next > 0)
4324         {
4325           u32 bi0;
4326           vlib_buffer_t * b0;
4327           u32 next0;
4328           u32 sw_if_index0;
4329           ip4_header_t * ip0;
4330           ip_csum_t sum0;
4331           ip4_address_t new_addr0, old_addr0;
4332           u16 old_port0, new_port0, lo_port0, i0;
4333           udp_header_t * udp0;
4334           tcp_header_t * tcp0;
4335           u32 proto0;
4336           snat_det_out_key_t key0;
4337           snat_det_map_t * dm0;
4338           snat_det_session_t * ses0 = 0;
4339           u32 rx_fib_index0;
4340           icmp46_header_t * icmp0;
4341
4342           /* speculatively enqueue b0 to the current next frame */
4343           bi0 = from[0];
4344           to_next[0] = bi0;
4345           from += 1;
4346           to_next += 1;
4347           n_left_from -= 1;
4348           n_left_to_next -= 1;
4349
4350           b0 = vlib_get_buffer (vm, bi0);
4351           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4352
4353           ip0 = vlib_buffer_get_current (b0);
4354           udp0 = ip4_next_header (ip0);
4355           tcp0 = (tcp_header_t *) udp0;
4356
4357           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4358
4359           if (PREDICT_FALSE(ip0->ttl == 1))
4360             {
4361               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4362               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4363                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4364                                            0);
4365               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4366               goto trace00;
4367             }
4368
4369           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4370
4371           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
4372             {
4373               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4374               icmp0 = (icmp46_header_t *) udp0;
4375
4376               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4377                                   rx_fib_index0, node, next0, thread_index,
4378                                   &ses0, &dm0);
4379               goto trace00;
4380             }
4381
4382           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
4383           if (PREDICT_FALSE(!dm0))
4384             {
4385               nat_log_info ("no match for internal host %U",
4386                             format_ip4_address, &ip0->src_address);
4387               next0 = SNAT_IN2OUT_NEXT_DROP;
4388               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4389               goto trace00;
4390             }
4391
4392           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
4393
4394           key0.ext_host_addr = ip0->dst_address;
4395           key0.ext_host_port = tcp0->dst;
4396
4397           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
4398           if (PREDICT_FALSE(!ses0))
4399             {
4400               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4401                 {
4402                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
4403                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
4404
4405                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
4406                     continue;
4407
4408                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
4409                   break;
4410                 }
4411               if (PREDICT_FALSE(!ses0))
4412                 {
4413                   /* too many sessions for user, send ICMP error packet */
4414
4415                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4416                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
4417                                                ICMP4_destination_unreachable_destination_unreachable_host,
4418                                                0);
4419                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4420                   goto trace00;
4421                 }
4422             }
4423
4424           new_port0 = ses0->out.out_port;
4425
4426           old_addr0.as_u32 = ip0->src_address.as_u32;
4427           ip0->src_address.as_u32 = new_addr0.as_u32;
4428           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4429
4430           sum0 = ip0->checksum;
4431           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4432                                  ip4_header_t,
4433                                  src_address /* changed member */);
4434           ip0->checksum = ip_csum_fold (sum0);
4435
4436           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4437             {
4438               if (tcp0->flags & TCP_FLAG_SYN)
4439                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
4440               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
4441                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4442               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
4443                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
4444               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
4445                 snat_det_ses_close(dm0, ses0);
4446               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4447                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
4448               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
4449                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4450
4451               old_port0 = tcp0->src;
4452               tcp0->src = new_port0;
4453
4454               sum0 = tcp0->checksum;
4455               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4456                                      ip4_header_t,
4457                                      dst_address /* changed member */);
4458               sum0 = ip_csum_update (sum0, old_port0, new_port0,
4459                                      ip4_header_t /* cheat */,
4460                                      length /* changed member */);
4461               tcp0->checksum = ip_csum_fold(sum0);
4462             }
4463           else
4464             {
4465               ses0->state = SNAT_SESSION_UDP_ACTIVE;
4466               old_port0 = udp0->src_port;
4467               udp0->src_port = new_port0;
4468               udp0->checksum = 0;
4469             }
4470
4471           switch(ses0->state)
4472             {
4473             case SNAT_SESSION_UDP_ACTIVE:
4474                 ses0->expire = now + sm->udp_timeout;
4475                 break;
4476             case SNAT_SESSION_TCP_SYN_SENT:
4477             case SNAT_SESSION_TCP_FIN_WAIT:
4478             case SNAT_SESSION_TCP_CLOSE_WAIT:
4479             case SNAT_SESSION_TCP_LAST_ACK:
4480                 ses0->expire = now + sm->tcp_transitory_timeout;
4481                 break;
4482             case SNAT_SESSION_TCP_ESTABLISHED:
4483                 ses0->expire = now + sm->tcp_established_timeout;
4484                 break;
4485             }
4486
4487         trace00:
4488           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4489                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4490             {
4491               snat_in2out_trace_t *t =
4492                  vlib_add_trace (vm, node, b0, sizeof (*t));
4493               t->is_slow_path = 0;
4494               t->sw_if_index = sw_if_index0;
4495               t->next_index = next0;
4496               t->session_index = ~0;
4497               if (ses0)
4498                 t->session_index = ses0 - dm0->sessions;
4499             }
4500
4501           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4502
4503           /* verify speculative enqueue, maybe switch current next frame */
4504           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4505                                            to_next, n_left_to_next,
4506                                            bi0, next0);
4507         }
4508
4509       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4510     }
4511
4512   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
4513                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4514                                pkts_processed);
4515   return frame->n_vectors;
4516 }
4517
4518 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
4519   .function = snat_det_in2out_node_fn,
4520   .name = "nat44-det-in2out",
4521   .vector_size = sizeof (u32),
4522   .format_trace = format_snat_in2out_trace,
4523   .type = VLIB_NODE_TYPE_INTERNAL,
4524
4525   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4526   .error_strings = snat_in2out_error_strings,
4527
4528   .runtime_data_bytes = sizeof (snat_runtime_t),
4529
4530   .n_next_nodes = 3,
4531
4532   /* edit / add dispositions here */
4533   .next_nodes = {
4534     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4535     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4536     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4537   },
4538 };
4539
4540 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
4541
4542 /**
4543  * Get address and port values to be used for ICMP packet translation
4544  * and create session if needed
4545  *
4546  * @param[in,out] sm             NAT main
4547  * @param[in,out] node           NAT node runtime
4548  * @param[in] thread_index       thread index
4549  * @param[in,out] b0             buffer containing packet to be translated
4550  * @param[out] p_proto           protocol used for matching
4551  * @param[out] p_value           address and port after NAT translation
4552  * @param[out] p_dont_translate  if packet should not be translated
4553  * @param d                      optional parameter
4554  * @param e                      optional parameter
4555  */
4556 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
4557                           u32 thread_index, vlib_buffer_t *b0,
4558                           ip4_header_t *ip0, u8 *p_proto,
4559                           snat_session_key_t *p_value,
4560                           u8 *p_dont_translate, void *d, void *e)
4561 {
4562   icmp46_header_t *icmp0;
4563   u32 sw_if_index0;
4564   u32 rx_fib_index0;
4565   u8 protocol;
4566   snat_det_out_key_t key0;
4567   u8 dont_translate = 0;
4568   u32 next0 = ~0;
4569   icmp_echo_header_t *echo0, *inner_echo0 = 0;
4570   ip4_header_t *inner_ip0;
4571   void *l4_header = 0;
4572   icmp46_header_t *inner_icmp0;
4573   snat_det_map_t * dm0 = 0;
4574   ip4_address_t new_addr0;
4575   u16 lo_port0, i0;
4576   snat_det_session_t * ses0 = 0;
4577   ip4_address_t in_addr;
4578   u16 in_port;
4579
4580   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
4581   echo0 = (icmp_echo_header_t *)(icmp0+1);
4582   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4583   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
4584
4585   if (!icmp_is_error_message (icmp0))
4586     {
4587       protocol = SNAT_PROTOCOL_ICMP;
4588       in_addr = ip0->src_address;
4589       in_port = echo0->identifier;
4590     }
4591   else
4592     {
4593       inner_ip0 = (ip4_header_t *)(echo0+1);
4594       l4_header = ip4_next_header (inner_ip0);
4595       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
4596       in_addr = inner_ip0->dst_address;
4597       switch (protocol)
4598         {
4599         case SNAT_PROTOCOL_ICMP:
4600           inner_icmp0 = (icmp46_header_t*)l4_header;
4601           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
4602           in_port = inner_echo0->identifier;
4603           break;
4604         case SNAT_PROTOCOL_UDP:
4605         case SNAT_PROTOCOL_TCP:
4606           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
4607           break;
4608         default:
4609           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
4610           next0 = SNAT_IN2OUT_NEXT_DROP;
4611           goto out;
4612         }
4613     }
4614
4615   dm0 = snat_det_map_by_user(sm, &in_addr);
4616   if (PREDICT_FALSE(!dm0))
4617     {
4618       nat_log_info ("no match for internal host %U",
4619                     format_ip4_address, &in_addr);
4620       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4621           IP_PROTOCOL_ICMP, rx_fib_index0)))
4622         {
4623           dont_translate = 1;
4624           goto out;
4625         }
4626       next0 = SNAT_IN2OUT_NEXT_DROP;
4627       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4628       goto out;
4629     }
4630
4631   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
4632
4633   key0.ext_host_addr = ip0->dst_address;
4634   key0.ext_host_port = 0;
4635
4636   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
4637   if (PREDICT_FALSE(!ses0))
4638     {
4639       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4640           IP_PROTOCOL_ICMP, rx_fib_index0)))
4641         {
4642           dont_translate = 1;
4643           goto out;
4644         }
4645       if (icmp0->type != ICMP4_echo_request)
4646         {
4647           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4648           next0 = SNAT_IN2OUT_NEXT_DROP;
4649           goto out;
4650         }
4651       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4652         {
4653           key0.out_port = clib_host_to_net_u16 (lo_port0 +
4654             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
4655
4656           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
4657             continue;
4658
4659           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
4660           break;
4661         }
4662       if (PREDICT_FALSE(!ses0))
4663         {
4664           next0 = SNAT_IN2OUT_NEXT_DROP;
4665           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
4666           goto out;
4667         }
4668     }
4669
4670   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
4671                     !icmp_is_error_message (icmp0)))
4672     {
4673       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4674       next0 = SNAT_IN2OUT_NEXT_DROP;
4675       goto out;
4676     }
4677
4678   u32 now = (u32) vlib_time_now (sm->vlib_main);
4679
4680   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
4681   ses0->expire = now + sm->icmp_timeout;
4682
4683 out:
4684   *p_proto = protocol;
4685   if (ses0)
4686     {
4687       p_value->addr = new_addr0;
4688       p_value->fib_index = sm->outside_fib_index;
4689       p_value->port = ses0->out.out_port;
4690     }
4691   *p_dont_translate = dont_translate;
4692   if (d)
4693     *(snat_det_session_t**)d = ses0;
4694   if (e)
4695     *(snat_det_map_t**)e = dm0;
4696   return next0;
4697 }
4698
4699 /**********************/
4700 /*** worker handoff ***/
4701 /**********************/
4702 static inline uword
4703 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
4704                                       vlib_node_runtime_t * node,
4705                                       vlib_frame_t * frame,
4706                                       u8 is_output)
4707 {
4708   snat_main_t *sm = &snat_main;
4709   vlib_thread_main_t *tm = vlib_get_thread_main ();
4710   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
4711   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
4712   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
4713     = 0;
4714   vlib_frame_queue_elt_t *hf = 0;
4715   vlib_frame_queue_t *fq;
4716   vlib_frame_t *f = 0;
4717   int i;
4718   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
4719   u32 next_worker_index = 0;
4720   u32 current_worker_index = ~0;
4721   u32 thread_index = vm->thread_index;
4722   u32 fq_index;
4723   u32 to_node_index;
4724   vlib_frame_t *d = 0;
4725
4726   ASSERT (vec_len (sm->workers));
4727
4728   if (is_output)
4729     {
4730       fq_index = sm->fq_in2out_output_index;
4731       to_node_index = sm->in2out_output_node_index;
4732     }
4733   else
4734     {
4735       fq_index = sm->fq_in2out_index;
4736       to_node_index = sm->in2out_node_index;
4737     }
4738
4739   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
4740     {
4741       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
4742
4743       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
4744                                tm->n_vlib_mains - 1,
4745                                (vlib_frame_queue_t *) (~0));
4746     }
4747
4748   from = vlib_frame_vector_args (frame);
4749   n_left_from = frame->n_vectors;
4750
4751   while (n_left_from > 0)
4752     {
4753       u32 bi0;
4754       vlib_buffer_t *b0;
4755       u32 sw_if_index0;
4756       u32 rx_fib_index0;
4757       ip4_header_t * ip0;
4758       u8 do_handoff;
4759
4760       bi0 = from[0];
4761       from += 1;
4762       n_left_from -= 1;
4763
4764       b0 = vlib_get_buffer (vm, bi0);
4765
4766       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
4767       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4768
4769       ip0 = vlib_buffer_get_current (b0);
4770
4771       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
4772
4773       if (PREDICT_FALSE (next_worker_index != thread_index))
4774         {
4775           do_handoff = 1;
4776
4777           if (next_worker_index != current_worker_index)
4778             {
4779               fq = is_vlib_frame_queue_congested (
4780                 fq_index, next_worker_index, NAT_FQ_NELTS - 2,
4781                 congested_handoff_queue_by_worker_index);
4782
4783               if (fq)
4784                 {
4785                   /* if this is 1st frame */
4786                   if (!d)
4787                     {
4788                       d = vlib_get_frame_to_node (vm, sm->error_node_index);
4789                       to_next_drop = vlib_frame_vector_args (d);
4790                     }
4791
4792                   to_next_drop[0] = bi0;
4793                   to_next_drop += 1;
4794                   d->n_vectors++;
4795                   b0->error = node->errors[SNAT_IN2OUT_ERROR_FQ_CONGESTED];
4796                   goto trace0;
4797                 }
4798
4799               if (hf)
4800                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4801
4802               hf = vlib_get_worker_handoff_queue_elt (fq_index,
4803                                                       next_worker_index,
4804                                                       handoff_queue_elt_by_worker_index);
4805
4806               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
4807               to_next_worker = &hf->buffer_index[hf->n_vectors];
4808               current_worker_index = next_worker_index;
4809             }
4810
4811           /* enqueue to correct worker thread */
4812           to_next_worker[0] = bi0;
4813           to_next_worker++;
4814           n_left_to_next_worker--;
4815
4816           if (n_left_to_next_worker == 0)
4817             {
4818               hf->n_vectors = VLIB_FRAME_SIZE;
4819               vlib_put_frame_queue_elt (hf);
4820               current_worker_index = ~0;
4821               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
4822               hf = 0;
4823             }
4824         }
4825       else
4826         {
4827           do_handoff = 0;
4828           /* if this is 1st frame */
4829           if (!f)
4830             {
4831               f = vlib_get_frame_to_node (vm, to_node_index);
4832               to_next = vlib_frame_vector_args (f);
4833             }
4834
4835           to_next[0] = bi0;
4836           to_next += 1;
4837           f->n_vectors++;
4838         }
4839
4840 trace0:
4841       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
4842                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4843         {
4844           snat_in2out_worker_handoff_trace_t *t =
4845             vlib_add_trace (vm, node, b0, sizeof (*t));
4846           t->next_worker_index = next_worker_index;
4847           t->do_handoff = do_handoff;
4848         }
4849     }
4850
4851   if (f)
4852     vlib_put_frame_to_node (vm, to_node_index, f);
4853
4854   if (d)
4855     vlib_put_frame_to_node (vm, sm->error_node_index, d);
4856
4857   if (hf)
4858     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4859
4860   /* Ship frames to the worker nodes */
4861   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
4862     {
4863       if (handoff_queue_elt_by_worker_index[i])
4864         {
4865           hf = handoff_queue_elt_by_worker_index[i];
4866           /*
4867            * It works better to let the handoff node
4868            * rate-adapt, always ship the handoff queue element.
4869            */
4870           if (1 || hf->n_vectors == hf->last_n_vectors)
4871             {
4872               vlib_put_frame_queue_elt (hf);
4873               handoff_queue_elt_by_worker_index[i] = 0;
4874             }
4875           else
4876             hf->last_n_vectors = hf->n_vectors;
4877         }
4878       congested_handoff_queue_by_worker_index[i] =
4879         (vlib_frame_queue_t *) (~0);
4880     }
4881   hf = 0;
4882   current_worker_index = ~0;
4883   return frame->n_vectors;
4884 }
4885
4886 static uword
4887 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
4888                                vlib_node_runtime_t * node,
4889                                vlib_frame_t * frame)
4890 {
4891   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
4892 }
4893
4894 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
4895   .function = snat_in2out_worker_handoff_fn,
4896   .name = "nat44-in2out-worker-handoff",
4897   .vector_size = sizeof (u32),
4898   .format_trace = format_snat_in2out_worker_handoff_trace,
4899   .type = VLIB_NODE_TYPE_INTERNAL,
4900
4901   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4902   .error_strings = snat_in2out_error_strings,
4903
4904   .n_next_nodes = 1,
4905
4906   .next_nodes = {
4907     [0] = "error-drop",
4908   },
4909 };
4910
4911 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
4912                               snat_in2out_worker_handoff_fn);
4913
4914 static uword
4915 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
4916                                       vlib_node_runtime_t * node,
4917                                       vlib_frame_t * frame)
4918 {
4919   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
4920 }
4921
4922 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
4923   .function = snat_in2out_output_worker_handoff_fn,
4924   .name = "nat44-in2out-output-worker-handoff",
4925   .vector_size = sizeof (u32),
4926   .format_trace = format_snat_in2out_worker_handoff_trace,
4927   .type = VLIB_NODE_TYPE_INTERNAL,
4928
4929   .n_next_nodes = 1,
4930
4931   .next_nodes = {
4932     [0] = "error-drop",
4933   },
4934 };
4935
4936 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
4937                               snat_in2out_output_worker_handoff_fn);
4938
4939 static_always_inline int
4940 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
4941 {
4942   snat_address_t * ap;
4943   clib_bihash_kv_8_8_t kv, value;
4944   snat_session_key_t m_key;
4945
4946   vec_foreach (ap, sm->addresses)
4947     {
4948       if (ap->addr.as_u32 == dst_addr->as_u32)
4949         return 1;
4950     }
4951
4952   m_key.addr.as_u32 = dst_addr->as_u32;
4953   m_key.fib_index = 0;
4954   m_key.port = 0;
4955   m_key.protocol = 0;
4956   kv.key = m_key.as_u64;
4957   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4958     return 1;
4959
4960   return 0;
4961 }
4962
4963 static inline uword
4964 snat_hairpin_dst_fn_inline (vlib_main_t * vm,
4965                             vlib_node_runtime_t * node,
4966                             vlib_frame_t * frame,
4967                             int is_ed)
4968 {
4969   u32 n_left_from, * from, * to_next, stats_node_index;
4970   snat_in2out_next_t next_index;
4971   u32 pkts_processed = 0;
4972   snat_main_t * sm = &snat_main;
4973
4974   stats_node_index = is_ed ? nat44_ed_hairpin_dst_node.index :
4975     snat_hairpin_dst_node.index;
4976
4977   from = vlib_frame_vector_args (frame);
4978   n_left_from = frame->n_vectors;
4979   next_index = node->cached_next_index;
4980
4981   while (n_left_from > 0)
4982     {
4983       u32 n_left_to_next;
4984
4985       vlib_get_next_frame (vm, node, next_index,
4986                            to_next, n_left_to_next);
4987
4988       while (n_left_from > 0 && n_left_to_next > 0)
4989         {
4990           u32 bi0;
4991           vlib_buffer_t * b0;
4992           u32 next0;
4993           ip4_header_t * ip0;
4994           u32 proto0;
4995
4996           /* speculatively enqueue b0 to the current next frame */
4997           bi0 = from[0];
4998           to_next[0] = bi0;
4999           from += 1;
5000           to_next += 1;
5001           n_left_from -= 1;
5002           n_left_to_next -= 1;
5003
5004           b0 = vlib_get_buffer (vm, bi0);
5005           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
5006           ip0 = vlib_buffer_get_current (b0);
5007
5008           proto0 = ip_proto_to_snat_proto (ip0->protocol);
5009
5010           vnet_buffer (b0)->snat.flags = 0;
5011           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
5012             {
5013               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
5014                 {
5015                   udp_header_t * udp0 = ip4_next_header (ip0);
5016                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
5017
5018                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed);
5019                 }
5020               else if (proto0 == SNAT_PROTOCOL_ICMP)
5021                 {
5022                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
5023
5024                   snat_icmp_hairpinning (sm, b0, ip0, icmp0, is_ed);
5025                 }
5026               else
5027                 {
5028                   if (is_ed)
5029                     nat44_ed_hairpinning_unknown_proto (sm, b0, ip0);
5030                   else
5031                     nat_hairpinning_sm_unknown_proto (sm, b0, ip0);
5032                 }
5033
5034               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
5035             }
5036
5037           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5038
5039           /* verify speculative enqueue, maybe switch current next frame */
5040           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5041                                            to_next, n_left_to_next,
5042                                            bi0, next0);
5043          }
5044
5045       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5046     }
5047
5048   vlib_node_increment_counter (vm, stats_node_index,
5049                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5050                                pkts_processed);
5051   return frame->n_vectors;
5052 }
5053
5054 static uword
5055 snat_hairpin_dst_fn (vlib_main_t * vm,
5056                      vlib_node_runtime_t * node,
5057                      vlib_frame_t * frame)
5058 {
5059   return snat_hairpin_dst_fn_inline (vm, node, frame, 0);
5060 }
5061
5062 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
5063   .function = snat_hairpin_dst_fn,
5064   .name = "nat44-hairpin-dst",
5065   .vector_size = sizeof (u32),
5066   .type = VLIB_NODE_TYPE_INTERNAL,
5067   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5068   .error_strings = snat_in2out_error_strings,
5069   .n_next_nodes = 2,
5070   .next_nodes = {
5071     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5072     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5073   },
5074 };
5075
5076 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
5077                               snat_hairpin_dst_fn);
5078
5079 static uword
5080 nat44_ed_hairpin_dst_fn (vlib_main_t * vm,
5081                          vlib_node_runtime_t * node,
5082                          vlib_frame_t * frame)
5083 {
5084   return snat_hairpin_dst_fn_inline (vm, node, frame, 1);
5085 }
5086
5087 VLIB_REGISTER_NODE (nat44_ed_hairpin_dst_node) = {
5088   .function = nat44_ed_hairpin_dst_fn,
5089   .name = "nat44-ed-hairpin-dst",
5090   .vector_size = sizeof (u32),
5091   .type = VLIB_NODE_TYPE_INTERNAL,
5092   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5093   .error_strings = snat_in2out_error_strings,
5094   .n_next_nodes = 2,
5095   .next_nodes = {
5096     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5097     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5098   },
5099 };
5100
5101 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_dst_node,
5102                               nat44_ed_hairpin_dst_fn);
5103
5104 static inline uword
5105 snat_hairpin_src_fn_inline (vlib_main_t * vm,
5106                             vlib_node_runtime_t * node,
5107                             vlib_frame_t * frame,
5108                             int is_ed)
5109 {
5110   u32 n_left_from, * from, * to_next, stats_node_index;
5111   snat_in2out_next_t next_index;
5112   u32 pkts_processed = 0;
5113   snat_main_t *sm = &snat_main;
5114
5115   stats_node_index = is_ed ? nat44_ed_hairpin_src_node.index :
5116     snat_hairpin_src_node.index;
5117
5118   from = vlib_frame_vector_args (frame);
5119   n_left_from = frame->n_vectors;
5120   next_index = node->cached_next_index;
5121
5122   while (n_left_from > 0)
5123     {
5124       u32 n_left_to_next;
5125
5126       vlib_get_next_frame (vm, node, next_index,
5127                            to_next, n_left_to_next);
5128
5129       while (n_left_from > 0 && n_left_to_next > 0)
5130         {
5131           u32 bi0;
5132           vlib_buffer_t * b0;
5133           u32 next0;
5134           snat_interface_t *i;
5135           u32 sw_if_index0;
5136
5137           /* speculatively enqueue b0 to the current next frame */
5138           bi0 = from[0];
5139           to_next[0] = bi0;
5140           from += 1;
5141           to_next += 1;
5142           n_left_from -= 1;
5143           n_left_to_next -= 1;
5144
5145           b0 = vlib_get_buffer (vm, bi0);
5146           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
5147           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
5148
5149           pool_foreach (i, sm->output_feature_interfaces,
5150           ({
5151             /* Only packets from NAT inside interface */
5152             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
5153               {
5154                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
5155                                     SNAT_FLAG_HAIRPINNING))
5156                   {
5157                     if (PREDICT_TRUE (sm->num_workers > 1))
5158                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
5159                     else
5160                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
5161                   }
5162                 break;
5163               }
5164           }));
5165
5166           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5167
5168           /* verify speculative enqueue, maybe switch current next frame */
5169           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5170                                            to_next, n_left_to_next,
5171                                            bi0, next0);
5172          }
5173
5174       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5175     }
5176
5177   vlib_node_increment_counter (vm, stats_node_index,
5178                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5179                                pkts_processed);
5180   return frame->n_vectors;
5181 }
5182
5183 static uword
5184 snat_hairpin_src_fn (vlib_main_t * vm,
5185                      vlib_node_runtime_t * node,
5186                      vlib_frame_t * frame)
5187 {
5188   return snat_hairpin_src_fn_inline (vm, node, frame, 0);
5189 }
5190
5191 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
5192   .function = snat_hairpin_src_fn,
5193   .name = "nat44-hairpin-src",
5194   .vector_size = sizeof (u32),
5195   .type = VLIB_NODE_TYPE_INTERNAL,
5196   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5197   .error_strings = snat_in2out_error_strings,
5198   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
5199   .next_nodes = {
5200      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
5201      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
5202      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
5203      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
5204   },
5205 };
5206
5207 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
5208                               snat_hairpin_src_fn);
5209
5210 static uword
5211 nat44_ed_hairpin_src_fn (vlib_main_t * vm,
5212                          vlib_node_runtime_t * node,
5213                          vlib_frame_t * frame)
5214 {
5215   return snat_hairpin_src_fn_inline (vm, node, frame, 1);
5216 }
5217
5218 VLIB_REGISTER_NODE (nat44_ed_hairpin_src_node) = {
5219   .function = nat44_ed_hairpin_src_fn,
5220   .name = "nat44-ed-hairpin-src",
5221   .vector_size = sizeof (u32),
5222   .type = VLIB_NODE_TYPE_INTERNAL,
5223   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5224   .error_strings = snat_in2out_error_strings,
5225   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
5226   .next_nodes = {
5227      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
5228      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ed-in2out-output",
5229      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
5230      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
5231   },
5232 };
5233
5234 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_src_node,
5235                               nat44_ed_hairpin_src_fn);
5236
5237 static uword
5238 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
5239                                 vlib_node_runtime_t * node,
5240                                 vlib_frame_t * frame)
5241 {
5242   u32 n_left_from, * from, * to_next;
5243   snat_in2out_next_t next_index;
5244   u32 pkts_processed = 0;
5245   snat_main_t * sm = &snat_main;
5246   u32 stats_node_index;
5247
5248   stats_node_index = snat_in2out_fast_node.index;
5249
5250   from = vlib_frame_vector_args (frame);
5251   n_left_from = frame->n_vectors;
5252   next_index = node->cached_next_index;
5253
5254   while (n_left_from > 0)
5255     {
5256       u32 n_left_to_next;
5257
5258       vlib_get_next_frame (vm, node, next_index,
5259                            to_next, n_left_to_next);
5260
5261       while (n_left_from > 0 && n_left_to_next > 0)
5262         {
5263           u32 bi0;
5264           vlib_buffer_t * b0;
5265           u32 next0;
5266           u32 sw_if_index0;
5267           ip4_header_t * ip0;
5268           ip_csum_t sum0;
5269           u32 new_addr0, old_addr0;
5270           u16 old_port0, new_port0;
5271           udp_header_t * udp0;
5272           tcp_header_t * tcp0;
5273           icmp46_header_t * icmp0;
5274           snat_session_key_t key0, sm0;
5275           u32 proto0;
5276           u32 rx_fib_index0;
5277
5278           /* speculatively enqueue b0 to the current next frame */
5279           bi0 = from[0];
5280           to_next[0] = bi0;
5281           from += 1;
5282           to_next += 1;
5283           n_left_from -= 1;
5284           n_left_to_next -= 1;
5285
5286           b0 = vlib_get_buffer (vm, bi0);
5287           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
5288
5289           ip0 = vlib_buffer_get_current (b0);
5290           udp0 = ip4_next_header (ip0);
5291           tcp0 = (tcp_header_t *) udp0;
5292           icmp0 = (icmp46_header_t *) udp0;
5293
5294           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
5295           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
5296
5297           if (PREDICT_FALSE(ip0->ttl == 1))
5298             {
5299               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
5300               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
5301                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
5302                                            0);
5303               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
5304               goto trace0;
5305             }
5306
5307           proto0 = ip_proto_to_snat_proto (ip0->protocol);
5308
5309           if (PREDICT_FALSE (proto0 == ~0))
5310               goto trace0;
5311
5312           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
5313             {
5314               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
5315                                   rx_fib_index0, node, next0, ~0, 0, 0);
5316               goto trace0;
5317             }
5318
5319           key0.addr = ip0->src_address;
5320           key0.protocol = proto0;
5321           key0.port = udp0->src_port;
5322           key0.fib_index = rx_fib_index0;
5323
5324           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0, 0, 0))
5325             {
5326               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
5327               next0= SNAT_IN2OUT_NEXT_DROP;
5328               goto trace0;
5329             }
5330
5331           new_addr0 = sm0.addr.as_u32;
5332           new_port0 = sm0.port;
5333           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
5334           old_addr0 = ip0->src_address.as_u32;
5335           ip0->src_address.as_u32 = new_addr0;
5336
5337           sum0 = ip0->checksum;
5338           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5339                                  ip4_header_t,
5340                                  src_address /* changed member */);
5341           ip0->checksum = ip_csum_fold (sum0);
5342
5343           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
5344             {
5345               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5346                 {
5347                   old_port0 = tcp0->src_port;
5348                   tcp0->src_port = new_port0;
5349
5350                   sum0 = tcp0->checksum;
5351                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5352                                          ip4_header_t,
5353                                          dst_address /* changed member */);
5354                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
5355                                          ip4_header_t /* cheat */,
5356                                          length /* changed member */);
5357                   tcp0->checksum = ip_csum_fold(sum0);
5358                 }
5359               else
5360                 {
5361                   old_port0 = udp0->src_port;
5362                   udp0->src_port = new_port0;
5363                   udp0->checksum = 0;
5364                 }
5365             }
5366           else
5367             {
5368               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5369                 {
5370                   sum0 = tcp0->checksum;
5371                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5372                                          ip4_header_t,
5373                                          dst_address /* changed member */);
5374                   tcp0->checksum = ip_csum_fold(sum0);
5375                 }
5376             }
5377
5378           /* Hairpinning */
5379           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, 0);
5380
5381         trace0:
5382           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
5383                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
5384             {
5385               snat_in2out_trace_t *t =
5386                  vlib_add_trace (vm, node, b0, sizeof (*t));
5387               t->sw_if_index = sw_if_index0;
5388               t->next_index = next0;
5389             }
5390
5391           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5392
5393           /* verify speculative enqueue, maybe switch current next frame */
5394           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5395                                            to_next, n_left_to_next,
5396                                            bi0, next0);
5397         }
5398
5399       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5400     }
5401
5402   vlib_node_increment_counter (vm, stats_node_index,
5403                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5404                                pkts_processed);
5405   return frame->n_vectors;
5406 }
5407
5408
5409 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
5410   .function = snat_in2out_fast_static_map_fn,
5411   .name = "nat44-in2out-fast",
5412   .vector_size = sizeof (u32),
5413   .format_trace = format_snat_in2out_fast_trace,
5414   .type = VLIB_NODE_TYPE_INTERNAL,
5415
5416   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5417   .error_strings = snat_in2out_error_strings,
5418
5419   .runtime_data_bytes = sizeof (snat_runtime_t),
5420
5421   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
5422
5423   /* edit / add dispositions here */
5424   .next_nodes = {
5425     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5426     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5427     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
5428     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
5429     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
5430   },
5431 };
5432
5433 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);