NAT: TCP MSS clamping
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28 #include <nat/nat_inlines.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38   u32 is_slow_path;
39 } snat_in2out_trace_t;
40
41 typedef struct {
42   u32 next_worker_index;
43   u8 do_handoff;
44 } snat_in2out_worker_handoff_trace_t;
45
46 /* packet trace format function */
47 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
48 {
49   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
50   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
51   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
52   char * tag;
53
54   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
55
56   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
57               t->sw_if_index, t->next_index, t->session_index);
58
59   return s;
60 }
61
62 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
63 {
64   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
65   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
66   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
67
68   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
69               t->sw_if_index, t->next_index);
70
71   return s;
72 }
73
74 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
75 {
76   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
77   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
78   snat_in2out_worker_handoff_trace_t * t =
79     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
80   char * m;
81
82   m = t->do_handoff ? "next worker" : "same worker";
83   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
84
85   return s;
86 }
87
88 typedef struct {
89   u32 sw_if_index;
90   u32 next_index;
91   u8 cached;
92 } nat44_in2out_reass_trace_t;
93
94 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
95 {
96   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
97   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
98   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
99
100   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
101               t->sw_if_index, t->next_index,
102               t->cached ? "cached" : "translated");
103
104   return s;
105 }
106
107 vlib_node_registration_t snat_in2out_node;
108 vlib_node_registration_t snat_in2out_slowpath_node;
109 vlib_node_registration_t snat_in2out_fast_node;
110 vlib_node_registration_t snat_in2out_worker_handoff_node;
111 vlib_node_registration_t snat_det_in2out_node;
112 vlib_node_registration_t snat_in2out_output_node;
113 vlib_node_registration_t snat_in2out_output_slowpath_node;
114 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
115 vlib_node_registration_t snat_hairpin_dst_node;
116 vlib_node_registration_t snat_hairpin_src_node;
117 vlib_node_registration_t nat44_hairpinning_node;
118 vlib_node_registration_t nat44_in2out_reass_node;
119 vlib_node_registration_t nat44_ed_in2out_node;
120 vlib_node_registration_t nat44_ed_in2out_slowpath_node;
121 vlib_node_registration_t nat44_ed_in2out_output_node;
122 vlib_node_registration_t nat44_ed_in2out_output_slowpath_node;
123 vlib_node_registration_t nat44_ed_hairpin_dst_node;
124 vlib_node_registration_t nat44_ed_hairpin_src_node;
125 vlib_node_registration_t nat44_ed_hairpinning_node;
126
127 #define foreach_snat_in2out_error                       \
128 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
129 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
130 _(OUT_OF_PORTS, "Out of ports")                         \
131 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
132 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
133 _(NO_TRANSLATION, "No translation")                     \
134 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
135 _(DROP_FRAGMENT, "Drop fragment")                       \
136 _(MAX_REASS, "Maximum reassemblies exceeded")           \
137 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")\
138 _(FQ_CONGESTED, "Handoff frame queue congested")
139
140 typedef enum {
141 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
142   foreach_snat_in2out_error
143 #undef _
144   SNAT_IN2OUT_N_ERROR,
145 } snat_in2out_error_t;
146
147 static char * snat_in2out_error_strings[] = {
148 #define _(sym,string) string,
149   foreach_snat_in2out_error
150 #undef _
151 };
152
153 typedef enum {
154   SNAT_IN2OUT_NEXT_LOOKUP,
155   SNAT_IN2OUT_NEXT_DROP,
156   SNAT_IN2OUT_NEXT_ICMP_ERROR,
157   SNAT_IN2OUT_NEXT_SLOW_PATH,
158   SNAT_IN2OUT_NEXT_REASS,
159   SNAT_IN2OUT_N_NEXT,
160 } snat_in2out_next_t;
161
162 typedef enum {
163   SNAT_HAIRPIN_SRC_NEXT_DROP,
164   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
165   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
166   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
167   SNAT_HAIRPIN_SRC_N_NEXT,
168 } snat_hairpin_next_t;
169
170 /**
171  * @brief Check if packet should be translated
172  *
173  * Packets aimed at outside interface and external address with active session
174  * should be translated.
175  *
176  * @param sm            NAT main
177  * @param rt            NAT runtime data
178  * @param sw_if_index0  index of the inside interface
179  * @param ip0           IPv4 header
180  * @param proto0        NAT protocol
181  * @param rx_fib_index0 RX FIB index
182  *
183  * @returns 0 if packet should be translated otherwise 1
184  */
185 static inline int
186 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
187                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
188                          u32 rx_fib_index0)
189 {
190   if (sm->out2in_dpo)
191     return 0;
192
193   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
194   nat_outside_fib_t *outside_fib;
195   fib_prefix_t pfx = {
196     .fp_proto = FIB_PROTOCOL_IP4,
197     .fp_len = 32,
198     .fp_addr = {
199         .ip4.as_u32 = ip0->dst_address.as_u32,
200     },
201   };
202
203   /* Don't NAT packet aimed at the intfc address */
204   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
205                                       ip0->dst_address.as_u32)))
206     return 1;
207
208   fei = fib_table_lookup (rx_fib_index0, &pfx);
209   if (FIB_NODE_INDEX_INVALID != fei)
210     {
211       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
212       if (sw_if_index == ~0)
213         {
214           vec_foreach (outside_fib, sm->outside_fibs)
215             {
216               fei = fib_table_lookup (outside_fib->fib_index, &pfx);
217               if (FIB_NODE_INDEX_INVALID != fei)
218                 {
219                   sw_if_index = fib_entry_get_resolving_interface (fei);
220                   if (sw_if_index != ~0)
221                     break;
222                 }
223             }
224         }
225       if (sw_if_index == ~0)
226         return 1;
227
228       snat_interface_t *i;
229       pool_foreach (i, sm->interfaces,
230       ({
231         /* NAT packet aimed at outside interface */
232         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
233           return 0;
234       }));
235     }
236
237   return 1;
238 }
239
240 static inline int
241 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
242                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
243                     u32 rx_fib_index0, u32 thread_index)
244 {
245   udp_header_t * udp0 = ip4_next_header (ip0);
246   snat_session_key_t key0, sm0;
247   clib_bihash_kv_8_8_t kv0, value0;
248
249   key0.addr = ip0->dst_address;
250   key0.port = udp0->dst_port;
251   key0.protocol = proto0;
252   key0.fib_index = sm->outside_fib_index;
253   kv0.key = key0.as_u64;
254
255   /* NAT packet aimed at external address if */
256   /* has active sessions */
257   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
258                               &value0))
259     {
260       /* or is static mappings */
261       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
262         return 0;
263     }
264   else
265     return 0;
266
267   if (sm->forwarding_enabled)
268     return 1;
269
270   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
271                                  rx_fib_index0);
272 }
273
274 static inline int
275 nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0,
276                                   u32 proto0, u16 src_port, u16 dst_port,
277                                   u32 thread_index, u32 sw_if_index)
278 {
279   snat_session_key_t key0;
280   clib_bihash_kv_8_8_t kv0, value0;
281   snat_interface_t *i;
282
283   /* src NAT check */
284   key0.addr = ip0->src_address;
285   key0.port = src_port;
286   key0.protocol = proto0;
287   key0.fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
288   kv0.key = key0.as_u64;
289
290   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
291                                &value0))
292     return 1;
293
294   /* dst NAT check */
295   key0.addr = ip0->dst_address;
296   key0.port = dst_port;
297   key0.protocol = proto0;
298   kv0.key = key0.as_u64;
299   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
300                                &value0))
301   {
302     /* hairpinning */
303     pool_foreach (i, sm->output_feature_interfaces,
304     ({
305       if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index))
306         return 0;
307     }));
308     return 1;
309   }
310
311   return 0;
312 }
313
314 int
315 nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void * arg)
316 {
317   snat_main_t *sm = &snat_main;
318   nat44_is_idle_session_ctx_t *ctx = arg;
319   snat_session_t *s;
320   u64 sess_timeout_time;
321   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
322                                                        ctx->thread_index);
323   clib_bihash_kv_8_8_t s_kv;
324
325   s = pool_elt_at_index (tsm->sessions, kv->value);
326   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
327   if (ctx->now >= sess_timeout_time)
328     {
329       s_kv.key = s->out2in.as_u64;
330       if (clib_bihash_add_del_8_8 (&tsm->out2in, &s_kv, 0))
331         nat_log_warn ("out2in key del failed");
332
333       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
334                                           s->out2in.addr.as_u32,
335                                           s->in2out.protocol,
336                                           s->in2out.port,
337                                           s->out2in.port,
338                                           s->in2out.fib_index);
339
340       if (!snat_is_session_static (s))
341         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
342                                             &s->out2in);
343
344       nat44_delete_session (sm, s, ctx->thread_index);
345       return 1;
346     }
347
348   return 0;
349 }
350
351 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
352                       ip4_header_t * ip0,
353                       u32 rx_fib_index0,
354                       snat_session_key_t * key0,
355                       snat_session_t ** sessionp,
356                       vlib_node_runtime_t * node,
357                       u32 next0,
358                       u32 thread_index,
359                       f64 now)
360 {
361   snat_user_t *u;
362   snat_session_t *s;
363   clib_bihash_kv_8_8_t kv0;
364   snat_session_key_t key1;
365   u32 address_index = ~0;
366   udp_header_t * udp0 = ip4_next_header (ip0);
367   u8 is_sm = 0;
368   nat_outside_fib_t *outside_fib;
369   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
370   fib_prefix_t pfx = {
371     .fp_proto = FIB_PROTOCOL_IP4,
372     .fp_len = 32,
373     .fp_addr = {
374         .ip4.as_u32 = ip0->dst_address.as_u32,
375     },
376   };
377   nat44_is_idle_session_ctx_t ctx0;
378
379   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
380     {
381       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
382       nat_ipfix_logging_max_sessions(sm->max_translations);
383       nat_log_notice ("maximum sessions exceeded");
384       return SNAT_IN2OUT_NEXT_DROP;
385     }
386
387   key1.protocol = key0->protocol;
388
389   /* First try to match static mapping by local address and port */
390   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0, 0, 0))
391     {
392       /* Try to create dynamic translation */
393       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
394                                                thread_index, &key1,
395                                                &address_index,
396                                                sm->port_per_thread,
397                                                sm->per_thread_data[thread_index].snat_thread_index))
398         {
399           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
400           return SNAT_IN2OUT_NEXT_DROP;
401         }
402     }
403   else
404     is_sm = 1;
405
406   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
407                               thread_index);
408   if (!u)
409     {
410       nat_log_warn ("create NAT user failed");
411       return SNAT_IN2OUT_NEXT_DROP;
412     }
413
414   s = nat_session_alloc_or_recycle (sm, u, thread_index);
415   if (!s)
416     {
417       nat44_delete_user_with_no_session (sm, u, thread_index);
418       nat_log_warn ("create NAT session failed");
419       return SNAT_IN2OUT_NEXT_DROP;
420     }
421
422   if (is_sm)
423     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
424   user_session_increment (sm, u, is_sm);
425   s->outside_address_index = address_index;
426   s->in2out = *key0;
427   s->out2in = key1;
428   s->out2in.protocol = key0->protocol;
429   s->out2in.fib_index = sm->outside_fib_index;
430   switch (vec_len (sm->outside_fibs))
431     {
432     case 0:
433       s->out2in.fib_index = sm->outside_fib_index;
434       break;
435     case 1:
436       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
437       break;
438     default:
439       vec_foreach (outside_fib, sm->outside_fibs)
440         {
441           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
442           if (FIB_NODE_INDEX_INVALID != fei)
443             {
444               if (fib_entry_get_resolving_interface (fei) != ~0)
445                 {
446                   s->out2in.fib_index = outside_fib->fib_index;
447                   break;
448                 }
449             }
450         }
451       break;
452     }
453   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
454   s->ext_host_port = udp0->dst_port;
455   *sessionp = s;
456
457   /* Add to translation hashes */
458   ctx0.now = now;
459   ctx0.thread_index = thread_index;
460   kv0.key = s->in2out.as_u64;
461   kv0.value = s - sm->per_thread_data[thread_index].sessions;
462   if (clib_bihash_add_or_overwrite_stale_8_8 (
463         &sm->per_thread_data[thread_index].in2out, &kv0,
464         nat44_i2o_is_idle_session_cb, &ctx0))
465       nat_log_notice ("in2out key add failed");
466
467   kv0.key = s->out2in.as_u64;
468   kv0.value = s - sm->per_thread_data[thread_index].sessions;
469
470   if (clib_bihash_add_or_overwrite_stale_8_8 (
471         &sm->per_thread_data[thread_index].out2in, &kv0,
472         nat44_o2i_is_idle_session_cb, &ctx0))
473       nat_log_notice ("out2in key add failed");
474
475   /* log NAT event */
476   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
477                                       s->out2in.addr.as_u32,
478                                       s->in2out.protocol,
479                                       s->in2out.port,
480                                       s->out2in.port,
481                                       s->in2out.fib_index);
482   return next0;
483 }
484
485 static_always_inline
486 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
487                                  snat_session_key_t *p_key0)
488 {
489   icmp46_header_t *icmp0;
490   snat_session_key_t key0;
491   icmp_echo_header_t *echo0, *inner_echo0 = 0;
492   ip4_header_t *inner_ip0 = 0;
493   void *l4_header = 0;
494   icmp46_header_t *inner_icmp0;
495
496   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
497   echo0 = (icmp_echo_header_t *)(icmp0+1);
498
499   if (!icmp_is_error_message (icmp0))
500     {
501       key0.protocol = SNAT_PROTOCOL_ICMP;
502       key0.addr = ip0->src_address;
503       key0.port = echo0->identifier;
504     }
505   else
506     {
507       inner_ip0 = (ip4_header_t *)(echo0+1);
508       l4_header = ip4_next_header (inner_ip0);
509       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
510       key0.addr = inner_ip0->dst_address;
511       switch (key0.protocol)
512         {
513         case SNAT_PROTOCOL_ICMP:
514           inner_icmp0 = (icmp46_header_t*)l4_header;
515           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
516           key0.port = inner_echo0->identifier;
517           break;
518         case SNAT_PROTOCOL_UDP:
519         case SNAT_PROTOCOL_TCP:
520           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
521           break;
522         default:
523           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
524         }
525     }
526   *p_key0 = key0;
527   return -1; /* success */
528 }
529
530 /**
531  * Get address and port values to be used for ICMP packet translation
532  * and create session if needed
533  *
534  * @param[in,out] sm             NAT main
535  * @param[in,out] node           NAT node runtime
536  * @param[in] thread_index       thread index
537  * @param[in,out] b0             buffer containing packet to be translated
538  * @param[out] p_proto           protocol used for matching
539  * @param[out] p_value           address and port after NAT translation
540  * @param[out] p_dont_translate  if packet should not be translated
541  * @param d                      optional parameter
542  * @param e                      optional parameter
543  */
544 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
545                            u32 thread_index, vlib_buffer_t *b0,
546                            ip4_header_t *ip0, u8 *p_proto,
547                            snat_session_key_t *p_value,
548                            u8 *p_dont_translate, void *d, void *e)
549 {
550   icmp46_header_t *icmp0;
551   u32 sw_if_index0;
552   u32 rx_fib_index0;
553   snat_session_key_t key0;
554   snat_session_t *s0 = 0;
555   u8 dont_translate = 0;
556   clib_bihash_kv_8_8_t kv0, value0;
557   u32 next0 = ~0;
558   int err;
559
560   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
561   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
562   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
563
564   err = icmp_get_key (ip0, &key0);
565   if (err != -1)
566     {
567       b0->error = node->errors[err];
568       next0 = SNAT_IN2OUT_NEXT_DROP;
569       goto out;
570     }
571   key0.fib_index = rx_fib_index0;
572
573   kv0.key = key0.as_u64;
574
575   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
576                               &value0))
577     {
578       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)
579         {
580           if (PREDICT_FALSE(nat_not_translate_output_feature(sm, ip0,
581               key0.protocol, key0.port, key0.port, thread_index, sw_if_index0)))
582             {
583               dont_translate = 1;
584               goto out;
585             }
586         }
587       else
588         {
589           if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
590               ip0, SNAT_PROTOCOL_ICMP, rx_fib_index0, thread_index)))
591             {
592               dont_translate = 1;
593               goto out;
594             }
595         }
596
597       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
598         {
599           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
600           next0 = SNAT_IN2OUT_NEXT_DROP;
601           goto out;
602         }
603
604       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, &s0, node, next0,
605                          thread_index, vlib_time_now (sm->vlib_main));
606
607       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
608         goto out;
609     }
610   else
611     {
612       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
613                         icmp0->type != ICMP4_echo_reply &&
614                         !icmp_is_error_message (icmp0)))
615         {
616           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
617           next0 = SNAT_IN2OUT_NEXT_DROP;
618           goto out;
619         }
620
621       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
622                               value0.value);
623     }
624
625 out:
626   *p_proto = key0.protocol;
627   if (s0)
628     *p_value = s0->out2in;
629   *p_dont_translate = dont_translate;
630   if (d)
631     *(snat_session_t**)d = s0;
632   return next0;
633 }
634
635 /**
636  * Get address and port values to be used for ICMP packet translation
637  *
638  * @param[in] sm                 NAT main
639  * @param[in,out] node           NAT node runtime
640  * @param[in] thread_index       thread index
641  * @param[in,out] b0             buffer containing packet to be translated
642  * @param[out] p_proto           protocol used for matching
643  * @param[out] p_value           address and port after NAT translation
644  * @param[out] p_dont_translate  if packet should not be translated
645  * @param d                      optional parameter
646  * @param e                      optional parameter
647  */
648 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
649                            u32 thread_index, vlib_buffer_t *b0,
650                            ip4_header_t *ip0, u8 *p_proto,
651                            snat_session_key_t *p_value,
652                            u8 *p_dont_translate, void *d, void *e)
653 {
654   icmp46_header_t *icmp0;
655   u32 sw_if_index0;
656   u32 rx_fib_index0;
657   snat_session_key_t key0;
658   snat_session_key_t sm0;
659   u8 dont_translate = 0;
660   u8 is_addr_only;
661   u32 next0 = ~0;
662   int err;
663
664   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
665   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
666   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
667
668   err = icmp_get_key (ip0, &key0);
669   if (err != -1)
670     {
671       b0->error = node->errors[err];
672       next0 = SNAT_IN2OUT_NEXT_DROP;
673       goto out2;
674     }
675   key0.fib_index = rx_fib_index0;
676
677   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0, 0, 0))
678     {
679       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
680           IP_PROTOCOL_ICMP, rx_fib_index0)))
681         {
682           dont_translate = 1;
683           goto out;
684         }
685
686       if (icmp_is_error_message (icmp0))
687         {
688           next0 = SNAT_IN2OUT_NEXT_DROP;
689           goto out;
690         }
691
692       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
693       next0 = SNAT_IN2OUT_NEXT_DROP;
694       goto out;
695     }
696
697   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
698                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
699                     !icmp_is_error_message (icmp0)))
700     {
701       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
702       next0 = SNAT_IN2OUT_NEXT_DROP;
703       goto out;
704     }
705
706 out:
707   *p_value = sm0;
708 out2:
709   *p_proto = key0.protocol;
710   *p_dont_translate = dont_translate;
711   return next0;
712 }
713
714 static inline u32 icmp_in2out (snat_main_t *sm,
715                                vlib_buffer_t * b0,
716                                ip4_header_t * ip0,
717                                icmp46_header_t * icmp0,
718                                u32 sw_if_index0,
719                                u32 rx_fib_index0,
720                                vlib_node_runtime_t * node,
721                                u32 next0,
722                                u32 thread_index,
723                                void *d,
724                                void *e)
725 {
726   snat_session_key_t sm0;
727   u8 protocol;
728   icmp_echo_header_t *echo0, *inner_echo0 = 0;
729   ip4_header_t *inner_ip0;
730   void *l4_header = 0;
731   icmp46_header_t *inner_icmp0;
732   u8 dont_translate;
733   u32 new_addr0, old_addr0;
734   u16 old_id0, new_id0;
735   ip_csum_t sum0;
736   u16 checksum0;
737   u32 next0_tmp;
738
739   echo0 = (icmp_echo_header_t *)(icmp0+1);
740
741   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
742                                        &protocol, &sm0, &dont_translate, d, e);
743   if (next0_tmp != ~0)
744     next0 = next0_tmp;
745   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
746     goto out;
747
748   sum0 = ip_incremental_checksum (0, icmp0,
749                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
750   checksum0 = ~ip_csum_fold (sum0);
751   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
752     {
753       next0 = SNAT_IN2OUT_NEXT_DROP;
754       goto out;
755     }
756
757   old_addr0 = ip0->src_address.as_u32;
758   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
759   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
760     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
761
762   sum0 = ip0->checksum;
763   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
764                          src_address /* changed member */);
765   ip0->checksum = ip_csum_fold (sum0);
766
767   if (icmp0->checksum == 0)
768     icmp0->checksum = 0xffff;
769
770   if (!icmp_is_error_message (icmp0))
771     {
772       new_id0 = sm0.port;
773       if (PREDICT_FALSE(new_id0 != echo0->identifier))
774         {
775           old_id0 = echo0->identifier;
776           new_id0 = sm0.port;
777           echo0->identifier = new_id0;
778
779           sum0 = icmp0->checksum;
780           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
781                                  identifier);
782           icmp0->checksum = ip_csum_fold (sum0);
783         }
784     }
785   else
786     {
787       inner_ip0 = (ip4_header_t *)(echo0+1);
788       l4_header = ip4_next_header (inner_ip0);
789
790       if (!ip4_header_checksum_is_valid (inner_ip0))
791         {
792           next0 = SNAT_IN2OUT_NEXT_DROP;
793           goto out;
794         }
795
796       old_addr0 = inner_ip0->dst_address.as_u32;
797       inner_ip0->dst_address = sm0.addr;
798       new_addr0 = inner_ip0->dst_address.as_u32;
799
800       sum0 = icmp0->checksum;
801       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
802                              dst_address /* changed member */);
803       icmp0->checksum = ip_csum_fold (sum0);
804
805       switch (protocol)
806         {
807           case SNAT_PROTOCOL_ICMP:
808             inner_icmp0 = (icmp46_header_t*)l4_header;
809             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
810
811             old_id0 = inner_echo0->identifier;
812             new_id0 = sm0.port;
813             inner_echo0->identifier = new_id0;
814
815             sum0 = icmp0->checksum;
816             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
817                                    identifier);
818             icmp0->checksum = ip_csum_fold (sum0);
819             break;
820           case SNAT_PROTOCOL_UDP:
821           case SNAT_PROTOCOL_TCP:
822             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
823             new_id0 = sm0.port;
824             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
825
826             sum0 = icmp0->checksum;
827             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
828                                    dst_port);
829             icmp0->checksum = ip_csum_fold (sum0);
830             break;
831           default:
832             ASSERT(0);
833         }
834     }
835
836 out:
837   return next0;
838 }
839
840 /**
841  * @brief Hairpinning
842  *
843  * Hairpinning allows two endpoints on the internal side of the NAT to
844  * communicate even if they only use each other's external IP addresses
845  * and ports.
846  *
847  * @param sm     NAT main.
848  * @param b0     Vlib buffer.
849  * @param ip0    IP header.
850  * @param udp0   UDP header.
851  * @param tcp0   TCP header.
852  * @param proto0 NAT protocol.
853  */
854 static inline int
855 snat_hairpinning (snat_main_t *sm,
856                   vlib_buffer_t * b0,
857                   ip4_header_t * ip0,
858                   udp_header_t * udp0,
859                   tcp_header_t * tcp0,
860                   u32 proto0,
861                   int is_ed)
862 {
863   snat_session_key_t key0, sm0;
864   snat_session_t * s0;
865   clib_bihash_kv_8_8_t kv0, value0;
866   ip_csum_t sum0;
867   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
868   u16 new_dst_port0, old_dst_port0;
869   int rv;
870
871   key0.addr = ip0->dst_address;
872   key0.port = udp0->dst_port;
873   key0.protocol = proto0;
874   key0.fib_index = sm->outside_fib_index;
875   kv0.key = key0.as_u64;
876
877   /* Check if destination is static mappings */
878   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
879     {
880       new_dst_addr0 = sm0.addr.as_u32;
881       new_dst_port0 = sm0.port;
882       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
883     }
884   /* or active session */
885   else
886     {
887       if (sm->num_workers > 1)
888         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
889       else
890         ti = sm->num_workers;
891
892       if (is_ed)
893         {
894           clib_bihash_kv_16_8_t ed_kv, ed_value;
895           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
896                       ip0->protocol, sm->outside_fib_index, udp0->dst_port,
897                       udp0->src_port);
898           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
899                                         &ed_kv, &ed_value);
900           si = ed_value.value;
901         }
902       else
903         {
904           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
905                                        &value0);
906           si = value0.value;
907         }
908       if (rv)
909         return 0;
910
911       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
912       new_dst_addr0 = s0->in2out.addr.as_u32;
913       new_dst_port0 = s0->in2out.port;
914       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
915     }
916
917   /* Destination is behind the same NAT, use internal address and port */
918   if (new_dst_addr0)
919     {
920       old_dst_addr0 = ip0->dst_address.as_u32;
921       ip0->dst_address.as_u32 = new_dst_addr0;
922       sum0 = ip0->checksum;
923       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
924                              ip4_header_t, dst_address);
925       ip0->checksum = ip_csum_fold (sum0);
926
927       old_dst_port0 = tcp0->dst;
928       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
929         {
930           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
931             {
932               tcp0->dst = new_dst_port0;
933               sum0 = tcp0->checksum;
934               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
935                                      ip4_header_t, dst_address);
936               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
937                                      ip4_header_t /* cheat */, length);
938               tcp0->checksum = ip_csum_fold(sum0);
939             }
940           else
941             {
942               udp0->dst_port = new_dst_port0;
943               udp0->checksum = 0;
944             }
945         }
946       else
947         {
948           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
949             {
950               sum0 = tcp0->checksum;
951               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
952                                      ip4_header_t, dst_address);
953               tcp0->checksum = ip_csum_fold(sum0);
954             }
955         }
956       return 1;
957     }
958   return 0;
959 }
960
961 static inline void
962 snat_icmp_hairpinning (snat_main_t *sm,
963                        vlib_buffer_t * b0,
964                        ip4_header_t * ip0,
965                        icmp46_header_t * icmp0,
966                        int is_ed)
967 {
968   snat_session_key_t key0, sm0;
969   clib_bihash_kv_8_8_t kv0, value0;
970   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
971   ip_csum_t sum0;
972   snat_session_t *s0;
973   int rv;
974
975   if (!icmp_is_error_message (icmp0))
976     {
977       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
978       u16 icmp_id0 = echo0->identifier;
979       key0.addr = ip0->dst_address;
980       key0.port = icmp_id0;
981       key0.protocol = SNAT_PROTOCOL_ICMP;
982       key0.fib_index = sm->outside_fib_index;
983       kv0.key = key0.as_u64;
984
985       if (sm->num_workers > 1)
986         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
987       else
988         ti = sm->num_workers;
989
990       /* Check if destination is in active sessions */
991       if (is_ed)
992         {
993           clib_bihash_kv_16_8_t ed_kv, ed_value;
994           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
995                       IP_PROTOCOL_ICMP, sm->outside_fib_index, icmp_id0, 0);
996           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
997                                         &ed_kv, &ed_value);
998           si = ed_value.value;
999         }
1000       else
1001         {
1002           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
1003                                        &value0);
1004           si = value0.value;
1005         }
1006       if (rv)
1007         {
1008           /* or static mappings */
1009           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
1010             {
1011               new_dst_addr0 = sm0.addr.as_u32;
1012               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1013             }
1014         }
1015       else
1016         {
1017           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
1018           new_dst_addr0 = s0->in2out.addr.as_u32;
1019           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1020           echo0->identifier = s0->in2out.port;
1021           sum0 = icmp0->checksum;
1022           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
1023                                  icmp_echo_header_t, identifier);
1024           icmp0->checksum = ip_csum_fold (sum0);
1025         }
1026
1027       /* Destination is behind the same NAT, use internal address and port */
1028       if (new_dst_addr0)
1029         {
1030           old_dst_addr0 = ip0->dst_address.as_u32;
1031           ip0->dst_address.as_u32 = new_dst_addr0;
1032           sum0 = ip0->checksum;
1033           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
1034                                  ip4_header_t, dst_address);
1035           ip0->checksum = ip_csum_fold (sum0);
1036         }
1037     }
1038
1039 }
1040
1041 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
1042                                          vlib_buffer_t * b0,
1043                                          ip4_header_t * ip0,
1044                                          icmp46_header_t * icmp0,
1045                                          u32 sw_if_index0,
1046                                          u32 rx_fib_index0,
1047                                          vlib_node_runtime_t * node,
1048                                          u32 next0,
1049                                          f64 now,
1050                                          u32 thread_index,
1051                                          snat_session_t ** p_s0)
1052 {
1053   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1054                       next0, thread_index, p_s0, 0);
1055   snat_session_t * s0 = *p_s0;
1056   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1057     {
1058       /* Hairpinning */
1059       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1060         snat_icmp_hairpinning(sm, b0, ip0, icmp0, sm->endpoint_dependent);
1061       /* Accounting */
1062       nat44_session_update_counters (s0, now,
1063                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
1064       /* Per-user LRU list maintenance */
1065       nat44_session_update_lru (sm, s0, thread_index);
1066     }
1067   return next0;
1068 }
1069
1070 static inline void
1071 nat_hairpinning_sm_unknown_proto (snat_main_t * sm,
1072                                   vlib_buffer_t * b,
1073                                   ip4_header_t * ip)
1074 {
1075   clib_bihash_kv_8_8_t kv, value;
1076   snat_static_mapping_t *m;
1077   u32 old_addr, new_addr;
1078   ip_csum_t sum;
1079
1080   make_sm_kv (&kv, &ip->dst_address, 0, 0, 0);
1081   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1082     return;
1083
1084   m = pool_elt_at_index (sm->static_mappings, value.value);
1085
1086   old_addr = ip->dst_address.as_u32;
1087   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1088   sum = ip->checksum;
1089   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1090   ip->checksum = ip_csum_fold (sum);
1091
1092   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1093     vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1094 }
1095
1096 static int
1097 nat_in2out_sm_unknown_proto (snat_main_t *sm,
1098                              vlib_buffer_t * b,
1099                              ip4_header_t * ip,
1100                              u32 rx_fib_index)
1101 {
1102   clib_bihash_kv_8_8_t kv, value;
1103   snat_static_mapping_t *m;
1104   snat_session_key_t m_key;
1105   u32 old_addr, new_addr;
1106   ip_csum_t sum;
1107
1108   m_key.addr = ip->src_address;
1109   m_key.port = 0;
1110   m_key.protocol = 0;
1111   m_key.fib_index = rx_fib_index;
1112   kv.key = m_key.as_u64;
1113   if (clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1114     return 1;
1115
1116   m = pool_elt_at_index (sm->static_mappings, value.value);
1117
1118   old_addr = ip->src_address.as_u32;
1119   new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1120   sum = ip->checksum;
1121   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1122   ip->checksum = ip_csum_fold (sum);
1123
1124
1125   /* Hairpinning */
1126   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1127     {
1128       vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1129       nat_hairpinning_sm_unknown_proto (sm, b, ip);
1130     }
1131
1132   return 0;
1133 }
1134
1135 static inline uword
1136 snat_in2out_node_fn_inline (vlib_main_t * vm,
1137                             vlib_node_runtime_t * node,
1138                             vlib_frame_t * frame, int is_slow_path,
1139                             int is_output_feature)
1140 {
1141   u32 n_left_from, * from, * to_next;
1142   snat_in2out_next_t next_index;
1143   u32 pkts_processed = 0;
1144   snat_main_t * sm = &snat_main;
1145   f64 now = vlib_time_now (vm);
1146   u32 stats_node_index;
1147   u32 thread_index = vm->thread_index;
1148
1149   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1150     snat_in2out_node.index;
1151
1152   from = vlib_frame_vector_args (frame);
1153   n_left_from = frame->n_vectors;
1154   next_index = node->cached_next_index;
1155
1156   while (n_left_from > 0)
1157     {
1158       u32 n_left_to_next;
1159
1160       vlib_get_next_frame (vm, node, next_index,
1161                            to_next, n_left_to_next);
1162
1163       while (n_left_from >= 4 && n_left_to_next >= 2)
1164         {
1165           u32 bi0, bi1;
1166           vlib_buffer_t * b0, * b1;
1167           u32 next0, next1;
1168           u32 sw_if_index0, sw_if_index1;
1169           ip4_header_t * ip0, * ip1;
1170           ip_csum_t sum0, sum1;
1171           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1172           u16 old_port0, new_port0, old_port1, new_port1;
1173           udp_header_t * udp0, * udp1;
1174           tcp_header_t * tcp0, * tcp1;
1175           icmp46_header_t * icmp0, * icmp1;
1176           snat_session_key_t key0, key1;
1177           u32 rx_fib_index0, rx_fib_index1;
1178           u32 proto0, proto1;
1179           snat_session_t * s0 = 0, * s1 = 0;
1180           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1181           u32 iph_offset0 = 0, iph_offset1 = 0;
1182
1183           /* Prefetch next iteration. */
1184           {
1185             vlib_buffer_t * p2, * p3;
1186
1187             p2 = vlib_get_buffer (vm, from[2]);
1188             p3 = vlib_get_buffer (vm, from[3]);
1189
1190             vlib_prefetch_buffer_header (p2, LOAD);
1191             vlib_prefetch_buffer_header (p3, LOAD);
1192
1193             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1194             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1195           }
1196
1197           /* speculatively enqueue b0 and b1 to the current next frame */
1198           to_next[0] = bi0 = from[0];
1199           to_next[1] = bi1 = from[1];
1200           from += 2;
1201           to_next += 2;
1202           n_left_from -= 2;
1203           n_left_to_next -= 2;
1204
1205           b0 = vlib_get_buffer (vm, bi0);
1206           b1 = vlib_get_buffer (vm, bi1);
1207
1208           if (is_output_feature)
1209             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1210
1211           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1212                  iph_offset0);
1213
1214           udp0 = ip4_next_header (ip0);
1215           tcp0 = (tcp_header_t *) udp0;
1216           icmp0 = (icmp46_header_t *) udp0;
1217
1218           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1219           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1220                                    sw_if_index0);
1221
1222           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1223
1224           if (PREDICT_FALSE(ip0->ttl == 1))
1225             {
1226               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1227               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1228                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1229                                            0);
1230               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1231               goto trace00;
1232             }
1233
1234           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1235
1236           /* Next configured feature, probably ip4-lookup */
1237           if (is_slow_path)
1238             {
1239               if (PREDICT_FALSE (proto0 == ~0))
1240                 {
1241                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1242                     {
1243                       next0 = SNAT_IN2OUT_NEXT_DROP;
1244                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1245                     }
1246                   goto trace00;
1247                 }
1248
1249               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1250                 {
1251                   next0 = icmp_in2out_slow_path
1252                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1253                      node, next0, now, thread_index, &s0);
1254                   goto trace00;
1255                 }
1256             }
1257           else
1258             {
1259               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1260                 {
1261                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1262                   goto trace00;
1263                 }
1264
1265               if (ip4_is_fragment (ip0))
1266                 {
1267                   next0 = SNAT_IN2OUT_NEXT_REASS;
1268                   goto trace00;
1269                 }
1270             }
1271
1272           key0.addr = ip0->src_address;
1273           key0.port = udp0->src_port;
1274           key0.protocol = proto0;
1275           key0.fib_index = rx_fib_index0;
1276
1277           kv0.key = key0.as_u64;
1278
1279           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1280               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1281             {
1282               if (is_slow_path)
1283                 {
1284                   if (is_output_feature)
1285                     {
1286                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1287                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1288                         goto trace00;
1289                     }
1290                   else
1291                     {
1292                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1293                           ip0, proto0, rx_fib_index0, thread_index)))
1294                         goto trace00;
1295                     }
1296
1297                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1298                                      &s0, node, next0, thread_index, now);
1299                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1300                     goto trace00;
1301                 }
1302               else
1303                 {
1304                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1305                   goto trace00;
1306                 }
1307             }
1308           else
1309             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1310                                     value0.value);
1311
1312           b0->flags |= VNET_BUFFER_F_IS_NATED;
1313
1314           old_addr0 = ip0->src_address.as_u32;
1315           ip0->src_address = s0->out2in.addr;
1316           new_addr0 = ip0->src_address.as_u32;
1317           if (!is_output_feature)
1318             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1319
1320           sum0 = ip0->checksum;
1321           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1322                                  ip4_header_t,
1323                                  src_address /* changed member */);
1324           ip0->checksum = ip_csum_fold (sum0);
1325
1326           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1327             {
1328               old_port0 = tcp0->src_port;
1329               tcp0->src_port = s0->out2in.port;
1330               new_port0 = tcp0->src_port;
1331
1332               sum0 = tcp0->checksum;
1333               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1334                                      ip4_header_t,
1335                                      dst_address /* changed member */);
1336               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1337                                      ip4_header_t /* cheat */,
1338                                      length /* changed member */);
1339               mss_clamping (sm, tcp0, &sum0);
1340               tcp0->checksum = ip_csum_fold(sum0);
1341             }
1342           else
1343             {
1344               old_port0 = udp0->src_port;
1345               udp0->src_port = s0->out2in.port;
1346               udp0->checksum = 0;
1347             }
1348
1349           /* Accounting */
1350           nat44_session_update_counters (s0, now,
1351                                          vlib_buffer_length_in_chain (vm, b0));
1352           /* Per-user LRU list maintenance */
1353           nat44_session_update_lru (sm, s0, thread_index);
1354         trace00:
1355
1356           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1357                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1358             {
1359               snat_in2out_trace_t *t =
1360                  vlib_add_trace (vm, node, b0, sizeof (*t));
1361               t->is_slow_path = is_slow_path;
1362               t->sw_if_index = sw_if_index0;
1363               t->next_index = next0;
1364                   t->session_index = ~0;
1365               if (s0)
1366                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1367             }
1368
1369           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1370
1371           if (is_output_feature)
1372             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1373
1374           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1375                  iph_offset1);
1376
1377           udp1 = ip4_next_header (ip1);
1378           tcp1 = (tcp_header_t *) udp1;
1379           icmp1 = (icmp46_header_t *) udp1;
1380
1381           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1382           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1383                                    sw_if_index1);
1384
1385           if (PREDICT_FALSE(ip1->ttl == 1))
1386             {
1387               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1388               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1389                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1390                                            0);
1391               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1392               goto trace01;
1393             }
1394
1395           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1396
1397           /* Next configured feature, probably ip4-lookup */
1398           if (is_slow_path)
1399             {
1400               if (PREDICT_FALSE (proto1 == ~0))
1401                 {
1402                   if (nat_in2out_sm_unknown_proto (sm, b1, ip1, rx_fib_index1))
1403                     {
1404                       next1 = SNAT_IN2OUT_NEXT_DROP;
1405                       b1->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1406                     }
1407                   goto trace01;
1408                 }
1409
1410               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1411                 {
1412                   next1 = icmp_in2out_slow_path
1413                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1414                      next1, now, thread_index, &s1);
1415                   goto trace01;
1416                 }
1417             }
1418           else
1419             {
1420               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1421                 {
1422                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1423                   goto trace01;
1424                 }
1425
1426               if (ip4_is_fragment (ip1))
1427                 {
1428                   next1 = SNAT_IN2OUT_NEXT_REASS;
1429                   goto trace01;
1430                 }
1431             }
1432
1433           key1.addr = ip1->src_address;
1434           key1.port = udp1->src_port;
1435           key1.protocol = proto1;
1436           key1.fib_index = rx_fib_index1;
1437
1438           kv1.key = key1.as_u64;
1439
1440             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1441                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1442             {
1443               if (is_slow_path)
1444                 {
1445                   if (is_output_feature)
1446                     {
1447                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1448                           ip1, proto1, udp1->src_port, udp1->dst_port, thread_index, sw_if_index1)))
1449                         goto trace01;
1450                     }
1451                   else
1452                     {
1453                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1454                           ip1, proto1, rx_fib_index1, thread_index)))
1455                         goto trace01;
1456                     }
1457
1458                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1459                                      &s1, node, next1, thread_index, now);
1460                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1461                     goto trace01;
1462                 }
1463               else
1464                 {
1465                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1466                   goto trace01;
1467                 }
1468             }
1469           else
1470             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1471                                     value1.value);
1472
1473           b1->flags |= VNET_BUFFER_F_IS_NATED;
1474
1475           old_addr1 = ip1->src_address.as_u32;
1476           ip1->src_address = s1->out2in.addr;
1477           new_addr1 = ip1->src_address.as_u32;
1478           if (!is_output_feature)
1479             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1480
1481           sum1 = ip1->checksum;
1482           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1483                                  ip4_header_t,
1484                                  src_address /* changed member */);
1485           ip1->checksum = ip_csum_fold (sum1);
1486
1487           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1488             {
1489               old_port1 = tcp1->src_port;
1490               tcp1->src_port = s1->out2in.port;
1491               new_port1 = tcp1->src_port;
1492
1493               sum1 = tcp1->checksum;
1494               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1495                                      ip4_header_t,
1496                                      dst_address /* changed member */);
1497               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1498                                      ip4_header_t /* cheat */,
1499                                      length /* changed member */);
1500               mss_clamping (sm, tcp1, &sum1);
1501               tcp1->checksum = ip_csum_fold(sum1);
1502             }
1503           else
1504             {
1505               old_port1 = udp1->src_port;
1506               udp1->src_port = s1->out2in.port;
1507               udp1->checksum = 0;
1508             }
1509
1510           /* Accounting */
1511           nat44_session_update_counters (s1, now,
1512                                          vlib_buffer_length_in_chain (vm, b1));
1513           /* Per-user LRU list maintenance */
1514           nat44_session_update_lru (sm, s1, thread_index);
1515         trace01:
1516
1517           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1518                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1519             {
1520               snat_in2out_trace_t *t =
1521                  vlib_add_trace (vm, node, b1, sizeof (*t));
1522               t->sw_if_index = sw_if_index1;
1523               t->next_index = next1;
1524               t->session_index = ~0;
1525               if (s1)
1526                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1527             }
1528
1529           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1530
1531           /* verify speculative enqueues, maybe switch current next frame */
1532           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1533                                            to_next, n_left_to_next,
1534                                            bi0, bi1, next0, next1);
1535         }
1536
1537       while (n_left_from > 0 && n_left_to_next > 0)
1538         {
1539           u32 bi0;
1540           vlib_buffer_t * b0;
1541           u32 next0;
1542           u32 sw_if_index0;
1543           ip4_header_t * ip0;
1544           ip_csum_t sum0;
1545           u32 new_addr0, old_addr0;
1546           u16 old_port0, new_port0;
1547           udp_header_t * udp0;
1548           tcp_header_t * tcp0;
1549           icmp46_header_t * icmp0;
1550           snat_session_key_t key0;
1551           u32 rx_fib_index0;
1552           u32 proto0;
1553           snat_session_t * s0 = 0;
1554           clib_bihash_kv_8_8_t kv0, value0;
1555           u32 iph_offset0 = 0;
1556
1557           /* speculatively enqueue b0 to the current next frame */
1558           bi0 = from[0];
1559           to_next[0] = bi0;
1560           from += 1;
1561           to_next += 1;
1562           n_left_from -= 1;
1563           n_left_to_next -= 1;
1564
1565           b0 = vlib_get_buffer (vm, bi0);
1566           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1567
1568           if (is_output_feature)
1569             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1570
1571           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1572                  iph_offset0);
1573
1574           udp0 = ip4_next_header (ip0);
1575           tcp0 = (tcp_header_t *) udp0;
1576           icmp0 = (icmp46_header_t *) udp0;
1577
1578           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1579           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1580                                    sw_if_index0);
1581
1582           if (PREDICT_FALSE(ip0->ttl == 1))
1583             {
1584               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1585               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1586                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1587                                            0);
1588               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1589               goto trace0;
1590             }
1591
1592           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1593
1594           /* Next configured feature, probably ip4-lookup */
1595           if (is_slow_path)
1596             {
1597               if (PREDICT_FALSE (proto0 == ~0))
1598                 {
1599                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1600                     {
1601                       next0 = SNAT_IN2OUT_NEXT_DROP;
1602                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1603                     }
1604                   goto trace0;
1605                 }
1606
1607               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1608                 {
1609                   next0 = icmp_in2out_slow_path
1610                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1611                      next0, now, thread_index, &s0);
1612                   goto trace0;
1613                 }
1614             }
1615           else
1616             {
1617               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1618                 {
1619                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1620                   goto trace0;
1621                 }
1622
1623               if (ip4_is_fragment (ip0))
1624                 {
1625                   next0 = SNAT_IN2OUT_NEXT_REASS;
1626                   goto trace0;
1627                 }
1628             }
1629
1630           key0.addr = ip0->src_address;
1631           key0.port = udp0->src_port;
1632           key0.protocol = proto0;
1633           key0.fib_index = rx_fib_index0;
1634
1635           kv0.key = key0.as_u64;
1636
1637           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1638                                       &kv0, &value0))
1639             {
1640               if (is_slow_path)
1641                 {
1642                   if (is_output_feature)
1643                     {
1644                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1645                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1646                         goto trace0;
1647                     }
1648                   else
1649                     {
1650                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1651                           ip0, proto0, rx_fib_index0, thread_index)))
1652                         goto trace0;
1653                     }
1654
1655                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1656                                      &s0, node, next0, thread_index, now);
1657
1658                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1659                     goto trace0;
1660                 }
1661               else
1662                 {
1663                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1664                   goto trace0;
1665                 }
1666             }
1667           else
1668           s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1669                                   value0.value);
1670
1671           b0->flags |= VNET_BUFFER_F_IS_NATED;
1672
1673           old_addr0 = ip0->src_address.as_u32;
1674           ip0->src_address = s0->out2in.addr;
1675           new_addr0 = ip0->src_address.as_u32;
1676           if (!is_output_feature)
1677             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1678
1679           sum0 = ip0->checksum;
1680           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1681                                  ip4_header_t,
1682                                  src_address /* changed member */);
1683           ip0->checksum = ip_csum_fold (sum0);
1684
1685           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1686             {
1687               old_port0 = tcp0->src_port;
1688               tcp0->src_port = s0->out2in.port;
1689               new_port0 = tcp0->src_port;
1690
1691               sum0 = tcp0->checksum;
1692               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1693                                      ip4_header_t,
1694                                      dst_address /* changed member */);
1695               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1696                                      ip4_header_t /* cheat */,
1697                                      length /* changed member */);
1698               mss_clamping (sm, tcp0, &sum0);
1699               tcp0->checksum = ip_csum_fold(sum0);
1700             }
1701           else
1702             {
1703               old_port0 = udp0->src_port;
1704               udp0->src_port = s0->out2in.port;
1705               udp0->checksum = 0;
1706             }
1707
1708           /* Accounting */
1709           nat44_session_update_counters (s0, now,
1710                                          vlib_buffer_length_in_chain (vm, b0));
1711           /* Per-user LRU list maintenance */
1712           nat44_session_update_lru (sm, s0, thread_index);
1713
1714         trace0:
1715           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1716                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1717             {
1718               snat_in2out_trace_t *t =
1719                  vlib_add_trace (vm, node, b0, sizeof (*t));
1720               t->is_slow_path = is_slow_path;
1721               t->sw_if_index = sw_if_index0;
1722               t->next_index = next0;
1723                   t->session_index = ~0;
1724               if (s0)
1725                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1726             }
1727
1728           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1729
1730           /* verify speculative enqueue, maybe switch current next frame */
1731           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1732                                            to_next, n_left_to_next,
1733                                            bi0, next0);
1734         }
1735
1736       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1737     }
1738
1739   vlib_node_increment_counter (vm, stats_node_index,
1740                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1741                                pkts_processed);
1742   return frame->n_vectors;
1743 }
1744
1745 static uword
1746 snat_in2out_fast_path_fn (vlib_main_t * vm,
1747                           vlib_node_runtime_t * node,
1748                           vlib_frame_t * frame)
1749 {
1750   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1751 }
1752
1753 VLIB_REGISTER_NODE (snat_in2out_node) = {
1754   .function = snat_in2out_fast_path_fn,
1755   .name = "nat44-in2out",
1756   .vector_size = sizeof (u32),
1757   .format_trace = format_snat_in2out_trace,
1758   .type = VLIB_NODE_TYPE_INTERNAL,
1759
1760   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1761   .error_strings = snat_in2out_error_strings,
1762
1763   .runtime_data_bytes = sizeof (snat_runtime_t),
1764
1765   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1766
1767   /* edit / add dispositions here */
1768   .next_nodes = {
1769     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1770     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1771     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1772     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1773     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1774   },
1775 };
1776
1777 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1778
1779 static uword
1780 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1781                                  vlib_node_runtime_t * node,
1782                                  vlib_frame_t * frame)
1783 {
1784   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1785 }
1786
1787 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1788   .function = snat_in2out_output_fast_path_fn,
1789   .name = "nat44-in2out-output",
1790   .vector_size = sizeof (u32),
1791   .format_trace = format_snat_in2out_trace,
1792   .type = VLIB_NODE_TYPE_INTERNAL,
1793
1794   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1795   .error_strings = snat_in2out_error_strings,
1796
1797   .runtime_data_bytes = sizeof (snat_runtime_t),
1798
1799   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1800
1801   /* edit / add dispositions here */
1802   .next_nodes = {
1803     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1804     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1805     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1806     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1807     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1808   },
1809 };
1810
1811 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
1812                               snat_in2out_output_fast_path_fn);
1813
1814 static uword
1815 snat_in2out_slow_path_fn (vlib_main_t * vm,
1816                           vlib_node_runtime_t * node,
1817                           vlib_frame_t * frame)
1818 {
1819   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
1820 }
1821
1822 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1823   .function = snat_in2out_slow_path_fn,
1824   .name = "nat44-in2out-slowpath",
1825   .vector_size = sizeof (u32),
1826   .format_trace = format_snat_in2out_trace,
1827   .type = VLIB_NODE_TYPE_INTERNAL,
1828
1829   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1830   .error_strings = snat_in2out_error_strings,
1831
1832   .runtime_data_bytes = sizeof (snat_runtime_t),
1833
1834   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1835
1836   /* edit / add dispositions here */
1837   .next_nodes = {
1838     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1839     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1840     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1841     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1842     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1843   },
1844 };
1845
1846 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
1847                               snat_in2out_slow_path_fn);
1848
1849 static uword
1850 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
1851                                  vlib_node_runtime_t * node,
1852                                  vlib_frame_t * frame)
1853 {
1854   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
1855 }
1856
1857 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
1858   .function = snat_in2out_output_slow_path_fn,
1859   .name = "nat44-in2out-output-slowpath",
1860   .vector_size = sizeof (u32),
1861   .format_trace = format_snat_in2out_trace,
1862   .type = VLIB_NODE_TYPE_INTERNAL,
1863
1864   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1865   .error_strings = snat_in2out_error_strings,
1866
1867   .runtime_data_bytes = sizeof (snat_runtime_t),
1868
1869   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1870
1871   /* edit / add dispositions here */
1872   .next_nodes = {
1873     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1874     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1875     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1876     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1877     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1878   },
1879 };
1880
1881 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
1882                               snat_in2out_output_slow_path_fn);
1883
1884 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
1885
1886 static inline uword
1887 nat44_hairpinning_fn_inline (vlib_main_t * vm,
1888                              vlib_node_runtime_t * node,
1889                              vlib_frame_t * frame,
1890                              int is_ed)
1891 {
1892   u32 n_left_from, * from, * to_next, stats_node_index;
1893   snat_in2out_next_t next_index;
1894   u32 pkts_processed = 0;
1895   snat_main_t * sm = &snat_main;
1896   vnet_feature_main_t *fm = &feature_main;
1897   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1898   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
1899
1900   stats_node_index = is_ed ? nat44_ed_hairpinning_node.index :
1901     nat44_hairpinning_node.index;
1902   from = vlib_frame_vector_args (frame);
1903   n_left_from = frame->n_vectors;
1904   next_index = node->cached_next_index;
1905
1906   while (n_left_from > 0)
1907     {
1908       u32 n_left_to_next;
1909
1910       vlib_get_next_frame (vm, node, next_index,
1911                            to_next, n_left_to_next);
1912
1913       while (n_left_from > 0 && n_left_to_next > 0)
1914         {
1915           u32 bi0;
1916           vlib_buffer_t * b0;
1917           u32 next0;
1918           ip4_header_t * ip0;
1919           u32 proto0;
1920           udp_header_t * udp0;
1921           tcp_header_t * tcp0;
1922
1923           /* speculatively enqueue b0 to the current next frame */
1924           bi0 = from[0];
1925           to_next[0] = bi0;
1926           from += 1;
1927           to_next += 1;
1928           n_left_from -= 1;
1929           n_left_to_next -= 1;
1930
1931           b0 = vlib_get_buffer (vm, bi0);
1932           ip0 = vlib_buffer_get_current (b0);
1933           udp0 = ip4_next_header (ip0);
1934           tcp0 = (tcp_header_t *) udp0;
1935
1936           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1937
1938           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
1939                                 &next0, 0);
1940
1941           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed))
1942             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1943
1944           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1945
1946           /* verify speculative enqueue, maybe switch current next frame */
1947           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1948                                            to_next, n_left_to_next,
1949                                            bi0, next0);
1950          }
1951
1952       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1953     }
1954
1955   vlib_node_increment_counter (vm, stats_node_index,
1956                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1957                                pkts_processed);
1958   return frame->n_vectors;
1959 }
1960
1961 static uword
1962 nat44_hairpinning_fn (vlib_main_t * vm,
1963                       vlib_node_runtime_t * node,
1964                       vlib_frame_t * frame)
1965 {
1966   return nat44_hairpinning_fn_inline (vm, node, frame, 0);
1967 }
1968
1969 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
1970   .function = nat44_hairpinning_fn,
1971   .name = "nat44-hairpinning",
1972   .vector_size = sizeof (u32),
1973   .type = VLIB_NODE_TYPE_INTERNAL,
1974   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1975   .error_strings = snat_in2out_error_strings,
1976   .n_next_nodes = 2,
1977   .next_nodes = {
1978     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1979     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1980   },
1981 };
1982
1983 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
1984                               nat44_hairpinning_fn);
1985
1986 static uword
1987 nat44_ed_hairpinning_fn (vlib_main_t * vm,
1988                          vlib_node_runtime_t * node,
1989                          vlib_frame_t * frame)
1990 {
1991   return nat44_hairpinning_fn_inline (vm, node, frame, 1);
1992 }
1993
1994 VLIB_REGISTER_NODE (nat44_ed_hairpinning_node) = {
1995   .function = nat44_ed_hairpinning_fn,
1996   .name = "nat44-ed-hairpinning",
1997   .vector_size = sizeof (u32),
1998   .type = VLIB_NODE_TYPE_INTERNAL,
1999   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2000   .error_strings = snat_in2out_error_strings,
2001   .n_next_nodes = 2,
2002   .next_nodes = {
2003     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2004     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2005   },
2006 };
2007
2008 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpinning_node,
2009                               nat44_ed_hairpinning_fn);
2010
2011 static inline void
2012 nat44_reass_hairpinning (snat_main_t *sm,
2013                          vlib_buffer_t * b0,
2014                          ip4_header_t * ip0,
2015                          u16 sport,
2016                          u16 dport,
2017                          u32 proto0)
2018 {
2019   snat_session_key_t key0, sm0;
2020   snat_session_t * s0;
2021   clib_bihash_kv_8_8_t kv0, value0;
2022   ip_csum_t sum0;
2023   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2024   u16 new_dst_port0, old_dst_port0;
2025   udp_header_t * udp0;
2026   tcp_header_t * tcp0;
2027
2028   key0.addr = ip0->dst_address;
2029   key0.port = dport;
2030   key0.protocol = proto0;
2031   key0.fib_index = sm->outside_fib_index;
2032   kv0.key = key0.as_u64;
2033
2034   udp0 = ip4_next_header (ip0);
2035
2036   /* Check if destination is static mappings */
2037   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
2038     {
2039       new_dst_addr0 = sm0.addr.as_u32;
2040       new_dst_port0 = sm0.port;
2041       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2042     }
2043   /* or active sessions */
2044   else
2045     {
2046       if (sm->num_workers > 1)
2047         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2048       else
2049         ti = sm->num_workers;
2050
2051       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2052         {
2053           si = value0.value;
2054           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2055           new_dst_addr0 = s0->in2out.addr.as_u32;
2056           new_dst_port0 = s0->in2out.port;
2057           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2058         }
2059     }
2060
2061   /* Destination is behind the same NAT, use internal address and port */
2062   if (new_dst_addr0)
2063     {
2064       old_dst_addr0 = ip0->dst_address.as_u32;
2065       ip0->dst_address.as_u32 = new_dst_addr0;
2066       sum0 = ip0->checksum;
2067       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2068                              ip4_header_t, dst_address);
2069       ip0->checksum = ip_csum_fold (sum0);
2070
2071       old_dst_port0 = dport;
2072       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2073                        ip4_is_first_fragment (ip0)))
2074         {
2075           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2076             {
2077               tcp0 = ip4_next_header (ip0);
2078               tcp0->dst = new_dst_port0;
2079               sum0 = tcp0->checksum;
2080               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2081                                      ip4_header_t, dst_address);
2082               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2083                                      ip4_header_t /* cheat */, length);
2084               tcp0->checksum = ip_csum_fold(sum0);
2085             }
2086           else
2087             {
2088               udp0->dst_port = new_dst_port0;
2089               udp0->checksum = 0;
2090             }
2091         }
2092       else
2093         {
2094           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2095             {
2096               tcp0 = ip4_next_header (ip0);
2097               sum0 = tcp0->checksum;
2098               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2099                                      ip4_header_t, dst_address);
2100               tcp0->checksum = ip_csum_fold(sum0);
2101             }
2102         }
2103     }
2104 }
2105
2106 static uword
2107 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2108                             vlib_node_runtime_t * node,
2109                             vlib_frame_t * frame)
2110 {
2111   u32 n_left_from, *from, *to_next;
2112   snat_in2out_next_t next_index;
2113   u32 pkts_processed = 0;
2114   snat_main_t *sm = &snat_main;
2115   f64 now = vlib_time_now (vm);
2116   u32 thread_index = vm->thread_index;
2117   snat_main_per_thread_data_t *per_thread_data =
2118     &sm->per_thread_data[thread_index];
2119   u32 *fragments_to_drop = 0;
2120   u32 *fragments_to_loopback = 0;
2121
2122   from = vlib_frame_vector_args (frame);
2123   n_left_from = frame->n_vectors;
2124   next_index = node->cached_next_index;
2125
2126   while (n_left_from > 0)
2127     {
2128       u32 n_left_to_next;
2129
2130       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2131
2132       while (n_left_from > 0 && n_left_to_next > 0)
2133        {
2134           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2135           vlib_buffer_t *b0;
2136           u32 next0;
2137           u8 cached0 = 0;
2138           ip4_header_t *ip0;
2139           nat_reass_ip4_t *reass0;
2140           udp_header_t * udp0;
2141           tcp_header_t * tcp0;
2142           snat_session_key_t key0;
2143           clib_bihash_kv_8_8_t kv0, value0;
2144           snat_session_t * s0 = 0;
2145           u16 old_port0, new_port0;
2146           ip_csum_t sum0;
2147
2148           /* speculatively enqueue b0 to the current next frame */
2149           bi0 = from[0];
2150           to_next[0] = bi0;
2151           from += 1;
2152           to_next += 1;
2153           n_left_from -= 1;
2154           n_left_to_next -= 1;
2155
2156           b0 = vlib_get_buffer (vm, bi0);
2157           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2158
2159           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2160           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2161                                                                sw_if_index0);
2162
2163           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2164             {
2165               next0 = SNAT_IN2OUT_NEXT_DROP;
2166               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2167               goto trace0;
2168             }
2169
2170           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2171           udp0 = ip4_next_header (ip0);
2172           tcp0 = (tcp_header_t *) udp0;
2173           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2174
2175           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2176                                                  ip0->dst_address,
2177                                                  ip0->fragment_id,
2178                                                  ip0->protocol,
2179                                                  1,
2180                                                  &fragments_to_drop);
2181
2182           if (PREDICT_FALSE (!reass0))
2183             {
2184               next0 = SNAT_IN2OUT_NEXT_DROP;
2185               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2186               nat_log_notice ("maximum reassemblies exceeded");
2187               goto trace0;
2188             }
2189
2190           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2191             {
2192               key0.addr = ip0->src_address;
2193               key0.port = udp0->src_port;
2194               key0.protocol = proto0;
2195               key0.fib_index = rx_fib_index0;
2196               kv0.key = key0.as_u64;
2197
2198               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2199                 {
2200                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2201                       ip0, proto0, rx_fib_index0, thread_index)))
2202                     goto trace0;
2203
2204                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2205                                      &s0, node, next0, thread_index, now);
2206
2207                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2208                     goto trace0;
2209
2210                   reass0->sess_index = s0 - per_thread_data->sessions;
2211                 }
2212               else
2213                 {
2214                   s0 = pool_elt_at_index (per_thread_data->sessions,
2215                                           value0.value);
2216                   reass0->sess_index = value0.value;
2217                 }
2218               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2219             }
2220           else
2221             {
2222               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2223                 {
2224                   if (nat_ip4_reass_add_fragment (reass0, bi0, &fragments_to_drop))
2225                     {
2226                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2227                       nat_log_notice ("maximum fragments per reassembly exceeded");
2228                       next0 = SNAT_IN2OUT_NEXT_DROP;
2229                       goto trace0;
2230                     }
2231                   cached0 = 1;
2232                   goto trace0;
2233                 }
2234               s0 = pool_elt_at_index (per_thread_data->sessions,
2235                                       reass0->sess_index);
2236             }
2237
2238           old_addr0 = ip0->src_address.as_u32;
2239           ip0->src_address = s0->out2in.addr;
2240           new_addr0 = ip0->src_address.as_u32;
2241           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2242
2243           sum0 = ip0->checksum;
2244           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2245                                  ip4_header_t,
2246                                  src_address /* changed member */);
2247           ip0->checksum = ip_csum_fold (sum0);
2248
2249           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2250             {
2251               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2252                 {
2253                   old_port0 = tcp0->src_port;
2254                   tcp0->src_port = s0->out2in.port;
2255                   new_port0 = tcp0->src_port;
2256
2257                   sum0 = tcp0->checksum;
2258                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2259                                          ip4_header_t,
2260                                          dst_address /* changed member */);
2261                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2262                                          ip4_header_t /* cheat */,
2263                                          length /* changed member */);
2264                   tcp0->checksum = ip_csum_fold(sum0);
2265                 }
2266               else
2267                 {
2268                   old_port0 = udp0->src_port;
2269                   udp0->src_port = s0->out2in.port;
2270                   udp0->checksum = 0;
2271                 }
2272             }
2273
2274           /* Hairpinning */
2275           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2276                                    s0->ext_host_port, proto0);
2277
2278           /* Accounting */
2279           nat44_session_update_counters (s0, now,
2280                                          vlib_buffer_length_in_chain (vm, b0));
2281           /* Per-user LRU list maintenance */
2282           nat44_session_update_lru (sm, s0, thread_index);
2283
2284         trace0:
2285           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2286                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2287             {
2288               nat44_in2out_reass_trace_t *t =
2289                  vlib_add_trace (vm, node, b0, sizeof (*t));
2290               t->cached = cached0;
2291               t->sw_if_index = sw_if_index0;
2292               t->next_index = next0;
2293             }
2294
2295           if (cached0)
2296             {
2297               n_left_to_next++;
2298               to_next--;
2299             }
2300           else
2301             {
2302               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2303
2304               /* verify speculative enqueue, maybe switch current next frame */
2305               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2306                                                to_next, n_left_to_next,
2307                                                bi0, next0);
2308             }
2309
2310           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2311             {
2312               from = vlib_frame_vector_args (frame);
2313               u32 len = vec_len (fragments_to_loopback);
2314               if (len <= VLIB_FRAME_SIZE)
2315                 {
2316                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2317                   n_left_from = len;
2318                   vec_reset_length (fragments_to_loopback);
2319                 }
2320               else
2321                 {
2322                   clib_memcpy (from,
2323                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2324                                sizeof (u32) * VLIB_FRAME_SIZE);
2325                   n_left_from = VLIB_FRAME_SIZE;
2326                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2327                 }
2328             }
2329        }
2330
2331       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2332     }
2333
2334   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2335                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2336                                pkts_processed);
2337
2338   nat_send_all_to_node (vm, fragments_to_drop, node,
2339                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2340                         SNAT_IN2OUT_NEXT_DROP);
2341
2342   vec_free (fragments_to_drop);
2343   vec_free (fragments_to_loopback);
2344   return frame->n_vectors;
2345 }
2346
2347 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2348   .function = nat44_in2out_reass_node_fn,
2349   .name = "nat44-in2out-reass",
2350   .vector_size = sizeof (u32),
2351   .format_trace = format_nat44_in2out_reass_trace,
2352   .type = VLIB_NODE_TYPE_INTERNAL,
2353
2354   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2355   .error_strings = snat_in2out_error_strings,
2356
2357   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2358   .next_nodes = {
2359     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2360     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2361     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2362     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2363     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2364   },
2365 };
2366
2367 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2368                               nat44_in2out_reass_node_fn);
2369
2370 /*******************************/
2371 /*** endpoint-dependent mode ***/
2372 /*******************************/
2373
2374 static_always_inline int
2375 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
2376 {
2377   icmp46_header_t *icmp0;
2378   nat_ed_ses_key_t key0;
2379   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2380   ip4_header_t *inner_ip0 = 0;
2381   void *l4_header = 0;
2382   icmp46_header_t *inner_icmp0;
2383
2384   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2385   echo0 = (icmp_echo_header_t *)(icmp0+1);
2386
2387   if (!icmp_is_error_message (icmp0))
2388     {
2389       key0.proto = IP_PROTOCOL_ICMP;
2390       key0.l_addr = ip0->src_address;
2391       key0.r_addr = ip0->dst_address;
2392       key0.l_port = echo0->identifier;
2393       key0.r_port = 0;
2394     }
2395   else
2396     {
2397       inner_ip0 = (ip4_header_t *)(echo0+1);
2398       l4_header = ip4_next_header (inner_ip0);
2399       key0.proto = inner_ip0->protocol;
2400       key0.r_addr = inner_ip0->src_address;
2401       key0.l_addr = inner_ip0->dst_address;
2402       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
2403         {
2404         case SNAT_PROTOCOL_ICMP:
2405           inner_icmp0 = (icmp46_header_t*)l4_header;
2406           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2407           key0.r_port = 0;
2408           key0.l_port = inner_echo0->identifier;
2409           break;
2410         case SNAT_PROTOCOL_UDP:
2411         case SNAT_PROTOCOL_TCP:
2412           key0.l_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2413           key0.r_port = ((tcp_udp_header_t*)l4_header)->src_port;
2414           break;
2415         default:
2416           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
2417         }
2418     }
2419   *p_key0 = key0;
2420   return 0;
2421 }
2422
2423 int
2424 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void * arg)
2425 {
2426   snat_main_t *sm = &snat_main;
2427   nat44_is_idle_session_ctx_t *ctx = arg;
2428   snat_session_t *s;
2429   u64 sess_timeout_time;
2430   nat_ed_ses_key_t ed_key;
2431   clib_bihash_kv_16_8_t ed_kv;
2432   int i;
2433   snat_address_t *a;
2434   snat_session_key_t key;
2435   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
2436                                                        ctx->thread_index);
2437
2438   s = pool_elt_at_index (tsm->sessions, kv->value);
2439   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
2440   if (ctx->now >= sess_timeout_time)
2441     {
2442       if (is_fwd_bypass_session (s))
2443         goto delete;
2444
2445       ed_key.l_addr = s->out2in.addr;
2446       ed_key.r_addr = s->ext_host_addr;
2447       ed_key.fib_index = s->out2in.fib_index;
2448       if (snat_is_unk_proto_session (s))
2449         {
2450           ed_key.proto = s->in2out.port;
2451           ed_key.r_port = 0;
2452           ed_key.l_port = 0;
2453         }
2454       else
2455         {
2456           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
2457           ed_key.l_port = s->out2in.port;
2458           ed_key.r_port = s->ext_host_port;
2459         }
2460       ed_kv.key[0] = ed_key.as_u64[0];
2461       ed_kv.key[1] = ed_key.as_u64[1];
2462       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
2463         nat_log_warn ("out2in_ed key del failed");
2464
2465       if (snat_is_unk_proto_session (s))
2466         goto delete;
2467
2468       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
2469                                           s->out2in.addr.as_u32,
2470                                           s->in2out.protocol,
2471                                           s->in2out.port,
2472                                           s->out2in.port,
2473                                           s->in2out.fib_index);
2474
2475       if (is_twice_nat_session (s))
2476         {
2477           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
2478             {
2479               key.protocol = s->in2out.protocol;
2480               key.port = s->ext_host_nat_port;
2481               a = sm->twice_nat_addresses + i;
2482               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
2483                 {
2484                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
2485                                                       ctx->thread_index, &key);
2486                   break;
2487                 }
2488             }
2489         }
2490
2491       if (snat_is_session_static (s))
2492         goto delete;
2493
2494       if (s->outside_address_index != ~0)
2495         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
2496                                             &s->out2in);
2497     delete:
2498       nat44_delete_session (sm, s, ctx->thread_index);
2499       return 1;
2500     }
2501
2502   return 0;
2503 }
2504
2505 static inline u32
2506 icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
2507                           ip4_header_t * ip0, icmp46_header_t * icmp0,
2508                           u32 sw_if_index0, u32 rx_fib_index0,
2509                           vlib_node_runtime_t * node, u32 next0, f64 now,
2510                           u32 thread_index, snat_session_t ** p_s0)
2511 {
2512   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2513                       next0, thread_index, p_s0, 0);
2514   snat_session_t * s0 = *p_s0;
2515   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
2516     {
2517       /* Hairpinning */
2518       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
2519         snat_icmp_hairpinning(sm, b0, ip0, icmp0, sm->endpoint_dependent);
2520       /* Accounting */
2521       nat44_session_update_counters (s0, now,
2522                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
2523     }
2524   return next0;
2525 }
2526
2527 static u32
2528 slow_path_ed (snat_main_t *sm,
2529               vlib_buffer_t *b,
2530               u32 rx_fib_index,
2531               clib_bihash_kv_16_8_t *kv,
2532               snat_session_t ** sessionp,
2533               vlib_node_runtime_t * node,
2534               u32 next,
2535               u32 thread_index,
2536               f64 now)
2537 {
2538   snat_session_t *s;
2539   snat_user_t *u;
2540   snat_session_key_t key0, key1;
2541   lb_nat_type_t lb = 0, is_sm = 0;
2542   u32 address_index = ~0;
2543   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2544   nat_ed_ses_key_t *key = (nat_ed_ses_key_t *) kv->key;
2545   u32 proto = ip_proto_to_snat_proto (key->proto);
2546   nat_outside_fib_t *outside_fib;
2547   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2548   fib_prefix_t pfx = {
2549     .fp_proto = FIB_PROTOCOL_IP4,
2550     .fp_len = 32,
2551     .fp_addr = {
2552         .ip4.as_u32 = key->r_addr.as_u32,
2553     },
2554   };
2555   nat44_is_idle_session_ctx_t ctx;
2556
2557   if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
2558     {
2559       b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
2560       nat_ipfix_logging_max_sessions(sm->max_translations);
2561       nat_log_notice ("maximum sessions exceeded");
2562       return SNAT_IN2OUT_NEXT_DROP;
2563     }
2564
2565   key0.addr = key->l_addr;
2566   key0.port = key->l_port;
2567   key1.protocol = key0.protocol = proto;
2568   key0.fib_index = rx_fib_index;
2569   key1.fib_index = sm->outside_fib_index;
2570   /* First try to match static mapping by local address and port */
2571   if (snat_static_mapping_match (sm, key0, &key1, 0, 0, 0, &lb, 0))
2572     {
2573       /* Try to create dynamic translation */
2574       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
2575                                                thread_index, &key1,
2576                                                &address_index,
2577                                                sm->port_per_thread,
2578                                                tsm->snat_thread_index))
2579         {
2580           nat_log_notice ("addresses exhausted");
2581           b->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2582           return SNAT_IN2OUT_NEXT_DROP;
2583         }
2584     }
2585   else
2586     is_sm = 1;
2587
2588   u = nat_user_get_or_create (sm, &key->l_addr, rx_fib_index, thread_index);
2589   if (!u)
2590     {
2591       nat_log_warn ("create NAT user failed");
2592       if (!is_sm)
2593         snat_free_outside_address_and_port (sm->addresses,
2594                                             thread_index, &key1);
2595       return SNAT_IN2OUT_NEXT_DROP;
2596     }
2597
2598   s = nat_ed_session_alloc (sm, u, thread_index);
2599   if (!s)
2600     {
2601       nat44_delete_user_with_no_session (sm, u, thread_index);
2602       nat_log_warn ("create NAT session failed");
2603       if (!is_sm)
2604         snat_free_outside_address_and_port (sm->addresses,
2605                                             thread_index, &key1);
2606       return SNAT_IN2OUT_NEXT_DROP;
2607     }
2608
2609   user_session_increment (sm, u, is_sm);
2610   if (is_sm)
2611     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2612   if (lb)
2613     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
2614   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2615   s->outside_address_index = address_index;
2616   s->ext_host_addr = key->r_addr;
2617   s->ext_host_port = key->r_port;
2618   s->in2out = key0;
2619   s->out2in = key1;
2620   s->out2in.protocol = key0.protocol;
2621
2622   switch (vec_len (sm->outside_fibs))
2623     {
2624     case 0:
2625       s->out2in.fib_index = sm->outside_fib_index;
2626       break;
2627     case 1:
2628       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
2629       break;
2630     default:
2631       vec_foreach (outside_fib, sm->outside_fibs)
2632         {
2633           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2634           if (FIB_NODE_INDEX_INVALID != fei)
2635             {
2636               if (fib_entry_get_resolving_interface (fei) != ~0)
2637                 {
2638                   s->out2in.fib_index = outside_fib->fib_index;
2639                   break;
2640                 }
2641             }
2642         }
2643       break;
2644     }
2645
2646   /* Add to lookup tables */
2647   kv->value = s - tsm->sessions;
2648   ctx.now = now;
2649   ctx.thread_index = thread_index;
2650   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, kv,
2651                                                nat44_i2o_ed_is_idle_session_cb,
2652                                                &ctx))
2653     nat_log_notice ("in2out-ed key add failed");
2654
2655   make_ed_kv (kv, &key1.addr, &key->r_addr, key->proto, s->out2in.fib_index,
2656               key1.port, key->r_port);
2657   kv->value = s - tsm->sessions;
2658   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->out2in_ed, kv,
2659                                                nat44_o2i_ed_is_idle_session_cb,
2660                                                &ctx))
2661     nat_log_notice ("out2in-ed key add failed");
2662
2663   *sessionp = s;
2664
2665   /* log NAT event */
2666   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
2667                                       s->out2in.addr.as_u32,
2668                                       s->in2out.protocol,
2669                                       s->in2out.port,
2670                                       s->out2in.port,
2671                                       s->in2out.fib_index);
2672   return next;
2673 }
2674
2675 static_always_inline int
2676 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
2677                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
2678                         u32 rx_fib_index, u32 thread_index)
2679 {
2680   udp_header_t *udp = ip4_next_header (ip);
2681   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2682   clib_bihash_kv_16_8_t kv, value;
2683   snat_session_key_t key0, key1;
2684
2685   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, ip->protocol,
2686               sm->outside_fib_index, udp->dst_port, udp->src_port);
2687
2688   /* NAT packet aimed at external address if */
2689   /* has active sessions */
2690   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2691     {
2692       key0.addr = ip->dst_address;
2693       key0.port = udp->dst_port;
2694       key0.protocol = proto;
2695       key0.fib_index = sm->outside_fib_index;
2696       /* or is static mappings */
2697       if (!snat_static_mapping_match(sm, key0, &key1, 1, 0, 0, 0, 0))
2698         return 0;
2699     }
2700   else
2701     return 0;
2702
2703   if (sm->forwarding_enabled)
2704     return 1;
2705
2706   return snat_not_translate_fast(sm, node, sw_if_index, ip, proto, rx_fib_index);
2707 }
2708
2709 static_always_inline int
2710 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
2711                                       u32 thread_index, f64 now,
2712                                       vlib_main_t * vm, vlib_buffer_t * b)
2713 {
2714   nat_ed_ses_key_t key;
2715   clib_bihash_kv_16_8_t kv, value;
2716   udp_header_t *udp;
2717   snat_session_t *s = 0;
2718   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2719
2720   if (!sm->forwarding_enabled)
2721     return 0;
2722
2723   if (ip->protocol == IP_PROTOCOL_ICMP)
2724     {
2725       key.as_u64[0] = key.as_u64[1] = 0;
2726       if (icmp_get_ed_key (ip, &key))
2727         return 0;
2728       key.fib_index = 0;
2729       kv.key[0] = key.as_u64[0];
2730       kv.key[1] = key.as_u64[1];
2731     }
2732   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
2733     {
2734       udp = ip4_next_header(ip);
2735       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0,
2736                   udp->src_port, udp->dst_port);
2737     }
2738   else
2739     {
2740       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0, 0,
2741                   0);
2742     }
2743
2744   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2745     {
2746       s = pool_elt_at_index (tsm->sessions, value.value);
2747       if (is_fwd_bypass_session (s))
2748         {
2749           if (ip->protocol == IP_PROTOCOL_TCP)
2750             {
2751               tcp_header_t *tcp = ip4_next_header(ip);
2752               if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index))
2753                 return 1;
2754             }
2755           /* Accounting */
2756           nat44_session_update_counters (s, now,
2757                                          vlib_buffer_length_in_chain (vm, b));
2758           return 1;
2759         }
2760       else
2761         return 0;
2762     }
2763
2764   return 0;
2765 }
2766
2767 static_always_inline int
2768 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
2769                                        u8 proto, u16 src_port, u16 dst_port,
2770                                        u32 thread_index, u32 rx_sw_if_index,
2771                                        u32 tx_sw_if_index)
2772 {
2773   clib_bihash_kv_16_8_t kv, value;
2774   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2775   snat_interface_t *i;
2776   snat_session_t *s;
2777   u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index);
2778   u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
2779
2780   /* src NAT check */
2781   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto, tx_fib_index,
2782               src_port, dst_port);
2783   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2784     return 1;
2785
2786   /* dst NAT check */
2787   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, proto, rx_fib_index,
2788               dst_port, src_port);
2789   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2790   {
2791     s = pool_elt_at_index (tsm->sessions, value.value);
2792     if (is_fwd_bypass_session (s))
2793       return 0;
2794
2795     /* hairpinning */
2796     pool_foreach (i, sm->output_feature_interfaces,
2797     ({
2798       if ((nat_interface_is_inside(i)) && (rx_sw_if_index == i->sw_if_index))
2799         return 0;
2800     }));
2801     return 1;
2802   }
2803
2804   return 0;
2805 }
2806
2807 u32
2808 icmp_match_in2out_ed(snat_main_t *sm, vlib_node_runtime_t *node,
2809                      u32 thread_index, vlib_buffer_t *b, ip4_header_t *ip,
2810                      u8 *p_proto, snat_session_key_t *p_value,
2811                      u8 *p_dont_translate, void *d, void *e)
2812 {
2813   icmp46_header_t *icmp;
2814   u32 sw_if_index;
2815   u32 rx_fib_index;
2816   nat_ed_ses_key_t key;
2817   snat_session_t *s = 0;
2818   u8 dont_translate = 0;
2819   clib_bihash_kv_16_8_t kv, value;
2820   u32 next = ~0;
2821   int err;
2822   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2823
2824   icmp = (icmp46_header_t *) ip4_next_header (ip);
2825   sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
2826   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
2827
2828   key.as_u64[0] = key.as_u64[1] = 0;
2829   err = icmp_get_ed_key (ip, &key);
2830   if (err != 0)
2831     {
2832       b->error = node->errors[err];
2833       next = SNAT_IN2OUT_NEXT_DROP;
2834       goto out;
2835     }
2836   key.fib_index = rx_fib_index;
2837
2838   kv.key[0] = key.as_u64[0];
2839   kv.key[1] = key.as_u64[1];
2840
2841   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2842     {
2843       if (vnet_buffer(b)->sw_if_index[VLIB_TX] != ~0)
2844         {
2845           if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(sm, ip,
2846               key.proto, key.l_port, key.r_port, thread_index, sw_if_index,
2847               vnet_buffer(b)->sw_if_index[VLIB_TX])))
2848             {
2849               dont_translate = 1;
2850               goto out;
2851             }
2852         }
2853       else
2854         {
2855           if (PREDICT_FALSE(nat44_ed_not_translate(sm, node, sw_if_index,
2856               ip, SNAT_PROTOCOL_ICMP, rx_fib_index, thread_index)))
2857             {
2858               dont_translate = 1;
2859               goto out;
2860             }
2861         }
2862
2863       if (PREDICT_FALSE(icmp_is_error_message (icmp)))
2864         {
2865           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2866           next = SNAT_IN2OUT_NEXT_DROP;
2867           goto out;
2868         }
2869
2870       next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next,
2871                            thread_index, vlib_time_now (sm->vlib_main));
2872
2873       if (PREDICT_FALSE (next == SNAT_IN2OUT_NEXT_DROP))
2874         goto out;
2875     }
2876   else
2877     {
2878       if (PREDICT_FALSE(icmp->type != ICMP4_echo_request &&
2879                         icmp->type != ICMP4_echo_reply &&
2880                         !icmp_is_error_message (icmp)))
2881         {
2882           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2883           next = SNAT_IN2OUT_NEXT_DROP;
2884           goto out;
2885         }
2886
2887       s = pool_elt_at_index (tsm->sessions, value.value);
2888     }
2889
2890   *p_proto = ip_proto_to_snat_proto (key.proto);
2891 out:
2892   if (s)
2893     *p_value = s->out2in;
2894   *p_dont_translate = dont_translate;
2895   if (d)
2896     *(snat_session_t**)d = s;
2897   return next;
2898 }
2899
2900 static inline void
2901 nat44_ed_hairpinning_unknown_proto (snat_main_t *sm,
2902                                     vlib_buffer_t * b,
2903                                     ip4_header_t * ip)
2904 {
2905   u32 old_addr, new_addr = 0, ti = 0;
2906   clib_bihash_kv_8_8_t kv, value;
2907   clib_bihash_kv_16_8_t s_kv, s_value;
2908   snat_static_mapping_t *m;
2909   ip_csum_t sum;
2910   snat_session_t *s;
2911   snat_main_per_thread_data_t *tsm;
2912
2913   if (sm->num_workers > 1)
2914     ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
2915   else
2916     ti = sm->num_workers;
2917   tsm = &sm->per_thread_data[ti];
2918
2919   old_addr = ip->dst_address.as_u32;
2920   make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2921               sm->outside_fib_index, 0, 0);
2922   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2923     {
2924       make_sm_kv (&kv, &ip->dst_address, 0, 0, 0);
2925       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2926         return;
2927
2928       m = pool_elt_at_index (sm->static_mappings, value.value);
2929       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2930         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
2931       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
2932     }
2933   else
2934     {
2935       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
2936       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2937         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
2938       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
2939     }
2940   sum = ip->checksum;
2941   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
2942   ip->checksum = ip_csum_fold (sum);
2943 }
2944
2945 static snat_session_t *
2946 nat44_ed_in2out_unknown_proto (snat_main_t *sm,
2947                                vlib_buffer_t * b,
2948                                ip4_header_t * ip,
2949                                u32 rx_fib_index,
2950                                u32 thread_index,
2951                                f64 now,
2952                                vlib_main_t * vm,
2953                                vlib_node_runtime_t * node)
2954 {
2955   clib_bihash_kv_8_8_t kv, value;
2956   clib_bihash_kv_16_8_t s_kv, s_value;
2957   snat_static_mapping_t *m;
2958   u32 old_addr, new_addr = 0;
2959   ip_csum_t sum;
2960   snat_user_t *u;
2961   dlist_elt_t *head, *elt;
2962   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2963   u32 elt_index, head_index, ses_index;
2964   snat_session_t * s;
2965   u32 address_index = ~0, outside_fib_index = sm->outside_fib_index;
2966   int i;
2967   u8 is_sm = 0;
2968   nat_outside_fib_t *outside_fib;
2969   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
2970   fib_prefix_t pfx = {
2971     .fp_proto = FIB_PROTOCOL_IP4,
2972     .fp_len = 32,
2973     .fp_addr = {
2974         .ip4.as_u32 = ip->dst_address.as_u32,
2975     },
2976   };
2977
2978   switch (vec_len (sm->outside_fibs))
2979     {
2980     case 0:
2981       outside_fib_index = sm->outside_fib_index;
2982       break;
2983     case 1:
2984       outside_fib_index = sm->outside_fibs[0].fib_index;
2985       break;
2986     default:
2987       vec_foreach (outside_fib, sm->outside_fibs)
2988         {
2989           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
2990           if (FIB_NODE_INDEX_INVALID != fei)
2991             {
2992               if (fib_entry_get_resolving_interface (fei) != ~0)
2993                 {
2994                   outside_fib_index = outside_fib->fib_index;
2995                   break;
2996                 }
2997             }
2998         }
2999       break;
3000     }
3001   old_addr = ip->src_address.as_u32;
3002
3003   make_ed_kv (&s_kv, &ip->src_address, &ip->dst_address, ip->protocol,
3004               rx_fib_index, 0, 0);
3005
3006   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
3007     {
3008       s = pool_elt_at_index (tsm->sessions, s_value.value);
3009       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
3010     }
3011   else
3012     {
3013       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
3014         {
3015           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
3016           nat_ipfix_logging_max_sessions(sm->max_translations);
3017           nat_log_notice ("maximum sessions exceeded");
3018           return 0;
3019         }
3020
3021       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
3022                                   thread_index);
3023       if (!u)
3024         {
3025           nat_log_warn ("create NAT user failed");
3026           return 0;
3027         }
3028
3029       make_sm_kv (&kv, &ip->src_address, 0, rx_fib_index, 0);
3030
3031       /* Try to find static mapping first */
3032       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
3033         {
3034           m = pool_elt_at_index (sm->static_mappings, value.value);
3035           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
3036           is_sm = 1;
3037           goto create_ses;
3038         }
3039       /* Fallback to 3-tuple key */
3040       else
3041         {
3042           /* Choose same out address as for TCP/UDP session to same destination */
3043           head_index = u->sessions_per_user_list_head_index;
3044           head = pool_elt_at_index (tsm->list_pool, head_index);
3045           elt_index = head->next;
3046           if (PREDICT_FALSE (elt_index == ~0))
3047             ses_index = ~0;
3048           else
3049             {
3050               elt = pool_elt_at_index (tsm->list_pool, elt_index);
3051               ses_index = elt->value;
3052             }
3053
3054           while (ses_index != ~0)
3055             {
3056               s =  pool_elt_at_index (tsm->sessions, ses_index);
3057               elt_index = elt->next;
3058               elt = pool_elt_at_index (tsm->list_pool, elt_index);
3059               ses_index = elt->value;
3060
3061               if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
3062                 {
3063                   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
3064                   address_index = s->outside_address_index;
3065
3066                   make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address,
3067                               ip->protocol, outside_fib_index, 0, 0);
3068                   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
3069                     goto create_ses;
3070
3071                   break;
3072                 }
3073             }
3074
3075           for (i = 0; i < vec_len (sm->addresses); i++)
3076             {
3077               make_ed_kv (&s_kv, &sm->addresses[i].addr, &ip->dst_address,
3078                           ip->protocol, outside_fib_index, 0, 0);
3079               if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
3080                 {
3081                   new_addr = ip->src_address.as_u32 =
3082                     sm->addresses[i].addr.as_u32;
3083                   address_index = i;
3084                   goto create_ses;
3085                 }
3086             }
3087           return 0;
3088         }
3089
3090 create_ses:
3091       s = nat_ed_session_alloc (sm, u, thread_index);
3092       if (!s)
3093         {
3094           nat44_delete_user_with_no_session (sm, u, thread_index);
3095           nat_log_warn ("create NAT session failed");
3096           return 0;
3097         }
3098
3099       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
3100       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
3101       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
3102       s->outside_address_index = address_index;
3103       s->out2in.addr.as_u32 = new_addr;
3104       s->out2in.fib_index = outside_fib_index;
3105       s->in2out.addr.as_u32 = old_addr;
3106       s->in2out.fib_index = rx_fib_index;
3107       s->in2out.port = s->out2in.port = ip->protocol;
3108       if (is_sm)
3109         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
3110       user_session_increment (sm, u, is_sm);
3111
3112       /* Add to lookup tables */
3113       make_ed_kv (&s_kv, &s->in2out.addr, &ip->dst_address, ip->protocol,
3114                   rx_fib_index, 0, 0);
3115       s_kv.value = s - tsm->sessions;
3116       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
3117         nat_log_notice ("in2out key add failed");
3118
3119       make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address, ip->protocol,
3120                   outside_fib_index, 0, 0);
3121       s_kv.value = s - tsm->sessions;
3122       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
3123         nat_log_notice ("out2in key add failed");
3124   }
3125
3126   /* Update IP checksum */
3127   sum = ip->checksum;
3128   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
3129   ip->checksum = ip_csum_fold (sum);
3130
3131   /* Accounting */
3132   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b));
3133
3134   /* Hairpinning */
3135   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
3136     nat44_ed_hairpinning_unknown_proto(sm, b, ip);
3137
3138   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
3139     vnet_buffer(b)->sw_if_index[VLIB_TX] = outside_fib_index;
3140
3141   return s;
3142 }
3143
3144 static inline uword
3145 nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
3146                                 vlib_node_runtime_t * node,
3147                                 vlib_frame_t * frame, int is_slow_path,
3148                                 int is_output_feature)
3149 {
3150   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
3151   snat_in2out_next_t next_index;
3152   snat_main_t *sm = &snat_main;
3153   f64 now = vlib_time_now (vm);
3154   u32 thread_index = vm->thread_index;
3155   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3156
3157   stats_node_index = is_slow_path ? nat44_ed_in2out_slowpath_node.index :
3158     nat44_ed_in2out_node.index;
3159
3160   from = vlib_frame_vector_args (frame);
3161   n_left_from = frame->n_vectors;
3162   next_index = node->cached_next_index;
3163
3164   while (n_left_from > 0)
3165     {
3166       u32 n_left_to_next;
3167
3168       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
3169
3170       while (n_left_from >= 4 && n_left_to_next >= 2)
3171         {
3172           u32 bi0, bi1;
3173           vlib_buffer_t *b0, *b1;
3174           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
3175               new_addr0, old_addr0;
3176           u32 next1, sw_if_index1, rx_fib_index1, iph_offset1 = 0, proto1,
3177               new_addr1, old_addr1;
3178           u16 old_port0, new_port0, old_port1, new_port1;
3179           ip4_header_t *ip0, *ip1;
3180           udp_header_t *udp0, *udp1;
3181           tcp_header_t *tcp0, *tcp1;
3182           icmp46_header_t *icmp0, *icmp1;
3183           snat_session_t *s0 = 0, *s1 = 0;
3184           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
3185           ip_csum_t sum0, sum1;
3186
3187           /* Prefetch next iteration. */
3188           {
3189             vlib_buffer_t * p2, * p3;
3190
3191             p2 = vlib_get_buffer (vm, from[2]);
3192             p3 = vlib_get_buffer (vm, from[3]);
3193
3194             vlib_prefetch_buffer_header (p2, LOAD);
3195             vlib_prefetch_buffer_header (p3, LOAD);
3196
3197             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
3198             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
3199           }
3200
3201           /* speculatively enqueue b0 and b1 to the current next frame */
3202           to_next[0] = bi0 = from[0];
3203           to_next[1] = bi1 = from[1];
3204           from += 2;
3205           to_next += 2;
3206           n_left_from -= 2;
3207           n_left_to_next -= 2;
3208
3209           b0 = vlib_get_buffer (vm, bi0);
3210           b1 = vlib_get_buffer (vm, bi1);
3211
3212           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3213
3214           if (is_output_feature)
3215             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
3216
3217           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
3218                  iph_offset0);
3219
3220           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3221           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3222                                                                sw_if_index0);
3223
3224           if (PREDICT_FALSE(ip0->ttl == 1))
3225             {
3226               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3227               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3228                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3229                                            0);
3230               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3231               goto trace00;
3232             }
3233
3234           udp0 = ip4_next_header (ip0);
3235           tcp0 = (tcp_header_t *) udp0;
3236           icmp0 = (icmp46_header_t *) udp0;
3237           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3238
3239           if (is_slow_path)
3240             {
3241               if (PREDICT_FALSE (proto0 == ~0))
3242                 {
3243                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
3244                                                       rx_fib_index0,
3245                                                       thread_index, now, vm,
3246                                                       node);
3247                   if (!s0)
3248                     next0 = SNAT_IN2OUT_NEXT_DROP;
3249                   goto trace00;
3250                 }
3251
3252               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3253                 {
3254                   next0 = icmp_in2out_ed_slow_path
3255                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
3256                      next0, now, thread_index, &s0);
3257                   goto trace00;
3258                 }
3259             }
3260           else
3261             {
3262                if (is_output_feature)
3263                 {
3264                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3265                       sm, ip0, thread_index, now, vm, b0)))
3266                     goto trace00;
3267                 }
3268
3269               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3270                 {
3271                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3272                   goto trace00;
3273                 }
3274
3275               if (ip4_is_fragment (ip0))
3276                 {
3277                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3278                   next0 = SNAT_IN2OUT_NEXT_DROP;
3279                   goto trace00;
3280                 }
3281             }
3282
3283           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3284                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3285
3286           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3287             {
3288               if (is_slow_path)
3289                 {
3290                   if (is_output_feature)
3291                     {
3292                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3293                           sm, ip0, ip0->protocol, udp0->src_port,
3294                           udp0->dst_port, thread_index, sw_if_index0,
3295                           vnet_buffer(b0)->sw_if_index[VLIB_TX])))
3296                         goto trace00;
3297                     }
3298                   else
3299                     {
3300                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3301                           sw_if_index0, ip0, proto0, rx_fib_index0,
3302                           thread_index)))
3303                         goto trace00;
3304                     }
3305
3306                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3307                                         next0, thread_index, now);
3308
3309                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3310                     goto trace00;
3311                 }
3312               else
3313                 {
3314                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3315                   goto trace00;
3316                 }
3317             }
3318           else
3319             {
3320               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3321             }
3322
3323           b0->flags |= VNET_BUFFER_F_IS_NATED;
3324
3325           if (!is_output_feature)
3326             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3327
3328           old_addr0 = ip0->src_address.as_u32;
3329           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3330           sum0 = ip0->checksum;
3331           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3332                                  src_address);
3333           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3334             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3335                                    s0->ext_host_addr.as_u32, ip4_header_t,
3336                                    dst_address);
3337           ip0->checksum = ip_csum_fold (sum0);
3338
3339           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3340             {
3341               old_port0 = tcp0->src_port;
3342               new_port0 = tcp0->src_port = s0->out2in.port;
3343
3344               sum0 = tcp0->checksum;
3345               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3346                                      dst_address);
3347               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3348                                      length);
3349               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3350                 {
3351                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3352                                          s0->ext_host_addr.as_u32,
3353                                          ip4_header_t, dst_address);
3354                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3355                                          s0->ext_host_port, ip4_header_t,
3356                                          length);
3357                   tcp0->dst_port = s0->ext_host_port;
3358                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3359                 }
3360               mss_clamping (sm, tcp0, &sum0);
3361               tcp0->checksum = ip_csum_fold(sum0);
3362               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3363                 goto trace00;
3364             }
3365           else
3366             {
3367               udp0->src_port = s0->out2in.port;
3368               udp0->checksum = 0;
3369               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3370                 {
3371                   udp0->dst_port = s0->ext_host_port;
3372                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3373                 }
3374             }
3375
3376           /* Accounting */
3377           nat44_session_update_counters (s0, now,
3378                                          vlib_buffer_length_in_chain (vm, b0));
3379
3380         trace00:
3381           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3382                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3383             {
3384               snat_in2out_trace_t *t =
3385                 vlib_add_trace (vm, node, b0, sizeof (*t));
3386               t->is_slow_path = is_slow_path;
3387               t->sw_if_index = sw_if_index0;
3388               t->next_index = next0;
3389               t->session_index = ~0;
3390               if (s0)
3391                 t->session_index = s0 - tsm->sessions;
3392             }
3393
3394           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3395
3396
3397           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
3398
3399           if (is_output_feature)
3400             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
3401
3402           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
3403                  iph_offset1);
3404
3405           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3406           rx_fib_index1 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3407                                                                sw_if_index1);
3408
3409           if (PREDICT_FALSE(ip1->ttl == 1))
3410             {
3411               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3412               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3413                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3414                                            0);
3415               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3416               goto trace01;
3417             }
3418
3419           udp1 = ip4_next_header (ip1);
3420           tcp1 = (tcp_header_t *) udp1;
3421           icmp1 = (icmp46_header_t *) udp1;
3422           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3423
3424           if (is_slow_path)
3425             {
3426               if (PREDICT_FALSE (proto1 == ~0))
3427                 {
3428                   s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1,
3429                                                       rx_fib_index1,
3430                                                       thread_index, now, vm,
3431                                                       node);
3432                   if (!s1)
3433                     next1 = SNAT_IN2OUT_NEXT_DROP;
3434                   goto trace01;
3435                 }
3436
3437               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
3438                 {
3439                   next1 = icmp_in2out_ed_slow_path
3440                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
3441                      next1, now, thread_index, &s1);
3442                   goto trace01;
3443                 }
3444             }
3445           else
3446             {
3447                if (is_output_feature)
3448                 {
3449                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3450                       sm, ip1, thread_index, now, vm, b1)))
3451                     goto trace01;
3452                 }
3453
3454               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
3455                 {
3456                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3457                   goto trace01;
3458                 }
3459
3460               if (ip4_is_fragment (ip1))
3461                 {
3462                   b1->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3463                   next1 = SNAT_IN2OUT_NEXT_DROP;
3464                   goto trace01;
3465                 }
3466             }
3467
3468           make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address, ip1->protocol,
3469                       rx_fib_index1, udp1->src_port, udp1->dst_port);
3470
3471           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1))
3472             {
3473               if (is_slow_path)
3474                 {
3475                   if (is_output_feature)
3476                     {
3477                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3478                           sm, ip1, ip1->protocol, udp1->src_port,
3479                           udp1->dst_port, thread_index, sw_if_index1,
3480                           vnet_buffer(b1)->sw_if_index[VLIB_TX])))
3481                         goto trace01;
3482                     }
3483                   else
3484                     {
3485                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3486                           sw_if_index1, ip1, proto1, rx_fib_index1,
3487                           thread_index)))
3488                         goto trace01;
3489                     }
3490
3491                   next1 = slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node,
3492                                         next1, thread_index, now);
3493
3494                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
3495                     goto trace01;
3496                 }
3497               else
3498                 {
3499                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3500                   goto trace01;
3501                 }
3502             }
3503           else
3504             {
3505               s1 = pool_elt_at_index (tsm->sessions, value1.value);
3506             }
3507
3508           b1->flags |= VNET_BUFFER_F_IS_NATED;
3509
3510           if (!is_output_feature)
3511             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
3512
3513           old_addr1 = ip1->src_address.as_u32;
3514           new_addr1 = ip1->src_address.as_u32 = s1->out2in.addr.as_u32;
3515           sum1 = ip1->checksum;
3516           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3517                                  src_address);
3518           if (PREDICT_FALSE (is_twice_nat_session (s1)))
3519             sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3520                                    s1->ext_host_addr.as_u32, ip4_header_t,
3521                                    dst_address);
3522           ip1->checksum = ip_csum_fold (sum1);
3523
3524           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
3525             {
3526               old_port1 = tcp1->src_port;
3527               new_port1 = tcp1->src_port = s1->out2in.port;
3528
3529               sum1 = tcp1->checksum;
3530               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3531                                      dst_address);
3532               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
3533                                      length);
3534               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3535                 {
3536                   sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3537                                          s1->ext_host_addr.as_u32,
3538                                          ip4_header_t, dst_address);
3539                   sum1 = ip_csum_update (sum1, tcp1->dst_port,
3540                                          s1->ext_host_port, ip4_header_t,
3541                                          length);
3542                   tcp1->dst_port = s1->ext_host_port;
3543                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3544                 }
3545               tcp1->checksum = ip_csum_fold(sum1);
3546               mss_clamping (sm, tcp1, &sum1);
3547               if (nat44_set_tcp_session_state_i2o (sm, s1, tcp1, thread_index))
3548                 goto trace01;
3549             }
3550           else
3551             {
3552               udp1->src_port = s1->out2in.port;
3553               udp1->checksum = 0;
3554               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3555                 {
3556                   udp1->dst_port = s1->ext_host_port;
3557                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3558                 }
3559             }
3560
3561           /* Accounting */
3562           nat44_session_update_counters (s1, now,
3563                                          vlib_buffer_length_in_chain (vm, b1));
3564
3565         trace01:
3566           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3567                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3568             {
3569               snat_in2out_trace_t *t =
3570                 vlib_add_trace (vm, node, b1, sizeof (*t));
3571               t->is_slow_path = is_slow_path;
3572               t->sw_if_index = sw_if_index1;
3573               t->next_index = next1;
3574               t->session_index = ~0;
3575               if (s1)
3576                 t->session_index = s1 - tsm->sessions;
3577             }
3578
3579           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
3580
3581           /* verify speculative enqueues, maybe switch current next frame */
3582           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3583                                            to_next, n_left_to_next,
3584                                            bi0, bi1, next0, next1);
3585         }
3586
3587       while (n_left_from > 0 && n_left_to_next > 0)
3588         {
3589           u32 bi0;
3590           vlib_buffer_t *b0;
3591           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
3592               new_addr0, old_addr0;
3593           u16 old_port0, new_port0;
3594           ip4_header_t *ip0;
3595           udp_header_t *udp0;
3596           tcp_header_t *tcp0;
3597           icmp46_header_t * icmp0;
3598           snat_session_t *s0 = 0;
3599           clib_bihash_kv_16_8_t kv0, value0;
3600           ip_csum_t sum0;
3601
3602           /* speculatively enqueue b0 to the current next frame */
3603           bi0 = from[0];
3604           to_next[0] = bi0;
3605           from += 1;
3606           to_next += 1;
3607           n_left_from -= 1;
3608           n_left_to_next -= 1;
3609
3610           b0 = vlib_get_buffer (vm, bi0);
3611           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3612
3613           if (is_output_feature)
3614             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
3615
3616           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
3617                  iph_offset0);
3618
3619           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3620           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3621                                                                sw_if_index0);
3622
3623           if (PREDICT_FALSE(ip0->ttl == 1))
3624             {
3625               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3626               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3627                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3628                                            0);
3629               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3630               goto trace0;
3631             }
3632
3633           udp0 = ip4_next_header (ip0);
3634           tcp0 = (tcp_header_t *) udp0;
3635           icmp0 = (icmp46_header_t *) udp0;
3636           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3637
3638           if (is_slow_path)
3639             {
3640               if (PREDICT_FALSE (proto0 == ~0))
3641                 {
3642                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
3643                                                       rx_fib_index0,
3644                                                       thread_index, now, vm,
3645                                                       node);
3646                   if (!s0)
3647                     next0 = SNAT_IN2OUT_NEXT_DROP;
3648                   goto trace0;
3649                 }
3650
3651               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3652                 {
3653                   next0 = icmp_in2out_ed_slow_path
3654                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
3655                      next0, now, thread_index, &s0);
3656                   goto trace0;
3657                 }
3658             }
3659           else
3660             {
3661                if (is_output_feature)
3662                 {
3663                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3664                       sm, ip0, thread_index, now, vm, b0)))
3665                     goto trace0;
3666                 }
3667
3668               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3669                 {
3670                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3671                   goto trace0;
3672                 }
3673
3674               if (ip4_is_fragment (ip0))
3675                 {
3676                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3677                   next0 = SNAT_IN2OUT_NEXT_DROP;
3678                   goto trace0;
3679                 }
3680             }
3681
3682           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3683                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3684
3685           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3686             {
3687               if (is_slow_path)
3688                 {
3689                   if (is_output_feature)
3690                     {
3691                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3692                           sm, ip0, ip0->protocol, udp0->src_port,
3693                           udp0->dst_port, thread_index, sw_if_index0,
3694                           vnet_buffer(b0)->sw_if_index[VLIB_TX])))
3695                         goto trace0;
3696                     }
3697                   else
3698                     {
3699                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3700                           sw_if_index0, ip0, proto0, rx_fib_index0,
3701                           thread_index)))
3702                         goto trace0;
3703                     }
3704
3705                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3706                                         next0, thread_index, now);
3707
3708                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3709                     goto trace0;
3710                 }
3711               else
3712                 {
3713                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3714                   goto trace0;
3715                 }
3716             }
3717           else
3718             {
3719               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3720             }
3721
3722           b0->flags |= VNET_BUFFER_F_IS_NATED;
3723
3724           if (!is_output_feature)
3725             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3726
3727           old_addr0 = ip0->src_address.as_u32;
3728           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3729           sum0 = ip0->checksum;
3730           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3731                                  src_address);
3732           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3733             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3734                                    s0->ext_host_addr.as_u32, ip4_header_t,
3735                                    dst_address);
3736           ip0->checksum = ip_csum_fold (sum0);
3737
3738           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3739             {
3740               old_port0 = tcp0->src_port;
3741               new_port0 = tcp0->src_port = s0->out2in.port;
3742
3743               sum0 = tcp0->checksum;
3744               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3745                                      dst_address);
3746               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3747                                      length);
3748               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3749                 {
3750                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3751                                          s0->ext_host_addr.as_u32,
3752                                          ip4_header_t, dst_address);
3753                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3754                                          s0->ext_host_port, ip4_header_t,
3755                                          length);
3756                   tcp0->dst_port = s0->ext_host_port;
3757                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3758                 }
3759               mss_clamping (sm, tcp0, &sum0);
3760               tcp0->checksum = ip_csum_fold(sum0);
3761               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3762                 goto trace0;
3763             }
3764           else
3765             {
3766               udp0->src_port = s0->out2in.port;
3767               udp0->checksum = 0;
3768               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3769                 {
3770                   udp0->dst_port = s0->ext_host_port;
3771                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3772                 }
3773             }
3774
3775           /* Accounting */
3776           nat44_session_update_counters (s0, now,
3777                                          vlib_buffer_length_in_chain (vm, b0));
3778
3779         trace0:
3780           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3781                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3782             {
3783               snat_in2out_trace_t *t =
3784                 vlib_add_trace (vm, node, b0, sizeof (*t));
3785               t->is_slow_path = is_slow_path;
3786               t->sw_if_index = sw_if_index0;
3787               t->next_index = next0;
3788               t->session_index = ~0;
3789               if (s0)
3790                 t->session_index = s0 - tsm->sessions;
3791             }
3792
3793           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3794
3795           /* verify speculative enqueue, maybe switch current next frame */
3796           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3797                                            to_next, n_left_to_next,
3798                                            bi0, next0);
3799         }
3800
3801       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3802     }
3803
3804   vlib_node_increment_counter (vm, stats_node_index,
3805                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3806                                pkts_processed);
3807   return frame->n_vectors;
3808 }
3809
3810 static uword
3811 nat44_ed_in2out_fast_path_fn (vlib_main_t * vm,
3812                               vlib_node_runtime_t * node,
3813                               vlib_frame_t * frame)
3814 {
3815   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 0);
3816 }
3817
3818 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
3819   .function = nat44_ed_in2out_fast_path_fn,
3820   .name = "nat44-ed-in2out",
3821   .vector_size = sizeof (u32),
3822   .format_trace = format_snat_in2out_trace,
3823   .type = VLIB_NODE_TYPE_INTERNAL,
3824
3825   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3826   .error_strings = snat_in2out_error_strings,
3827
3828   .runtime_data_bytes = sizeof (snat_runtime_t),
3829
3830   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3831
3832   /* edit / add dispositions here */
3833   .next_nodes = {
3834     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3835     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3836     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3837     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3838     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3839   },
3840 };
3841
3842 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_node, nat44_ed_in2out_fast_path_fn);
3843
3844 static uword
3845 nat44_ed_in2out_output_fast_path_fn (vlib_main_t * vm,
3846                                      vlib_node_runtime_t * node,
3847                                      vlib_frame_t * frame)
3848 {
3849   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 1);
3850 }
3851
3852 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
3853   .function = nat44_ed_in2out_output_fast_path_fn,
3854   .name = "nat44-ed-in2out-output",
3855   .vector_size = sizeof (u32),
3856   .format_trace = format_snat_in2out_trace,
3857   .type = VLIB_NODE_TYPE_INTERNAL,
3858
3859   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3860   .error_strings = snat_in2out_error_strings,
3861
3862   .runtime_data_bytes = sizeof (snat_runtime_t),
3863
3864   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3865
3866   /* edit / add dispositions here */
3867   .next_nodes = {
3868     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3869     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3870     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3871     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3872     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3873   },
3874 };
3875
3876 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_node,
3877                               nat44_ed_in2out_output_fast_path_fn);
3878
3879 static uword
3880 nat44_ed_in2out_slow_path_fn (vlib_main_t * vm,
3881                               vlib_node_runtime_t * node,
3882                               vlib_frame_t * frame)
3883 {
3884   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0);
3885 }
3886
3887 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
3888   .function = nat44_ed_in2out_slow_path_fn,
3889   .name = "nat44-ed-in2out-slowpath",
3890   .vector_size = sizeof (u32),
3891   .format_trace = format_snat_in2out_trace,
3892   .type = VLIB_NODE_TYPE_INTERNAL,
3893
3894   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3895   .error_strings = snat_in2out_error_strings,
3896
3897   .runtime_data_bytes = sizeof (snat_runtime_t),
3898
3899   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3900
3901   /* edit / add dispositions here */
3902   .next_nodes = {
3903     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3904     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3905     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3906     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3907     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3908   },
3909 };
3910
3911 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_slowpath_node,
3912                               nat44_ed_in2out_slow_path_fn);
3913
3914 static uword
3915 nat44_ed_in2out_output_slow_path_fn (vlib_main_t * vm,
3916                                      vlib_node_runtime_t * node,
3917                                      vlib_frame_t * frame)
3918 {
3919   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1);
3920 }
3921
3922 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
3923   .function = nat44_ed_in2out_output_slow_path_fn,
3924   .name = "nat44-ed-in2out-output-slowpath",
3925   .vector_size = sizeof (u32),
3926   .format_trace = format_snat_in2out_trace,
3927   .type = VLIB_NODE_TYPE_INTERNAL,
3928
3929   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3930   .error_strings = snat_in2out_error_strings,
3931
3932   .runtime_data_bytes = sizeof (snat_runtime_t),
3933
3934   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3935
3936   /* edit / add dispositions here */
3937   .next_nodes = {
3938     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3939     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3940     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3941     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3942     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3943   },
3944 };
3945
3946 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_slowpath_node,
3947                               nat44_ed_in2out_output_slow_path_fn);
3948
3949 /**************************/
3950 /*** deterministic mode ***/
3951 /**************************/
3952 static uword
3953 snat_det_in2out_node_fn (vlib_main_t * vm,
3954                          vlib_node_runtime_t * node,
3955                          vlib_frame_t * frame)
3956 {
3957   u32 n_left_from, * from, * to_next;
3958   snat_in2out_next_t next_index;
3959   u32 pkts_processed = 0;
3960   snat_main_t * sm = &snat_main;
3961   u32 now = (u32) vlib_time_now (vm);
3962   u32 thread_index = vm->thread_index;
3963
3964   from = vlib_frame_vector_args (frame);
3965   n_left_from = frame->n_vectors;
3966   next_index = node->cached_next_index;
3967
3968   while (n_left_from > 0)
3969     {
3970       u32 n_left_to_next;
3971
3972       vlib_get_next_frame (vm, node, next_index,
3973                            to_next, n_left_to_next);
3974
3975       while (n_left_from >= 4 && n_left_to_next >= 2)
3976         {
3977           u32 bi0, bi1;
3978           vlib_buffer_t * b0, * b1;
3979           u32 next0, next1;
3980           u32 sw_if_index0, sw_if_index1;
3981           ip4_header_t * ip0, * ip1;
3982           ip_csum_t sum0, sum1;
3983           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
3984           u16 old_port0, new_port0, lo_port0, i0;
3985           u16 old_port1, new_port1, lo_port1, i1;
3986           udp_header_t * udp0, * udp1;
3987           tcp_header_t * tcp0, * tcp1;
3988           u32 proto0, proto1;
3989           snat_det_out_key_t key0, key1;
3990           snat_det_map_t * dm0, * dm1;
3991           snat_det_session_t * ses0 = 0, * ses1 = 0;
3992           u32 rx_fib_index0, rx_fib_index1;
3993           icmp46_header_t * icmp0, * icmp1;
3994
3995           /* Prefetch next iteration. */
3996           {
3997             vlib_buffer_t * p2, * p3;
3998
3999             p2 = vlib_get_buffer (vm, from[2]);
4000             p3 = vlib_get_buffer (vm, from[3]);
4001
4002             vlib_prefetch_buffer_header (p2, LOAD);
4003             vlib_prefetch_buffer_header (p3, LOAD);
4004
4005             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
4006             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
4007           }
4008
4009           /* speculatively enqueue b0 and b1 to the current next frame */
4010           to_next[0] = bi0 = from[0];
4011           to_next[1] = bi1 = from[1];
4012           from += 2;
4013           to_next += 2;
4014           n_left_from -= 2;
4015           n_left_to_next -= 2;
4016
4017           b0 = vlib_get_buffer (vm, bi0);
4018           b1 = vlib_get_buffer (vm, bi1);
4019
4020           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4021           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
4022
4023           ip0 = vlib_buffer_get_current (b0);
4024           udp0 = ip4_next_header (ip0);
4025           tcp0 = (tcp_header_t *) udp0;
4026
4027           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4028
4029           if (PREDICT_FALSE(ip0->ttl == 1))
4030             {
4031               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4032               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4033                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4034                                            0);
4035               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4036               goto trace0;
4037             }
4038
4039           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4040
4041           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
4042             {
4043               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4044               icmp0 = (icmp46_header_t *) udp0;
4045
4046               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4047                                   rx_fib_index0, node, next0, thread_index,
4048                                   &ses0, &dm0);
4049               goto trace0;
4050             }
4051
4052           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
4053           if (PREDICT_FALSE(!dm0))
4054             {
4055               nat_log_info ("no match for internal host %U",
4056                             format_ip4_address, &ip0->src_address);
4057               next0 = SNAT_IN2OUT_NEXT_DROP;
4058               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4059               goto trace0;
4060             }
4061
4062           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
4063
4064           key0.ext_host_addr = ip0->dst_address;
4065           key0.ext_host_port = tcp0->dst;
4066
4067           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
4068           if (PREDICT_FALSE(!ses0))
4069             {
4070               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4071                 {
4072                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
4073                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
4074
4075                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
4076                     continue;
4077
4078                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
4079                   break;
4080                 }
4081               if (PREDICT_FALSE(!ses0))
4082                 {
4083                   /* too many sessions for user, send ICMP error packet */
4084
4085                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4086                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
4087                                                ICMP4_destination_unreachable_destination_unreachable_host,
4088                                                0);
4089                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4090                   goto trace0;
4091                 }
4092             }
4093
4094           new_port0 = ses0->out.out_port;
4095
4096           old_addr0.as_u32 = ip0->src_address.as_u32;
4097           ip0->src_address.as_u32 = new_addr0.as_u32;
4098           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4099
4100           sum0 = ip0->checksum;
4101           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4102                                  ip4_header_t,
4103                                  src_address /* changed member */);
4104           ip0->checksum = ip_csum_fold (sum0);
4105
4106           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4107             {
4108               if (tcp0->flags & TCP_FLAG_SYN)
4109                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
4110               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
4111                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4112               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
4113                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
4114               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
4115                 snat_det_ses_close(dm0, ses0);
4116               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4117                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
4118               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
4119                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4120
4121               old_port0 = tcp0->src;
4122               tcp0->src = new_port0;
4123
4124               sum0 = tcp0->checksum;
4125               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4126                                      ip4_header_t,
4127                                      dst_address /* changed member */);
4128               sum0 = ip_csum_update (sum0, old_port0, new_port0,
4129                                      ip4_header_t /* cheat */,
4130                                      length /* changed member */);
4131               mss_clamping (sm, tcp0, &sum0);
4132               tcp0->checksum = ip_csum_fold(sum0);
4133             }
4134           else
4135             {
4136               ses0->state = SNAT_SESSION_UDP_ACTIVE;
4137               old_port0 = udp0->src_port;
4138               udp0->src_port = new_port0;
4139               udp0->checksum = 0;
4140             }
4141
4142           switch(ses0->state)
4143             {
4144             case SNAT_SESSION_UDP_ACTIVE:
4145                 ses0->expire = now + sm->udp_timeout;
4146                 break;
4147             case SNAT_SESSION_TCP_SYN_SENT:
4148             case SNAT_SESSION_TCP_FIN_WAIT:
4149             case SNAT_SESSION_TCP_CLOSE_WAIT:
4150             case SNAT_SESSION_TCP_LAST_ACK:
4151                 ses0->expire = now + sm->tcp_transitory_timeout;
4152                 break;
4153             case SNAT_SESSION_TCP_ESTABLISHED:
4154                 ses0->expire = now + sm->tcp_established_timeout;
4155                 break;
4156             }
4157
4158         trace0:
4159           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4160                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4161             {
4162               snat_in2out_trace_t *t =
4163                  vlib_add_trace (vm, node, b0, sizeof (*t));
4164               t->is_slow_path = 0;
4165               t->sw_if_index = sw_if_index0;
4166               t->next_index = next0;
4167               t->session_index = ~0;
4168               if (ses0)
4169                 t->session_index = ses0 - dm0->sessions;
4170             }
4171
4172           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4173
4174           ip1 = vlib_buffer_get_current (b1);
4175           udp1 = ip4_next_header (ip1);
4176           tcp1 = (tcp_header_t *) udp1;
4177
4178           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
4179
4180           if (PREDICT_FALSE(ip1->ttl == 1))
4181             {
4182               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4183               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
4184                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4185                                            0);
4186               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4187               goto trace1;
4188             }
4189
4190           proto1 = ip_proto_to_snat_proto (ip1->protocol);
4191
4192           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
4193             {
4194               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
4195               icmp1 = (icmp46_header_t *) udp1;
4196
4197               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
4198                                   rx_fib_index1, node, next1, thread_index,
4199                                   &ses1, &dm1);
4200               goto trace1;
4201             }
4202
4203           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
4204           if (PREDICT_FALSE(!dm1))
4205             {
4206               nat_log_info ("no match for internal host %U",
4207                             format_ip4_address, &ip0->src_address);
4208               next1 = SNAT_IN2OUT_NEXT_DROP;
4209               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4210               goto trace1;
4211             }
4212
4213           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
4214
4215           key1.ext_host_addr = ip1->dst_address;
4216           key1.ext_host_port = tcp1->dst;
4217
4218           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
4219           if (PREDICT_FALSE(!ses1))
4220             {
4221               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
4222                 {
4223                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
4224                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
4225
4226                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
4227                     continue;
4228
4229                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
4230                   break;
4231                 }
4232               if (PREDICT_FALSE(!ses1))
4233                 {
4234                   /* too many sessions for user, send ICMP error packet */
4235
4236                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4237                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
4238                                                ICMP4_destination_unreachable_destination_unreachable_host,
4239                                                0);
4240                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4241                   goto trace1;
4242                 }
4243             }
4244
4245           new_port1 = ses1->out.out_port;
4246
4247           old_addr1.as_u32 = ip1->src_address.as_u32;
4248           ip1->src_address.as_u32 = new_addr1.as_u32;
4249           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4250
4251           sum1 = ip1->checksum;
4252           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
4253                                  ip4_header_t,
4254                                  src_address /* changed member */);
4255           ip1->checksum = ip_csum_fold (sum1);
4256
4257           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
4258             {
4259               if (tcp1->flags & TCP_FLAG_SYN)
4260                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
4261               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
4262                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
4263               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
4264                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
4265               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
4266                 snat_det_ses_close(dm1, ses1);
4267               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4268                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
4269               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
4270                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
4271
4272               old_port1 = tcp1->src;
4273               tcp1->src = new_port1;
4274
4275               sum1 = tcp1->checksum;
4276               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
4277                                      ip4_header_t,
4278                                      dst_address /* changed member */);
4279               sum1 = ip_csum_update (sum1, old_port1, new_port1,
4280                                      ip4_header_t /* cheat */,
4281                                      length /* changed member */);
4282               mss_clamping (sm, tcp1, &sum1);
4283               tcp1->checksum = ip_csum_fold(sum1);
4284             }
4285           else
4286             {
4287               ses1->state = SNAT_SESSION_UDP_ACTIVE;
4288               old_port1 = udp1->src_port;
4289               udp1->src_port = new_port1;
4290               udp1->checksum = 0;
4291             }
4292
4293           switch(ses1->state)
4294             {
4295             case SNAT_SESSION_UDP_ACTIVE:
4296                 ses1->expire = now + sm->udp_timeout;
4297                 break;
4298             case SNAT_SESSION_TCP_SYN_SENT:
4299             case SNAT_SESSION_TCP_FIN_WAIT:
4300             case SNAT_SESSION_TCP_CLOSE_WAIT:
4301             case SNAT_SESSION_TCP_LAST_ACK:
4302                 ses1->expire = now + sm->tcp_transitory_timeout;
4303                 break;
4304             case SNAT_SESSION_TCP_ESTABLISHED:
4305                 ses1->expire = now + sm->tcp_established_timeout;
4306                 break;
4307             }
4308
4309         trace1:
4310           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4311                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
4312             {
4313               snat_in2out_trace_t *t =
4314                  vlib_add_trace (vm, node, b1, sizeof (*t));
4315               t->is_slow_path = 0;
4316               t->sw_if_index = sw_if_index1;
4317               t->next_index = next1;
4318               t->session_index = ~0;
4319               if (ses1)
4320                 t->session_index = ses1 - dm1->sessions;
4321             }
4322
4323           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
4324
4325           /* verify speculative enqueues, maybe switch current next frame */
4326           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
4327                                            to_next, n_left_to_next,
4328                                            bi0, bi1, next0, next1);
4329          }
4330
4331       while (n_left_from > 0 && n_left_to_next > 0)
4332         {
4333           u32 bi0;
4334           vlib_buffer_t * b0;
4335           u32 next0;
4336           u32 sw_if_index0;
4337           ip4_header_t * ip0;
4338           ip_csum_t sum0;
4339           ip4_address_t new_addr0, old_addr0;
4340           u16 old_port0, new_port0, lo_port0, i0;
4341           udp_header_t * udp0;
4342           tcp_header_t * tcp0;
4343           u32 proto0;
4344           snat_det_out_key_t key0;
4345           snat_det_map_t * dm0;
4346           snat_det_session_t * ses0 = 0;
4347           u32 rx_fib_index0;
4348           icmp46_header_t * icmp0;
4349
4350           /* speculatively enqueue b0 to the current next frame */
4351           bi0 = from[0];
4352           to_next[0] = bi0;
4353           from += 1;
4354           to_next += 1;
4355           n_left_from -= 1;
4356           n_left_to_next -= 1;
4357
4358           b0 = vlib_get_buffer (vm, bi0);
4359           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4360
4361           ip0 = vlib_buffer_get_current (b0);
4362           udp0 = ip4_next_header (ip0);
4363           tcp0 = (tcp_header_t *) udp0;
4364
4365           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4366
4367           if (PREDICT_FALSE(ip0->ttl == 1))
4368             {
4369               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4370               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4371                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4372                                            0);
4373               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4374               goto trace00;
4375             }
4376
4377           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4378
4379           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
4380             {
4381               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4382               icmp0 = (icmp46_header_t *) udp0;
4383
4384               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4385                                   rx_fib_index0, node, next0, thread_index,
4386                                   &ses0, &dm0);
4387               goto trace00;
4388             }
4389
4390           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
4391           if (PREDICT_FALSE(!dm0))
4392             {
4393               nat_log_info ("no match for internal host %U",
4394                             format_ip4_address, &ip0->src_address);
4395               next0 = SNAT_IN2OUT_NEXT_DROP;
4396               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4397               goto trace00;
4398             }
4399
4400           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
4401
4402           key0.ext_host_addr = ip0->dst_address;
4403           key0.ext_host_port = tcp0->dst;
4404
4405           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
4406           if (PREDICT_FALSE(!ses0))
4407             {
4408               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4409                 {
4410                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
4411                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
4412
4413                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
4414                     continue;
4415
4416                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
4417                   break;
4418                 }
4419               if (PREDICT_FALSE(!ses0))
4420                 {
4421                   /* too many sessions for user, send ICMP error packet */
4422
4423                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4424                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
4425                                                ICMP4_destination_unreachable_destination_unreachable_host,
4426                                                0);
4427                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4428                   goto trace00;
4429                 }
4430             }
4431
4432           new_port0 = ses0->out.out_port;
4433
4434           old_addr0.as_u32 = ip0->src_address.as_u32;
4435           ip0->src_address.as_u32 = new_addr0.as_u32;
4436           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4437
4438           sum0 = ip0->checksum;
4439           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4440                                  ip4_header_t,
4441                                  src_address /* changed member */);
4442           ip0->checksum = ip_csum_fold (sum0);
4443
4444           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4445             {
4446               if (tcp0->flags & TCP_FLAG_SYN)
4447                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
4448               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
4449                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4450               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
4451                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
4452               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
4453                 snat_det_ses_close(dm0, ses0);
4454               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4455                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
4456               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
4457                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4458
4459               old_port0 = tcp0->src;
4460               tcp0->src = new_port0;
4461
4462               sum0 = tcp0->checksum;
4463               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4464                                      ip4_header_t,
4465                                      dst_address /* changed member */);
4466               sum0 = ip_csum_update (sum0, old_port0, new_port0,
4467                                      ip4_header_t /* cheat */,
4468                                      length /* changed member */);
4469               mss_clamping (sm, tcp0, &sum0);
4470               tcp0->checksum = ip_csum_fold(sum0);
4471             }
4472           else
4473             {
4474               ses0->state = SNAT_SESSION_UDP_ACTIVE;
4475               old_port0 = udp0->src_port;
4476               udp0->src_port = new_port0;
4477               udp0->checksum = 0;
4478             }
4479
4480           switch(ses0->state)
4481             {
4482             case SNAT_SESSION_UDP_ACTIVE:
4483                 ses0->expire = now + sm->udp_timeout;
4484                 break;
4485             case SNAT_SESSION_TCP_SYN_SENT:
4486             case SNAT_SESSION_TCP_FIN_WAIT:
4487             case SNAT_SESSION_TCP_CLOSE_WAIT:
4488             case SNAT_SESSION_TCP_LAST_ACK:
4489                 ses0->expire = now + sm->tcp_transitory_timeout;
4490                 break;
4491             case SNAT_SESSION_TCP_ESTABLISHED:
4492                 ses0->expire = now + sm->tcp_established_timeout;
4493                 break;
4494             }
4495
4496         trace00:
4497           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4498                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4499             {
4500               snat_in2out_trace_t *t =
4501                  vlib_add_trace (vm, node, b0, sizeof (*t));
4502               t->is_slow_path = 0;
4503               t->sw_if_index = sw_if_index0;
4504               t->next_index = next0;
4505               t->session_index = ~0;
4506               if (ses0)
4507                 t->session_index = ses0 - dm0->sessions;
4508             }
4509
4510           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4511
4512           /* verify speculative enqueue, maybe switch current next frame */
4513           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4514                                            to_next, n_left_to_next,
4515                                            bi0, next0);
4516         }
4517
4518       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4519     }
4520
4521   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
4522                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4523                                pkts_processed);
4524   return frame->n_vectors;
4525 }
4526
4527 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
4528   .function = snat_det_in2out_node_fn,
4529   .name = "nat44-det-in2out",
4530   .vector_size = sizeof (u32),
4531   .format_trace = format_snat_in2out_trace,
4532   .type = VLIB_NODE_TYPE_INTERNAL,
4533
4534   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4535   .error_strings = snat_in2out_error_strings,
4536
4537   .runtime_data_bytes = sizeof (snat_runtime_t),
4538
4539   .n_next_nodes = 3,
4540
4541   /* edit / add dispositions here */
4542   .next_nodes = {
4543     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4544     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4545     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4546   },
4547 };
4548
4549 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
4550
4551 /**
4552  * Get address and port values to be used for ICMP packet translation
4553  * and create session if needed
4554  *
4555  * @param[in,out] sm             NAT main
4556  * @param[in,out] node           NAT node runtime
4557  * @param[in] thread_index       thread index
4558  * @param[in,out] b0             buffer containing packet to be translated
4559  * @param[out] p_proto           protocol used for matching
4560  * @param[out] p_value           address and port after NAT translation
4561  * @param[out] p_dont_translate  if packet should not be translated
4562  * @param d                      optional parameter
4563  * @param e                      optional parameter
4564  */
4565 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
4566                           u32 thread_index, vlib_buffer_t *b0,
4567                           ip4_header_t *ip0, u8 *p_proto,
4568                           snat_session_key_t *p_value,
4569                           u8 *p_dont_translate, void *d, void *e)
4570 {
4571   icmp46_header_t *icmp0;
4572   u32 sw_if_index0;
4573   u32 rx_fib_index0;
4574   u8 protocol;
4575   snat_det_out_key_t key0;
4576   u8 dont_translate = 0;
4577   u32 next0 = ~0;
4578   icmp_echo_header_t *echo0, *inner_echo0 = 0;
4579   ip4_header_t *inner_ip0;
4580   void *l4_header = 0;
4581   icmp46_header_t *inner_icmp0;
4582   snat_det_map_t * dm0 = 0;
4583   ip4_address_t new_addr0;
4584   u16 lo_port0, i0;
4585   snat_det_session_t * ses0 = 0;
4586   ip4_address_t in_addr;
4587   u16 in_port;
4588
4589   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
4590   echo0 = (icmp_echo_header_t *)(icmp0+1);
4591   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4592   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
4593
4594   if (!icmp_is_error_message (icmp0))
4595     {
4596       protocol = SNAT_PROTOCOL_ICMP;
4597       in_addr = ip0->src_address;
4598       in_port = echo0->identifier;
4599     }
4600   else
4601     {
4602       inner_ip0 = (ip4_header_t *)(echo0+1);
4603       l4_header = ip4_next_header (inner_ip0);
4604       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
4605       in_addr = inner_ip0->dst_address;
4606       switch (protocol)
4607         {
4608         case SNAT_PROTOCOL_ICMP:
4609           inner_icmp0 = (icmp46_header_t*)l4_header;
4610           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
4611           in_port = inner_echo0->identifier;
4612           break;
4613         case SNAT_PROTOCOL_UDP:
4614         case SNAT_PROTOCOL_TCP:
4615           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
4616           break;
4617         default:
4618           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
4619           next0 = SNAT_IN2OUT_NEXT_DROP;
4620           goto out;
4621         }
4622     }
4623
4624   dm0 = snat_det_map_by_user(sm, &in_addr);
4625   if (PREDICT_FALSE(!dm0))
4626     {
4627       nat_log_info ("no match for internal host %U",
4628                     format_ip4_address, &in_addr);
4629       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4630           IP_PROTOCOL_ICMP, rx_fib_index0)))
4631         {
4632           dont_translate = 1;
4633           goto out;
4634         }
4635       next0 = SNAT_IN2OUT_NEXT_DROP;
4636       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4637       goto out;
4638     }
4639
4640   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
4641
4642   key0.ext_host_addr = ip0->dst_address;
4643   key0.ext_host_port = 0;
4644
4645   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
4646   if (PREDICT_FALSE(!ses0))
4647     {
4648       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4649           IP_PROTOCOL_ICMP, rx_fib_index0)))
4650         {
4651           dont_translate = 1;
4652           goto out;
4653         }
4654       if (icmp0->type != ICMP4_echo_request)
4655         {
4656           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4657           next0 = SNAT_IN2OUT_NEXT_DROP;
4658           goto out;
4659         }
4660       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4661         {
4662           key0.out_port = clib_host_to_net_u16 (lo_port0 +
4663             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
4664
4665           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
4666             continue;
4667
4668           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
4669           break;
4670         }
4671       if (PREDICT_FALSE(!ses0))
4672         {
4673           next0 = SNAT_IN2OUT_NEXT_DROP;
4674           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
4675           goto out;
4676         }
4677     }
4678
4679   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
4680                     !icmp_is_error_message (icmp0)))
4681     {
4682       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4683       next0 = SNAT_IN2OUT_NEXT_DROP;
4684       goto out;
4685     }
4686
4687   u32 now = (u32) vlib_time_now (sm->vlib_main);
4688
4689   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
4690   ses0->expire = now + sm->icmp_timeout;
4691
4692 out:
4693   *p_proto = protocol;
4694   if (ses0)
4695     {
4696       p_value->addr = new_addr0;
4697       p_value->fib_index = sm->outside_fib_index;
4698       p_value->port = ses0->out.out_port;
4699     }
4700   *p_dont_translate = dont_translate;
4701   if (d)
4702     *(snat_det_session_t**)d = ses0;
4703   if (e)
4704     *(snat_det_map_t**)e = dm0;
4705   return next0;
4706 }
4707
4708 /**********************/
4709 /*** worker handoff ***/
4710 /**********************/
4711 static inline uword
4712 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
4713                                       vlib_node_runtime_t * node,
4714                                       vlib_frame_t * frame,
4715                                       u8 is_output)
4716 {
4717   snat_main_t *sm = &snat_main;
4718   vlib_thread_main_t *tm = vlib_get_thread_main ();
4719   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
4720   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
4721   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
4722     = 0;
4723   vlib_frame_queue_elt_t *hf = 0;
4724   vlib_frame_queue_t *fq;
4725   vlib_frame_t *f = 0;
4726   int i;
4727   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
4728   u32 next_worker_index = 0;
4729   u32 current_worker_index = ~0;
4730   u32 thread_index = vm->thread_index;
4731   u32 fq_index;
4732   u32 to_node_index;
4733   vlib_frame_t *d = 0;
4734
4735   ASSERT (vec_len (sm->workers));
4736
4737   if (is_output)
4738     {
4739       fq_index = sm->fq_in2out_output_index;
4740       to_node_index = sm->in2out_output_node_index;
4741     }
4742   else
4743     {
4744       fq_index = sm->fq_in2out_index;
4745       to_node_index = sm->in2out_node_index;
4746     }
4747
4748   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
4749     {
4750       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
4751
4752       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
4753                                tm->n_vlib_mains - 1,
4754                                (vlib_frame_queue_t *) (~0));
4755     }
4756
4757   from = vlib_frame_vector_args (frame);
4758   n_left_from = frame->n_vectors;
4759
4760   while (n_left_from > 0)
4761     {
4762       u32 bi0;
4763       vlib_buffer_t *b0;
4764       u32 sw_if_index0;
4765       u32 rx_fib_index0;
4766       ip4_header_t * ip0;
4767       u8 do_handoff;
4768
4769       bi0 = from[0];
4770       from += 1;
4771       n_left_from -= 1;
4772
4773       b0 = vlib_get_buffer (vm, bi0);
4774
4775       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
4776       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4777
4778       ip0 = vlib_buffer_get_current (b0);
4779
4780       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
4781
4782       if (PREDICT_FALSE (next_worker_index != thread_index))
4783         {
4784           do_handoff = 1;
4785
4786           if (next_worker_index != current_worker_index)
4787             {
4788               fq = is_vlib_frame_queue_congested (
4789                 fq_index, next_worker_index, NAT_FQ_NELTS - 2,
4790                 congested_handoff_queue_by_worker_index);
4791
4792               if (fq)
4793                 {
4794                   /* if this is 1st frame */
4795                   if (!d)
4796                     {
4797                       d = vlib_get_frame_to_node (vm, sm->error_node_index);
4798                       to_next_drop = vlib_frame_vector_args (d);
4799                     }
4800
4801                   to_next_drop[0] = bi0;
4802                   to_next_drop += 1;
4803                   d->n_vectors++;
4804                   b0->error = node->errors[SNAT_IN2OUT_ERROR_FQ_CONGESTED];
4805                   goto trace0;
4806                 }
4807
4808               if (hf)
4809                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4810
4811               hf = vlib_get_worker_handoff_queue_elt (fq_index,
4812                                                       next_worker_index,
4813                                                       handoff_queue_elt_by_worker_index);
4814
4815               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
4816               to_next_worker = &hf->buffer_index[hf->n_vectors];
4817               current_worker_index = next_worker_index;
4818             }
4819
4820           /* enqueue to correct worker thread */
4821           to_next_worker[0] = bi0;
4822           to_next_worker++;
4823           n_left_to_next_worker--;
4824
4825           if (n_left_to_next_worker == 0)
4826             {
4827               hf->n_vectors = VLIB_FRAME_SIZE;
4828               vlib_put_frame_queue_elt (hf);
4829               current_worker_index = ~0;
4830               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
4831               hf = 0;
4832             }
4833         }
4834       else
4835         {
4836           do_handoff = 0;
4837           /* if this is 1st frame */
4838           if (!f)
4839             {
4840               f = vlib_get_frame_to_node (vm, to_node_index);
4841               to_next = vlib_frame_vector_args (f);
4842             }
4843
4844           to_next[0] = bi0;
4845           to_next += 1;
4846           f->n_vectors++;
4847         }
4848
4849 trace0:
4850       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
4851                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4852         {
4853           snat_in2out_worker_handoff_trace_t *t =
4854             vlib_add_trace (vm, node, b0, sizeof (*t));
4855           t->next_worker_index = next_worker_index;
4856           t->do_handoff = do_handoff;
4857         }
4858     }
4859
4860   if (f)
4861     vlib_put_frame_to_node (vm, to_node_index, f);
4862
4863   if (d)
4864     vlib_put_frame_to_node (vm, sm->error_node_index, d);
4865
4866   if (hf)
4867     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4868
4869   /* Ship frames to the worker nodes */
4870   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
4871     {
4872       if (handoff_queue_elt_by_worker_index[i])
4873         {
4874           hf = handoff_queue_elt_by_worker_index[i];
4875           /*
4876            * It works better to let the handoff node
4877            * rate-adapt, always ship the handoff queue element.
4878            */
4879           if (1 || hf->n_vectors == hf->last_n_vectors)
4880             {
4881               vlib_put_frame_queue_elt (hf);
4882               handoff_queue_elt_by_worker_index[i] = 0;
4883             }
4884           else
4885             hf->last_n_vectors = hf->n_vectors;
4886         }
4887       congested_handoff_queue_by_worker_index[i] =
4888         (vlib_frame_queue_t *) (~0);
4889     }
4890   hf = 0;
4891   current_worker_index = ~0;
4892   return frame->n_vectors;
4893 }
4894
4895 static uword
4896 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
4897                                vlib_node_runtime_t * node,
4898                                vlib_frame_t * frame)
4899 {
4900   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
4901 }
4902
4903 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
4904   .function = snat_in2out_worker_handoff_fn,
4905   .name = "nat44-in2out-worker-handoff",
4906   .vector_size = sizeof (u32),
4907   .format_trace = format_snat_in2out_worker_handoff_trace,
4908   .type = VLIB_NODE_TYPE_INTERNAL,
4909
4910   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4911   .error_strings = snat_in2out_error_strings,
4912
4913   .n_next_nodes = 1,
4914
4915   .next_nodes = {
4916     [0] = "error-drop",
4917   },
4918 };
4919
4920 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
4921                               snat_in2out_worker_handoff_fn);
4922
4923 static uword
4924 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
4925                                       vlib_node_runtime_t * node,
4926                                       vlib_frame_t * frame)
4927 {
4928   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
4929 }
4930
4931 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
4932   .function = snat_in2out_output_worker_handoff_fn,
4933   .name = "nat44-in2out-output-worker-handoff",
4934   .vector_size = sizeof (u32),
4935   .format_trace = format_snat_in2out_worker_handoff_trace,
4936   .type = VLIB_NODE_TYPE_INTERNAL,
4937
4938   .n_next_nodes = 1,
4939
4940   .next_nodes = {
4941     [0] = "error-drop",
4942   },
4943 };
4944
4945 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
4946                               snat_in2out_output_worker_handoff_fn);
4947
4948 static_always_inline int
4949 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
4950 {
4951   snat_address_t * ap;
4952   clib_bihash_kv_8_8_t kv, value;
4953   snat_session_key_t m_key;
4954
4955   vec_foreach (ap, sm->addresses)
4956     {
4957       if (ap->addr.as_u32 == dst_addr->as_u32)
4958         return 1;
4959     }
4960
4961   m_key.addr.as_u32 = dst_addr->as_u32;
4962   m_key.fib_index = 0;
4963   m_key.port = 0;
4964   m_key.protocol = 0;
4965   kv.key = m_key.as_u64;
4966   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4967     return 1;
4968
4969   return 0;
4970 }
4971
4972 static inline uword
4973 snat_hairpin_dst_fn_inline (vlib_main_t * vm,
4974                             vlib_node_runtime_t * node,
4975                             vlib_frame_t * frame,
4976                             int is_ed)
4977 {
4978   u32 n_left_from, * from, * to_next, stats_node_index;
4979   snat_in2out_next_t next_index;
4980   u32 pkts_processed = 0;
4981   snat_main_t * sm = &snat_main;
4982
4983   stats_node_index = is_ed ? nat44_ed_hairpin_dst_node.index :
4984     snat_hairpin_dst_node.index;
4985
4986   from = vlib_frame_vector_args (frame);
4987   n_left_from = frame->n_vectors;
4988   next_index = node->cached_next_index;
4989
4990   while (n_left_from > 0)
4991     {
4992       u32 n_left_to_next;
4993
4994       vlib_get_next_frame (vm, node, next_index,
4995                            to_next, n_left_to_next);
4996
4997       while (n_left_from > 0 && n_left_to_next > 0)
4998         {
4999           u32 bi0;
5000           vlib_buffer_t * b0;
5001           u32 next0;
5002           ip4_header_t * ip0;
5003           u32 proto0;
5004
5005           /* speculatively enqueue b0 to the current next frame */
5006           bi0 = from[0];
5007           to_next[0] = bi0;
5008           from += 1;
5009           to_next += 1;
5010           n_left_from -= 1;
5011           n_left_to_next -= 1;
5012
5013           b0 = vlib_get_buffer (vm, bi0);
5014           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
5015           ip0 = vlib_buffer_get_current (b0);
5016
5017           proto0 = ip_proto_to_snat_proto (ip0->protocol);
5018
5019           vnet_buffer (b0)->snat.flags = 0;
5020           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
5021             {
5022               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
5023                 {
5024                   udp_header_t * udp0 = ip4_next_header (ip0);
5025                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
5026
5027                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed);
5028                 }
5029               else if (proto0 == SNAT_PROTOCOL_ICMP)
5030                 {
5031                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
5032
5033                   snat_icmp_hairpinning (sm, b0, ip0, icmp0, is_ed);
5034                 }
5035               else
5036                 {
5037                   if (is_ed)
5038                     nat44_ed_hairpinning_unknown_proto (sm, b0, ip0);
5039                   else
5040                     nat_hairpinning_sm_unknown_proto (sm, b0, ip0);
5041                 }
5042
5043               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
5044             }
5045
5046           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5047
5048           /* verify speculative enqueue, maybe switch current next frame */
5049           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5050                                            to_next, n_left_to_next,
5051                                            bi0, next0);
5052          }
5053
5054       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5055     }
5056
5057   vlib_node_increment_counter (vm, stats_node_index,
5058                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5059                                pkts_processed);
5060   return frame->n_vectors;
5061 }
5062
5063 static uword
5064 snat_hairpin_dst_fn (vlib_main_t * vm,
5065                      vlib_node_runtime_t * node,
5066                      vlib_frame_t * frame)
5067 {
5068   return snat_hairpin_dst_fn_inline (vm, node, frame, 0);
5069 }
5070
5071 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
5072   .function = snat_hairpin_dst_fn,
5073   .name = "nat44-hairpin-dst",
5074   .vector_size = sizeof (u32),
5075   .type = VLIB_NODE_TYPE_INTERNAL,
5076   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5077   .error_strings = snat_in2out_error_strings,
5078   .n_next_nodes = 2,
5079   .next_nodes = {
5080     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5081     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5082   },
5083 };
5084
5085 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
5086                               snat_hairpin_dst_fn);
5087
5088 static uword
5089 nat44_ed_hairpin_dst_fn (vlib_main_t * vm,
5090                          vlib_node_runtime_t * node,
5091                          vlib_frame_t * frame)
5092 {
5093   return snat_hairpin_dst_fn_inline (vm, node, frame, 1);
5094 }
5095
5096 VLIB_REGISTER_NODE (nat44_ed_hairpin_dst_node) = {
5097   .function = nat44_ed_hairpin_dst_fn,
5098   .name = "nat44-ed-hairpin-dst",
5099   .vector_size = sizeof (u32),
5100   .type = VLIB_NODE_TYPE_INTERNAL,
5101   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5102   .error_strings = snat_in2out_error_strings,
5103   .n_next_nodes = 2,
5104   .next_nodes = {
5105     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5106     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5107   },
5108 };
5109
5110 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_dst_node,
5111                               nat44_ed_hairpin_dst_fn);
5112
5113 static inline uword
5114 snat_hairpin_src_fn_inline (vlib_main_t * vm,
5115                             vlib_node_runtime_t * node,
5116                             vlib_frame_t * frame,
5117                             int is_ed)
5118 {
5119   u32 n_left_from, * from, * to_next, stats_node_index;
5120   snat_in2out_next_t next_index;
5121   u32 pkts_processed = 0;
5122   snat_main_t *sm = &snat_main;
5123
5124   stats_node_index = is_ed ? nat44_ed_hairpin_src_node.index :
5125     snat_hairpin_src_node.index;
5126
5127   from = vlib_frame_vector_args (frame);
5128   n_left_from = frame->n_vectors;
5129   next_index = node->cached_next_index;
5130
5131   while (n_left_from > 0)
5132     {
5133       u32 n_left_to_next;
5134
5135       vlib_get_next_frame (vm, node, next_index,
5136                            to_next, n_left_to_next);
5137
5138       while (n_left_from > 0 && n_left_to_next > 0)
5139         {
5140           u32 bi0;
5141           vlib_buffer_t * b0;
5142           u32 next0;
5143           snat_interface_t *i;
5144           u32 sw_if_index0;
5145
5146           /* speculatively enqueue b0 to the current next frame */
5147           bi0 = from[0];
5148           to_next[0] = bi0;
5149           from += 1;
5150           to_next += 1;
5151           n_left_from -= 1;
5152           n_left_to_next -= 1;
5153
5154           b0 = vlib_get_buffer (vm, bi0);
5155           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
5156           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
5157
5158           pool_foreach (i, sm->output_feature_interfaces,
5159           ({
5160             /* Only packets from NAT inside interface */
5161             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
5162               {
5163                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
5164                                     SNAT_FLAG_HAIRPINNING))
5165                   {
5166                     if (PREDICT_TRUE (sm->num_workers > 1))
5167                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
5168                     else
5169                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
5170                   }
5171                 break;
5172               }
5173           }));
5174
5175           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5176
5177           /* verify speculative enqueue, maybe switch current next frame */
5178           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5179                                            to_next, n_left_to_next,
5180                                            bi0, next0);
5181          }
5182
5183       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5184     }
5185
5186   vlib_node_increment_counter (vm, stats_node_index,
5187                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5188                                pkts_processed);
5189   return frame->n_vectors;
5190 }
5191
5192 static uword
5193 snat_hairpin_src_fn (vlib_main_t * vm,
5194                      vlib_node_runtime_t * node,
5195                      vlib_frame_t * frame)
5196 {
5197   return snat_hairpin_src_fn_inline (vm, node, frame, 0);
5198 }
5199
5200 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
5201   .function = snat_hairpin_src_fn,
5202   .name = "nat44-hairpin-src",
5203   .vector_size = sizeof (u32),
5204   .type = VLIB_NODE_TYPE_INTERNAL,
5205   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5206   .error_strings = snat_in2out_error_strings,
5207   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
5208   .next_nodes = {
5209      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
5210      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
5211      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
5212      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
5213   },
5214 };
5215
5216 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
5217                               snat_hairpin_src_fn);
5218
5219 static uword
5220 nat44_ed_hairpin_src_fn (vlib_main_t * vm,
5221                          vlib_node_runtime_t * node,
5222                          vlib_frame_t * frame)
5223 {
5224   return snat_hairpin_src_fn_inline (vm, node, frame, 1);
5225 }
5226
5227 VLIB_REGISTER_NODE (nat44_ed_hairpin_src_node) = {
5228   .function = nat44_ed_hairpin_src_fn,
5229   .name = "nat44-ed-hairpin-src",
5230   .vector_size = sizeof (u32),
5231   .type = VLIB_NODE_TYPE_INTERNAL,
5232   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5233   .error_strings = snat_in2out_error_strings,
5234   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
5235   .next_nodes = {
5236      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
5237      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ed-in2out-output",
5238      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
5239      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
5240   },
5241 };
5242
5243 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_src_node,
5244                               nat44_ed_hairpin_src_fn);
5245
5246 static uword
5247 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
5248                                 vlib_node_runtime_t * node,
5249                                 vlib_frame_t * frame)
5250 {
5251   u32 n_left_from, * from, * to_next;
5252   snat_in2out_next_t next_index;
5253   u32 pkts_processed = 0;
5254   snat_main_t * sm = &snat_main;
5255   u32 stats_node_index;
5256
5257   stats_node_index = snat_in2out_fast_node.index;
5258
5259   from = vlib_frame_vector_args (frame);
5260   n_left_from = frame->n_vectors;
5261   next_index = node->cached_next_index;
5262
5263   while (n_left_from > 0)
5264     {
5265       u32 n_left_to_next;
5266
5267       vlib_get_next_frame (vm, node, next_index,
5268                            to_next, n_left_to_next);
5269
5270       while (n_left_from > 0 && n_left_to_next > 0)
5271         {
5272           u32 bi0;
5273           vlib_buffer_t * b0;
5274           u32 next0;
5275           u32 sw_if_index0;
5276           ip4_header_t * ip0;
5277           ip_csum_t sum0;
5278           u32 new_addr0, old_addr0;
5279           u16 old_port0, new_port0;
5280           udp_header_t * udp0;
5281           tcp_header_t * tcp0;
5282           icmp46_header_t * icmp0;
5283           snat_session_key_t key0, sm0;
5284           u32 proto0;
5285           u32 rx_fib_index0;
5286
5287           /* speculatively enqueue b0 to the current next frame */
5288           bi0 = from[0];
5289           to_next[0] = bi0;
5290           from += 1;
5291           to_next += 1;
5292           n_left_from -= 1;
5293           n_left_to_next -= 1;
5294
5295           b0 = vlib_get_buffer (vm, bi0);
5296           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
5297
5298           ip0 = vlib_buffer_get_current (b0);
5299           udp0 = ip4_next_header (ip0);
5300           tcp0 = (tcp_header_t *) udp0;
5301           icmp0 = (icmp46_header_t *) udp0;
5302
5303           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
5304           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
5305
5306           if (PREDICT_FALSE(ip0->ttl == 1))
5307             {
5308               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
5309               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
5310                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
5311                                            0);
5312               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
5313               goto trace0;
5314             }
5315
5316           proto0 = ip_proto_to_snat_proto (ip0->protocol);
5317
5318           if (PREDICT_FALSE (proto0 == ~0))
5319               goto trace0;
5320
5321           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
5322             {
5323               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
5324                                   rx_fib_index0, node, next0, ~0, 0, 0);
5325               goto trace0;
5326             }
5327
5328           key0.addr = ip0->src_address;
5329           key0.protocol = proto0;
5330           key0.port = udp0->src_port;
5331           key0.fib_index = rx_fib_index0;
5332
5333           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0, 0, 0))
5334             {
5335               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
5336               next0= SNAT_IN2OUT_NEXT_DROP;
5337               goto trace0;
5338             }
5339
5340           new_addr0 = sm0.addr.as_u32;
5341           new_port0 = sm0.port;
5342           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
5343           old_addr0 = ip0->src_address.as_u32;
5344           ip0->src_address.as_u32 = new_addr0;
5345
5346           sum0 = ip0->checksum;
5347           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5348                                  ip4_header_t,
5349                                  src_address /* changed member */);
5350           ip0->checksum = ip_csum_fold (sum0);
5351
5352           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
5353             {
5354               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5355                 {
5356                   old_port0 = tcp0->src_port;
5357                   tcp0->src_port = new_port0;
5358
5359                   sum0 = tcp0->checksum;
5360                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5361                                          ip4_header_t,
5362                                          dst_address /* changed member */);
5363                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
5364                                          ip4_header_t /* cheat */,
5365                                          length /* changed member */);
5366                   mss_clamping (sm, tcp0, &sum0);
5367                   tcp0->checksum = ip_csum_fold(sum0);
5368                 }
5369               else
5370                 {
5371                   old_port0 = udp0->src_port;
5372                   udp0->src_port = new_port0;
5373                   udp0->checksum = 0;
5374                 }
5375             }
5376           else
5377             {
5378               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5379                 {
5380                   sum0 = tcp0->checksum;
5381                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5382                                          ip4_header_t,
5383                                          dst_address /* changed member */);
5384                   mss_clamping (sm, tcp0, &sum0);
5385                   tcp0->checksum = ip_csum_fold(sum0);
5386                 }
5387             }
5388
5389           /* Hairpinning */
5390           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, 0);
5391
5392         trace0:
5393           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
5394                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
5395             {
5396               snat_in2out_trace_t *t =
5397                  vlib_add_trace (vm, node, b0, sizeof (*t));
5398               t->sw_if_index = sw_if_index0;
5399               t->next_index = next0;
5400             }
5401
5402           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5403
5404           /* verify speculative enqueue, maybe switch current next frame */
5405           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5406                                            to_next, n_left_to_next,
5407                                            bi0, next0);
5408         }
5409
5410       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5411     }
5412
5413   vlib_node_increment_counter (vm, stats_node_index,
5414                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5415                                pkts_processed);
5416   return frame->n_vectors;
5417 }
5418
5419
5420 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
5421   .function = snat_in2out_fast_static_map_fn,
5422   .name = "nat44-in2out-fast",
5423   .vector_size = sizeof (u32),
5424   .format_trace = format_snat_in2out_fast_trace,
5425   .type = VLIB_NODE_TYPE_INTERNAL,
5426
5427   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5428   .error_strings = snat_in2out_error_strings,
5429
5430   .runtime_data_bytes = sizeof (snat_runtime_t),
5431
5432   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
5433
5434   /* edit / add dispositions here */
5435   .next_nodes = {
5436     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5437     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5438     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
5439     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
5440     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
5441   },
5442 };
5443
5444 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);