NAT44: fix nat44_ed_not_translate_output_feature (VPP-1329)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28 #include <nat/nat_inlines.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38   u32 is_slow_path;
39 } snat_in2out_trace_t;
40
41 typedef struct {
42   u32 next_worker_index;
43   u8 do_handoff;
44 } snat_in2out_worker_handoff_trace_t;
45
46 /* packet trace format function */
47 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
48 {
49   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
50   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
51   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
52   char * tag;
53
54   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
55
56   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
57               t->sw_if_index, t->next_index, t->session_index);
58
59   return s;
60 }
61
62 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
63 {
64   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
65   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
66   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
67
68   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
69               t->sw_if_index, t->next_index);
70
71   return s;
72 }
73
74 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
75 {
76   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
77   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
78   snat_in2out_worker_handoff_trace_t * t =
79     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
80   char * m;
81
82   m = t->do_handoff ? "next worker" : "same worker";
83   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
84
85   return s;
86 }
87
88 typedef struct {
89   u32 sw_if_index;
90   u32 next_index;
91   u8 cached;
92 } nat44_in2out_reass_trace_t;
93
94 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
95 {
96   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
97   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
98   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
99
100   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
101               t->sw_if_index, t->next_index,
102               t->cached ? "cached" : "translated");
103
104   return s;
105 }
106
107 vlib_node_registration_t snat_in2out_node;
108 vlib_node_registration_t snat_in2out_slowpath_node;
109 vlib_node_registration_t snat_in2out_fast_node;
110 vlib_node_registration_t snat_in2out_worker_handoff_node;
111 vlib_node_registration_t snat_det_in2out_node;
112 vlib_node_registration_t snat_in2out_output_node;
113 vlib_node_registration_t snat_in2out_output_slowpath_node;
114 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
115 vlib_node_registration_t snat_hairpin_dst_node;
116 vlib_node_registration_t snat_hairpin_src_node;
117 vlib_node_registration_t nat44_hairpinning_node;
118 vlib_node_registration_t nat44_in2out_reass_node;
119 vlib_node_registration_t nat44_ed_in2out_node;
120 vlib_node_registration_t nat44_ed_in2out_slowpath_node;
121 vlib_node_registration_t nat44_ed_in2out_output_node;
122 vlib_node_registration_t nat44_ed_in2out_output_slowpath_node;
123 vlib_node_registration_t nat44_ed_hairpin_dst_node;
124 vlib_node_registration_t nat44_ed_hairpin_src_node;
125 vlib_node_registration_t nat44_ed_hairpinning_node;
126
127 #define foreach_snat_in2out_error                       \
128 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
129 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
130 _(OUT_OF_PORTS, "Out of ports")                         \
131 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
132 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
133 _(NO_TRANSLATION, "No translation")                     \
134 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
135 _(DROP_FRAGMENT, "Drop fragment")                       \
136 _(MAX_REASS, "Maximum reassemblies exceeded")           \
137 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")\
138 _(FQ_CONGESTED, "Handoff frame queue congested")
139
140 typedef enum {
141 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
142   foreach_snat_in2out_error
143 #undef _
144   SNAT_IN2OUT_N_ERROR,
145 } snat_in2out_error_t;
146
147 static char * snat_in2out_error_strings[] = {
148 #define _(sym,string) string,
149   foreach_snat_in2out_error
150 #undef _
151 };
152
153 typedef enum {
154   SNAT_IN2OUT_NEXT_LOOKUP,
155   SNAT_IN2OUT_NEXT_DROP,
156   SNAT_IN2OUT_NEXT_ICMP_ERROR,
157   SNAT_IN2OUT_NEXT_SLOW_PATH,
158   SNAT_IN2OUT_NEXT_REASS,
159   SNAT_IN2OUT_N_NEXT,
160 } snat_in2out_next_t;
161
162 typedef enum {
163   SNAT_HAIRPIN_SRC_NEXT_DROP,
164   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
165   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
166   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
167   SNAT_HAIRPIN_SRC_N_NEXT,
168 } snat_hairpin_next_t;
169
170 /**
171  * @brief Check if packet should be translated
172  *
173  * Packets aimed at outside interface and external address with active session
174  * should be translated.
175  *
176  * @param sm            NAT main
177  * @param rt            NAT runtime data
178  * @param sw_if_index0  index of the inside interface
179  * @param ip0           IPv4 header
180  * @param proto0        NAT protocol
181  * @param rx_fib_index0 RX FIB index
182  *
183  * @returns 0 if packet should be translated otherwise 1
184  */
185 static inline int
186 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
187                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
188                          u32 rx_fib_index0)
189 {
190   if (sm->out2in_dpo)
191     return 0;
192
193   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
194   fib_prefix_t pfx = {
195     .fp_proto = FIB_PROTOCOL_IP4,
196     .fp_len = 32,
197     .fp_addr = {
198         .ip4.as_u32 = ip0->dst_address.as_u32,
199     },
200   };
201
202   /* Don't NAT packet aimed at the intfc address */
203   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
204                                       ip0->dst_address.as_u32)))
205     return 1;
206
207   fei = fib_table_lookup (rx_fib_index0, &pfx);
208   if (FIB_NODE_INDEX_INVALID != fei)
209     {
210       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
211       if (sw_if_index == ~0)
212         {
213           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
214           if (FIB_NODE_INDEX_INVALID != fei)
215             sw_if_index = fib_entry_get_resolving_interface (fei);
216         }
217       snat_interface_t *i;
218       pool_foreach (i, sm->interfaces,
219       ({
220         /* NAT packet aimed at outside interface */
221         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
222           return 0;
223       }));
224     }
225
226   return 1;
227 }
228
229 static inline int
230 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
231                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
232                     u32 rx_fib_index0, u32 thread_index)
233 {
234   udp_header_t * udp0 = ip4_next_header (ip0);
235   snat_session_key_t key0, sm0;
236   clib_bihash_kv_8_8_t kv0, value0;
237
238   key0.addr = ip0->dst_address;
239   key0.port = udp0->dst_port;
240   key0.protocol = proto0;
241   key0.fib_index = sm->outside_fib_index;
242   kv0.key = key0.as_u64;
243
244   /* NAT packet aimed at external address if */
245   /* has active sessions */
246   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
247                               &value0))
248     {
249       /* or is static mappings */
250       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
251         return 0;
252     }
253   else
254     return 0;
255
256   if (sm->forwarding_enabled)
257     return 1;
258
259   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
260                                  rx_fib_index0);
261 }
262
263 static inline int
264 nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0,
265                                   u32 proto0, u16 src_port, u16 dst_port,
266                                   u32 thread_index, u32 sw_if_index)
267 {
268   snat_session_key_t key0;
269   clib_bihash_kv_8_8_t kv0, value0;
270   snat_interface_t *i;
271
272   /* src NAT check */
273   key0.addr = ip0->src_address;
274   key0.port = src_port;
275   key0.protocol = proto0;
276   key0.fib_index = sm->outside_fib_index;
277   kv0.key = key0.as_u64;
278
279   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
280                                &value0))
281     return 1;
282
283   /* dst NAT check */
284   key0.addr = ip0->dst_address;
285   key0.port = dst_port;
286   key0.protocol = proto0;
287   key0.fib_index = sm->inside_fib_index;
288   kv0.key = key0.as_u64;
289   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
290                                &value0))
291   {
292     /* hairpinning */
293     pool_foreach (i, sm->output_feature_interfaces,
294     ({
295       if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index))
296         return 0;
297     }));
298     return 1;
299   }
300
301   return 0;
302 }
303
304 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
305                       ip4_header_t * ip0,
306                       u32 rx_fib_index0,
307                       snat_session_key_t * key0,
308                       snat_session_t ** sessionp,
309                       vlib_node_runtime_t * node,
310                       u32 next0,
311                       u32 thread_index)
312 {
313   snat_user_t *u;
314   snat_session_t *s;
315   clib_bihash_kv_8_8_t kv0;
316   snat_session_key_t key1;
317   u32 address_index = ~0;
318   u32 outside_fib_index;
319   uword * p;
320   udp_header_t * udp0 = ip4_next_header (ip0);
321   u8 is_sm = 0;
322
323   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
324     {
325       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
326       nat_ipfix_logging_max_sessions(sm->max_translations);
327       nat_log_notice ("maximum sessions exceeded");
328       return SNAT_IN2OUT_NEXT_DROP;
329     }
330
331   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
332   if (! p)
333     {
334       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
335       return SNAT_IN2OUT_NEXT_DROP;
336     }
337   outside_fib_index = p[0];
338
339   key1.protocol = key0->protocol;
340
341   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
342                               thread_index);
343   if (!u)
344     {
345       nat_log_warn ("create NAT user failed");
346       return SNAT_IN2OUT_NEXT_DROP;
347     }
348
349   /* First try to match static mapping by local address and port */
350   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0, 0))
351     {
352       /* Try to create dynamic translation */
353       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
354                                                thread_index, &key1,
355                                                &address_index,
356                                                sm->port_per_thread,
357                                                sm->per_thread_data[thread_index].snat_thread_index))
358         {
359           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
360           return SNAT_IN2OUT_NEXT_DROP;
361         }
362     }
363   else
364     is_sm = 1;
365
366   s = nat_session_alloc_or_recycle (sm, u, thread_index);
367   if (!s)
368     {
369       nat_log_warn ("create NAT session failed");
370       return SNAT_IN2OUT_NEXT_DROP;
371     }
372
373   if (is_sm)
374     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
375   user_session_increment (sm, u, is_sm);
376   s->outside_address_index = address_index;
377   s->in2out = *key0;
378   s->out2in = key1;
379   s->out2in.protocol = key0->protocol;
380   s->out2in.fib_index = outside_fib_index;
381   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
382   s->ext_host_port = udp0->dst_port;
383   *sessionp = s;
384
385   /* Add to translation hashes */
386   kv0.key = s->in2out.as_u64;
387   kv0.value = s - sm->per_thread_data[thread_index].sessions;
388   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
389                                1 /* is_add */))
390       nat_log_notice ("in2out key add failed");
391
392   kv0.key = s->out2in.as_u64;
393   kv0.value = s - sm->per_thread_data[thread_index].sessions;
394
395   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
396                                1 /* is_add */))
397       nat_log_notice ("out2in key add failed");
398
399   /* log NAT event */
400   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
401                                       s->out2in.addr.as_u32,
402                                       s->in2out.protocol,
403                                       s->in2out.port,
404                                       s->out2in.port,
405                                       s->in2out.fib_index);
406   return next0;
407 }
408
409 static_always_inline
410 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
411                                  snat_session_key_t *p_key0)
412 {
413   icmp46_header_t *icmp0;
414   snat_session_key_t key0;
415   icmp_echo_header_t *echo0, *inner_echo0 = 0;
416   ip4_header_t *inner_ip0 = 0;
417   void *l4_header = 0;
418   icmp46_header_t *inner_icmp0;
419
420   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
421   echo0 = (icmp_echo_header_t *)(icmp0+1);
422
423   if (!icmp_is_error_message (icmp0))
424     {
425       key0.protocol = SNAT_PROTOCOL_ICMP;
426       key0.addr = ip0->src_address;
427       key0.port = echo0->identifier;
428     }
429   else
430     {
431       inner_ip0 = (ip4_header_t *)(echo0+1);
432       l4_header = ip4_next_header (inner_ip0);
433       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
434       key0.addr = inner_ip0->dst_address;
435       switch (key0.protocol)
436         {
437         case SNAT_PROTOCOL_ICMP:
438           inner_icmp0 = (icmp46_header_t*)l4_header;
439           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
440           key0.port = inner_echo0->identifier;
441           break;
442         case SNAT_PROTOCOL_UDP:
443         case SNAT_PROTOCOL_TCP:
444           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
445           break;
446         default:
447           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
448         }
449     }
450   *p_key0 = key0;
451   return -1; /* success */
452 }
453
454 /**
455  * Get address and port values to be used for ICMP packet translation
456  * and create session if needed
457  *
458  * @param[in,out] sm             NAT main
459  * @param[in,out] node           NAT node runtime
460  * @param[in] thread_index       thread index
461  * @param[in,out] b0             buffer containing packet to be translated
462  * @param[out] p_proto           protocol used for matching
463  * @param[out] p_value           address and port after NAT translation
464  * @param[out] p_dont_translate  if packet should not be translated
465  * @param d                      optional parameter
466  * @param e                      optional parameter
467  */
468 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
469                            u32 thread_index, vlib_buffer_t *b0,
470                            ip4_header_t *ip0, u8 *p_proto,
471                            snat_session_key_t *p_value,
472                            u8 *p_dont_translate, void *d, void *e)
473 {
474   icmp46_header_t *icmp0;
475   u32 sw_if_index0;
476   u32 rx_fib_index0;
477   snat_session_key_t key0;
478   snat_session_t *s0 = 0;
479   u8 dont_translate = 0;
480   clib_bihash_kv_8_8_t kv0, value0;
481   u32 next0 = ~0;
482   int err;
483
484   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
485   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
486   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
487
488   err = icmp_get_key (ip0, &key0);
489   if (err != -1)
490     {
491       b0->error = node->errors[err];
492       next0 = SNAT_IN2OUT_NEXT_DROP;
493       goto out;
494     }
495   key0.fib_index = rx_fib_index0;
496
497   kv0.key = key0.as_u64;
498
499   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
500                               &value0))
501     {
502       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)
503         {
504           if (PREDICT_FALSE(nat_not_translate_output_feature(sm, ip0,
505               key0.protocol, key0.port, key0.port, thread_index, sw_if_index0)))
506             {
507               dont_translate = 1;
508               goto out;
509             }
510         }
511       else
512         {
513           if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
514               ip0, SNAT_PROTOCOL_ICMP, rx_fib_index0, thread_index)))
515             {
516               dont_translate = 1;
517               goto out;
518             }
519         }
520
521       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
522         {
523           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
524           next0 = SNAT_IN2OUT_NEXT_DROP;
525           goto out;
526         }
527
528       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
529                          &s0, node, next0, thread_index);
530
531       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
532         goto out;
533     }
534   else
535     {
536       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
537                         icmp0->type != ICMP4_echo_reply &&
538                         !icmp_is_error_message (icmp0)))
539         {
540           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
541           next0 = SNAT_IN2OUT_NEXT_DROP;
542           goto out;
543         }
544
545       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
546                               value0.value);
547     }
548
549 out:
550   *p_proto = key0.protocol;
551   if (s0)
552     *p_value = s0->out2in;
553   *p_dont_translate = dont_translate;
554   if (d)
555     *(snat_session_t**)d = s0;
556   return next0;
557 }
558
559 /**
560  * Get address and port values to be used for ICMP packet translation
561  *
562  * @param[in] sm                 NAT main
563  * @param[in,out] node           NAT node runtime
564  * @param[in] thread_index       thread index
565  * @param[in,out] b0             buffer containing packet to be translated
566  * @param[out] p_proto           protocol used for matching
567  * @param[out] p_value           address and port after NAT translation
568  * @param[out] p_dont_translate  if packet should not be translated
569  * @param d                      optional parameter
570  * @param e                      optional parameter
571  */
572 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
573                            u32 thread_index, vlib_buffer_t *b0,
574                            ip4_header_t *ip0, u8 *p_proto,
575                            snat_session_key_t *p_value,
576                            u8 *p_dont_translate, void *d, void *e)
577 {
578   icmp46_header_t *icmp0;
579   u32 sw_if_index0;
580   u32 rx_fib_index0;
581   snat_session_key_t key0;
582   snat_session_key_t sm0;
583   u8 dont_translate = 0;
584   u8 is_addr_only;
585   u32 next0 = ~0;
586   int err;
587
588   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
589   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
590   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
591
592   err = icmp_get_key (ip0, &key0);
593   if (err != -1)
594     {
595       b0->error = node->errors[err];
596       next0 = SNAT_IN2OUT_NEXT_DROP;
597       goto out2;
598     }
599   key0.fib_index = rx_fib_index0;
600
601   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0, 0))
602     {
603       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
604           IP_PROTOCOL_ICMP, rx_fib_index0)))
605         {
606           dont_translate = 1;
607           goto out;
608         }
609
610       if (icmp_is_error_message (icmp0))
611         {
612           next0 = SNAT_IN2OUT_NEXT_DROP;
613           goto out;
614         }
615
616       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
617       next0 = SNAT_IN2OUT_NEXT_DROP;
618       goto out;
619     }
620
621   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
622                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
623                     !icmp_is_error_message (icmp0)))
624     {
625       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
626       next0 = SNAT_IN2OUT_NEXT_DROP;
627       goto out;
628     }
629
630 out:
631   *p_value = sm0;
632 out2:
633   *p_proto = key0.protocol;
634   *p_dont_translate = dont_translate;
635   return next0;
636 }
637
638 static inline u32 icmp_in2out (snat_main_t *sm,
639                                vlib_buffer_t * b0,
640                                ip4_header_t * ip0,
641                                icmp46_header_t * icmp0,
642                                u32 sw_if_index0,
643                                u32 rx_fib_index0,
644                                vlib_node_runtime_t * node,
645                                u32 next0,
646                                u32 thread_index,
647                                void *d,
648                                void *e)
649 {
650   snat_session_key_t sm0;
651   u8 protocol;
652   icmp_echo_header_t *echo0, *inner_echo0 = 0;
653   ip4_header_t *inner_ip0;
654   void *l4_header = 0;
655   icmp46_header_t *inner_icmp0;
656   u8 dont_translate;
657   u32 new_addr0, old_addr0;
658   u16 old_id0, new_id0;
659   ip_csum_t sum0;
660   u16 checksum0;
661   u32 next0_tmp;
662
663   echo0 = (icmp_echo_header_t *)(icmp0+1);
664
665   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
666                                        &protocol, &sm0, &dont_translate, d, e);
667   if (next0_tmp != ~0)
668     next0 = next0_tmp;
669   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
670     goto out;
671
672   sum0 = ip_incremental_checksum (0, icmp0,
673                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
674   checksum0 = ~ip_csum_fold (sum0);
675   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
676     {
677       next0 = SNAT_IN2OUT_NEXT_DROP;
678       goto out;
679     }
680
681   old_addr0 = ip0->src_address.as_u32;
682   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
683   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
684     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
685
686   sum0 = ip0->checksum;
687   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
688                          src_address /* changed member */);
689   ip0->checksum = ip_csum_fold (sum0);
690
691   if (icmp0->checksum == 0)
692     icmp0->checksum = 0xffff;
693
694   if (!icmp_is_error_message (icmp0))
695     {
696       new_id0 = sm0.port;
697       if (PREDICT_FALSE(new_id0 != echo0->identifier))
698         {
699           old_id0 = echo0->identifier;
700           new_id0 = sm0.port;
701           echo0->identifier = new_id0;
702
703           sum0 = icmp0->checksum;
704           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
705                                  identifier);
706           icmp0->checksum = ip_csum_fold (sum0);
707         }
708     }
709   else
710     {
711       inner_ip0 = (ip4_header_t *)(echo0+1);
712       l4_header = ip4_next_header (inner_ip0);
713
714       if (!ip4_header_checksum_is_valid (inner_ip0))
715         {
716           next0 = SNAT_IN2OUT_NEXT_DROP;
717           goto out;
718         }
719
720       old_addr0 = inner_ip0->dst_address.as_u32;
721       inner_ip0->dst_address = sm0.addr;
722       new_addr0 = inner_ip0->dst_address.as_u32;
723
724       sum0 = icmp0->checksum;
725       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
726                              dst_address /* changed member */);
727       icmp0->checksum = ip_csum_fold (sum0);
728
729       switch (protocol)
730         {
731           case SNAT_PROTOCOL_ICMP:
732             inner_icmp0 = (icmp46_header_t*)l4_header;
733             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
734
735             old_id0 = inner_echo0->identifier;
736             new_id0 = sm0.port;
737             inner_echo0->identifier = new_id0;
738
739             sum0 = icmp0->checksum;
740             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
741                                    identifier);
742             icmp0->checksum = ip_csum_fold (sum0);
743             break;
744           case SNAT_PROTOCOL_UDP:
745           case SNAT_PROTOCOL_TCP:
746             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
747             new_id0 = sm0.port;
748             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
749
750             sum0 = icmp0->checksum;
751             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
752                                    dst_port);
753             icmp0->checksum = ip_csum_fold (sum0);
754             break;
755           default:
756             ASSERT(0);
757         }
758     }
759
760 out:
761   return next0;
762 }
763
764 /**
765  * @brief Hairpinning
766  *
767  * Hairpinning allows two endpoints on the internal side of the NAT to
768  * communicate even if they only use each other's external IP addresses
769  * and ports.
770  *
771  * @param sm     NAT main.
772  * @param b0     Vlib buffer.
773  * @param ip0    IP header.
774  * @param udp0   UDP header.
775  * @param tcp0   TCP header.
776  * @param proto0 NAT protocol.
777  */
778 static inline int
779 snat_hairpinning (snat_main_t *sm,
780                   vlib_buffer_t * b0,
781                   ip4_header_t * ip0,
782                   udp_header_t * udp0,
783                   tcp_header_t * tcp0,
784                   u32 proto0,
785                   int is_ed)
786 {
787   snat_session_key_t key0, sm0;
788   snat_session_t * s0;
789   clib_bihash_kv_8_8_t kv0, value0;
790   ip_csum_t sum0;
791   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
792   u16 new_dst_port0, old_dst_port0;
793   int rv;
794
795   key0.addr = ip0->dst_address;
796   key0.port = udp0->dst_port;
797   key0.protocol = proto0;
798   key0.fib_index = sm->outside_fib_index;
799   kv0.key = key0.as_u64;
800
801   /* Check if destination is static mappings */
802   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
803     {
804       new_dst_addr0 = sm0.addr.as_u32;
805       new_dst_port0 = sm0.port;
806       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
807     }
808   /* or active session */
809   else
810     {
811       if (sm->num_workers > 1)
812         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
813       else
814         ti = sm->num_workers;
815
816       if (is_ed)
817         {
818           clib_bihash_kv_16_8_t ed_kv, ed_value;
819           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
820                       ip0->protocol, sm->outside_fib_index, udp0->dst_port,
821                       udp0->src_port);
822           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
823                                         &ed_kv, &ed_value);
824           si = ed_value.value;
825         }
826       else
827         {
828           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
829                                        &value0);
830           si = value0.value;
831         }
832       if (rv)
833         return 0;
834
835       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
836       new_dst_addr0 = s0->in2out.addr.as_u32;
837       new_dst_port0 = s0->in2out.port;
838       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
839     }
840
841   /* Destination is behind the same NAT, use internal address and port */
842   if (new_dst_addr0)
843     {
844       old_dst_addr0 = ip0->dst_address.as_u32;
845       ip0->dst_address.as_u32 = new_dst_addr0;
846       sum0 = ip0->checksum;
847       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
848                              ip4_header_t, dst_address);
849       ip0->checksum = ip_csum_fold (sum0);
850
851       old_dst_port0 = tcp0->dst;
852       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
853         {
854           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
855             {
856               tcp0->dst = new_dst_port0;
857               sum0 = tcp0->checksum;
858               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
859                                      ip4_header_t, dst_address);
860               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
861                                      ip4_header_t /* cheat */, length);
862               tcp0->checksum = ip_csum_fold(sum0);
863             }
864           else
865             {
866               udp0->dst_port = new_dst_port0;
867               udp0->checksum = 0;
868             }
869         }
870       else
871         {
872           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
873             {
874               sum0 = tcp0->checksum;
875               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
876                                      ip4_header_t, dst_address);
877               tcp0->checksum = ip_csum_fold(sum0);
878             }
879         }
880       return 1;
881     }
882   return 0;
883 }
884
885 static inline void
886 snat_icmp_hairpinning (snat_main_t *sm,
887                        vlib_buffer_t * b0,
888                        ip4_header_t * ip0,
889                        icmp46_header_t * icmp0,
890                        int is_ed)
891 {
892   snat_session_key_t key0, sm0;
893   clib_bihash_kv_8_8_t kv0, value0;
894   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
895   ip_csum_t sum0;
896   snat_session_t *s0;
897   int rv;
898
899   if (!icmp_is_error_message (icmp0))
900     {
901       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
902       u16 icmp_id0 = echo0->identifier;
903       key0.addr = ip0->dst_address;
904       key0.port = icmp_id0;
905       key0.protocol = SNAT_PROTOCOL_ICMP;
906       key0.fib_index = sm->outside_fib_index;
907       kv0.key = key0.as_u64;
908
909       if (sm->num_workers > 1)
910         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
911       else
912         ti = sm->num_workers;
913
914       /* Check if destination is in active sessions */
915       if (is_ed)
916         {
917           clib_bihash_kv_16_8_t ed_kv, ed_value;
918           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
919                       IP_PROTOCOL_ICMP, sm->outside_fib_index, icmp_id0, 0);
920           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
921                                         &ed_kv, &ed_value);
922           si = ed_value.value;
923         }
924       else
925         {
926           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
927                                        &value0);
928           si = value0.value;
929         }
930       if (rv)
931         {
932           /* or static mappings */
933           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
934             {
935               new_dst_addr0 = sm0.addr.as_u32;
936               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
937             }
938         }
939       else
940         {
941           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
942           new_dst_addr0 = s0->in2out.addr.as_u32;
943           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
944           echo0->identifier = s0->in2out.port;
945           sum0 = icmp0->checksum;
946           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
947                                  icmp_echo_header_t, identifier);
948           icmp0->checksum = ip_csum_fold (sum0);
949         }
950
951       /* Destination is behind the same NAT, use internal address and port */
952       if (new_dst_addr0)
953         {
954           old_dst_addr0 = ip0->dst_address.as_u32;
955           ip0->dst_address.as_u32 = new_dst_addr0;
956           sum0 = ip0->checksum;
957           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
958                                  ip4_header_t, dst_address);
959           ip0->checksum = ip_csum_fold (sum0);
960         }
961     }
962
963 }
964
965 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
966                                          vlib_buffer_t * b0,
967                                          ip4_header_t * ip0,
968                                          icmp46_header_t * icmp0,
969                                          u32 sw_if_index0,
970                                          u32 rx_fib_index0,
971                                          vlib_node_runtime_t * node,
972                                          u32 next0,
973                                          f64 now,
974                                          u32 thread_index,
975                                          snat_session_t ** p_s0)
976 {
977   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
978                       next0, thread_index, p_s0, 0);
979   snat_session_t * s0 = *p_s0;
980   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
981     {
982       /* Hairpinning */
983       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
984         snat_icmp_hairpinning(sm, b0, ip0, icmp0, sm->endpoint_dependent);
985       /* Accounting */
986       nat44_session_update_counters (s0, now,
987                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
988       /* Per-user LRU list maintenance */
989       nat44_session_update_lru (sm, s0, thread_index);
990     }
991   return next0;
992 }
993
994 static inline void
995 nat_hairpinning_sm_unknown_proto (snat_main_t * sm,
996                                   vlib_buffer_t * b,
997                                   ip4_header_t * ip)
998 {
999   clib_bihash_kv_8_8_t kv, value;
1000   snat_static_mapping_t *m;
1001   u32 old_addr, new_addr;
1002   ip_csum_t sum;
1003
1004   make_sm_kv (&kv, &ip->dst_address, 0, sm->outside_fib_index, 0);
1005   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1006     return;
1007
1008   m = pool_elt_at_index (sm->static_mappings, value.value);
1009
1010   old_addr = ip->dst_address.as_u32;
1011   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1012   sum = ip->checksum;
1013   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1014   ip->checksum = ip_csum_fold (sum);
1015
1016   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1017     vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1018 }
1019
1020 static int
1021 nat_in2out_sm_unknown_proto (snat_main_t *sm,
1022                              vlib_buffer_t * b,
1023                              ip4_header_t * ip,
1024                              u32 rx_fib_index)
1025 {
1026   clib_bihash_kv_8_8_t kv, value;
1027   snat_static_mapping_t *m;
1028   snat_session_key_t m_key;
1029   u32 old_addr, new_addr;
1030   ip_csum_t sum;
1031
1032   m_key.addr = ip->src_address;
1033   m_key.port = 0;
1034   m_key.protocol = 0;
1035   m_key.fib_index = rx_fib_index;
1036   kv.key = m_key.as_u64;
1037   if (clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1038     return 1;
1039
1040   m = pool_elt_at_index (sm->static_mappings, value.value);
1041
1042   old_addr = ip->src_address.as_u32;
1043   new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1044   sum = ip->checksum;
1045   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1046   ip->checksum = ip_csum_fold (sum);
1047
1048
1049   /* Hairpinning */
1050   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1051     {
1052       nat_hairpinning_sm_unknown_proto (sm, b, ip);
1053       vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1054     }
1055
1056   return 0;
1057 }
1058
1059 static inline uword
1060 snat_in2out_node_fn_inline (vlib_main_t * vm,
1061                             vlib_node_runtime_t * node,
1062                             vlib_frame_t * frame, int is_slow_path,
1063                             int is_output_feature)
1064 {
1065   u32 n_left_from, * from, * to_next;
1066   snat_in2out_next_t next_index;
1067   u32 pkts_processed = 0;
1068   snat_main_t * sm = &snat_main;
1069   f64 now = vlib_time_now (vm);
1070   u32 stats_node_index;
1071   u32 thread_index = vlib_get_thread_index ();
1072
1073   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1074     snat_in2out_node.index;
1075
1076   from = vlib_frame_vector_args (frame);
1077   n_left_from = frame->n_vectors;
1078   next_index = node->cached_next_index;
1079
1080   while (n_left_from > 0)
1081     {
1082       u32 n_left_to_next;
1083
1084       vlib_get_next_frame (vm, node, next_index,
1085                            to_next, n_left_to_next);
1086
1087       while (n_left_from >= 4 && n_left_to_next >= 2)
1088         {
1089           u32 bi0, bi1;
1090           vlib_buffer_t * b0, * b1;
1091           u32 next0, next1;
1092           u32 sw_if_index0, sw_if_index1;
1093           ip4_header_t * ip0, * ip1;
1094           ip_csum_t sum0, sum1;
1095           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1096           u16 old_port0, new_port0, old_port1, new_port1;
1097           udp_header_t * udp0, * udp1;
1098           tcp_header_t * tcp0, * tcp1;
1099           icmp46_header_t * icmp0, * icmp1;
1100           snat_session_key_t key0, key1;
1101           u32 rx_fib_index0, rx_fib_index1;
1102           u32 proto0, proto1;
1103           snat_session_t * s0 = 0, * s1 = 0;
1104           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1105           u32 iph_offset0 = 0, iph_offset1 = 0;
1106
1107           /* Prefetch next iteration. */
1108           {
1109             vlib_buffer_t * p2, * p3;
1110
1111             p2 = vlib_get_buffer (vm, from[2]);
1112             p3 = vlib_get_buffer (vm, from[3]);
1113
1114             vlib_prefetch_buffer_header (p2, LOAD);
1115             vlib_prefetch_buffer_header (p3, LOAD);
1116
1117             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1118             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1119           }
1120
1121           /* speculatively enqueue b0 and b1 to the current next frame */
1122           to_next[0] = bi0 = from[0];
1123           to_next[1] = bi1 = from[1];
1124           from += 2;
1125           to_next += 2;
1126           n_left_from -= 2;
1127           n_left_to_next -= 2;
1128
1129           b0 = vlib_get_buffer (vm, bi0);
1130           b1 = vlib_get_buffer (vm, bi1);
1131
1132           if (is_output_feature)
1133             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1134
1135           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1136                  iph_offset0);
1137
1138           udp0 = ip4_next_header (ip0);
1139           tcp0 = (tcp_header_t *) udp0;
1140           icmp0 = (icmp46_header_t *) udp0;
1141
1142           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1143           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1144                                    sw_if_index0);
1145
1146           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1147
1148           if (PREDICT_FALSE(ip0->ttl == 1))
1149             {
1150               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1151               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1152                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1153                                            0);
1154               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1155               goto trace00;
1156             }
1157
1158           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1159
1160           /* Next configured feature, probably ip4-lookup */
1161           if (is_slow_path)
1162             {
1163               if (PREDICT_FALSE (proto0 == ~0))
1164                 {
1165                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1166                     {
1167                       next0 = SNAT_IN2OUT_NEXT_DROP;
1168                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1169                     }
1170                   goto trace00;
1171                 }
1172
1173               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1174                 {
1175                   next0 = icmp_in2out_slow_path
1176                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1177                      node, next0, now, thread_index, &s0);
1178                   goto trace00;
1179                 }
1180             }
1181           else
1182             {
1183               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1184                 {
1185                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1186                   goto trace00;
1187                 }
1188
1189               if (ip4_is_fragment (ip0))
1190                 {
1191                   next0 = SNAT_IN2OUT_NEXT_REASS;
1192                   goto trace00;
1193                 }
1194             }
1195
1196           key0.addr = ip0->src_address;
1197           key0.port = udp0->src_port;
1198           key0.protocol = proto0;
1199           key0.fib_index = rx_fib_index0;
1200
1201           kv0.key = key0.as_u64;
1202
1203           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1204               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1205             {
1206               if (is_slow_path)
1207                 {
1208                   if (is_output_feature)
1209                     {
1210                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1211                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1212                         goto trace00;
1213                     }
1214                   else
1215                     {
1216                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1217                           ip0, proto0, rx_fib_index0, thread_index)))
1218                         goto trace00;
1219                     }
1220
1221                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1222                                      &s0, node, next0, thread_index);
1223                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1224                     goto trace00;
1225                 }
1226               else
1227                 {
1228                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1229                   goto trace00;
1230                 }
1231             }
1232           else
1233             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1234                                     value0.value);
1235
1236           b0->flags |= VNET_BUFFER_F_IS_NATED;
1237
1238           old_addr0 = ip0->src_address.as_u32;
1239           ip0->src_address = s0->out2in.addr;
1240           new_addr0 = ip0->src_address.as_u32;
1241           if (!is_output_feature)
1242             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1243
1244           sum0 = ip0->checksum;
1245           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1246                                  ip4_header_t,
1247                                  src_address /* changed member */);
1248           ip0->checksum = ip_csum_fold (sum0);
1249
1250           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1251             {
1252               old_port0 = tcp0->src_port;
1253               tcp0->src_port = s0->out2in.port;
1254               new_port0 = tcp0->src_port;
1255
1256               sum0 = tcp0->checksum;
1257               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1258                                      ip4_header_t,
1259                                      dst_address /* changed member */);
1260               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1261                                      ip4_header_t /* cheat */,
1262                                      length /* changed member */);
1263               tcp0->checksum = ip_csum_fold(sum0);
1264             }
1265           else
1266             {
1267               old_port0 = udp0->src_port;
1268               udp0->src_port = s0->out2in.port;
1269               udp0->checksum = 0;
1270             }
1271
1272           /* Accounting */
1273           nat44_session_update_counters (s0, now,
1274                                          vlib_buffer_length_in_chain (vm, b0));
1275           /* Per-user LRU list maintenance */
1276           nat44_session_update_lru (sm, s0, thread_index);
1277         trace00:
1278
1279           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1280                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1281             {
1282               snat_in2out_trace_t *t =
1283                  vlib_add_trace (vm, node, b0, sizeof (*t));
1284               t->is_slow_path = is_slow_path;
1285               t->sw_if_index = sw_if_index0;
1286               t->next_index = next0;
1287                   t->session_index = ~0;
1288               if (s0)
1289                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1290             }
1291
1292           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1293
1294           if (is_output_feature)
1295             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1296
1297           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1298                  iph_offset1);
1299
1300           udp1 = ip4_next_header (ip1);
1301           tcp1 = (tcp_header_t *) udp1;
1302           icmp1 = (icmp46_header_t *) udp1;
1303
1304           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1305           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1306                                    sw_if_index1);
1307
1308           if (PREDICT_FALSE(ip1->ttl == 1))
1309             {
1310               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1311               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1312                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1313                                            0);
1314               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1315               goto trace01;
1316             }
1317
1318           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1319
1320           /* Next configured feature, probably ip4-lookup */
1321           if (is_slow_path)
1322             {
1323               if (PREDICT_FALSE (proto1 == ~0))
1324                 {
1325                   if (nat_in2out_sm_unknown_proto (sm, b1, ip1, rx_fib_index1))
1326                     {
1327                       next1 = SNAT_IN2OUT_NEXT_DROP;
1328                       b1->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1329                     }
1330                   goto trace01;
1331                 }
1332
1333               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1334                 {
1335                   next1 = icmp_in2out_slow_path
1336                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1337                      next1, now, thread_index, &s1);
1338                   goto trace01;
1339                 }
1340             }
1341           else
1342             {
1343               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1344                 {
1345                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1346                   goto trace01;
1347                 }
1348
1349               if (ip4_is_fragment (ip1))
1350                 {
1351                   next1 = SNAT_IN2OUT_NEXT_REASS;
1352                   goto trace01;
1353                 }
1354             }
1355
1356           key1.addr = ip1->src_address;
1357           key1.port = udp1->src_port;
1358           key1.protocol = proto1;
1359           key1.fib_index = rx_fib_index1;
1360
1361           kv1.key = key1.as_u64;
1362
1363             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1364                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1365             {
1366               if (is_slow_path)
1367                 {
1368                   if (is_output_feature)
1369                     {
1370                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1371                           ip1, proto1, udp1->src_port, udp1->dst_port, thread_index, sw_if_index1)))
1372                         goto trace01;
1373                     }
1374                   else
1375                     {
1376                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1377                           ip1, proto1, rx_fib_index1, thread_index)))
1378                         goto trace01;
1379                     }
1380
1381                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1382                                      &s1, node, next1, thread_index);
1383                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1384                     goto trace01;
1385                 }
1386               else
1387                 {
1388                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1389                   goto trace01;
1390                 }
1391             }
1392           else
1393             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1394                                     value1.value);
1395
1396           b1->flags |= VNET_BUFFER_F_IS_NATED;
1397
1398           old_addr1 = ip1->src_address.as_u32;
1399           ip1->src_address = s1->out2in.addr;
1400           new_addr1 = ip1->src_address.as_u32;
1401           if (!is_output_feature)
1402             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1403
1404           sum1 = ip1->checksum;
1405           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1406                                  ip4_header_t,
1407                                  src_address /* changed member */);
1408           ip1->checksum = ip_csum_fold (sum1);
1409
1410           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1411             {
1412               old_port1 = tcp1->src_port;
1413               tcp1->src_port = s1->out2in.port;
1414               new_port1 = tcp1->src_port;
1415
1416               sum1 = tcp1->checksum;
1417               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1418                                      ip4_header_t,
1419                                      dst_address /* changed member */);
1420               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1421                                      ip4_header_t /* cheat */,
1422                                      length /* changed member */);
1423               tcp1->checksum = ip_csum_fold(sum1);
1424             }
1425           else
1426             {
1427               old_port1 = udp1->src_port;
1428               udp1->src_port = s1->out2in.port;
1429               udp1->checksum = 0;
1430             }
1431
1432           /* Accounting */
1433           nat44_session_update_counters (s1, now,
1434                                          vlib_buffer_length_in_chain (vm, b1));
1435           /* Per-user LRU list maintenance */
1436           nat44_session_update_lru (sm, s1, thread_index);
1437         trace01:
1438
1439           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1440                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1441             {
1442               snat_in2out_trace_t *t =
1443                  vlib_add_trace (vm, node, b1, sizeof (*t));
1444               t->sw_if_index = sw_if_index1;
1445               t->next_index = next1;
1446               t->session_index = ~0;
1447               if (s1)
1448                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1449             }
1450
1451           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1452
1453           /* verify speculative enqueues, maybe switch current next frame */
1454           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1455                                            to_next, n_left_to_next,
1456                                            bi0, bi1, next0, next1);
1457         }
1458
1459       while (n_left_from > 0 && n_left_to_next > 0)
1460         {
1461           u32 bi0;
1462           vlib_buffer_t * b0;
1463           u32 next0;
1464           u32 sw_if_index0;
1465           ip4_header_t * ip0;
1466           ip_csum_t sum0;
1467           u32 new_addr0, old_addr0;
1468           u16 old_port0, new_port0;
1469           udp_header_t * udp0;
1470           tcp_header_t * tcp0;
1471           icmp46_header_t * icmp0;
1472           snat_session_key_t key0;
1473           u32 rx_fib_index0;
1474           u32 proto0;
1475           snat_session_t * s0 = 0;
1476           clib_bihash_kv_8_8_t kv0, value0;
1477           u32 iph_offset0 = 0;
1478
1479           /* speculatively enqueue b0 to the current next frame */
1480           bi0 = from[0];
1481           to_next[0] = bi0;
1482           from += 1;
1483           to_next += 1;
1484           n_left_from -= 1;
1485           n_left_to_next -= 1;
1486
1487           b0 = vlib_get_buffer (vm, bi0);
1488           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1489
1490           if (is_output_feature)
1491             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1492
1493           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1494                  iph_offset0);
1495
1496           udp0 = ip4_next_header (ip0);
1497           tcp0 = (tcp_header_t *) udp0;
1498           icmp0 = (icmp46_header_t *) udp0;
1499
1500           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1501           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1502                                    sw_if_index0);
1503
1504           if (PREDICT_FALSE(ip0->ttl == 1))
1505             {
1506               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1507               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1508                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1509                                            0);
1510               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1511               goto trace0;
1512             }
1513
1514           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1515
1516           /* Next configured feature, probably ip4-lookup */
1517           if (is_slow_path)
1518             {
1519               if (PREDICT_FALSE (proto0 == ~0))
1520                 {
1521                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1522                     {
1523                       next0 = SNAT_IN2OUT_NEXT_DROP;
1524                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1525                     }
1526                   goto trace0;
1527                 }
1528
1529               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1530                 {
1531                   next0 = icmp_in2out_slow_path
1532                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1533                      next0, now, thread_index, &s0);
1534                   goto trace0;
1535                 }
1536             }
1537           else
1538             {
1539               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1540                 {
1541                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1542                   goto trace0;
1543                 }
1544
1545               if (ip4_is_fragment (ip0))
1546                 {
1547                   next0 = SNAT_IN2OUT_NEXT_REASS;
1548                   goto trace0;
1549                 }
1550             }
1551
1552           key0.addr = ip0->src_address;
1553           key0.port = udp0->src_port;
1554           key0.protocol = proto0;
1555           key0.fib_index = rx_fib_index0;
1556
1557           kv0.key = key0.as_u64;
1558
1559           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1560                                       &kv0, &value0))
1561             {
1562               if (is_slow_path)
1563                 {
1564                   if (is_output_feature)
1565                     {
1566                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1567                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1568                         goto trace0;
1569                     }
1570                   else
1571                     {
1572                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1573                           ip0, proto0, rx_fib_index0, thread_index)))
1574                         goto trace0;
1575                     }
1576
1577                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1578                                      &s0, node, next0, thread_index);
1579
1580                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1581                     goto trace0;
1582                 }
1583               else
1584                 {
1585                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1586                   goto trace0;
1587                 }
1588             }
1589           else
1590           s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1591                                   value0.value);
1592
1593           b0->flags |= VNET_BUFFER_F_IS_NATED;
1594
1595           old_addr0 = ip0->src_address.as_u32;
1596           ip0->src_address = s0->out2in.addr;
1597           new_addr0 = ip0->src_address.as_u32;
1598           if (!is_output_feature)
1599             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1600
1601           sum0 = ip0->checksum;
1602           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1603                                  ip4_header_t,
1604                                  src_address /* changed member */);
1605           ip0->checksum = ip_csum_fold (sum0);
1606
1607           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1608             {
1609               old_port0 = tcp0->src_port;
1610               tcp0->src_port = s0->out2in.port;
1611               new_port0 = tcp0->src_port;
1612
1613               sum0 = tcp0->checksum;
1614               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1615                                      ip4_header_t,
1616                                      dst_address /* changed member */);
1617               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1618                                      ip4_header_t /* cheat */,
1619                                      length /* changed member */);
1620               tcp0->checksum = ip_csum_fold(sum0);
1621             }
1622           else
1623             {
1624               old_port0 = udp0->src_port;
1625               udp0->src_port = s0->out2in.port;
1626               udp0->checksum = 0;
1627             }
1628
1629           /* Accounting */
1630           nat44_session_update_counters (s0, now,
1631                                          vlib_buffer_length_in_chain (vm, b0));
1632           /* Per-user LRU list maintenance */
1633           nat44_session_update_lru (sm, s0, thread_index);
1634
1635         trace0:
1636           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1637                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1638             {
1639               snat_in2out_trace_t *t =
1640                  vlib_add_trace (vm, node, b0, sizeof (*t));
1641               t->is_slow_path = is_slow_path;
1642               t->sw_if_index = sw_if_index0;
1643               t->next_index = next0;
1644                   t->session_index = ~0;
1645               if (s0)
1646                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1647             }
1648
1649           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1650
1651           /* verify speculative enqueue, maybe switch current next frame */
1652           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1653                                            to_next, n_left_to_next,
1654                                            bi0, next0);
1655         }
1656
1657       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1658     }
1659
1660   vlib_node_increment_counter (vm, stats_node_index,
1661                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1662                                pkts_processed);
1663   return frame->n_vectors;
1664 }
1665
1666 static uword
1667 snat_in2out_fast_path_fn (vlib_main_t * vm,
1668                           vlib_node_runtime_t * node,
1669                           vlib_frame_t * frame)
1670 {
1671   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1672 }
1673
1674 VLIB_REGISTER_NODE (snat_in2out_node) = {
1675   .function = snat_in2out_fast_path_fn,
1676   .name = "nat44-in2out",
1677   .vector_size = sizeof (u32),
1678   .format_trace = format_snat_in2out_trace,
1679   .type = VLIB_NODE_TYPE_INTERNAL,
1680
1681   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1682   .error_strings = snat_in2out_error_strings,
1683
1684   .runtime_data_bytes = sizeof (snat_runtime_t),
1685
1686   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1687
1688   /* edit / add dispositions here */
1689   .next_nodes = {
1690     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1691     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1692     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1693     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1694     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1695   },
1696 };
1697
1698 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1699
1700 static uword
1701 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1702                                  vlib_node_runtime_t * node,
1703                                  vlib_frame_t * frame)
1704 {
1705   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1706 }
1707
1708 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1709   .function = snat_in2out_output_fast_path_fn,
1710   .name = "nat44-in2out-output",
1711   .vector_size = sizeof (u32),
1712   .format_trace = format_snat_in2out_trace,
1713   .type = VLIB_NODE_TYPE_INTERNAL,
1714
1715   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1716   .error_strings = snat_in2out_error_strings,
1717
1718   .runtime_data_bytes = sizeof (snat_runtime_t),
1719
1720   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1721
1722   /* edit / add dispositions here */
1723   .next_nodes = {
1724     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1725     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1726     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1727     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1728     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1729   },
1730 };
1731
1732 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
1733                               snat_in2out_output_fast_path_fn);
1734
1735 static uword
1736 snat_in2out_slow_path_fn (vlib_main_t * vm,
1737                           vlib_node_runtime_t * node,
1738                           vlib_frame_t * frame)
1739 {
1740   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
1741 }
1742
1743 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1744   .function = snat_in2out_slow_path_fn,
1745   .name = "nat44-in2out-slowpath",
1746   .vector_size = sizeof (u32),
1747   .format_trace = format_snat_in2out_trace,
1748   .type = VLIB_NODE_TYPE_INTERNAL,
1749
1750   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1751   .error_strings = snat_in2out_error_strings,
1752
1753   .runtime_data_bytes = sizeof (snat_runtime_t),
1754
1755   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1756
1757   /* edit / add dispositions here */
1758   .next_nodes = {
1759     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1760     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1761     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1762     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1763     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1764   },
1765 };
1766
1767 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
1768                               snat_in2out_slow_path_fn);
1769
1770 static uword
1771 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
1772                                  vlib_node_runtime_t * node,
1773                                  vlib_frame_t * frame)
1774 {
1775   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
1776 }
1777
1778 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
1779   .function = snat_in2out_output_slow_path_fn,
1780   .name = "nat44-in2out-output-slowpath",
1781   .vector_size = sizeof (u32),
1782   .format_trace = format_snat_in2out_trace,
1783   .type = VLIB_NODE_TYPE_INTERNAL,
1784
1785   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1786   .error_strings = snat_in2out_error_strings,
1787
1788   .runtime_data_bytes = sizeof (snat_runtime_t),
1789
1790   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1791
1792   /* edit / add dispositions here */
1793   .next_nodes = {
1794     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1795     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1796     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1797     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1798     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1799   },
1800 };
1801
1802 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
1803                               snat_in2out_output_slow_path_fn);
1804
1805 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
1806
1807 static inline uword
1808 nat44_hairpinning_fn_inline (vlib_main_t * vm,
1809                              vlib_node_runtime_t * node,
1810                              vlib_frame_t * frame,
1811                              int is_ed)
1812 {
1813   u32 n_left_from, * from, * to_next, stats_node_index;
1814   snat_in2out_next_t next_index;
1815   u32 pkts_processed = 0;
1816   snat_main_t * sm = &snat_main;
1817   vnet_feature_main_t *fm = &feature_main;
1818   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1819   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
1820
1821   stats_node_index = is_ed ? nat44_ed_hairpinning_node.index :
1822     nat44_hairpinning_node.index;
1823   from = vlib_frame_vector_args (frame);
1824   n_left_from = frame->n_vectors;
1825   next_index = node->cached_next_index;
1826
1827   while (n_left_from > 0)
1828     {
1829       u32 n_left_to_next;
1830
1831       vlib_get_next_frame (vm, node, next_index,
1832                            to_next, n_left_to_next);
1833
1834       while (n_left_from > 0 && n_left_to_next > 0)
1835         {
1836           u32 bi0;
1837           vlib_buffer_t * b0;
1838           u32 next0;
1839           ip4_header_t * ip0;
1840           u32 proto0;
1841           udp_header_t * udp0;
1842           tcp_header_t * tcp0;
1843
1844           /* speculatively enqueue b0 to the current next frame */
1845           bi0 = from[0];
1846           to_next[0] = bi0;
1847           from += 1;
1848           to_next += 1;
1849           n_left_from -= 1;
1850           n_left_to_next -= 1;
1851
1852           b0 = vlib_get_buffer (vm, bi0);
1853           ip0 = vlib_buffer_get_current (b0);
1854           udp0 = ip4_next_header (ip0);
1855           tcp0 = (tcp_header_t *) udp0;
1856
1857           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1858
1859           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
1860                                 &next0, 0);
1861
1862           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed))
1863             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1864
1865           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1866
1867           /* verify speculative enqueue, maybe switch current next frame */
1868           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1869                                            to_next, n_left_to_next,
1870                                            bi0, next0);
1871          }
1872
1873       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1874     }
1875
1876   vlib_node_increment_counter (vm, stats_node_index,
1877                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1878                                pkts_processed);
1879   return frame->n_vectors;
1880 }
1881
1882 static uword
1883 nat44_hairpinning_fn (vlib_main_t * vm,
1884                       vlib_node_runtime_t * node,
1885                       vlib_frame_t * frame)
1886 {
1887   return nat44_hairpinning_fn_inline (vm, node, frame, 0);
1888 }
1889
1890 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
1891   .function = nat44_hairpinning_fn,
1892   .name = "nat44-hairpinning",
1893   .vector_size = sizeof (u32),
1894   .type = VLIB_NODE_TYPE_INTERNAL,
1895   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1896   .error_strings = snat_in2out_error_strings,
1897   .n_next_nodes = 2,
1898   .next_nodes = {
1899     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1900     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1901   },
1902 };
1903
1904 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
1905                               nat44_hairpinning_fn);
1906
1907 static uword
1908 nat44_ed_hairpinning_fn (vlib_main_t * vm,
1909                          vlib_node_runtime_t * node,
1910                          vlib_frame_t * frame)
1911 {
1912   return nat44_hairpinning_fn_inline (vm, node, frame, 1);
1913 }
1914
1915 VLIB_REGISTER_NODE (nat44_ed_hairpinning_node) = {
1916   .function = nat44_ed_hairpinning_fn,
1917   .name = "nat44-ed-hairpinning",
1918   .vector_size = sizeof (u32),
1919   .type = VLIB_NODE_TYPE_INTERNAL,
1920   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1921   .error_strings = snat_in2out_error_strings,
1922   .n_next_nodes = 2,
1923   .next_nodes = {
1924     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1925     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1926   },
1927 };
1928
1929 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpinning_node,
1930                               nat44_ed_hairpinning_fn);
1931
1932 static inline void
1933 nat44_reass_hairpinning (snat_main_t *sm,
1934                          vlib_buffer_t * b0,
1935                          ip4_header_t * ip0,
1936                          u16 sport,
1937                          u16 dport,
1938                          u32 proto0)
1939 {
1940   snat_session_key_t key0, sm0;
1941   snat_session_t * s0;
1942   clib_bihash_kv_8_8_t kv0, value0;
1943   ip_csum_t sum0;
1944   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
1945   u16 new_dst_port0, old_dst_port0;
1946   udp_header_t * udp0;
1947   tcp_header_t * tcp0;
1948
1949   key0.addr = ip0->dst_address;
1950   key0.port = dport;
1951   key0.protocol = proto0;
1952   key0.fib_index = sm->outside_fib_index;
1953   kv0.key = key0.as_u64;
1954
1955   udp0 = ip4_next_header (ip0);
1956
1957   /* Check if destination is static mappings */
1958   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
1959     {
1960       new_dst_addr0 = sm0.addr.as_u32;
1961       new_dst_port0 = sm0.port;
1962       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1963     }
1964   /* or active sessions */
1965   else
1966     {
1967       if (sm->num_workers > 1)
1968         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
1969       else
1970         ti = sm->num_workers;
1971
1972       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
1973         {
1974           si = value0.value;
1975           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
1976           new_dst_addr0 = s0->in2out.addr.as_u32;
1977           new_dst_port0 = s0->in2out.port;
1978           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1979         }
1980     }
1981
1982   /* Destination is behind the same NAT, use internal address and port */
1983   if (new_dst_addr0)
1984     {
1985       old_dst_addr0 = ip0->dst_address.as_u32;
1986       ip0->dst_address.as_u32 = new_dst_addr0;
1987       sum0 = ip0->checksum;
1988       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
1989                              ip4_header_t, dst_address);
1990       ip0->checksum = ip_csum_fold (sum0);
1991
1992       old_dst_port0 = dport;
1993       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
1994                        ip4_is_first_fragment (ip0)))
1995         {
1996           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1997             {
1998               tcp0 = ip4_next_header (ip0);
1999               tcp0->dst = new_dst_port0;
2000               sum0 = tcp0->checksum;
2001               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2002                                      ip4_header_t, dst_address);
2003               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2004                                      ip4_header_t /* cheat */, length);
2005               tcp0->checksum = ip_csum_fold(sum0);
2006             }
2007           else
2008             {
2009               udp0->dst_port = new_dst_port0;
2010               udp0->checksum = 0;
2011             }
2012         }
2013       else
2014         {
2015           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2016             {
2017               tcp0 = ip4_next_header (ip0);
2018               sum0 = tcp0->checksum;
2019               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2020                                      ip4_header_t, dst_address);
2021               tcp0->checksum = ip_csum_fold(sum0);
2022             }
2023         }
2024     }
2025 }
2026
2027 static uword
2028 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2029                             vlib_node_runtime_t * node,
2030                             vlib_frame_t * frame)
2031 {
2032   u32 n_left_from, *from, *to_next;
2033   snat_in2out_next_t next_index;
2034   u32 pkts_processed = 0;
2035   snat_main_t *sm = &snat_main;
2036   f64 now = vlib_time_now (vm);
2037   u32 thread_index = vlib_get_thread_index ();
2038   snat_main_per_thread_data_t *per_thread_data =
2039     &sm->per_thread_data[thread_index];
2040   u32 *fragments_to_drop = 0;
2041   u32 *fragments_to_loopback = 0;
2042
2043   from = vlib_frame_vector_args (frame);
2044   n_left_from = frame->n_vectors;
2045   next_index = node->cached_next_index;
2046
2047   while (n_left_from > 0)
2048     {
2049       u32 n_left_to_next;
2050
2051       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2052
2053       while (n_left_from > 0 && n_left_to_next > 0)
2054        {
2055           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2056           vlib_buffer_t *b0;
2057           u32 next0;
2058           u8 cached0 = 0;
2059           ip4_header_t *ip0;
2060           nat_reass_ip4_t *reass0;
2061           udp_header_t * udp0;
2062           tcp_header_t * tcp0;
2063           snat_session_key_t key0;
2064           clib_bihash_kv_8_8_t kv0, value0;
2065           snat_session_t * s0 = 0;
2066           u16 old_port0, new_port0;
2067           ip_csum_t sum0;
2068
2069           /* speculatively enqueue b0 to the current next frame */
2070           bi0 = from[0];
2071           to_next[0] = bi0;
2072           from += 1;
2073           to_next += 1;
2074           n_left_from -= 1;
2075           n_left_to_next -= 1;
2076
2077           b0 = vlib_get_buffer (vm, bi0);
2078           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2079
2080           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2081           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2082                                                                sw_if_index0);
2083
2084           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2085             {
2086               next0 = SNAT_IN2OUT_NEXT_DROP;
2087               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2088               goto trace0;
2089             }
2090
2091           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2092           udp0 = ip4_next_header (ip0);
2093           tcp0 = (tcp_header_t *) udp0;
2094           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2095
2096           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2097                                                  ip0->dst_address,
2098                                                  ip0->fragment_id,
2099                                                  ip0->protocol,
2100                                                  1,
2101                                                  &fragments_to_drop);
2102
2103           if (PREDICT_FALSE (!reass0))
2104             {
2105               next0 = SNAT_IN2OUT_NEXT_DROP;
2106               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2107               nat_log_notice ("maximum reassemblies exceeded");
2108               goto trace0;
2109             }
2110
2111           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2112             {
2113               key0.addr = ip0->src_address;
2114               key0.port = udp0->src_port;
2115               key0.protocol = proto0;
2116               key0.fib_index = rx_fib_index0;
2117               kv0.key = key0.as_u64;
2118
2119               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2120                 {
2121                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2122                       ip0, proto0, rx_fib_index0, thread_index)))
2123                     goto trace0;
2124
2125                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2126                                      &s0, node, next0, thread_index);
2127
2128                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2129                     goto trace0;
2130
2131                   reass0->sess_index = s0 - per_thread_data->sessions;
2132                 }
2133               else
2134                 {
2135                   s0 = pool_elt_at_index (per_thread_data->sessions,
2136                                           value0.value);
2137                   reass0->sess_index = value0.value;
2138                 }
2139               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2140             }
2141           else
2142             {
2143               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2144                 {
2145                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2146                     {
2147                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2148                       nat_log_notice ("maximum fragments per reassembly exceeded");
2149                       next0 = SNAT_IN2OUT_NEXT_DROP;
2150                       goto trace0;
2151                     }
2152                   cached0 = 1;
2153                   goto trace0;
2154                 }
2155               s0 = pool_elt_at_index (per_thread_data->sessions,
2156                                       reass0->sess_index);
2157             }
2158
2159           old_addr0 = ip0->src_address.as_u32;
2160           ip0->src_address = s0->out2in.addr;
2161           new_addr0 = ip0->src_address.as_u32;
2162           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2163
2164           sum0 = ip0->checksum;
2165           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2166                                  ip4_header_t,
2167                                  src_address /* changed member */);
2168           ip0->checksum = ip_csum_fold (sum0);
2169
2170           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2171             {
2172               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2173                 {
2174                   old_port0 = tcp0->src_port;
2175                   tcp0->src_port = s0->out2in.port;
2176                   new_port0 = tcp0->src_port;
2177
2178                   sum0 = tcp0->checksum;
2179                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2180                                          ip4_header_t,
2181                                          dst_address /* changed member */);
2182                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2183                                          ip4_header_t /* cheat */,
2184                                          length /* changed member */);
2185                   tcp0->checksum = ip_csum_fold(sum0);
2186                 }
2187               else
2188                 {
2189                   old_port0 = udp0->src_port;
2190                   udp0->src_port = s0->out2in.port;
2191                   udp0->checksum = 0;
2192                 }
2193             }
2194
2195           /* Hairpinning */
2196           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2197                                    s0->ext_host_port, proto0);
2198
2199           /* Accounting */
2200           nat44_session_update_counters (s0, now,
2201                                          vlib_buffer_length_in_chain (vm, b0));
2202           /* Per-user LRU list maintenance */
2203           nat44_session_update_lru (sm, s0, thread_index);
2204
2205         trace0:
2206           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2207                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2208             {
2209               nat44_in2out_reass_trace_t *t =
2210                  vlib_add_trace (vm, node, b0, sizeof (*t));
2211               t->cached = cached0;
2212               t->sw_if_index = sw_if_index0;
2213               t->next_index = next0;
2214             }
2215
2216           if (cached0)
2217             {
2218               n_left_to_next++;
2219               to_next--;
2220             }
2221           else
2222             {
2223               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2224
2225               /* verify speculative enqueue, maybe switch current next frame */
2226               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2227                                                to_next, n_left_to_next,
2228                                                bi0, next0);
2229             }
2230
2231           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2232             {
2233               from = vlib_frame_vector_args (frame);
2234               u32 len = vec_len (fragments_to_loopback);
2235               if (len <= VLIB_FRAME_SIZE)
2236                 {
2237                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2238                   n_left_from = len;
2239                   vec_reset_length (fragments_to_loopback);
2240                 }
2241               else
2242                 {
2243                   clib_memcpy (from,
2244                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2245                                sizeof (u32) * VLIB_FRAME_SIZE);
2246                   n_left_from = VLIB_FRAME_SIZE;
2247                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2248                 }
2249             }
2250        }
2251
2252       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2253     }
2254
2255   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2256                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2257                                pkts_processed);
2258
2259   nat_send_all_to_node (vm, fragments_to_drop, node,
2260                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2261                         SNAT_IN2OUT_NEXT_DROP);
2262
2263   vec_free (fragments_to_drop);
2264   vec_free (fragments_to_loopback);
2265   return frame->n_vectors;
2266 }
2267
2268 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2269   .function = nat44_in2out_reass_node_fn,
2270   .name = "nat44-in2out-reass",
2271   .vector_size = sizeof (u32),
2272   .format_trace = format_nat44_in2out_reass_trace,
2273   .type = VLIB_NODE_TYPE_INTERNAL,
2274
2275   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2276   .error_strings = snat_in2out_error_strings,
2277
2278   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2279   .next_nodes = {
2280     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2281     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2282     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2283     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2284     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2285   },
2286 };
2287
2288 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2289                               nat44_in2out_reass_node_fn);
2290
2291 /*******************************/
2292 /*** endpoint-dependent mode ***/
2293 /*******************************/
2294
2295 static_always_inline int
2296 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
2297 {
2298   icmp46_header_t *icmp0;
2299   nat_ed_ses_key_t key0;
2300   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2301   ip4_header_t *inner_ip0 = 0;
2302   void *l4_header = 0;
2303   icmp46_header_t *inner_icmp0;
2304
2305   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2306   echo0 = (icmp_echo_header_t *)(icmp0+1);
2307
2308   if (!icmp_is_error_message (icmp0))
2309     {
2310       key0.proto = IP_PROTOCOL_ICMP;
2311       key0.l_addr = ip0->src_address;
2312       key0.r_addr = ip0->dst_address;
2313       key0.l_port = echo0->identifier;
2314       key0.r_port = 0;
2315     }
2316   else
2317     {
2318       inner_ip0 = (ip4_header_t *)(echo0+1);
2319       l4_header = ip4_next_header (inner_ip0);
2320       key0.proto = inner_ip0->protocol;
2321       key0.r_addr = inner_ip0->src_address;
2322       key0.l_addr = inner_ip0->dst_address;
2323       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
2324         {
2325         case SNAT_PROTOCOL_ICMP:
2326           inner_icmp0 = (icmp46_header_t*)l4_header;
2327           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2328           key0.r_port = 0;
2329           key0.l_port = inner_echo0->identifier;
2330           break;
2331         case SNAT_PROTOCOL_UDP:
2332         case SNAT_PROTOCOL_TCP:
2333           key0.l_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2334           key0.r_port = ((tcp_udp_header_t*)l4_header)->src_port;
2335           break;
2336         default:
2337           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
2338         }
2339     }
2340   *p_key0 = key0;
2341   return 0;
2342 }
2343
2344 static u32
2345 slow_path_ed (snat_main_t *sm,
2346               vlib_buffer_t *b,
2347               u32 rx_fib_index,
2348               clib_bihash_kv_16_8_t *kv,
2349               snat_session_t ** sessionp,
2350               vlib_node_runtime_t * node,
2351               u32 next,
2352               u32 thread_index)
2353 {
2354   snat_session_t *s;
2355   snat_user_t *u;
2356   snat_session_key_t key0, key1;
2357   u8 lb = 0, is_sm = 0;
2358   u32 address_index = ~0;
2359   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2360   nat_ed_ses_key_t *key = (nat_ed_ses_key_t *) kv->key;
2361   u32 proto = ip_proto_to_snat_proto (key->proto);
2362
2363   if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
2364     {
2365       b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
2366       nat_ipfix_logging_max_sessions(sm->max_translations);
2367       nat_log_notice ("maximum sessions exceeded");
2368       return SNAT_IN2OUT_NEXT_DROP;
2369     }
2370
2371   key0.addr = key->l_addr;
2372   key0.port = key->l_port;
2373   key1.protocol = key0.protocol = proto;
2374   key0.fib_index = rx_fib_index;
2375   key1.fib_index = sm->outside_fib_index;
2376   /* First try to match static mapping by local address and port */
2377   if (snat_static_mapping_match (sm, key0, &key1, 0, 0, 0, &lb))
2378     {
2379       /* Try to create dynamic translation */
2380       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
2381                                                thread_index, &key1,
2382                                                &address_index,
2383                                                sm->port_per_thread,
2384                                                tsm->snat_thread_index))
2385         {
2386           nat_log_notice ("addresses exhausted");
2387           b->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2388           return SNAT_IN2OUT_NEXT_DROP;
2389         }
2390     }
2391   else
2392     is_sm = 1;
2393
2394   u = nat_user_get_or_create (sm, &key->l_addr, rx_fib_index, thread_index);
2395   if (!u)
2396     {
2397       nat_log_warn ("create NAT user failed");
2398       return SNAT_IN2OUT_NEXT_DROP;
2399     }
2400
2401   s = nat_session_alloc_or_recycle (sm, u, thread_index);
2402   if (!s)
2403     {
2404       nat_log_warn ("create NAT session failed");
2405       return SNAT_IN2OUT_NEXT_DROP;
2406     }
2407
2408   user_session_increment (sm, u, is_sm);
2409   if (is_sm)
2410     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2411   if (lb)
2412     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
2413   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2414   s->outside_address_index = address_index;
2415   s->ext_host_addr = key->r_addr;
2416   s->ext_host_port = key->r_port;
2417   s->in2out = key0;
2418   s->out2in = key1;
2419   s->out2in.protocol = key0.protocol;
2420
2421   /* Add to lookup tables */
2422   kv->value = s - tsm->sessions;
2423   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, kv, 1))
2424     nat_log_notice ("in2out-ed key add failed");
2425
2426   make_ed_kv (kv, &key1.addr, &key->r_addr, key->proto, key1.fib_index,
2427               key1.port, key->r_port);
2428   kv->value = s - tsm->sessions;
2429   if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, kv, 1))
2430     nat_log_notice ("out2in-ed key add failed");
2431
2432   *sessionp = s;
2433
2434   /* log NAT event */
2435   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
2436                                       s->out2in.addr.as_u32,
2437                                       s->in2out.protocol,
2438                                       s->in2out.port,
2439                                       s->out2in.port,
2440                                       s->in2out.fib_index);
2441   return next;
2442 }
2443
2444 static_always_inline int
2445 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
2446                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
2447                         u32 rx_fib_index, u32 thread_index)
2448 {
2449   udp_header_t *udp = ip4_next_header (ip);
2450   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2451   clib_bihash_kv_16_8_t kv, value;
2452   snat_session_key_t key0, key1;
2453
2454   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, ip->protocol,
2455               sm->outside_fib_index, udp->dst_port, udp->src_port);
2456
2457   /* NAT packet aimed at external address if */
2458   /* has active sessions */
2459   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2460     {
2461       key0.addr = ip->dst_address;
2462       key0.port = udp->dst_port;
2463       key0.protocol = proto;
2464       key0.fib_index = sm->outside_fib_index;
2465       /* or is static mappings */
2466       if (!snat_static_mapping_match(sm, key0, &key1, 1, 0, 0, 0))
2467         return 0;
2468     }
2469   else
2470     return 0;
2471
2472   if (sm->forwarding_enabled)
2473     return 1;
2474
2475   return snat_not_translate_fast(sm, node, sw_if_index, ip, proto, rx_fib_index);
2476 }
2477
2478 static_always_inline int
2479 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
2480                                       u32 thread_index, f64 now,
2481                                       vlib_main_t * vm, vlib_buffer_t * b)
2482 {
2483   nat_ed_ses_key_t key;
2484   clib_bihash_kv_16_8_t kv, value;
2485   udp_header_t *udp;
2486   snat_session_t *s = 0;
2487   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2488
2489   if (!sm->forwarding_enabled)
2490     return 0;
2491
2492   if (ip->protocol == IP_PROTOCOL_ICMP)
2493     {
2494       key.as_u64[0] = key.as_u64[1] = 0;
2495       if (icmp_get_ed_key (ip, &key))
2496         return 0;
2497       key.fib_index = 0;
2498       kv.key[0] = key.as_u64[0];
2499       kv.key[1] = key.as_u64[1];
2500     }
2501   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
2502     {
2503       udp = ip4_next_header(ip);
2504       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0,
2505                   udp->src_port, udp->dst_port);
2506     }
2507   else
2508     {
2509       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0, 0,
2510                   0);
2511     }
2512
2513   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2514     {
2515       s = pool_elt_at_index (tsm->sessions, value.value);
2516       if (is_fwd_bypass_session (s))
2517         {
2518           if (ip->protocol == IP_PROTOCOL_TCP)
2519             {
2520               tcp_header_t *tcp = ip4_next_header(ip);
2521               if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index))
2522                 return 1;
2523             }
2524           /* Per-user LRU list maintenance */
2525           nat44_session_update_lru (sm, s, thread_index);
2526           /* Accounting */
2527           nat44_session_update_counters (s, now,
2528                                          vlib_buffer_length_in_chain (vm, b));
2529           return 1;
2530         }
2531       else
2532         return 0;
2533     }
2534
2535   return 0;
2536 }
2537
2538 static_always_inline int
2539 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
2540                                        u8 proto, u16 src_port, u16 dst_port,
2541                                        u32 thread_index, u32 sw_if_index)
2542 {
2543   clib_bihash_kv_16_8_t kv, value;
2544   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2545   snat_interface_t *i;
2546   snat_session_t *s;
2547
2548   /* src NAT check */
2549   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto,
2550               sm->outside_fib_index, src_port, dst_port);
2551   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2552     return 1;
2553
2554   /* dst NAT check */
2555   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, proto,
2556               sm->inside_fib_index, dst_port, src_port);
2557   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2558   {
2559     s = pool_elt_at_index (tsm->sessions, value.value);
2560     if (is_fwd_bypass_session (s))
2561       return 0;
2562
2563     /* hairpinning */
2564     pool_foreach (i, sm->output_feature_interfaces,
2565     ({
2566       if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index))
2567         return 0;
2568     }));
2569     return 1;
2570   }
2571
2572   return 0;
2573 }
2574
2575 u32
2576 icmp_match_in2out_ed(snat_main_t *sm, vlib_node_runtime_t *node,
2577                      u32 thread_index, vlib_buffer_t *b, ip4_header_t *ip,
2578                      u8 *p_proto, snat_session_key_t *p_value,
2579                      u8 *p_dont_translate, void *d, void *e)
2580 {
2581   icmp46_header_t *icmp;
2582   u32 sw_if_index;
2583   u32 rx_fib_index;
2584   nat_ed_ses_key_t key;
2585   snat_session_t *s = 0;
2586   u8 dont_translate = 0;
2587   clib_bihash_kv_16_8_t kv, value;
2588   u32 next = ~0;
2589   int err;
2590   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2591
2592   icmp = (icmp46_header_t *) ip4_next_header (ip);
2593   sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
2594   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
2595
2596   key.as_u64[0] = key.as_u64[1] = 0;
2597   err = icmp_get_ed_key (ip, &key);
2598   if (err != 0)
2599     {
2600       b->error = node->errors[err];
2601       next = SNAT_IN2OUT_NEXT_DROP;
2602       goto out;
2603     }
2604   key.fib_index = rx_fib_index;
2605
2606   kv.key[0] = key.as_u64[0];
2607   kv.key[1] = key.as_u64[1];
2608
2609   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2610     {
2611       if (vnet_buffer(b)->sw_if_index[VLIB_TX] != ~0)
2612         {
2613           if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(sm, ip,
2614               key.proto, key.l_port, key.r_port, thread_index, sw_if_index)))
2615             {
2616               dont_translate = 1;
2617               goto out;
2618             }
2619         }
2620       else
2621         {
2622           if (PREDICT_FALSE(nat44_ed_not_translate(sm, node, sw_if_index,
2623               ip, SNAT_PROTOCOL_ICMP, rx_fib_index, thread_index)))
2624             {
2625               dont_translate = 1;
2626               goto out;
2627             }
2628         }
2629
2630       if (PREDICT_FALSE(icmp_is_error_message (icmp)))
2631         {
2632           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2633           next = SNAT_IN2OUT_NEXT_DROP;
2634           goto out;
2635         }
2636
2637       next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next,
2638                            thread_index);
2639
2640       if (PREDICT_FALSE (next == SNAT_IN2OUT_NEXT_DROP))
2641         goto out;
2642     }
2643   else
2644     {
2645       if (PREDICT_FALSE(icmp->type != ICMP4_echo_request &&
2646                         icmp->type != ICMP4_echo_reply &&
2647                         !icmp_is_error_message (icmp)))
2648         {
2649           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2650           next = SNAT_IN2OUT_NEXT_DROP;
2651           goto out;
2652         }
2653
2654       s = pool_elt_at_index (tsm->sessions, value.value);
2655     }
2656
2657   *p_proto = ip_proto_to_snat_proto (key.proto);
2658 out:
2659   if (s)
2660     *p_value = s->out2in;
2661   *p_dont_translate = dont_translate;
2662   if (d)
2663     *(snat_session_t**)d = s;
2664   return next;
2665 }
2666
2667 static inline void
2668 nat44_ed_hairpinning_unknown_proto (snat_main_t *sm,
2669                                     vlib_buffer_t * b,
2670                                     ip4_header_t * ip)
2671 {
2672   u32 old_addr, new_addr = 0, ti = 0;
2673   clib_bihash_kv_8_8_t kv, value;
2674   clib_bihash_kv_16_8_t s_kv, s_value;
2675   snat_static_mapping_t *m;
2676   ip_csum_t sum;
2677   snat_session_t *s;
2678   snat_main_per_thread_data_t *tsm;
2679
2680   if (sm->num_workers > 1)
2681     ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
2682   else
2683     ti = sm->num_workers;
2684   tsm = &sm->per_thread_data[ti];
2685
2686   old_addr = ip->dst_address.as_u32;
2687   make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2688               sm->outside_fib_index, 0, 0);
2689   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2690     {
2691       make_sm_kv (&kv, &ip->dst_address, 0, sm->outside_fib_index, 0);
2692       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2693         return;
2694
2695       m = pool_elt_at_index (sm->static_mappings, value.value);
2696       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2697         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
2698       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
2699     }
2700   else
2701     {
2702       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
2703       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2704         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
2705       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
2706     }
2707   sum = ip->checksum;
2708   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
2709   ip->checksum = ip_csum_fold (sum);
2710 }
2711
2712 static snat_session_t *
2713 nat44_ed_in2out_unknown_proto (snat_main_t *sm,
2714                                vlib_buffer_t * b,
2715                                ip4_header_t * ip,
2716                                u32 rx_fib_index,
2717                                u32 thread_index,
2718                                f64 now,
2719                                vlib_main_t * vm,
2720                                vlib_node_runtime_t * node)
2721 {
2722   clib_bihash_kv_8_8_t kv, value;
2723   clib_bihash_kv_16_8_t s_kv, s_value;
2724   snat_static_mapping_t *m;
2725   u32 old_addr, new_addr = 0;
2726   ip_csum_t sum;
2727   snat_user_t *u;
2728   dlist_elt_t *head, *elt;
2729   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2730   u32 elt_index, head_index, ses_index;
2731   snat_session_t * s;
2732   u32 address_index = ~0;
2733   int i;
2734   u8 is_sm = 0;
2735
2736   old_addr = ip->src_address.as_u32;
2737
2738   make_ed_kv (&s_kv, &ip->src_address, &ip->dst_address, ip->protocol,
2739               rx_fib_index, 0, 0);
2740
2741   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
2742     {
2743       s = pool_elt_at_index (tsm->sessions, s_value.value);
2744       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
2745     }
2746   else
2747     {
2748       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
2749         {
2750           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
2751           nat_ipfix_logging_max_sessions(sm->max_translations);
2752           nat_log_notice ("maximum sessions exceeded");
2753           return 0;
2754         }
2755
2756       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
2757                                   thread_index);
2758       if (!u)
2759         {
2760           nat_log_warn ("create NAT user failed");
2761           return 0;
2762         }
2763
2764       make_sm_kv (&kv, &ip->src_address, 0, rx_fib_index, 0);
2765
2766       /* Try to find static mapping first */
2767       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
2768         {
2769           m = pool_elt_at_index (sm->static_mappings, value.value);
2770           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
2771           is_sm = 1;
2772           goto create_ses;
2773         }
2774       /* Fallback to 3-tuple key */
2775       else
2776         {
2777           /* Choose same out address as for TCP/UDP session to same destination */
2778           head_index = u->sessions_per_user_list_head_index;
2779           head = pool_elt_at_index (tsm->list_pool, head_index);
2780           elt_index = head->next;
2781           if (PREDICT_FALSE (elt_index == ~0))
2782             ses_index = ~0;
2783           else
2784             {
2785               elt = pool_elt_at_index (tsm->list_pool, elt_index);
2786               ses_index = elt->value;
2787             }
2788
2789           while (ses_index != ~0)
2790             {
2791               s =  pool_elt_at_index (tsm->sessions, ses_index);
2792               elt_index = elt->next;
2793               elt = pool_elt_at_index (tsm->list_pool, elt_index);
2794               ses_index = elt->value;
2795
2796               if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
2797                 {
2798                   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
2799                   address_index = s->outside_address_index;
2800
2801                   make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address,
2802                               ip->protocol, sm->outside_fib_index, 0, 0);
2803                   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2804                     goto create_ses;
2805
2806                   break;
2807                 }
2808             }
2809
2810           for (i = 0; i < vec_len (sm->addresses); i++)
2811             {
2812               make_ed_kv (&s_kv, &sm->addresses[i].addr, &ip->dst_address,
2813                           ip->protocol, sm->outside_fib_index, 0, 0);
2814               if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2815                 {
2816                   new_addr = ip->src_address.as_u32 =
2817                     sm->addresses[i].addr.as_u32;
2818                   address_index = i;
2819                   goto create_ses;
2820                 }
2821             }
2822           return 0;
2823         }
2824
2825 create_ses:
2826       s = nat_session_alloc_or_recycle (sm, u, thread_index);
2827       if (!s)
2828         {
2829           nat_log_warn ("create NAT session failed");
2830           return 0;
2831         }
2832
2833       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
2834       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
2835       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2836       s->outside_address_index = address_index;
2837       s->out2in.addr.as_u32 = new_addr;
2838       s->out2in.fib_index = sm->outside_fib_index;
2839       s->in2out.addr.as_u32 = old_addr;
2840       s->in2out.fib_index = rx_fib_index;
2841       s->in2out.port = s->out2in.port = ip->protocol;
2842       if (is_sm)
2843         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2844       user_session_increment (sm, u, is_sm);
2845
2846       /* Add to lookup tables */
2847       make_ed_kv (&s_kv, &s->in2out.addr, &ip->dst_address, ip->protocol,
2848                   rx_fib_index, 0, 0);
2849       s_kv.value = s - tsm->sessions;
2850       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
2851         nat_log_notice ("in2out key add failed");
2852
2853       make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address, ip->protocol,
2854                   sm->outside_fib_index, 0, 0);
2855       s_kv.value = s - tsm->sessions;
2856       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
2857         nat_log_notice ("out2in key add failed");
2858   }
2859
2860   /* Update IP checksum */
2861   sum = ip->checksum;
2862   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
2863   ip->checksum = ip_csum_fold (sum);
2864
2865   /* Accounting */
2866   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b));
2867   /* Per-user LRU list maintenance */
2868   nat44_session_update_lru (sm, s, thread_index);
2869
2870   /* Hairpinning */
2871   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2872     nat44_ed_hairpinning_unknown_proto(sm, b, ip);
2873
2874   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2875     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2876
2877   return s;
2878 }
2879
2880 static inline uword
2881 nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
2882                                 vlib_node_runtime_t * node,
2883                                 vlib_frame_t * frame, int is_slow_path,
2884                                 int is_output_feature)
2885 {
2886   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
2887   snat_in2out_next_t next_index;
2888   snat_main_t *sm = &snat_main;
2889   f64 now = vlib_time_now (vm);
2890   u32 thread_index = vlib_get_thread_index ();
2891   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2892
2893   stats_node_index = is_slow_path ? nat44_ed_in2out_slowpath_node.index :
2894     nat44_ed_in2out_node.index;
2895
2896   from = vlib_frame_vector_args (frame);
2897   n_left_from = frame->n_vectors;
2898   next_index = node->cached_next_index;
2899
2900   while (n_left_from > 0)
2901     {
2902       u32 n_left_to_next;
2903
2904       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2905
2906       while (n_left_from >= 4 && n_left_to_next >= 2)
2907         {
2908           u32 bi0, bi1;
2909           vlib_buffer_t *b0, *b1;
2910           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
2911               new_addr0, old_addr0;
2912           u32 next1, sw_if_index1, rx_fib_index1, iph_offset1 = 0, proto1,
2913               new_addr1, old_addr1;
2914           u16 old_port0, new_port0, old_port1, new_port1;
2915           ip4_header_t *ip0, *ip1;
2916           udp_header_t *udp0, *udp1;
2917           tcp_header_t *tcp0, *tcp1;
2918           icmp46_header_t *icmp0, *icmp1;
2919           snat_session_t *s0 = 0, *s1 = 0;
2920           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
2921           ip_csum_t sum0, sum1;
2922
2923           /* Prefetch next iteration. */
2924           {
2925             vlib_buffer_t * p2, * p3;
2926
2927             p2 = vlib_get_buffer (vm, from[2]);
2928             p3 = vlib_get_buffer (vm, from[3]);
2929
2930             vlib_prefetch_buffer_header (p2, LOAD);
2931             vlib_prefetch_buffer_header (p3, LOAD);
2932
2933             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2934             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2935           }
2936
2937           /* speculatively enqueue b0 and b1 to the current next frame */
2938           to_next[0] = bi0 = from[0];
2939           to_next[1] = bi1 = from[1];
2940           from += 2;
2941           to_next += 2;
2942           n_left_from -= 2;
2943           n_left_to_next -= 2;
2944
2945           b0 = vlib_get_buffer (vm, bi0);
2946           b1 = vlib_get_buffer (vm, bi1);
2947
2948           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2949
2950           if (is_output_feature)
2951             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
2952
2953           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
2954                  iph_offset0);
2955
2956           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2957           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2958                                                                sw_if_index0);
2959
2960           if (PREDICT_FALSE(ip0->ttl == 1))
2961             {
2962               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2963               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2964                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2965                                            0);
2966               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2967               goto trace00;
2968             }
2969
2970           udp0 = ip4_next_header (ip0);
2971           tcp0 = (tcp_header_t *) udp0;
2972           icmp0 = (icmp46_header_t *) udp0;
2973           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2974
2975           if (is_slow_path)
2976             {
2977               if (PREDICT_FALSE (proto0 == ~0))
2978                 {
2979                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
2980                                                       rx_fib_index0,
2981                                                       thread_index, now, vm,
2982                                                       node);
2983                   if (!s0)
2984                     next0 = SNAT_IN2OUT_NEXT_DROP;
2985                   goto trace00;
2986                 }
2987
2988               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2989                 {
2990                   next0 = icmp_in2out_slow_path
2991                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2992                      next0, now, thread_index, &s0);
2993                   goto trace00;
2994                 }
2995             }
2996           else
2997             {
2998                if (is_output_feature)
2999                 {
3000                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3001                       sm, ip0, thread_index, now, vm, b0)))
3002                     goto trace00;
3003                 }
3004
3005               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3006                 {
3007                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3008                   goto trace00;
3009                 }
3010
3011               if (ip4_is_fragment (ip0))
3012                 {
3013                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3014                   next0 = SNAT_IN2OUT_NEXT_DROP;
3015                   goto trace00;
3016                 }
3017             }
3018
3019           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3020                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3021
3022           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3023             {
3024               if (is_slow_path)
3025                 {
3026                   if (is_output_feature)
3027                     {
3028                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3029                           sm, ip0, ip0->protocol, udp0->src_port,
3030                           udp0->dst_port, thread_index, sw_if_index0)))
3031                         goto trace00;
3032                     }
3033                   else
3034                     {
3035                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3036                           sw_if_index0, ip0, proto0, rx_fib_index0,
3037                           thread_index)))
3038                         goto trace00;
3039                     }
3040
3041                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3042                                         next0, thread_index);
3043
3044                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3045                     goto trace00;
3046                 }
3047               else
3048                 {
3049                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3050                   goto trace00;
3051                 }
3052             }
3053           else
3054             {
3055               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3056             }
3057
3058           b0->flags |= VNET_BUFFER_F_IS_NATED;
3059
3060           if (!is_output_feature)
3061             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3062
3063           old_addr0 = ip0->src_address.as_u32;
3064           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3065           sum0 = ip0->checksum;
3066           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3067                                  src_address);
3068           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3069             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3070                                    s0->ext_host_addr.as_u32, ip4_header_t,
3071                                    dst_address);
3072           ip0->checksum = ip_csum_fold (sum0);
3073
3074           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3075             {
3076               old_port0 = tcp0->src_port;
3077               new_port0 = tcp0->src_port = s0->out2in.port;
3078
3079               sum0 = tcp0->checksum;
3080               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3081                                      dst_address);
3082               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3083                                      length);
3084               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3085                 {
3086                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3087                                          s0->ext_host_addr.as_u32,
3088                                          ip4_header_t, dst_address);
3089                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3090                                          s0->ext_host_port, ip4_header_t,
3091                                          length);
3092                   tcp0->dst_port = s0->ext_host_port;
3093                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3094                 }
3095               tcp0->checksum = ip_csum_fold(sum0);
3096               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3097                 goto trace00;
3098             }
3099           else
3100             {
3101               udp0->src_port = s0->out2in.port;
3102               udp0->checksum = 0;
3103               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3104                 {
3105                   udp0->dst_port = s0->ext_host_port;
3106                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3107                 }
3108             }
3109
3110           /* Accounting */
3111           nat44_session_update_counters (s0, now,
3112                                          vlib_buffer_length_in_chain (vm, b0));
3113           /* Per-user LRU list maintenance */
3114           nat44_session_update_lru (sm, s0, thread_index);
3115
3116         trace00:
3117           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3118                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3119             {
3120               snat_in2out_trace_t *t =
3121                 vlib_add_trace (vm, node, b0, sizeof (*t));
3122               t->is_slow_path = is_slow_path;
3123               t->sw_if_index = sw_if_index0;
3124               t->next_index = next0;
3125               t->session_index = ~0;
3126               if (s0)
3127                 t->session_index = s0 - tsm->sessions;
3128             }
3129
3130           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3131
3132
3133           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
3134
3135           if (is_output_feature)
3136             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
3137
3138           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
3139                  iph_offset1);
3140
3141           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3142           rx_fib_index1 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3143                                                                sw_if_index1);
3144
3145           if (PREDICT_FALSE(ip1->ttl == 1))
3146             {
3147               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3148               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3149                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3150                                            0);
3151               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3152               goto trace01;
3153             }
3154
3155           udp1 = ip4_next_header (ip1);
3156           tcp1 = (tcp_header_t *) udp1;
3157           icmp1 = (icmp46_header_t *) udp1;
3158           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3159
3160           if (is_slow_path)
3161             {
3162               if (PREDICT_FALSE (proto1 == ~0))
3163                 {
3164                   s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1,
3165                                                       rx_fib_index1,
3166                                                       thread_index, now, vm,
3167                                                       node);
3168                   if (!s1)
3169                     next1 = SNAT_IN2OUT_NEXT_DROP;
3170                   goto trace01;
3171                 }
3172
3173               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
3174                 {
3175                   next1 = icmp_in2out_slow_path
3176                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
3177                      next1, now, thread_index, &s1);
3178                   goto trace01;
3179                 }
3180             }
3181           else
3182             {
3183                if (is_output_feature)
3184                 {
3185                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3186                       sm, ip1, thread_index, now, vm, b1)))
3187                     goto trace01;
3188                 }
3189
3190               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
3191                 {
3192                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3193                   goto trace01;
3194                 }
3195
3196               if (ip4_is_fragment (ip1))
3197                 {
3198                   b1->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3199                   next1 = SNAT_IN2OUT_NEXT_DROP;
3200                   goto trace01;
3201                 }
3202             }
3203
3204           make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address, ip1->protocol,
3205                       rx_fib_index1, udp1->src_port, udp1->dst_port);
3206
3207           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1))
3208             {
3209               if (is_slow_path)
3210                 {
3211                   if (is_output_feature)
3212                     {
3213                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3214                           sm, ip1, ip1->protocol, udp1->src_port,
3215                           udp1->dst_port, thread_index, sw_if_index1)))
3216                         goto trace01;
3217                     }
3218                   else
3219                     {
3220                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3221                           sw_if_index1, ip1, proto1, rx_fib_index1,
3222                           thread_index)))
3223                         goto trace01;
3224                     }
3225
3226                   next1 = slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node,
3227                                         next1, thread_index);
3228
3229                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
3230                     goto trace01;
3231                 }
3232               else
3233                 {
3234                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3235                   goto trace01;
3236                 }
3237             }
3238           else
3239             {
3240               s1 = pool_elt_at_index (tsm->sessions, value1.value);
3241             }
3242
3243           b1->flags |= VNET_BUFFER_F_IS_NATED;
3244
3245           if (!is_output_feature)
3246             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
3247
3248           old_addr1 = ip1->src_address.as_u32;
3249           new_addr1 = ip1->src_address.as_u32 = s1->out2in.addr.as_u32;
3250           sum1 = ip1->checksum;
3251           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3252                                  src_address);
3253           if (PREDICT_FALSE (is_twice_nat_session (s1)))
3254             sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3255                                    s1->ext_host_addr.as_u32, ip4_header_t,
3256                                    dst_address);
3257           ip1->checksum = ip_csum_fold (sum1);
3258
3259           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
3260             {
3261               old_port1 = tcp1->src_port;
3262               new_port1 = tcp1->src_port = s1->out2in.port;
3263
3264               sum1 = tcp1->checksum;
3265               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3266                                      dst_address);
3267               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
3268                                      length);
3269               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3270                 {
3271                   sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3272                                          s1->ext_host_addr.as_u32,
3273                                          ip4_header_t, dst_address);
3274                   sum1 = ip_csum_update (sum1, tcp1->dst_port,
3275                                          s1->ext_host_port, ip4_header_t,
3276                                          length);
3277                   tcp1->dst_port = s1->ext_host_port;
3278                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3279                 }
3280               tcp1->checksum = ip_csum_fold(sum1);
3281               if (nat44_set_tcp_session_state_i2o (sm, s1, tcp1, thread_index))
3282                 goto trace01;
3283             }
3284           else
3285             {
3286               udp1->src_port = s1->out2in.port;
3287               udp1->checksum = 0;
3288               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3289                 {
3290                   udp1->dst_port = s1->ext_host_port;
3291                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3292                 }
3293             }
3294
3295           /* Accounting */
3296           nat44_session_update_counters (s1, now,
3297                                          vlib_buffer_length_in_chain (vm, b1));
3298           /* Per-user LRU list maintenance */
3299           nat44_session_update_lru (sm, s1, thread_index);
3300
3301         trace01:
3302           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3303                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3304             {
3305               snat_in2out_trace_t *t =
3306                 vlib_add_trace (vm, node, b1, sizeof (*t));
3307               t->is_slow_path = is_slow_path;
3308               t->sw_if_index = sw_if_index1;
3309               t->next_index = next1;
3310               t->session_index = ~0;
3311               if (s1)
3312                 t->session_index = s1 - tsm->sessions;
3313             }
3314
3315           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
3316
3317           /* verify speculative enqueues, maybe switch current next frame */
3318           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3319                                            to_next, n_left_to_next,
3320                                            bi0, bi1, next0, next1);
3321         }
3322
3323       while (n_left_from > 0 && n_left_to_next > 0)
3324         {
3325           u32 bi0;
3326           vlib_buffer_t *b0;
3327           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
3328               new_addr0, old_addr0;
3329           u16 old_port0, new_port0;
3330           ip4_header_t *ip0;
3331           udp_header_t *udp0;
3332           tcp_header_t *tcp0;
3333           icmp46_header_t * icmp0;
3334           snat_session_t *s0 = 0;
3335           clib_bihash_kv_16_8_t kv0, value0;
3336           ip_csum_t sum0;
3337
3338           /* speculatively enqueue b0 to the current next frame */
3339           bi0 = from[0];
3340           to_next[0] = bi0;
3341           from += 1;
3342           to_next += 1;
3343           n_left_from -= 1;
3344           n_left_to_next -= 1;
3345
3346           b0 = vlib_get_buffer (vm, bi0);
3347           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3348
3349           if (is_output_feature)
3350             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
3351
3352           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
3353                  iph_offset0);
3354
3355           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3356           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3357                                                                sw_if_index0);
3358
3359           if (PREDICT_FALSE(ip0->ttl == 1))
3360             {
3361               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3362               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3363                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3364                                            0);
3365               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3366               goto trace0;
3367             }
3368
3369           udp0 = ip4_next_header (ip0);
3370           tcp0 = (tcp_header_t *) udp0;
3371           icmp0 = (icmp46_header_t *) udp0;
3372           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3373
3374           if (is_slow_path)
3375             {
3376               if (PREDICT_FALSE (proto0 == ~0))
3377                 {
3378                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
3379                                                       rx_fib_index0,
3380                                                       thread_index, now, vm,
3381                                                       node);
3382                   if (!s0)
3383                     next0 = SNAT_IN2OUT_NEXT_DROP;
3384                   goto trace0;
3385                 }
3386
3387               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3388                 {
3389                   next0 = icmp_in2out_slow_path
3390                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
3391                      next0, now, thread_index, &s0);
3392                   goto trace0;
3393                 }
3394             }
3395           else
3396             {
3397                if (is_output_feature)
3398                 {
3399                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3400                       sm, ip0, thread_index, now, vm, b0)))
3401                     goto trace0;
3402                 }
3403
3404               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3405                 {
3406                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3407                   goto trace0;
3408                 }
3409
3410               if (ip4_is_fragment (ip0))
3411                 {
3412                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3413                   next0 = SNAT_IN2OUT_NEXT_DROP;
3414                   goto trace0;
3415                 }
3416             }
3417
3418           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3419                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3420
3421           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3422             {
3423               if (is_slow_path)
3424                 {
3425                   if (is_output_feature)
3426                     {
3427                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3428                           sm, ip0, ip0->protocol, udp0->src_port,
3429                           udp0->dst_port, thread_index, sw_if_index0)))
3430                         goto trace0;
3431                     }
3432                   else
3433                     {
3434                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3435                           sw_if_index0, ip0, proto0, rx_fib_index0,
3436                           thread_index)))
3437                         goto trace0;
3438                     }
3439
3440                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3441                                         next0, thread_index);
3442
3443                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3444                     goto trace0;
3445                 }
3446               else
3447                 {
3448                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3449                   goto trace0;
3450                 }
3451             }
3452           else
3453             {
3454               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3455             }
3456
3457           b0->flags |= VNET_BUFFER_F_IS_NATED;
3458
3459           if (!is_output_feature)
3460             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3461
3462           old_addr0 = ip0->src_address.as_u32;
3463           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3464           sum0 = ip0->checksum;
3465           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3466                                  src_address);
3467           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3468             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3469                                    s0->ext_host_addr.as_u32, ip4_header_t,
3470                                    dst_address);
3471           ip0->checksum = ip_csum_fold (sum0);
3472
3473           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3474             {
3475               old_port0 = tcp0->src_port;
3476               new_port0 = tcp0->src_port = s0->out2in.port;
3477
3478               sum0 = tcp0->checksum;
3479               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3480                                      dst_address);
3481               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3482                                      length);
3483               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3484                 {
3485                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3486                                          s0->ext_host_addr.as_u32,
3487                                          ip4_header_t, dst_address);
3488                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3489                                          s0->ext_host_port, ip4_header_t,
3490                                          length);
3491                   tcp0->dst_port = s0->ext_host_port;
3492                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3493                 }
3494               tcp0->checksum = ip_csum_fold(sum0);
3495               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3496                 goto trace0;
3497             }
3498           else
3499             {
3500               udp0->src_port = s0->out2in.port;
3501               udp0->checksum = 0;
3502               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3503                 {
3504                   udp0->dst_port = s0->ext_host_port;
3505                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3506                 }
3507             }
3508
3509           /* Accounting */
3510           nat44_session_update_counters (s0, now,
3511                                          vlib_buffer_length_in_chain (vm, b0));
3512           /* Per-user LRU list maintenance */
3513           nat44_session_update_lru (sm, s0, thread_index);
3514
3515         trace0:
3516           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3517                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3518             {
3519               snat_in2out_trace_t *t =
3520                 vlib_add_trace (vm, node, b0, sizeof (*t));
3521               t->is_slow_path = is_slow_path;
3522               t->sw_if_index = sw_if_index0;
3523               t->next_index = next0;
3524               t->session_index = ~0;
3525               if (s0)
3526                 t->session_index = s0 - tsm->sessions;
3527             }
3528
3529           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3530
3531           /* verify speculative enqueue, maybe switch current next frame */
3532           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3533                                            to_next, n_left_to_next,
3534                                            bi0, next0);
3535         }
3536
3537       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3538     }
3539
3540   vlib_node_increment_counter (vm, stats_node_index,
3541                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3542                                pkts_processed);
3543   return frame->n_vectors;
3544 }
3545
3546 static uword
3547 nat44_ed_in2out_fast_path_fn (vlib_main_t * vm,
3548                               vlib_node_runtime_t * node,
3549                               vlib_frame_t * frame)
3550 {
3551   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 0);
3552 }
3553
3554 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
3555   .function = nat44_ed_in2out_fast_path_fn,
3556   .name = "nat44-ed-in2out",
3557   .vector_size = sizeof (u32),
3558   .format_trace = format_snat_in2out_trace,
3559   .type = VLIB_NODE_TYPE_INTERNAL,
3560
3561   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3562   .error_strings = snat_in2out_error_strings,
3563
3564   .runtime_data_bytes = sizeof (snat_runtime_t),
3565
3566   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3567
3568   /* edit / add dispositions here */
3569   .next_nodes = {
3570     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3571     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3572     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3573     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3574     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3575   },
3576 };
3577
3578 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_node, nat44_ed_in2out_fast_path_fn);
3579
3580 static uword
3581 nat44_ed_in2out_output_fast_path_fn (vlib_main_t * vm,
3582                                      vlib_node_runtime_t * node,
3583                                      vlib_frame_t * frame)
3584 {
3585   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 1);
3586 }
3587
3588 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
3589   .function = nat44_ed_in2out_output_fast_path_fn,
3590   .name = "nat44-ed-in2out-output",
3591   .vector_size = sizeof (u32),
3592   .format_trace = format_snat_in2out_trace,
3593   .type = VLIB_NODE_TYPE_INTERNAL,
3594
3595   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3596   .error_strings = snat_in2out_error_strings,
3597
3598   .runtime_data_bytes = sizeof (snat_runtime_t),
3599
3600   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3601
3602   /* edit / add dispositions here */
3603   .next_nodes = {
3604     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3605     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3606     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3607     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3608     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3609   },
3610 };
3611
3612 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_node,
3613                               nat44_ed_in2out_output_fast_path_fn);
3614
3615 static uword
3616 nat44_ed_in2out_slow_path_fn (vlib_main_t * vm,
3617                               vlib_node_runtime_t * node,
3618                               vlib_frame_t * frame)
3619 {
3620   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0);
3621 }
3622
3623 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
3624   .function = nat44_ed_in2out_slow_path_fn,
3625   .name = "nat44-ed-in2out-slowpath",
3626   .vector_size = sizeof (u32),
3627   .format_trace = format_snat_in2out_trace,
3628   .type = VLIB_NODE_TYPE_INTERNAL,
3629
3630   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3631   .error_strings = snat_in2out_error_strings,
3632
3633   .runtime_data_bytes = sizeof (snat_runtime_t),
3634
3635   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3636
3637   /* edit / add dispositions here */
3638   .next_nodes = {
3639     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3640     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3641     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3642     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3643     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3644   },
3645 };
3646
3647 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_slowpath_node,
3648                               nat44_ed_in2out_slow_path_fn);
3649
3650 static uword
3651 nat44_ed_in2out_output_slow_path_fn (vlib_main_t * vm,
3652                                      vlib_node_runtime_t * node,
3653                                      vlib_frame_t * frame)
3654 {
3655   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1);
3656 }
3657
3658 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
3659   .function = nat44_ed_in2out_output_slow_path_fn,
3660   .name = "nat44-ed-in2out-output-slowpath",
3661   .vector_size = sizeof (u32),
3662   .format_trace = format_snat_in2out_trace,
3663   .type = VLIB_NODE_TYPE_INTERNAL,
3664
3665   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3666   .error_strings = snat_in2out_error_strings,
3667
3668   .runtime_data_bytes = sizeof (snat_runtime_t),
3669
3670   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3671
3672   /* edit / add dispositions here */
3673   .next_nodes = {
3674     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3675     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3676     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3677     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3678     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3679   },
3680 };
3681
3682 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_slowpath_node,
3683                               nat44_ed_in2out_output_slow_path_fn);
3684
3685 /**************************/
3686 /*** deterministic mode ***/
3687 /**************************/
3688 static uword
3689 snat_det_in2out_node_fn (vlib_main_t * vm,
3690                          vlib_node_runtime_t * node,
3691                          vlib_frame_t * frame)
3692 {
3693   u32 n_left_from, * from, * to_next;
3694   snat_in2out_next_t next_index;
3695   u32 pkts_processed = 0;
3696   snat_main_t * sm = &snat_main;
3697   u32 now = (u32) vlib_time_now (vm);
3698   u32 thread_index = vlib_get_thread_index ();
3699
3700   from = vlib_frame_vector_args (frame);
3701   n_left_from = frame->n_vectors;
3702   next_index = node->cached_next_index;
3703
3704   while (n_left_from > 0)
3705     {
3706       u32 n_left_to_next;
3707
3708       vlib_get_next_frame (vm, node, next_index,
3709                            to_next, n_left_to_next);
3710
3711       while (n_left_from >= 4 && n_left_to_next >= 2)
3712         {
3713           u32 bi0, bi1;
3714           vlib_buffer_t * b0, * b1;
3715           u32 next0, next1;
3716           u32 sw_if_index0, sw_if_index1;
3717           ip4_header_t * ip0, * ip1;
3718           ip_csum_t sum0, sum1;
3719           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
3720           u16 old_port0, new_port0, lo_port0, i0;
3721           u16 old_port1, new_port1, lo_port1, i1;
3722           udp_header_t * udp0, * udp1;
3723           tcp_header_t * tcp0, * tcp1;
3724           u32 proto0, proto1;
3725           snat_det_out_key_t key0, key1;
3726           snat_det_map_t * dm0, * dm1;
3727           snat_det_session_t * ses0 = 0, * ses1 = 0;
3728           u32 rx_fib_index0, rx_fib_index1;
3729           icmp46_header_t * icmp0, * icmp1;
3730
3731           /* Prefetch next iteration. */
3732           {
3733             vlib_buffer_t * p2, * p3;
3734
3735             p2 = vlib_get_buffer (vm, from[2]);
3736             p3 = vlib_get_buffer (vm, from[3]);
3737
3738             vlib_prefetch_buffer_header (p2, LOAD);
3739             vlib_prefetch_buffer_header (p3, LOAD);
3740
3741             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
3742             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
3743           }
3744
3745           /* speculatively enqueue b0 and b1 to the current next frame */
3746           to_next[0] = bi0 = from[0];
3747           to_next[1] = bi1 = from[1];
3748           from += 2;
3749           to_next += 2;
3750           n_left_from -= 2;
3751           n_left_to_next -= 2;
3752
3753           b0 = vlib_get_buffer (vm, bi0);
3754           b1 = vlib_get_buffer (vm, bi1);
3755
3756           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3757           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
3758
3759           ip0 = vlib_buffer_get_current (b0);
3760           udp0 = ip4_next_header (ip0);
3761           tcp0 = (tcp_header_t *) udp0;
3762
3763           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3764
3765           if (PREDICT_FALSE(ip0->ttl == 1))
3766             {
3767               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3768               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3769                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3770                                            0);
3771               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3772               goto trace0;
3773             }
3774
3775           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3776
3777           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3778             {
3779               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3780               icmp0 = (icmp46_header_t *) udp0;
3781
3782               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3783                                   rx_fib_index0, node, next0, thread_index,
3784                                   &ses0, &dm0);
3785               goto trace0;
3786             }
3787
3788           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
3789           if (PREDICT_FALSE(!dm0))
3790             {
3791               nat_log_info ("no match for internal host %U",
3792                             format_ip4_address, &ip0->src_address);
3793               next0 = SNAT_IN2OUT_NEXT_DROP;
3794               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3795               goto trace0;
3796             }
3797
3798           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
3799
3800           key0.ext_host_addr = ip0->dst_address;
3801           key0.ext_host_port = tcp0->dst;
3802
3803           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
3804           if (PREDICT_FALSE(!ses0))
3805             {
3806               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3807                 {
3808                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
3809                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3810
3811                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3812                     continue;
3813
3814                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3815                   break;
3816                 }
3817               if (PREDICT_FALSE(!ses0))
3818                 {
3819                   /* too many sessions for user, send ICMP error packet */
3820
3821                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3822                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3823                                                ICMP4_destination_unreachable_destination_unreachable_host,
3824                                                0);
3825                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3826                   goto trace0;
3827                 }
3828             }
3829
3830           new_port0 = ses0->out.out_port;
3831
3832           old_addr0.as_u32 = ip0->src_address.as_u32;
3833           ip0->src_address.as_u32 = new_addr0.as_u32;
3834           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3835
3836           sum0 = ip0->checksum;
3837           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3838                                  ip4_header_t,
3839                                  src_address /* changed member */);
3840           ip0->checksum = ip_csum_fold (sum0);
3841
3842           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3843             {
3844               if (tcp0->flags & TCP_FLAG_SYN)
3845                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3846               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3847                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3848               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3849                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3850               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3851                 snat_det_ses_close(dm0, ses0);
3852               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3853                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3854               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3855                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3856
3857               old_port0 = tcp0->src;
3858               tcp0->src = new_port0;
3859
3860               sum0 = tcp0->checksum;
3861               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3862                                      ip4_header_t,
3863                                      dst_address /* changed member */);
3864               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3865                                      ip4_header_t /* cheat */,
3866                                      length /* changed member */);
3867               tcp0->checksum = ip_csum_fold(sum0);
3868             }
3869           else
3870             {
3871               ses0->state = SNAT_SESSION_UDP_ACTIVE;
3872               old_port0 = udp0->src_port;
3873               udp0->src_port = new_port0;
3874               udp0->checksum = 0;
3875             }
3876
3877           switch(ses0->state)
3878             {
3879             case SNAT_SESSION_UDP_ACTIVE:
3880                 ses0->expire = now + sm->udp_timeout;
3881                 break;
3882             case SNAT_SESSION_TCP_SYN_SENT:
3883             case SNAT_SESSION_TCP_FIN_WAIT:
3884             case SNAT_SESSION_TCP_CLOSE_WAIT:
3885             case SNAT_SESSION_TCP_LAST_ACK:
3886                 ses0->expire = now + sm->tcp_transitory_timeout;
3887                 break;
3888             case SNAT_SESSION_TCP_ESTABLISHED:
3889                 ses0->expire = now + sm->tcp_established_timeout;
3890                 break;
3891             }
3892
3893         trace0:
3894           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3895                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3896             {
3897               snat_in2out_trace_t *t =
3898                  vlib_add_trace (vm, node, b0, sizeof (*t));
3899               t->is_slow_path = 0;
3900               t->sw_if_index = sw_if_index0;
3901               t->next_index = next0;
3902               t->session_index = ~0;
3903               if (ses0)
3904                 t->session_index = ses0 - dm0->sessions;
3905             }
3906
3907           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3908
3909           ip1 = vlib_buffer_get_current (b1);
3910           udp1 = ip4_next_header (ip1);
3911           tcp1 = (tcp_header_t *) udp1;
3912
3913           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3914
3915           if (PREDICT_FALSE(ip1->ttl == 1))
3916             {
3917               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3918               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3919                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3920                                            0);
3921               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3922               goto trace1;
3923             }
3924
3925           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3926
3927           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
3928             {
3929               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
3930               icmp1 = (icmp46_header_t *) udp1;
3931
3932               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
3933                                   rx_fib_index1, node, next1, thread_index,
3934                                   &ses1, &dm1);
3935               goto trace1;
3936             }
3937
3938           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
3939           if (PREDICT_FALSE(!dm1))
3940             {
3941               nat_log_info ("no match for internal host %U",
3942                             format_ip4_address, &ip0->src_address);
3943               next1 = SNAT_IN2OUT_NEXT_DROP;
3944               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3945               goto trace1;
3946             }
3947
3948           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
3949
3950           key1.ext_host_addr = ip1->dst_address;
3951           key1.ext_host_port = tcp1->dst;
3952
3953           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
3954           if (PREDICT_FALSE(!ses1))
3955             {
3956               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
3957                 {
3958                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
3959                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
3960
3961                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
3962                     continue;
3963
3964                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
3965                   break;
3966                 }
3967               if (PREDICT_FALSE(!ses1))
3968                 {
3969                   /* too many sessions for user, send ICMP error packet */
3970
3971                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3972                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
3973                                                ICMP4_destination_unreachable_destination_unreachable_host,
3974                                                0);
3975                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3976                   goto trace1;
3977                 }
3978             }
3979
3980           new_port1 = ses1->out.out_port;
3981
3982           old_addr1.as_u32 = ip1->src_address.as_u32;
3983           ip1->src_address.as_u32 = new_addr1.as_u32;
3984           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3985
3986           sum1 = ip1->checksum;
3987           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3988                                  ip4_header_t,
3989                                  src_address /* changed member */);
3990           ip1->checksum = ip_csum_fold (sum1);
3991
3992           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
3993             {
3994               if (tcp1->flags & TCP_FLAG_SYN)
3995                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
3996               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
3997                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
3998               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
3999                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
4000               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
4001                 snat_det_ses_close(dm1, ses1);
4002               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4003                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
4004               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
4005                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
4006
4007               old_port1 = tcp1->src;
4008               tcp1->src = new_port1;
4009
4010               sum1 = tcp1->checksum;
4011               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
4012                                      ip4_header_t,
4013                                      dst_address /* changed member */);
4014               sum1 = ip_csum_update (sum1, old_port1, new_port1,
4015                                      ip4_header_t /* cheat */,
4016                                      length /* changed member */);
4017               tcp1->checksum = ip_csum_fold(sum1);
4018             }
4019           else
4020             {
4021               ses1->state = SNAT_SESSION_UDP_ACTIVE;
4022               old_port1 = udp1->src_port;
4023               udp1->src_port = new_port1;
4024               udp1->checksum = 0;
4025             }
4026
4027           switch(ses1->state)
4028             {
4029             case SNAT_SESSION_UDP_ACTIVE:
4030                 ses1->expire = now + sm->udp_timeout;
4031                 break;
4032             case SNAT_SESSION_TCP_SYN_SENT:
4033             case SNAT_SESSION_TCP_FIN_WAIT:
4034             case SNAT_SESSION_TCP_CLOSE_WAIT:
4035             case SNAT_SESSION_TCP_LAST_ACK:
4036                 ses1->expire = now + sm->tcp_transitory_timeout;
4037                 break;
4038             case SNAT_SESSION_TCP_ESTABLISHED:
4039                 ses1->expire = now + sm->tcp_established_timeout;
4040                 break;
4041             }
4042
4043         trace1:
4044           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4045                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
4046             {
4047               snat_in2out_trace_t *t =
4048                  vlib_add_trace (vm, node, b1, sizeof (*t));
4049               t->is_slow_path = 0;
4050               t->sw_if_index = sw_if_index1;
4051               t->next_index = next1;
4052               t->session_index = ~0;
4053               if (ses1)
4054                 t->session_index = ses1 - dm1->sessions;
4055             }
4056
4057           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
4058
4059           /* verify speculative enqueues, maybe switch current next frame */
4060           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
4061                                            to_next, n_left_to_next,
4062                                            bi0, bi1, next0, next1);
4063          }
4064
4065       while (n_left_from > 0 && n_left_to_next > 0)
4066         {
4067           u32 bi0;
4068           vlib_buffer_t * b0;
4069           u32 next0;
4070           u32 sw_if_index0;
4071           ip4_header_t * ip0;
4072           ip_csum_t sum0;
4073           ip4_address_t new_addr0, old_addr0;
4074           u16 old_port0, new_port0, lo_port0, i0;
4075           udp_header_t * udp0;
4076           tcp_header_t * tcp0;
4077           u32 proto0;
4078           snat_det_out_key_t key0;
4079           snat_det_map_t * dm0;
4080           snat_det_session_t * ses0 = 0;
4081           u32 rx_fib_index0;
4082           icmp46_header_t * icmp0;
4083
4084           /* speculatively enqueue b0 to the current next frame */
4085           bi0 = from[0];
4086           to_next[0] = bi0;
4087           from += 1;
4088           to_next += 1;
4089           n_left_from -= 1;
4090           n_left_to_next -= 1;
4091
4092           b0 = vlib_get_buffer (vm, bi0);
4093           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4094
4095           ip0 = vlib_buffer_get_current (b0);
4096           udp0 = ip4_next_header (ip0);
4097           tcp0 = (tcp_header_t *) udp0;
4098
4099           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4100
4101           if (PREDICT_FALSE(ip0->ttl == 1))
4102             {
4103               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4104               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4105                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4106                                            0);
4107               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4108               goto trace00;
4109             }
4110
4111           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4112
4113           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
4114             {
4115               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4116               icmp0 = (icmp46_header_t *) udp0;
4117
4118               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4119                                   rx_fib_index0, node, next0, thread_index,
4120                                   &ses0, &dm0);
4121               goto trace00;
4122             }
4123
4124           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
4125           if (PREDICT_FALSE(!dm0))
4126             {
4127               nat_log_info ("no match for internal host %U",
4128                             format_ip4_address, &ip0->src_address);
4129               next0 = SNAT_IN2OUT_NEXT_DROP;
4130               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4131               goto trace00;
4132             }
4133
4134           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
4135
4136           key0.ext_host_addr = ip0->dst_address;
4137           key0.ext_host_port = tcp0->dst;
4138
4139           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
4140           if (PREDICT_FALSE(!ses0))
4141             {
4142               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4143                 {
4144                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
4145                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
4146
4147                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
4148                     continue;
4149
4150                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
4151                   break;
4152                 }
4153               if (PREDICT_FALSE(!ses0))
4154                 {
4155                   /* too many sessions for user, send ICMP error packet */
4156
4157                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4158                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
4159                                                ICMP4_destination_unreachable_destination_unreachable_host,
4160                                                0);
4161                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4162                   goto trace00;
4163                 }
4164             }
4165
4166           new_port0 = ses0->out.out_port;
4167
4168           old_addr0.as_u32 = ip0->src_address.as_u32;
4169           ip0->src_address.as_u32 = new_addr0.as_u32;
4170           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4171
4172           sum0 = ip0->checksum;
4173           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4174                                  ip4_header_t,
4175                                  src_address /* changed member */);
4176           ip0->checksum = ip_csum_fold (sum0);
4177
4178           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4179             {
4180               if (tcp0->flags & TCP_FLAG_SYN)
4181                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
4182               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
4183                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4184               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
4185                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
4186               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
4187                 snat_det_ses_close(dm0, ses0);
4188               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4189                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
4190               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
4191                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4192
4193               old_port0 = tcp0->src;
4194               tcp0->src = new_port0;
4195
4196               sum0 = tcp0->checksum;
4197               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4198                                      ip4_header_t,
4199                                      dst_address /* changed member */);
4200               sum0 = ip_csum_update (sum0, old_port0, new_port0,
4201                                      ip4_header_t /* cheat */,
4202                                      length /* changed member */);
4203               tcp0->checksum = ip_csum_fold(sum0);
4204             }
4205           else
4206             {
4207               ses0->state = SNAT_SESSION_UDP_ACTIVE;
4208               old_port0 = udp0->src_port;
4209               udp0->src_port = new_port0;
4210               udp0->checksum = 0;
4211             }
4212
4213           switch(ses0->state)
4214             {
4215             case SNAT_SESSION_UDP_ACTIVE:
4216                 ses0->expire = now + sm->udp_timeout;
4217                 break;
4218             case SNAT_SESSION_TCP_SYN_SENT:
4219             case SNAT_SESSION_TCP_FIN_WAIT:
4220             case SNAT_SESSION_TCP_CLOSE_WAIT:
4221             case SNAT_SESSION_TCP_LAST_ACK:
4222                 ses0->expire = now + sm->tcp_transitory_timeout;
4223                 break;
4224             case SNAT_SESSION_TCP_ESTABLISHED:
4225                 ses0->expire = now + sm->tcp_established_timeout;
4226                 break;
4227             }
4228
4229         trace00:
4230           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4231                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4232             {
4233               snat_in2out_trace_t *t =
4234                  vlib_add_trace (vm, node, b0, sizeof (*t));
4235               t->is_slow_path = 0;
4236               t->sw_if_index = sw_if_index0;
4237               t->next_index = next0;
4238               t->session_index = ~0;
4239               if (ses0)
4240                 t->session_index = ses0 - dm0->sessions;
4241             }
4242
4243           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4244
4245           /* verify speculative enqueue, maybe switch current next frame */
4246           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4247                                            to_next, n_left_to_next,
4248                                            bi0, next0);
4249         }
4250
4251       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4252     }
4253
4254   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
4255                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4256                                pkts_processed);
4257   return frame->n_vectors;
4258 }
4259
4260 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
4261   .function = snat_det_in2out_node_fn,
4262   .name = "nat44-det-in2out",
4263   .vector_size = sizeof (u32),
4264   .format_trace = format_snat_in2out_trace,
4265   .type = VLIB_NODE_TYPE_INTERNAL,
4266
4267   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4268   .error_strings = snat_in2out_error_strings,
4269
4270   .runtime_data_bytes = sizeof (snat_runtime_t),
4271
4272   .n_next_nodes = 3,
4273
4274   /* edit / add dispositions here */
4275   .next_nodes = {
4276     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4277     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4278     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4279   },
4280 };
4281
4282 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
4283
4284 /**
4285  * Get address and port values to be used for ICMP packet translation
4286  * and create session if needed
4287  *
4288  * @param[in,out] sm             NAT main
4289  * @param[in,out] node           NAT node runtime
4290  * @param[in] thread_index       thread index
4291  * @param[in,out] b0             buffer containing packet to be translated
4292  * @param[out] p_proto           protocol used for matching
4293  * @param[out] p_value           address and port after NAT translation
4294  * @param[out] p_dont_translate  if packet should not be translated
4295  * @param d                      optional parameter
4296  * @param e                      optional parameter
4297  */
4298 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
4299                           u32 thread_index, vlib_buffer_t *b0,
4300                           ip4_header_t *ip0, u8 *p_proto,
4301                           snat_session_key_t *p_value,
4302                           u8 *p_dont_translate, void *d, void *e)
4303 {
4304   icmp46_header_t *icmp0;
4305   u32 sw_if_index0;
4306   u32 rx_fib_index0;
4307   u8 protocol;
4308   snat_det_out_key_t key0;
4309   u8 dont_translate = 0;
4310   u32 next0 = ~0;
4311   icmp_echo_header_t *echo0, *inner_echo0 = 0;
4312   ip4_header_t *inner_ip0;
4313   void *l4_header = 0;
4314   icmp46_header_t *inner_icmp0;
4315   snat_det_map_t * dm0 = 0;
4316   ip4_address_t new_addr0;
4317   u16 lo_port0, i0;
4318   snat_det_session_t * ses0 = 0;
4319   ip4_address_t in_addr;
4320   u16 in_port;
4321
4322   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
4323   echo0 = (icmp_echo_header_t *)(icmp0+1);
4324   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4325   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
4326
4327   if (!icmp_is_error_message (icmp0))
4328     {
4329       protocol = SNAT_PROTOCOL_ICMP;
4330       in_addr = ip0->src_address;
4331       in_port = echo0->identifier;
4332     }
4333   else
4334     {
4335       inner_ip0 = (ip4_header_t *)(echo0+1);
4336       l4_header = ip4_next_header (inner_ip0);
4337       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
4338       in_addr = inner_ip0->dst_address;
4339       switch (protocol)
4340         {
4341         case SNAT_PROTOCOL_ICMP:
4342           inner_icmp0 = (icmp46_header_t*)l4_header;
4343           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
4344           in_port = inner_echo0->identifier;
4345           break;
4346         case SNAT_PROTOCOL_UDP:
4347         case SNAT_PROTOCOL_TCP:
4348           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
4349           break;
4350         default:
4351           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
4352           next0 = SNAT_IN2OUT_NEXT_DROP;
4353           goto out;
4354         }
4355     }
4356
4357   dm0 = snat_det_map_by_user(sm, &in_addr);
4358   if (PREDICT_FALSE(!dm0))
4359     {
4360       nat_log_info ("no match for internal host %U",
4361                     format_ip4_address, &in_addr);
4362       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4363           IP_PROTOCOL_ICMP, rx_fib_index0)))
4364         {
4365           dont_translate = 1;
4366           goto out;
4367         }
4368       next0 = SNAT_IN2OUT_NEXT_DROP;
4369       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4370       goto out;
4371     }
4372
4373   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
4374
4375   key0.ext_host_addr = ip0->dst_address;
4376   key0.ext_host_port = 0;
4377
4378   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
4379   if (PREDICT_FALSE(!ses0))
4380     {
4381       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4382           IP_PROTOCOL_ICMP, rx_fib_index0)))
4383         {
4384           dont_translate = 1;
4385           goto out;
4386         }
4387       if (icmp0->type != ICMP4_echo_request)
4388         {
4389           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4390           next0 = SNAT_IN2OUT_NEXT_DROP;
4391           goto out;
4392         }
4393       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4394         {
4395           key0.out_port = clib_host_to_net_u16 (lo_port0 +
4396             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
4397
4398           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
4399             continue;
4400
4401           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
4402           break;
4403         }
4404       if (PREDICT_FALSE(!ses0))
4405         {
4406           next0 = SNAT_IN2OUT_NEXT_DROP;
4407           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
4408           goto out;
4409         }
4410     }
4411
4412   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
4413                     !icmp_is_error_message (icmp0)))
4414     {
4415       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4416       next0 = SNAT_IN2OUT_NEXT_DROP;
4417       goto out;
4418     }
4419
4420   u32 now = (u32) vlib_time_now (sm->vlib_main);
4421
4422   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
4423   ses0->expire = now + sm->icmp_timeout;
4424
4425 out:
4426   *p_proto = protocol;
4427   if (ses0)
4428     {
4429       p_value->addr = new_addr0;
4430       p_value->fib_index = sm->outside_fib_index;
4431       p_value->port = ses0->out.out_port;
4432     }
4433   *p_dont_translate = dont_translate;
4434   if (d)
4435     *(snat_det_session_t**)d = ses0;
4436   if (e)
4437     *(snat_det_map_t**)e = dm0;
4438   return next0;
4439 }
4440
4441 /**********************/
4442 /*** worker handoff ***/
4443 /**********************/
4444 static inline uword
4445 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
4446                                       vlib_node_runtime_t * node,
4447                                       vlib_frame_t * frame,
4448                                       u8 is_output)
4449 {
4450   snat_main_t *sm = &snat_main;
4451   vlib_thread_main_t *tm = vlib_get_thread_main ();
4452   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
4453   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
4454   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
4455     = 0;
4456   vlib_frame_queue_elt_t *hf = 0;
4457   vlib_frame_queue_t *fq;
4458   vlib_frame_t *f = 0;
4459   int i;
4460   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
4461   u32 next_worker_index = 0;
4462   u32 current_worker_index = ~0;
4463   u32 thread_index = vlib_get_thread_index ();
4464   u32 fq_index;
4465   u32 to_node_index;
4466   vlib_frame_t *d = 0;
4467
4468   ASSERT (vec_len (sm->workers));
4469
4470   if (is_output)
4471     {
4472       fq_index = sm->fq_in2out_output_index;
4473       to_node_index = sm->in2out_output_node_index;
4474     }
4475   else
4476     {
4477       fq_index = sm->fq_in2out_index;
4478       to_node_index = sm->in2out_node_index;
4479     }
4480
4481   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
4482     {
4483       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
4484
4485       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
4486                                tm->n_vlib_mains - 1,
4487                                (vlib_frame_queue_t *) (~0));
4488     }
4489
4490   from = vlib_frame_vector_args (frame);
4491   n_left_from = frame->n_vectors;
4492
4493   while (n_left_from > 0)
4494     {
4495       u32 bi0;
4496       vlib_buffer_t *b0;
4497       u32 sw_if_index0;
4498       u32 rx_fib_index0;
4499       ip4_header_t * ip0;
4500       u8 do_handoff;
4501
4502       bi0 = from[0];
4503       from += 1;
4504       n_left_from -= 1;
4505
4506       b0 = vlib_get_buffer (vm, bi0);
4507
4508       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
4509       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4510
4511       ip0 = vlib_buffer_get_current (b0);
4512
4513       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
4514
4515       if (PREDICT_FALSE (next_worker_index != thread_index))
4516         {
4517           do_handoff = 1;
4518
4519           if (next_worker_index != current_worker_index)
4520             {
4521               fq = is_vlib_frame_queue_congested (
4522                 fq_index, next_worker_index, NAT_FQ_NELTS - 2,
4523                 congested_handoff_queue_by_worker_index);
4524
4525               if (fq)
4526                 {
4527                   /* if this is 1st frame */
4528                   if (!d)
4529                     {
4530                       d = vlib_get_frame_to_node (vm, sm->error_node_index);
4531                       to_next_drop = vlib_frame_vector_args (d);
4532                     }
4533
4534                   to_next_drop[0] = bi0;
4535                   to_next_drop += 1;
4536                   d->n_vectors++;
4537                   b0->error = node->errors[SNAT_IN2OUT_ERROR_FQ_CONGESTED];
4538                   goto trace0;
4539                 }
4540
4541               if (hf)
4542                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4543
4544               hf = vlib_get_worker_handoff_queue_elt (fq_index,
4545                                                       next_worker_index,
4546                                                       handoff_queue_elt_by_worker_index);
4547
4548               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
4549               to_next_worker = &hf->buffer_index[hf->n_vectors];
4550               current_worker_index = next_worker_index;
4551             }
4552
4553           /* enqueue to correct worker thread */
4554           to_next_worker[0] = bi0;
4555           to_next_worker++;
4556           n_left_to_next_worker--;
4557
4558           if (n_left_to_next_worker == 0)
4559             {
4560               hf->n_vectors = VLIB_FRAME_SIZE;
4561               vlib_put_frame_queue_elt (hf);
4562               current_worker_index = ~0;
4563               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
4564               hf = 0;
4565             }
4566         }
4567       else
4568         {
4569           do_handoff = 0;
4570           /* if this is 1st frame */
4571           if (!f)
4572             {
4573               f = vlib_get_frame_to_node (vm, to_node_index);
4574               to_next = vlib_frame_vector_args (f);
4575             }
4576
4577           to_next[0] = bi0;
4578           to_next += 1;
4579           f->n_vectors++;
4580         }
4581
4582 trace0:
4583       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
4584                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4585         {
4586           snat_in2out_worker_handoff_trace_t *t =
4587             vlib_add_trace (vm, node, b0, sizeof (*t));
4588           t->next_worker_index = next_worker_index;
4589           t->do_handoff = do_handoff;
4590         }
4591     }
4592
4593   if (f)
4594     vlib_put_frame_to_node (vm, to_node_index, f);
4595
4596   if (d)
4597     vlib_put_frame_to_node (vm, sm->error_node_index, d);
4598
4599   if (hf)
4600     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4601
4602   /* Ship frames to the worker nodes */
4603   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
4604     {
4605       if (handoff_queue_elt_by_worker_index[i])
4606         {
4607           hf = handoff_queue_elt_by_worker_index[i];
4608           /*
4609            * It works better to let the handoff node
4610            * rate-adapt, always ship the handoff queue element.
4611            */
4612           if (1 || hf->n_vectors == hf->last_n_vectors)
4613             {
4614               vlib_put_frame_queue_elt (hf);
4615               handoff_queue_elt_by_worker_index[i] = 0;
4616             }
4617           else
4618             hf->last_n_vectors = hf->n_vectors;
4619         }
4620       congested_handoff_queue_by_worker_index[i] =
4621         (vlib_frame_queue_t *) (~0);
4622     }
4623   hf = 0;
4624   current_worker_index = ~0;
4625   return frame->n_vectors;
4626 }
4627
4628 static uword
4629 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
4630                                vlib_node_runtime_t * node,
4631                                vlib_frame_t * frame)
4632 {
4633   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
4634 }
4635
4636 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
4637   .function = snat_in2out_worker_handoff_fn,
4638   .name = "nat44-in2out-worker-handoff",
4639   .vector_size = sizeof (u32),
4640   .format_trace = format_snat_in2out_worker_handoff_trace,
4641   .type = VLIB_NODE_TYPE_INTERNAL,
4642
4643   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4644   .error_strings = snat_in2out_error_strings,
4645
4646   .n_next_nodes = 1,
4647
4648   .next_nodes = {
4649     [0] = "error-drop",
4650   },
4651 };
4652
4653 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
4654                               snat_in2out_worker_handoff_fn);
4655
4656 static uword
4657 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
4658                                       vlib_node_runtime_t * node,
4659                                       vlib_frame_t * frame)
4660 {
4661   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
4662 }
4663
4664 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
4665   .function = snat_in2out_output_worker_handoff_fn,
4666   .name = "nat44-in2out-output-worker-handoff",
4667   .vector_size = sizeof (u32),
4668   .format_trace = format_snat_in2out_worker_handoff_trace,
4669   .type = VLIB_NODE_TYPE_INTERNAL,
4670
4671   .n_next_nodes = 1,
4672
4673   .next_nodes = {
4674     [0] = "error-drop",
4675   },
4676 };
4677
4678 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
4679                               snat_in2out_output_worker_handoff_fn);
4680
4681 static_always_inline int
4682 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
4683 {
4684   snat_address_t * ap;
4685   clib_bihash_kv_8_8_t kv, value;
4686   snat_session_key_t m_key;
4687
4688   vec_foreach (ap, sm->addresses)
4689     {
4690       if (ap->addr.as_u32 == dst_addr->as_u32)
4691         return 1;
4692     }
4693
4694   m_key.addr.as_u32 = dst_addr->as_u32;
4695   m_key.fib_index = sm->outside_fib_index;
4696   m_key.port = 0;
4697   m_key.protocol = 0;
4698   kv.key = m_key.as_u64;
4699   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4700     return 1;
4701
4702   return 0;
4703 }
4704
4705 static inline uword
4706 snat_hairpin_dst_fn_inline (vlib_main_t * vm,
4707                             vlib_node_runtime_t * node,
4708                             vlib_frame_t * frame,
4709                             int is_ed)
4710 {
4711   u32 n_left_from, * from, * to_next, stats_node_index;
4712   snat_in2out_next_t next_index;
4713   u32 pkts_processed = 0;
4714   snat_main_t * sm = &snat_main;
4715
4716   stats_node_index = is_ed ? nat44_ed_hairpin_dst_node.index :
4717     snat_hairpin_dst_node.index;
4718
4719   from = vlib_frame_vector_args (frame);
4720   n_left_from = frame->n_vectors;
4721   next_index = node->cached_next_index;
4722
4723   while (n_left_from > 0)
4724     {
4725       u32 n_left_to_next;
4726
4727       vlib_get_next_frame (vm, node, next_index,
4728                            to_next, n_left_to_next);
4729
4730       while (n_left_from > 0 && n_left_to_next > 0)
4731         {
4732           u32 bi0;
4733           vlib_buffer_t * b0;
4734           u32 next0;
4735           ip4_header_t * ip0;
4736           u32 proto0;
4737
4738           /* speculatively enqueue b0 to the current next frame */
4739           bi0 = from[0];
4740           to_next[0] = bi0;
4741           from += 1;
4742           to_next += 1;
4743           n_left_from -= 1;
4744           n_left_to_next -= 1;
4745
4746           b0 = vlib_get_buffer (vm, bi0);
4747           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4748           ip0 = vlib_buffer_get_current (b0);
4749
4750           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4751
4752           vnet_buffer (b0)->snat.flags = 0;
4753           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
4754             {
4755               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
4756                 {
4757                   udp_header_t * udp0 = ip4_next_header (ip0);
4758                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
4759
4760                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed);
4761                 }
4762               else if (proto0 == SNAT_PROTOCOL_ICMP)
4763                 {
4764                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
4765
4766                   snat_icmp_hairpinning (sm, b0, ip0, icmp0, is_ed);
4767                 }
4768               else
4769                 {
4770                   if (is_ed)
4771                     nat44_ed_hairpinning_unknown_proto (sm, b0, ip0);
4772                   else
4773                     nat_hairpinning_sm_unknown_proto (sm, b0, ip0);
4774                 }
4775
4776               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
4777             }
4778
4779           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4780
4781           /* verify speculative enqueue, maybe switch current next frame */
4782           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4783                                            to_next, n_left_to_next,
4784                                            bi0, next0);
4785          }
4786
4787       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4788     }
4789
4790   vlib_node_increment_counter (vm, stats_node_index,
4791                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4792                                pkts_processed);
4793   return frame->n_vectors;
4794 }
4795
4796 static uword
4797 snat_hairpin_dst_fn (vlib_main_t * vm,
4798                      vlib_node_runtime_t * node,
4799                      vlib_frame_t * frame)
4800 {
4801   return snat_hairpin_dst_fn_inline (vm, node, frame, 0);
4802 }
4803
4804 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
4805   .function = snat_hairpin_dst_fn,
4806   .name = "nat44-hairpin-dst",
4807   .vector_size = sizeof (u32),
4808   .type = VLIB_NODE_TYPE_INTERNAL,
4809   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4810   .error_strings = snat_in2out_error_strings,
4811   .n_next_nodes = 2,
4812   .next_nodes = {
4813     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4814     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4815   },
4816 };
4817
4818 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
4819                               snat_hairpin_dst_fn);
4820
4821 static uword
4822 nat44_ed_hairpin_dst_fn (vlib_main_t * vm,
4823                          vlib_node_runtime_t * node,
4824                          vlib_frame_t * frame)
4825 {
4826   return snat_hairpin_dst_fn_inline (vm, node, frame, 1);
4827 }
4828
4829 VLIB_REGISTER_NODE (nat44_ed_hairpin_dst_node) = {
4830   .function = nat44_ed_hairpin_dst_fn,
4831   .name = "nat44-ed-hairpin-dst",
4832   .vector_size = sizeof (u32),
4833   .type = VLIB_NODE_TYPE_INTERNAL,
4834   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4835   .error_strings = snat_in2out_error_strings,
4836   .n_next_nodes = 2,
4837   .next_nodes = {
4838     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4839     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4840   },
4841 };
4842
4843 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_dst_node,
4844                               nat44_ed_hairpin_dst_fn);
4845
4846 static inline uword
4847 snat_hairpin_src_fn_inline (vlib_main_t * vm,
4848                             vlib_node_runtime_t * node,
4849                             vlib_frame_t * frame,
4850                             int is_ed)
4851 {
4852   u32 n_left_from, * from, * to_next, stats_node_index;
4853   snat_in2out_next_t next_index;
4854   u32 pkts_processed = 0;
4855   snat_main_t *sm = &snat_main;
4856
4857   stats_node_index = is_ed ? nat44_ed_hairpin_src_node.index :
4858     snat_hairpin_src_node.index;
4859
4860   from = vlib_frame_vector_args (frame);
4861   n_left_from = frame->n_vectors;
4862   next_index = node->cached_next_index;
4863
4864   while (n_left_from > 0)
4865     {
4866       u32 n_left_to_next;
4867
4868       vlib_get_next_frame (vm, node, next_index,
4869                            to_next, n_left_to_next);
4870
4871       while (n_left_from > 0 && n_left_to_next > 0)
4872         {
4873           u32 bi0;
4874           vlib_buffer_t * b0;
4875           u32 next0;
4876           snat_interface_t *i;
4877           u32 sw_if_index0;
4878
4879           /* speculatively enqueue b0 to the current next frame */
4880           bi0 = from[0];
4881           to_next[0] = bi0;
4882           from += 1;
4883           to_next += 1;
4884           n_left_from -= 1;
4885           n_left_to_next -= 1;
4886
4887           b0 = vlib_get_buffer (vm, bi0);
4888           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4889           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
4890
4891           pool_foreach (i, sm->output_feature_interfaces,
4892           ({
4893             /* Only packets from NAT inside interface */
4894             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
4895               {
4896                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
4897                                     SNAT_FLAG_HAIRPINNING))
4898                   {
4899                     if (PREDICT_TRUE (sm->num_workers > 1))
4900                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
4901                     else
4902                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
4903                   }
4904                 break;
4905               }
4906           }));
4907
4908           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4909
4910           /* verify speculative enqueue, maybe switch current next frame */
4911           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4912                                            to_next, n_left_to_next,
4913                                            bi0, next0);
4914          }
4915
4916       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4917     }
4918
4919   vlib_node_increment_counter (vm, stats_node_index,
4920                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4921                                pkts_processed);
4922   return frame->n_vectors;
4923 }
4924
4925 static uword
4926 snat_hairpin_src_fn (vlib_main_t * vm,
4927                      vlib_node_runtime_t * node,
4928                      vlib_frame_t * frame)
4929 {
4930   return snat_hairpin_src_fn_inline (vm, node, frame, 0);
4931 }
4932
4933 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
4934   .function = snat_hairpin_src_fn,
4935   .name = "nat44-hairpin-src",
4936   .vector_size = sizeof (u32),
4937   .type = VLIB_NODE_TYPE_INTERNAL,
4938   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4939   .error_strings = snat_in2out_error_strings,
4940   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
4941   .next_nodes = {
4942      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
4943      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
4944      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
4945      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
4946   },
4947 };
4948
4949 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
4950                               snat_hairpin_src_fn);
4951
4952 static uword
4953 nat44_ed_hairpin_src_fn (vlib_main_t * vm,
4954                          vlib_node_runtime_t * node,
4955                          vlib_frame_t * frame)
4956 {
4957   return snat_hairpin_src_fn_inline (vm, node, frame, 1);
4958 }
4959
4960 VLIB_REGISTER_NODE (nat44_ed_hairpin_src_node) = {
4961   .function = nat44_ed_hairpin_src_fn,
4962   .name = "nat44-ed-hairpin-src",
4963   .vector_size = sizeof (u32),
4964   .type = VLIB_NODE_TYPE_INTERNAL,
4965   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4966   .error_strings = snat_in2out_error_strings,
4967   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
4968   .next_nodes = {
4969      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
4970      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ed-in2out-output",
4971      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
4972      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
4973   },
4974 };
4975
4976 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_src_node,
4977                               nat44_ed_hairpin_src_fn);
4978
4979 static uword
4980 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
4981                                 vlib_node_runtime_t * node,
4982                                 vlib_frame_t * frame)
4983 {
4984   u32 n_left_from, * from, * to_next;
4985   snat_in2out_next_t next_index;
4986   u32 pkts_processed = 0;
4987   snat_main_t * sm = &snat_main;
4988   u32 stats_node_index;
4989
4990   stats_node_index = snat_in2out_fast_node.index;
4991
4992   from = vlib_frame_vector_args (frame);
4993   n_left_from = frame->n_vectors;
4994   next_index = node->cached_next_index;
4995
4996   while (n_left_from > 0)
4997     {
4998       u32 n_left_to_next;
4999
5000       vlib_get_next_frame (vm, node, next_index,
5001                            to_next, n_left_to_next);
5002
5003       while (n_left_from > 0 && n_left_to_next > 0)
5004         {
5005           u32 bi0;
5006           vlib_buffer_t * b0;
5007           u32 next0;
5008           u32 sw_if_index0;
5009           ip4_header_t * ip0;
5010           ip_csum_t sum0;
5011           u32 new_addr0, old_addr0;
5012           u16 old_port0, new_port0;
5013           udp_header_t * udp0;
5014           tcp_header_t * tcp0;
5015           icmp46_header_t * icmp0;
5016           snat_session_key_t key0, sm0;
5017           u32 proto0;
5018           u32 rx_fib_index0;
5019
5020           /* speculatively enqueue b0 to the current next frame */
5021           bi0 = from[0];
5022           to_next[0] = bi0;
5023           from += 1;
5024           to_next += 1;
5025           n_left_from -= 1;
5026           n_left_to_next -= 1;
5027
5028           b0 = vlib_get_buffer (vm, bi0);
5029           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
5030
5031           ip0 = vlib_buffer_get_current (b0);
5032           udp0 = ip4_next_header (ip0);
5033           tcp0 = (tcp_header_t *) udp0;
5034           icmp0 = (icmp46_header_t *) udp0;
5035
5036           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
5037           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
5038
5039           if (PREDICT_FALSE(ip0->ttl == 1))
5040             {
5041               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
5042               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
5043                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
5044                                            0);
5045               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
5046               goto trace0;
5047             }
5048
5049           proto0 = ip_proto_to_snat_proto (ip0->protocol);
5050
5051           if (PREDICT_FALSE (proto0 == ~0))
5052               goto trace0;
5053
5054           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
5055             {
5056               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
5057                                   rx_fib_index0, node, next0, ~0, 0, 0);
5058               goto trace0;
5059             }
5060
5061           key0.addr = ip0->src_address;
5062           key0.protocol = proto0;
5063           key0.port = udp0->src_port;
5064           key0.fib_index = rx_fib_index0;
5065
5066           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0, 0))
5067             {
5068               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
5069               next0= SNAT_IN2OUT_NEXT_DROP;
5070               goto trace0;
5071             }
5072
5073           new_addr0 = sm0.addr.as_u32;
5074           new_port0 = sm0.port;
5075           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
5076           old_addr0 = ip0->src_address.as_u32;
5077           ip0->src_address.as_u32 = new_addr0;
5078
5079           sum0 = ip0->checksum;
5080           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5081                                  ip4_header_t,
5082                                  src_address /* changed member */);
5083           ip0->checksum = ip_csum_fold (sum0);
5084
5085           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
5086             {
5087               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5088                 {
5089                   old_port0 = tcp0->src_port;
5090                   tcp0->src_port = new_port0;
5091
5092                   sum0 = tcp0->checksum;
5093                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5094                                          ip4_header_t,
5095                                          dst_address /* changed member */);
5096                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
5097                                          ip4_header_t /* cheat */,
5098                                          length /* changed member */);
5099                   tcp0->checksum = ip_csum_fold(sum0);
5100                 }
5101               else
5102                 {
5103                   old_port0 = udp0->src_port;
5104                   udp0->src_port = new_port0;
5105                   udp0->checksum = 0;
5106                 }
5107             }
5108           else
5109             {
5110               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5111                 {
5112                   sum0 = tcp0->checksum;
5113                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5114                                          ip4_header_t,
5115                                          dst_address /* changed member */);
5116                   tcp0->checksum = ip_csum_fold(sum0);
5117                 }
5118             }
5119
5120           /* Hairpinning */
5121           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, 0);
5122
5123         trace0:
5124           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
5125                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
5126             {
5127               snat_in2out_trace_t *t =
5128                  vlib_add_trace (vm, node, b0, sizeof (*t));
5129               t->sw_if_index = sw_if_index0;
5130               t->next_index = next0;
5131             }
5132
5133           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5134
5135           /* verify speculative enqueue, maybe switch current next frame */
5136           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5137                                            to_next, n_left_to_next,
5138                                            bi0, next0);
5139         }
5140
5141       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5142     }
5143
5144   vlib_node_increment_counter (vm, stats_node_index,
5145                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5146                                pkts_processed);
5147   return frame->n_vectors;
5148 }
5149
5150
5151 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
5152   .function = snat_in2out_fast_static_map_fn,
5153   .name = "nat44-in2out-fast",
5154   .vector_size = sizeof (u32),
5155   .format_trace = format_snat_in2out_fast_trace,
5156   .type = VLIB_NODE_TYPE_INTERNAL,
5157
5158   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5159   .error_strings = snat_in2out_error_strings,
5160
5161   .runtime_data_bytes = sizeof (snat_runtime_t),
5162
5163   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
5164
5165   /* edit / add dispositions here */
5166   .next_nodes = {
5167     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5168     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5169     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
5170     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
5171     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
5172   },
5173 };
5174
5175 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);