NAT44: endpoint dependent mode (VPP-1273)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28 #include <nat/nat_inlines.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38   u32 is_slow_path;
39 } snat_in2out_trace_t;
40
41 typedef struct {
42   u32 next_worker_index;
43   u8 do_handoff;
44 } snat_in2out_worker_handoff_trace_t;
45
46 /* packet trace format function */
47 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
48 {
49   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
50   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
51   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
52   char * tag;
53
54   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
55
56   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
57               t->sw_if_index, t->next_index, t->session_index);
58
59   return s;
60 }
61
62 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
63 {
64   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
65   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
66   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
67
68   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
69               t->sw_if_index, t->next_index);
70
71   return s;
72 }
73
74 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
75 {
76   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
77   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
78   snat_in2out_worker_handoff_trace_t * t =
79     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
80   char * m;
81
82   m = t->do_handoff ? "next worker" : "same worker";
83   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
84
85   return s;
86 }
87
88 typedef struct {
89   u32 sw_if_index;
90   u32 next_index;
91   u8 cached;
92 } nat44_in2out_reass_trace_t;
93
94 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
95 {
96   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
97   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
98   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
99
100   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
101               t->sw_if_index, t->next_index,
102               t->cached ? "cached" : "translated");
103
104   return s;
105 }
106
107 vlib_node_registration_t snat_in2out_node;
108 vlib_node_registration_t snat_in2out_slowpath_node;
109 vlib_node_registration_t snat_in2out_fast_node;
110 vlib_node_registration_t snat_in2out_worker_handoff_node;
111 vlib_node_registration_t snat_det_in2out_node;
112 vlib_node_registration_t snat_in2out_output_node;
113 vlib_node_registration_t snat_in2out_output_slowpath_node;
114 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
115 vlib_node_registration_t snat_hairpin_dst_node;
116 vlib_node_registration_t snat_hairpin_src_node;
117 vlib_node_registration_t nat44_hairpinning_node;
118 vlib_node_registration_t nat44_in2out_reass_node;
119 vlib_node_registration_t nat44_ed_in2out_node;
120 vlib_node_registration_t nat44_ed_in2out_slowpath_node;
121 vlib_node_registration_t nat44_ed_in2out_output_node;
122 vlib_node_registration_t nat44_ed_in2out_output_slowpath_node;
123 vlib_node_registration_t nat44_ed_hairpin_dst_node;
124 vlib_node_registration_t nat44_ed_hairpin_src_node;
125 vlib_node_registration_t nat44_ed_hairpinning_node;
126
127 #define foreach_snat_in2out_error                       \
128 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
129 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
130 _(OUT_OF_PORTS, "Out of ports")                         \
131 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
132 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
133 _(NO_TRANSLATION, "No translation")                     \
134 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
135 _(DROP_FRAGMENT, "Drop fragment")                       \
136 _(MAX_REASS, "Maximum reassemblies exceeded")           \
137 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")\
138 _(FQ_CONGESTED, "Handoff frame queue congested")
139
140 typedef enum {
141 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
142   foreach_snat_in2out_error
143 #undef _
144   SNAT_IN2OUT_N_ERROR,
145 } snat_in2out_error_t;
146
147 static char * snat_in2out_error_strings[] = {
148 #define _(sym,string) string,
149   foreach_snat_in2out_error
150 #undef _
151 };
152
153 typedef enum {
154   SNAT_IN2OUT_NEXT_LOOKUP,
155   SNAT_IN2OUT_NEXT_DROP,
156   SNAT_IN2OUT_NEXT_ICMP_ERROR,
157   SNAT_IN2OUT_NEXT_SLOW_PATH,
158   SNAT_IN2OUT_NEXT_REASS,
159   SNAT_IN2OUT_N_NEXT,
160 } snat_in2out_next_t;
161
162 typedef enum {
163   SNAT_HAIRPIN_SRC_NEXT_DROP,
164   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
165   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
166   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
167   SNAT_HAIRPIN_SRC_N_NEXT,
168 } snat_hairpin_next_t;
169
170 /**
171  * @brief Check if packet should be translated
172  *
173  * Packets aimed at outside interface and external address with active session
174  * should be translated.
175  *
176  * @param sm            NAT main
177  * @param rt            NAT runtime data
178  * @param sw_if_index0  index of the inside interface
179  * @param ip0           IPv4 header
180  * @param proto0        NAT protocol
181  * @param rx_fib_index0 RX FIB index
182  *
183  * @returns 0 if packet should be translated otherwise 1
184  */
185 static inline int
186 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
187                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
188                          u32 rx_fib_index0)
189 {
190   if (sm->out2in_dpo)
191     return 0;
192
193   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
194   fib_prefix_t pfx = {
195     .fp_proto = FIB_PROTOCOL_IP4,
196     .fp_len = 32,
197     .fp_addr = {
198         .ip4.as_u32 = ip0->dst_address.as_u32,
199     },
200   };
201
202   /* Don't NAT packet aimed at the intfc address */
203   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
204                                       ip0->dst_address.as_u32)))
205     return 1;
206
207   fei = fib_table_lookup (rx_fib_index0, &pfx);
208   if (FIB_NODE_INDEX_INVALID != fei)
209     {
210       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
211       if (sw_if_index == ~0)
212         {
213           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
214           if (FIB_NODE_INDEX_INVALID != fei)
215             sw_if_index = fib_entry_get_resolving_interface (fei);
216         }
217       snat_interface_t *i;
218       pool_foreach (i, sm->interfaces,
219       ({
220         /* NAT packet aimed at outside interface */
221         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
222           return 0;
223       }));
224     }
225
226   return 1;
227 }
228
229 static inline int
230 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
231                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
232                     u32 rx_fib_index0, u32 thread_index)
233 {
234   udp_header_t * udp0 = ip4_next_header (ip0);
235   snat_session_key_t key0, sm0;
236   clib_bihash_kv_8_8_t kv0, value0;
237
238   key0.addr = ip0->dst_address;
239   key0.port = udp0->dst_port;
240   key0.protocol = proto0;
241   key0.fib_index = sm->outside_fib_index;
242   kv0.key = key0.as_u64;
243
244   /* NAT packet aimed at external address if */
245   /* has active sessions */
246   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
247                               &value0))
248     {
249       /* or is static mappings */
250       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
251         return 0;
252     }
253   else
254     return 0;
255
256   if (sm->forwarding_enabled)
257     return 1;
258
259   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
260                                  rx_fib_index0);
261 }
262
263 static inline int
264 nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0,
265                                   u32 proto0, u16 src_port, u16 dst_port,
266                                   u32 thread_index, u32 sw_if_index)
267 {
268   snat_session_key_t key0;
269   clib_bihash_kv_8_8_t kv0, value0;
270   snat_interface_t *i;
271
272   /* src NAT check */
273   key0.addr = ip0->src_address;
274   key0.port = src_port;
275   key0.protocol = proto0;
276   key0.fib_index = sm->outside_fib_index;
277   kv0.key = key0.as_u64;
278
279   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
280                                &value0))
281     return 1;
282
283   /* dst NAT check */
284   key0.addr = ip0->dst_address;
285   key0.port = dst_port;
286   key0.protocol = proto0;
287   key0.fib_index = sm->inside_fib_index;
288   kv0.key = key0.as_u64;
289   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
290                                &value0))
291   {
292     /* hairpinning */
293     pool_foreach (i, sm->output_feature_interfaces,
294     ({
295       if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index))
296         return 0;
297     }));
298     return 1;
299   }
300
301   return 0;
302 }
303
304 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
305                       ip4_header_t * ip0,
306                       u32 rx_fib_index0,
307                       snat_session_key_t * key0,
308                       snat_session_t ** sessionp,
309                       vlib_node_runtime_t * node,
310                       u32 next0,
311                       u32 thread_index)
312 {
313   snat_user_t *u;
314   snat_session_t *s;
315   clib_bihash_kv_8_8_t kv0;
316   snat_session_key_t key1;
317   u32 address_index = ~0;
318   u32 outside_fib_index;
319   uword * p;
320   udp_header_t * udp0 = ip4_next_header (ip0);
321   u8 is_sm = 0;
322
323   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
324     {
325       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
326       nat_ipfix_logging_max_sessions(sm->max_translations);
327       nat_log_notice ("maximum sessions exceeded");
328       return SNAT_IN2OUT_NEXT_DROP;
329     }
330
331   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
332   if (! p)
333     {
334       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
335       return SNAT_IN2OUT_NEXT_DROP;
336     }
337   outside_fib_index = p[0];
338
339   key1.protocol = key0->protocol;
340
341   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
342                               thread_index);
343   if (!u)
344     {
345       nat_log_warn ("create NAT user failed");
346       return SNAT_IN2OUT_NEXT_DROP;
347     }
348
349   /* First try to match static mapping by local address and port */
350   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0, 0))
351     {
352       /* Try to create dynamic translation */
353       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
354                                                thread_index, &key1,
355                                                &address_index,
356                                                sm->port_per_thread,
357                                                sm->per_thread_data[thread_index].snat_thread_index))
358         {
359           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
360           return SNAT_IN2OUT_NEXT_DROP;
361         }
362     }
363   else
364     is_sm = 1;
365
366   s = nat_session_alloc_or_recycle (sm, u, thread_index);
367   if (!s)
368     {
369       nat_log_warn ("create NAT session failed");
370       return SNAT_IN2OUT_NEXT_DROP;
371     }
372
373   if (is_sm)
374     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
375   user_session_increment (sm, u, is_sm);
376   s->outside_address_index = address_index;
377   s->in2out = *key0;
378   s->out2in = key1;
379   s->out2in.protocol = key0->protocol;
380   s->out2in.fib_index = outside_fib_index;
381   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
382   s->ext_host_port = udp0->dst_port;
383   *sessionp = s;
384
385   /* Add to translation hashes */
386   kv0.key = s->in2out.as_u64;
387   kv0.value = s - sm->per_thread_data[thread_index].sessions;
388   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
389                                1 /* is_add */))
390       nat_log_notice ("in2out key add failed");
391
392   kv0.key = s->out2in.as_u64;
393   kv0.value = s - sm->per_thread_data[thread_index].sessions;
394
395   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
396                                1 /* is_add */))
397       nat_log_notice ("out2in key add failed");
398
399   /* log NAT event */
400   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
401                                       s->out2in.addr.as_u32,
402                                       s->in2out.protocol,
403                                       s->in2out.port,
404                                       s->out2in.port,
405                                       s->in2out.fib_index);
406   return next0;
407 }
408
409 static_always_inline
410 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
411                                  snat_session_key_t *p_key0)
412 {
413   icmp46_header_t *icmp0;
414   snat_session_key_t key0;
415   icmp_echo_header_t *echo0, *inner_echo0 = 0;
416   ip4_header_t *inner_ip0 = 0;
417   void *l4_header = 0;
418   icmp46_header_t *inner_icmp0;
419
420   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
421   echo0 = (icmp_echo_header_t *)(icmp0+1);
422
423   if (!icmp_is_error_message (icmp0))
424     {
425       key0.protocol = SNAT_PROTOCOL_ICMP;
426       key0.addr = ip0->src_address;
427       key0.port = echo0->identifier;
428     }
429   else
430     {
431       inner_ip0 = (ip4_header_t *)(echo0+1);
432       l4_header = ip4_next_header (inner_ip0);
433       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
434       key0.addr = inner_ip0->dst_address;
435       switch (key0.protocol)
436         {
437         case SNAT_PROTOCOL_ICMP:
438           inner_icmp0 = (icmp46_header_t*)l4_header;
439           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
440           key0.port = inner_echo0->identifier;
441           break;
442         case SNAT_PROTOCOL_UDP:
443         case SNAT_PROTOCOL_TCP:
444           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
445           break;
446         default:
447           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
448         }
449     }
450   *p_key0 = key0;
451   return -1; /* success */
452 }
453
454 /**
455  * Get address and port values to be used for ICMP packet translation
456  * and create session if needed
457  *
458  * @param[in,out] sm             NAT main
459  * @param[in,out] node           NAT node runtime
460  * @param[in] thread_index       thread index
461  * @param[in,out] b0             buffer containing packet to be translated
462  * @param[out] p_proto           protocol used for matching
463  * @param[out] p_value           address and port after NAT translation
464  * @param[out] p_dont_translate  if packet should not be translated
465  * @param d                      optional parameter
466  * @param e                      optional parameter
467  */
468 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
469                            u32 thread_index, vlib_buffer_t *b0,
470                            ip4_header_t *ip0, u8 *p_proto,
471                            snat_session_key_t *p_value,
472                            u8 *p_dont_translate, void *d, void *e)
473 {
474   icmp46_header_t *icmp0;
475   u32 sw_if_index0;
476   u32 rx_fib_index0;
477   snat_session_key_t key0;
478   snat_session_t *s0 = 0;
479   u8 dont_translate = 0;
480   clib_bihash_kv_8_8_t kv0, value0;
481   u32 next0 = ~0;
482   int err;
483
484   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
485   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
486   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
487
488   err = icmp_get_key (ip0, &key0);
489   if (err != -1)
490     {
491       b0->error = node->errors[err];
492       next0 = SNAT_IN2OUT_NEXT_DROP;
493       goto out;
494     }
495   key0.fib_index = rx_fib_index0;
496
497   kv0.key = key0.as_u64;
498
499   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
500                               &value0))
501     {
502       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)
503         {
504           if (PREDICT_FALSE(nat_not_translate_output_feature(sm, ip0,
505               key0.protocol, key0.port, key0.port, thread_index, sw_if_index0)))
506             {
507               dont_translate = 1;
508               goto out;
509             }
510         }
511       else
512         {
513           if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
514               ip0, SNAT_PROTOCOL_ICMP, rx_fib_index0, thread_index)))
515             {
516               dont_translate = 1;
517               goto out;
518             }
519         }
520
521       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
522         {
523           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
524           next0 = SNAT_IN2OUT_NEXT_DROP;
525           goto out;
526         }
527
528       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
529                          &s0, node, next0, thread_index);
530
531       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
532         goto out;
533     }
534   else
535     {
536       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
537                         icmp0->type != ICMP4_echo_reply &&
538                         !icmp_is_error_message (icmp0)))
539         {
540           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
541           next0 = SNAT_IN2OUT_NEXT_DROP;
542           goto out;
543         }
544
545       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
546                               value0.value);
547     }
548
549 out:
550   *p_proto = key0.protocol;
551   if (s0)
552     *p_value = s0->out2in;
553   *p_dont_translate = dont_translate;
554   if (d)
555     *(snat_session_t**)d = s0;
556   return next0;
557 }
558
559 /**
560  * Get address and port values to be used for ICMP packet translation
561  *
562  * @param[in] sm                 NAT main
563  * @param[in,out] node           NAT node runtime
564  * @param[in] thread_index       thread index
565  * @param[in,out] b0             buffer containing packet to be translated
566  * @param[out] p_proto           protocol used for matching
567  * @param[out] p_value           address and port after NAT translation
568  * @param[out] p_dont_translate  if packet should not be translated
569  * @param d                      optional parameter
570  * @param e                      optional parameter
571  */
572 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
573                            u32 thread_index, vlib_buffer_t *b0,
574                            ip4_header_t *ip0, u8 *p_proto,
575                            snat_session_key_t *p_value,
576                            u8 *p_dont_translate, void *d, void *e)
577 {
578   icmp46_header_t *icmp0;
579   u32 sw_if_index0;
580   u32 rx_fib_index0;
581   snat_session_key_t key0;
582   snat_session_key_t sm0;
583   u8 dont_translate = 0;
584   u8 is_addr_only;
585   u32 next0 = ~0;
586   int err;
587
588   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
589   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
590   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
591
592   err = icmp_get_key (ip0, &key0);
593   if (err != -1)
594     {
595       b0->error = node->errors[err];
596       next0 = SNAT_IN2OUT_NEXT_DROP;
597       goto out2;
598     }
599   key0.fib_index = rx_fib_index0;
600
601   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0, 0))
602     {
603       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
604           IP_PROTOCOL_ICMP, rx_fib_index0)))
605         {
606           dont_translate = 1;
607           goto out;
608         }
609
610       if (icmp_is_error_message (icmp0))
611         {
612           next0 = SNAT_IN2OUT_NEXT_DROP;
613           goto out;
614         }
615
616       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
617       next0 = SNAT_IN2OUT_NEXT_DROP;
618       goto out;
619     }
620
621   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
622                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
623                     !icmp_is_error_message (icmp0)))
624     {
625       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
626       next0 = SNAT_IN2OUT_NEXT_DROP;
627       goto out;
628     }
629
630 out:
631   *p_value = sm0;
632 out2:
633   *p_proto = key0.protocol;
634   *p_dont_translate = dont_translate;
635   return next0;
636 }
637
638 static inline u32 icmp_in2out (snat_main_t *sm,
639                                vlib_buffer_t * b0,
640                                ip4_header_t * ip0,
641                                icmp46_header_t * icmp0,
642                                u32 sw_if_index0,
643                                u32 rx_fib_index0,
644                                vlib_node_runtime_t * node,
645                                u32 next0,
646                                u32 thread_index,
647                                void *d,
648                                void *e)
649 {
650   snat_session_key_t sm0;
651   u8 protocol;
652   icmp_echo_header_t *echo0, *inner_echo0 = 0;
653   ip4_header_t *inner_ip0;
654   void *l4_header = 0;
655   icmp46_header_t *inner_icmp0;
656   u8 dont_translate;
657   u32 new_addr0, old_addr0;
658   u16 old_id0, new_id0;
659   ip_csum_t sum0;
660   u16 checksum0;
661   u32 next0_tmp;
662
663   echo0 = (icmp_echo_header_t *)(icmp0+1);
664
665   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
666                                        &protocol, &sm0, &dont_translate, d, e);
667   if (next0_tmp != ~0)
668     next0 = next0_tmp;
669   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
670     goto out;
671
672   sum0 = ip_incremental_checksum (0, icmp0,
673                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
674   checksum0 = ~ip_csum_fold (sum0);
675   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
676     {
677       next0 = SNAT_IN2OUT_NEXT_DROP;
678       goto out;
679     }
680
681   old_addr0 = ip0->src_address.as_u32;
682   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
683   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
684     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
685
686   sum0 = ip0->checksum;
687   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
688                          src_address /* changed member */);
689   ip0->checksum = ip_csum_fold (sum0);
690
691   if (icmp0->checksum == 0)
692     icmp0->checksum = 0xffff;
693
694   if (!icmp_is_error_message (icmp0))
695     {
696       new_id0 = sm0.port;
697       if (PREDICT_FALSE(new_id0 != echo0->identifier))
698         {
699           old_id0 = echo0->identifier;
700           new_id0 = sm0.port;
701           echo0->identifier = new_id0;
702
703           sum0 = icmp0->checksum;
704           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
705                                  identifier);
706           icmp0->checksum = ip_csum_fold (sum0);
707         }
708     }
709   else
710     {
711       inner_ip0 = (ip4_header_t *)(echo0+1);
712       l4_header = ip4_next_header (inner_ip0);
713
714       if (!ip4_header_checksum_is_valid (inner_ip0))
715         {
716           next0 = SNAT_IN2OUT_NEXT_DROP;
717           goto out;
718         }
719
720       old_addr0 = inner_ip0->dst_address.as_u32;
721       inner_ip0->dst_address = sm0.addr;
722       new_addr0 = inner_ip0->dst_address.as_u32;
723
724       sum0 = icmp0->checksum;
725       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
726                              dst_address /* changed member */);
727       icmp0->checksum = ip_csum_fold (sum0);
728
729       switch (protocol)
730         {
731           case SNAT_PROTOCOL_ICMP:
732             inner_icmp0 = (icmp46_header_t*)l4_header;
733             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
734
735             old_id0 = inner_echo0->identifier;
736             new_id0 = sm0.port;
737             inner_echo0->identifier = new_id0;
738
739             sum0 = icmp0->checksum;
740             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
741                                    identifier);
742             icmp0->checksum = ip_csum_fold (sum0);
743             break;
744           case SNAT_PROTOCOL_UDP:
745           case SNAT_PROTOCOL_TCP:
746             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
747             new_id0 = sm0.port;
748             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
749
750             sum0 = icmp0->checksum;
751             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
752                                    dst_port);
753             icmp0->checksum = ip_csum_fold (sum0);
754             break;
755           default:
756             ASSERT(0);
757         }
758     }
759
760 out:
761   return next0;
762 }
763
764 /**
765  * @brief Hairpinning
766  *
767  * Hairpinning allows two endpoints on the internal side of the NAT to
768  * communicate even if they only use each other's external IP addresses
769  * and ports.
770  *
771  * @param sm     NAT main.
772  * @param b0     Vlib buffer.
773  * @param ip0    IP header.
774  * @param udp0   UDP header.
775  * @param tcp0   TCP header.
776  * @param proto0 NAT protocol.
777  */
778 static inline int
779 snat_hairpinning (snat_main_t *sm,
780                   vlib_buffer_t * b0,
781                   ip4_header_t * ip0,
782                   udp_header_t * udp0,
783                   tcp_header_t * tcp0,
784                   u32 proto0,
785                   int is_ed)
786 {
787   snat_session_key_t key0, sm0;
788   snat_session_t * s0;
789   clib_bihash_kv_8_8_t kv0, value0;
790   ip_csum_t sum0;
791   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
792   u16 new_dst_port0, old_dst_port0;
793   int rv;
794
795   key0.addr = ip0->dst_address;
796   key0.port = udp0->dst_port;
797   key0.protocol = proto0;
798   key0.fib_index = sm->outside_fib_index;
799   kv0.key = key0.as_u64;
800
801   /* Check if destination is static mappings */
802   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
803     {
804       new_dst_addr0 = sm0.addr.as_u32;
805       new_dst_port0 = sm0.port;
806       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
807     }
808   /* or active session */
809   else
810     {
811       if (sm->num_workers > 1)
812         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
813       else
814         ti = sm->num_workers;
815
816       if (is_ed)
817         {
818           clib_bihash_kv_16_8_t ed_kv, ed_value;
819           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
820                       ip0->protocol, sm->outside_fib_index, udp0->dst_port,
821                       udp0->src_port);
822           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
823                                         &ed_kv, &ed_value);
824           si = ed_value.value;
825         }
826       else
827         {
828           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
829                                        &value0);
830           si = value0.value;
831         }
832       if (rv)
833         return 0;
834
835       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
836       new_dst_addr0 = s0->in2out.addr.as_u32;
837       new_dst_port0 = s0->in2out.port;
838       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
839     }
840
841   /* Destination is behind the same NAT, use internal address and port */
842   if (new_dst_addr0)
843     {
844       old_dst_addr0 = ip0->dst_address.as_u32;
845       ip0->dst_address.as_u32 = new_dst_addr0;
846       sum0 = ip0->checksum;
847       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
848                              ip4_header_t, dst_address);
849       ip0->checksum = ip_csum_fold (sum0);
850
851       old_dst_port0 = tcp0->dst;
852       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
853         {
854           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
855             {
856               tcp0->dst = new_dst_port0;
857               sum0 = tcp0->checksum;
858               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
859                                      ip4_header_t, dst_address);
860               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
861                                      ip4_header_t /* cheat */, length);
862               tcp0->checksum = ip_csum_fold(sum0);
863             }
864           else
865             {
866               udp0->dst_port = new_dst_port0;
867               udp0->checksum = 0;
868             }
869         }
870       else
871         {
872           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
873             {
874               sum0 = tcp0->checksum;
875               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
876                                      ip4_header_t, dst_address);
877               tcp0->checksum = ip_csum_fold(sum0);
878             }
879         }
880       return 1;
881     }
882   return 0;
883 }
884
885 static inline void
886 snat_icmp_hairpinning (snat_main_t *sm,
887                        vlib_buffer_t * b0,
888                        ip4_header_t * ip0,
889                        icmp46_header_t * icmp0,
890                        int is_ed)
891 {
892   snat_session_key_t key0, sm0;
893   clib_bihash_kv_8_8_t kv0, value0;
894   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
895   ip_csum_t sum0;
896   snat_session_t *s0;
897   int rv;
898
899   if (!icmp_is_error_message (icmp0))
900     {
901       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
902       u16 icmp_id0 = echo0->identifier;
903       key0.addr = ip0->dst_address;
904       key0.port = icmp_id0;
905       key0.protocol = SNAT_PROTOCOL_ICMP;
906       key0.fib_index = sm->outside_fib_index;
907       kv0.key = key0.as_u64;
908
909       if (sm->num_workers > 1)
910         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
911       else
912         ti = sm->num_workers;
913
914       /* Check if destination is in active sessions */
915       if (is_ed)
916         {
917           clib_bihash_kv_16_8_t ed_kv, ed_value;
918           make_ed_kv (&ed_kv, &ip0->dst_address, &ip0->src_address,
919                       IP_PROTOCOL_ICMP, sm->outside_fib_index, icmp_id0, 0);
920           rv = clib_bihash_search_16_8 (&sm->per_thread_data[ti].out2in_ed,
921                                         &ed_kv, &ed_value);
922           si = ed_value.value;
923         }
924       else
925         {
926           rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
927                                        &value0);
928           si = value0.value;
929         }
930       if (rv)
931         {
932           /* or static mappings */
933           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
934             {
935               new_dst_addr0 = sm0.addr.as_u32;
936               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
937             }
938         }
939       else
940         {
941           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
942           new_dst_addr0 = s0->in2out.addr.as_u32;
943           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
944           echo0->identifier = s0->in2out.port;
945           sum0 = icmp0->checksum;
946           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
947                                  icmp_echo_header_t, identifier);
948           icmp0->checksum = ip_csum_fold (sum0);
949         }
950
951       /* Destination is behind the same NAT, use internal address and port */
952       if (new_dst_addr0)
953         {
954           old_dst_addr0 = ip0->dst_address.as_u32;
955           ip0->dst_address.as_u32 = new_dst_addr0;
956           sum0 = ip0->checksum;
957           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
958                                  ip4_header_t, dst_address);
959           ip0->checksum = ip_csum_fold (sum0);
960         }
961     }
962
963 }
964
965 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
966                                          vlib_buffer_t * b0,
967                                          ip4_header_t * ip0,
968                                          icmp46_header_t * icmp0,
969                                          u32 sw_if_index0,
970                                          u32 rx_fib_index0,
971                                          vlib_node_runtime_t * node,
972                                          u32 next0,
973                                          f64 now,
974                                          u32 thread_index,
975                                          snat_session_t ** p_s0)
976 {
977   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
978                       next0, thread_index, p_s0, 0);
979   snat_session_t * s0 = *p_s0;
980   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
981     {
982       /* Hairpinning */
983       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
984         snat_icmp_hairpinning(sm, b0, ip0, icmp0, sm->endpoint_dependent);
985       /* Accounting */
986       nat44_session_update_counters (s0, now,
987                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
988       /* Per-user LRU list maintenance */
989       nat44_session_update_lru (sm, s0, thread_index);
990     }
991   return next0;
992 }
993
994 static inline void
995 nat_hairpinning_sm_unknown_proto (snat_main_t * sm,
996                                   vlib_buffer_t * b,
997                                   ip4_header_t * ip)
998 {
999   clib_bihash_kv_8_8_t kv, value;
1000   snat_static_mapping_t *m;
1001   u32 old_addr, new_addr;
1002   ip_csum_t sum;
1003
1004   make_sm_kv (&kv, &ip->dst_address, 0, sm->outside_fib_index, 0);
1005   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1006     return;
1007
1008   m = pool_elt_at_index (sm->static_mappings, value.value);
1009
1010   old_addr = ip->dst_address.as_u32;
1011   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1012   sum = ip->checksum;
1013   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1014   ip->checksum = ip_csum_fold (sum);
1015
1016   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1017     vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1018 }
1019
1020 static int
1021 nat_in2out_sm_unknown_proto (snat_main_t *sm,
1022                              vlib_buffer_t * b,
1023                              ip4_header_t * ip,
1024                              u32 rx_fib_index)
1025 {
1026   clib_bihash_kv_8_8_t kv, value;
1027   snat_static_mapping_t *m;
1028   snat_session_key_t m_key;
1029   u32 old_addr, new_addr;
1030   ip_csum_t sum;
1031
1032   m_key.addr = ip->src_address;
1033   m_key.port = 0;
1034   m_key.protocol = 0;
1035   m_key.fib_index = rx_fib_index;
1036   kv.key = m_key.as_u64;
1037   if (clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1038     return 1;
1039
1040   m = pool_elt_at_index (sm->static_mappings, value.value);
1041
1042   old_addr = ip->src_address.as_u32;
1043   new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1044   sum = ip->checksum;
1045   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1046   ip->checksum = ip_csum_fold (sum);
1047
1048
1049   /* Hairpinning */
1050   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1051     {
1052       nat_hairpinning_sm_unknown_proto (sm, b, ip);
1053       vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1054     }
1055
1056   return 0;
1057 }
1058
1059 static inline uword
1060 snat_in2out_node_fn_inline (vlib_main_t * vm,
1061                             vlib_node_runtime_t * node,
1062                             vlib_frame_t * frame, int is_slow_path,
1063                             int is_output_feature)
1064 {
1065   u32 n_left_from, * from, * to_next;
1066   snat_in2out_next_t next_index;
1067   u32 pkts_processed = 0;
1068   snat_main_t * sm = &snat_main;
1069   f64 now = vlib_time_now (vm);
1070   u32 stats_node_index;
1071   u32 thread_index = vlib_get_thread_index ();
1072
1073   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1074     snat_in2out_node.index;
1075
1076   from = vlib_frame_vector_args (frame);
1077   n_left_from = frame->n_vectors;
1078   next_index = node->cached_next_index;
1079
1080   while (n_left_from > 0)
1081     {
1082       u32 n_left_to_next;
1083
1084       vlib_get_next_frame (vm, node, next_index,
1085                            to_next, n_left_to_next);
1086
1087       while (n_left_from >= 4 && n_left_to_next >= 2)
1088         {
1089           u32 bi0, bi1;
1090           vlib_buffer_t * b0, * b1;
1091           u32 next0, next1;
1092           u32 sw_if_index0, sw_if_index1;
1093           ip4_header_t * ip0, * ip1;
1094           ip_csum_t sum0, sum1;
1095           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1096           u16 old_port0, new_port0, old_port1, new_port1;
1097           udp_header_t * udp0, * udp1;
1098           tcp_header_t * tcp0, * tcp1;
1099           icmp46_header_t * icmp0, * icmp1;
1100           snat_session_key_t key0, key1;
1101           u32 rx_fib_index0, rx_fib_index1;
1102           u32 proto0, proto1;
1103           snat_session_t * s0 = 0, * s1 = 0;
1104           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1105           u32 iph_offset0 = 0, iph_offset1 = 0;
1106
1107           /* Prefetch next iteration. */
1108           {
1109             vlib_buffer_t * p2, * p3;
1110
1111             p2 = vlib_get_buffer (vm, from[2]);
1112             p3 = vlib_get_buffer (vm, from[3]);
1113
1114             vlib_prefetch_buffer_header (p2, LOAD);
1115             vlib_prefetch_buffer_header (p3, LOAD);
1116
1117             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1118             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1119           }
1120
1121           /* speculatively enqueue b0 and b1 to the current next frame */
1122           to_next[0] = bi0 = from[0];
1123           to_next[1] = bi1 = from[1];
1124           from += 2;
1125           to_next += 2;
1126           n_left_from -= 2;
1127           n_left_to_next -= 2;
1128
1129           b0 = vlib_get_buffer (vm, bi0);
1130           b1 = vlib_get_buffer (vm, bi1);
1131
1132           if (is_output_feature)
1133             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1134
1135           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1136                  iph_offset0);
1137
1138           udp0 = ip4_next_header (ip0);
1139           tcp0 = (tcp_header_t *) udp0;
1140           icmp0 = (icmp46_header_t *) udp0;
1141
1142           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1143           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1144                                    sw_if_index0);
1145
1146           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1147
1148           if (PREDICT_FALSE(ip0->ttl == 1))
1149             {
1150               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1151               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1152                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1153                                            0);
1154               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1155               goto trace00;
1156             }
1157
1158           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1159
1160           /* Next configured feature, probably ip4-lookup */
1161           if (is_slow_path)
1162             {
1163               if (PREDICT_FALSE (proto0 == ~0))
1164                 {
1165                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1166                     {
1167                       next0 = SNAT_IN2OUT_NEXT_DROP;
1168                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1169                     }
1170                   goto trace00;
1171                 }
1172
1173               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1174                 {
1175                   next0 = icmp_in2out_slow_path
1176                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1177                      node, next0, now, thread_index, &s0);
1178                   goto trace00;
1179                 }
1180             }
1181           else
1182             {
1183               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1184                 {
1185                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1186                   goto trace00;
1187                 }
1188
1189               if (ip4_is_fragment (ip0))
1190                 {
1191                   next0 = SNAT_IN2OUT_NEXT_REASS;
1192                   goto trace00;
1193                 }
1194             }
1195
1196           key0.addr = ip0->src_address;
1197           key0.port = udp0->src_port;
1198           key0.protocol = proto0;
1199           key0.fib_index = rx_fib_index0;
1200
1201           kv0.key = key0.as_u64;
1202
1203           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1204               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1205             {
1206               if (is_slow_path)
1207                 {
1208                   if (is_output_feature)
1209                     {
1210                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1211                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1212                         goto trace00;
1213                     }
1214                   else
1215                     {
1216                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1217                           ip0, proto0, rx_fib_index0, thread_index)))
1218                         goto trace00;
1219                     }
1220
1221                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1222                                      &s0, node, next0, thread_index);
1223                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1224                     goto trace00;
1225                 }
1226               else
1227                 {
1228                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1229                   goto trace00;
1230                 }
1231             }
1232           else
1233             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1234                                     value0.value);
1235
1236           b0->flags |= VNET_BUFFER_F_IS_NATED;
1237
1238           old_addr0 = ip0->src_address.as_u32;
1239           ip0->src_address = s0->out2in.addr;
1240           new_addr0 = ip0->src_address.as_u32;
1241           if (!is_output_feature)
1242             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1243
1244           sum0 = ip0->checksum;
1245           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1246                                  ip4_header_t,
1247                                  src_address /* changed member */);
1248           ip0->checksum = ip_csum_fold (sum0);
1249
1250           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1251             {
1252               old_port0 = tcp0->src_port;
1253               tcp0->src_port = s0->out2in.port;
1254               new_port0 = tcp0->src_port;
1255
1256               sum0 = tcp0->checksum;
1257               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1258                                      ip4_header_t,
1259                                      dst_address /* changed member */);
1260               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1261                                      ip4_header_t /* cheat */,
1262                                      length /* changed member */);
1263               tcp0->checksum = ip_csum_fold(sum0);
1264             }
1265           else
1266             {
1267               old_port0 = udp0->src_port;
1268               udp0->src_port = s0->out2in.port;
1269               udp0->checksum = 0;
1270             }
1271
1272           /* Accounting */
1273           nat44_session_update_counters (s0, now,
1274                                          vlib_buffer_length_in_chain (vm, b0));
1275           /* Per-user LRU list maintenance */
1276           nat44_session_update_lru (sm, s0, thread_index);
1277         trace00:
1278
1279           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1280                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1281             {
1282               snat_in2out_trace_t *t =
1283                  vlib_add_trace (vm, node, b0, sizeof (*t));
1284               t->is_slow_path = is_slow_path;
1285               t->sw_if_index = sw_if_index0;
1286               t->next_index = next0;
1287                   t->session_index = ~0;
1288               if (s0)
1289                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1290             }
1291
1292           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1293
1294           if (is_output_feature)
1295             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1296
1297           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1298                  iph_offset1);
1299
1300           udp1 = ip4_next_header (ip1);
1301           tcp1 = (tcp_header_t *) udp1;
1302           icmp1 = (icmp46_header_t *) udp1;
1303
1304           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1305           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1306                                    sw_if_index1);
1307
1308           if (PREDICT_FALSE(ip1->ttl == 1))
1309             {
1310               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1311               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1312                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1313                                            0);
1314               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1315               goto trace01;
1316             }
1317
1318           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1319
1320           /* Next configured feature, probably ip4-lookup */
1321           if (is_slow_path)
1322             {
1323               if (PREDICT_FALSE (proto1 == ~0))
1324                 {
1325                   if (nat_in2out_sm_unknown_proto (sm, b1, ip1, rx_fib_index1))
1326                     {
1327                       next1 = SNAT_IN2OUT_NEXT_DROP;
1328                       b1->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1329                     }
1330                   goto trace01;
1331                 }
1332
1333               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1334                 {
1335                   next1 = icmp_in2out_slow_path
1336                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1337                      next1, now, thread_index, &s1);
1338                   goto trace01;
1339                 }
1340             }
1341           else
1342             {
1343               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1344                 {
1345                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1346                   goto trace01;
1347                 }
1348
1349               if (ip4_is_fragment (ip1))
1350                 {
1351                   next1 = SNAT_IN2OUT_NEXT_REASS;
1352                   goto trace01;
1353                 }
1354             }
1355
1356           key1.addr = ip1->src_address;
1357           key1.port = udp1->src_port;
1358           key1.protocol = proto1;
1359           key1.fib_index = rx_fib_index1;
1360
1361           kv1.key = key1.as_u64;
1362
1363             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1364                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1365             {
1366               if (is_slow_path)
1367                 {
1368                   if (is_output_feature)
1369                     {
1370                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1371                           ip1, proto1, udp1->src_port, udp1->dst_port, thread_index, sw_if_index1)))
1372                         goto trace01;
1373                     }
1374                   else
1375                     {
1376                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1377                           ip1, proto1, rx_fib_index1, thread_index)))
1378                         goto trace01;
1379                     }
1380
1381                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1382                                      &s1, node, next1, thread_index);
1383                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1384                     goto trace01;
1385                 }
1386               else
1387                 {
1388                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1389                   goto trace01;
1390                 }
1391             }
1392           else
1393             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1394                                     value1.value);
1395
1396           b1->flags |= VNET_BUFFER_F_IS_NATED;
1397
1398           old_addr1 = ip1->src_address.as_u32;
1399           ip1->src_address = s1->out2in.addr;
1400           new_addr1 = ip1->src_address.as_u32;
1401           if (!is_output_feature)
1402             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1403
1404           sum1 = ip1->checksum;
1405           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1406                                  ip4_header_t,
1407                                  src_address /* changed member */);
1408           ip1->checksum = ip_csum_fold (sum1);
1409
1410           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1411             {
1412               old_port1 = tcp1->src_port;
1413               tcp1->src_port = s1->out2in.port;
1414               new_port1 = tcp1->src_port;
1415
1416               sum1 = tcp1->checksum;
1417               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1418                                      ip4_header_t,
1419                                      dst_address /* changed member */);
1420               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1421                                      ip4_header_t /* cheat */,
1422                                      length /* changed member */);
1423               tcp1->checksum = ip_csum_fold(sum1);
1424             }
1425           else
1426             {
1427               old_port1 = udp1->src_port;
1428               udp1->src_port = s1->out2in.port;
1429               udp1->checksum = 0;
1430             }
1431
1432           /* Accounting */
1433           nat44_session_update_counters (s1, now,
1434                                          vlib_buffer_length_in_chain (vm, b1));
1435           /* Per-user LRU list maintenance */
1436           nat44_session_update_lru (sm, s1, thread_index);
1437         trace01:
1438
1439           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1440                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1441             {
1442               snat_in2out_trace_t *t =
1443                  vlib_add_trace (vm, node, b1, sizeof (*t));
1444               t->sw_if_index = sw_if_index1;
1445               t->next_index = next1;
1446               t->session_index = ~0;
1447               if (s1)
1448                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1449             }
1450
1451           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1452
1453           /* verify speculative enqueues, maybe switch current next frame */
1454           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1455                                            to_next, n_left_to_next,
1456                                            bi0, bi1, next0, next1);
1457         }
1458
1459       while (n_left_from > 0 && n_left_to_next > 0)
1460         {
1461           u32 bi0;
1462           vlib_buffer_t * b0;
1463           u32 next0;
1464           u32 sw_if_index0;
1465           ip4_header_t * ip0;
1466           ip_csum_t sum0;
1467           u32 new_addr0, old_addr0;
1468           u16 old_port0, new_port0;
1469           udp_header_t * udp0;
1470           tcp_header_t * tcp0;
1471           icmp46_header_t * icmp0;
1472           snat_session_key_t key0;
1473           u32 rx_fib_index0;
1474           u32 proto0;
1475           snat_session_t * s0 = 0;
1476           clib_bihash_kv_8_8_t kv0, value0;
1477           u32 iph_offset0 = 0;
1478
1479           /* speculatively enqueue b0 to the current next frame */
1480           bi0 = from[0];
1481           to_next[0] = bi0;
1482           from += 1;
1483           to_next += 1;
1484           n_left_from -= 1;
1485           n_left_to_next -= 1;
1486
1487           b0 = vlib_get_buffer (vm, bi0);
1488           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1489
1490           if (is_output_feature)
1491             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1492
1493           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1494                  iph_offset0);
1495
1496           udp0 = ip4_next_header (ip0);
1497           tcp0 = (tcp_header_t *) udp0;
1498           icmp0 = (icmp46_header_t *) udp0;
1499
1500           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1501           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1502                                    sw_if_index0);
1503
1504           if (PREDICT_FALSE(ip0->ttl == 1))
1505             {
1506               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1507               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1508                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1509                                            0);
1510               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1511               goto trace0;
1512             }
1513
1514           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1515
1516           /* Next configured feature, probably ip4-lookup */
1517           if (is_slow_path)
1518             {
1519               if (PREDICT_FALSE (proto0 == ~0))
1520                 {
1521                   if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0))
1522                     {
1523                       next0 = SNAT_IN2OUT_NEXT_DROP;
1524                       b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
1525                     }
1526                   goto trace0;
1527                 }
1528
1529               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1530                 {
1531                   next0 = icmp_in2out_slow_path
1532                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1533                      next0, now, thread_index, &s0);
1534                   goto trace0;
1535                 }
1536             }
1537           else
1538             {
1539               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1540                 {
1541                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1542                   goto trace0;
1543                 }
1544
1545               if (ip4_is_fragment (ip0))
1546                 {
1547                   next0 = SNAT_IN2OUT_NEXT_REASS;
1548                   goto trace0;
1549                 }
1550             }
1551
1552           key0.addr = ip0->src_address;
1553           key0.port = udp0->src_port;
1554           key0.protocol = proto0;
1555           key0.fib_index = rx_fib_index0;
1556
1557           kv0.key = key0.as_u64;
1558
1559           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1560                                       &kv0, &value0))
1561             {
1562               if (is_slow_path)
1563                 {
1564                   if (is_output_feature)
1565                     {
1566                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1567                           ip0, proto0, udp0->src_port, udp0->dst_port, thread_index, sw_if_index0)))
1568                         goto trace0;
1569                     }
1570                   else
1571                     {
1572                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1573                           ip0, proto0, rx_fib_index0, thread_index)))
1574                         goto trace0;
1575                     }
1576
1577                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1578                                      &s0, node, next0, thread_index);
1579
1580                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1581                     goto trace0;
1582                 }
1583               else
1584                 {
1585                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1586                   goto trace0;
1587                 }
1588             }
1589           else
1590           s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1591                                   value0.value);
1592
1593           b0->flags |= VNET_BUFFER_F_IS_NATED;
1594
1595           old_addr0 = ip0->src_address.as_u32;
1596           ip0->src_address = s0->out2in.addr;
1597           new_addr0 = ip0->src_address.as_u32;
1598           if (!is_output_feature)
1599             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1600
1601           sum0 = ip0->checksum;
1602           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1603                                  ip4_header_t,
1604                                  src_address /* changed member */);
1605           ip0->checksum = ip_csum_fold (sum0);
1606
1607           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1608             {
1609               old_port0 = tcp0->src_port;
1610               tcp0->src_port = s0->out2in.port;
1611               new_port0 = tcp0->src_port;
1612
1613               sum0 = tcp0->checksum;
1614               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1615                                      ip4_header_t,
1616                                      dst_address /* changed member */);
1617               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1618                                      ip4_header_t /* cheat */,
1619                                      length /* changed member */);
1620               tcp0->checksum = ip_csum_fold(sum0);
1621             }
1622           else
1623             {
1624               old_port0 = udp0->src_port;
1625               udp0->src_port = s0->out2in.port;
1626               udp0->checksum = 0;
1627             }
1628
1629           /* Accounting */
1630           nat44_session_update_counters (s0, now,
1631                                          vlib_buffer_length_in_chain (vm, b0));
1632           /* Per-user LRU list maintenance */
1633           nat44_session_update_lru (sm, s0, thread_index);
1634
1635         trace0:
1636           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1637                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1638             {
1639               snat_in2out_trace_t *t =
1640                  vlib_add_trace (vm, node, b0, sizeof (*t));
1641               t->is_slow_path = is_slow_path;
1642               t->sw_if_index = sw_if_index0;
1643               t->next_index = next0;
1644                   t->session_index = ~0;
1645               if (s0)
1646                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1647             }
1648
1649           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1650
1651           /* verify speculative enqueue, maybe switch current next frame */
1652           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1653                                            to_next, n_left_to_next,
1654                                            bi0, next0);
1655         }
1656
1657       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1658     }
1659
1660   vlib_node_increment_counter (vm, stats_node_index,
1661                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1662                                pkts_processed);
1663   return frame->n_vectors;
1664 }
1665
1666 static uword
1667 snat_in2out_fast_path_fn (vlib_main_t * vm,
1668                           vlib_node_runtime_t * node,
1669                           vlib_frame_t * frame)
1670 {
1671   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1672 }
1673
1674 VLIB_REGISTER_NODE (snat_in2out_node) = {
1675   .function = snat_in2out_fast_path_fn,
1676   .name = "nat44-in2out",
1677   .vector_size = sizeof (u32),
1678   .format_trace = format_snat_in2out_trace,
1679   .type = VLIB_NODE_TYPE_INTERNAL,
1680
1681   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1682   .error_strings = snat_in2out_error_strings,
1683
1684   .runtime_data_bytes = sizeof (snat_runtime_t),
1685
1686   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1687
1688   /* edit / add dispositions here */
1689   .next_nodes = {
1690     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1691     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1692     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1693     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1694     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1695   },
1696 };
1697
1698 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1699
1700 static uword
1701 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1702                                  vlib_node_runtime_t * node,
1703                                  vlib_frame_t * frame)
1704 {
1705   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1706 }
1707
1708 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1709   .function = snat_in2out_output_fast_path_fn,
1710   .name = "nat44-in2out-output",
1711   .vector_size = sizeof (u32),
1712   .format_trace = format_snat_in2out_trace,
1713   .type = VLIB_NODE_TYPE_INTERNAL,
1714
1715   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1716   .error_strings = snat_in2out_error_strings,
1717
1718   .runtime_data_bytes = sizeof (snat_runtime_t),
1719
1720   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1721
1722   /* edit / add dispositions here */
1723   .next_nodes = {
1724     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1725     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1726     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1727     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1728     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1729   },
1730 };
1731
1732 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
1733                               snat_in2out_output_fast_path_fn);
1734
1735 static uword
1736 snat_in2out_slow_path_fn (vlib_main_t * vm,
1737                           vlib_node_runtime_t * node,
1738                           vlib_frame_t * frame)
1739 {
1740   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
1741 }
1742
1743 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1744   .function = snat_in2out_slow_path_fn,
1745   .name = "nat44-in2out-slowpath",
1746   .vector_size = sizeof (u32),
1747   .format_trace = format_snat_in2out_trace,
1748   .type = VLIB_NODE_TYPE_INTERNAL,
1749
1750   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1751   .error_strings = snat_in2out_error_strings,
1752
1753   .runtime_data_bytes = sizeof (snat_runtime_t),
1754
1755   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1756
1757   /* edit / add dispositions here */
1758   .next_nodes = {
1759     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1760     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1761     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1762     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1763     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1764   },
1765 };
1766
1767 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
1768                               snat_in2out_slow_path_fn);
1769
1770 static uword
1771 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
1772                                  vlib_node_runtime_t * node,
1773                                  vlib_frame_t * frame)
1774 {
1775   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
1776 }
1777
1778 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
1779   .function = snat_in2out_output_slow_path_fn,
1780   .name = "nat44-in2out-output-slowpath",
1781   .vector_size = sizeof (u32),
1782   .format_trace = format_snat_in2out_trace,
1783   .type = VLIB_NODE_TYPE_INTERNAL,
1784
1785   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1786   .error_strings = snat_in2out_error_strings,
1787
1788   .runtime_data_bytes = sizeof (snat_runtime_t),
1789
1790   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1791
1792   /* edit / add dispositions here */
1793   .next_nodes = {
1794     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1795     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1796     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
1797     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1798     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1799   },
1800 };
1801
1802 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
1803                               snat_in2out_output_slow_path_fn);
1804
1805 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
1806
1807 static inline uword
1808 nat44_hairpinning_fn_inline (vlib_main_t * vm,
1809                              vlib_node_runtime_t * node,
1810                              vlib_frame_t * frame,
1811                              int is_ed)
1812 {
1813   u32 n_left_from, * from, * to_next, stats_node_index;
1814   snat_in2out_next_t next_index;
1815   u32 pkts_processed = 0;
1816   snat_main_t * sm = &snat_main;
1817   vnet_feature_main_t *fm = &feature_main;
1818   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1819   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
1820
1821   stats_node_index = is_ed ? nat44_ed_hairpinning_node.index :
1822     nat44_hairpinning_node.index;
1823   from = vlib_frame_vector_args (frame);
1824   n_left_from = frame->n_vectors;
1825   next_index = node->cached_next_index;
1826
1827   while (n_left_from > 0)
1828     {
1829       u32 n_left_to_next;
1830
1831       vlib_get_next_frame (vm, node, next_index,
1832                            to_next, n_left_to_next);
1833
1834       while (n_left_from > 0 && n_left_to_next > 0)
1835         {
1836           u32 bi0;
1837           vlib_buffer_t * b0;
1838           u32 next0;
1839           ip4_header_t * ip0;
1840           u32 proto0;
1841           udp_header_t * udp0;
1842           tcp_header_t * tcp0;
1843
1844           /* speculatively enqueue b0 to the current next frame */
1845           bi0 = from[0];
1846           to_next[0] = bi0;
1847           from += 1;
1848           to_next += 1;
1849           n_left_from -= 1;
1850           n_left_to_next -= 1;
1851
1852           b0 = vlib_get_buffer (vm, bi0);
1853           ip0 = vlib_buffer_get_current (b0);
1854           udp0 = ip4_next_header (ip0);
1855           tcp0 = (tcp_header_t *) udp0;
1856
1857           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1858
1859           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
1860                                 &next0, 0);
1861
1862           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed))
1863             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1864
1865           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1866
1867           /* verify speculative enqueue, maybe switch current next frame */
1868           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1869                                            to_next, n_left_to_next,
1870                                            bi0, next0);
1871          }
1872
1873       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1874     }
1875
1876   vlib_node_increment_counter (vm, stats_node_index,
1877                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1878                                pkts_processed);
1879   return frame->n_vectors;
1880 }
1881
1882 static uword
1883 nat44_hairpinning_fn (vlib_main_t * vm,
1884                       vlib_node_runtime_t * node,
1885                       vlib_frame_t * frame)
1886 {
1887   return nat44_hairpinning_fn_inline (vm, node, frame, 0);
1888 }
1889
1890 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
1891   .function = nat44_hairpinning_fn,
1892   .name = "nat44-hairpinning",
1893   .vector_size = sizeof (u32),
1894   .type = VLIB_NODE_TYPE_INTERNAL,
1895   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1896   .error_strings = snat_in2out_error_strings,
1897   .n_next_nodes = 2,
1898   .next_nodes = {
1899     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1900     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1901   },
1902 };
1903
1904 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
1905                               nat44_hairpinning_fn);
1906
1907 static uword
1908 nat44_ed_hairpinning_fn (vlib_main_t * vm,
1909                          vlib_node_runtime_t * node,
1910                          vlib_frame_t * frame)
1911 {
1912   return nat44_hairpinning_fn_inline (vm, node, frame, 1);
1913 }
1914
1915 VLIB_REGISTER_NODE (nat44_ed_hairpinning_node) = {
1916   .function = nat44_ed_hairpinning_fn,
1917   .name = "nat44-ed-hairpinning",
1918   .vector_size = sizeof (u32),
1919   .type = VLIB_NODE_TYPE_INTERNAL,
1920   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1921   .error_strings = snat_in2out_error_strings,
1922   .n_next_nodes = 2,
1923   .next_nodes = {
1924     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1925     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1926   },
1927 };
1928
1929 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpinning_node,
1930                               nat44_ed_hairpinning_fn);
1931
1932 static inline void
1933 nat44_reass_hairpinning (snat_main_t *sm,
1934                          vlib_buffer_t * b0,
1935                          ip4_header_t * ip0,
1936                          u16 sport,
1937                          u16 dport,
1938                          u32 proto0)
1939 {
1940   snat_session_key_t key0, sm0;
1941   snat_session_t * s0;
1942   clib_bihash_kv_8_8_t kv0, value0;
1943   ip_csum_t sum0;
1944   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
1945   u16 new_dst_port0, old_dst_port0;
1946   udp_header_t * udp0;
1947   tcp_header_t * tcp0;
1948
1949   key0.addr = ip0->dst_address;
1950   key0.port = dport;
1951   key0.protocol = proto0;
1952   key0.fib_index = sm->outside_fib_index;
1953   kv0.key = key0.as_u64;
1954
1955   udp0 = ip4_next_header (ip0);
1956
1957   /* Check if destination is static mappings */
1958   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
1959     {
1960       new_dst_addr0 = sm0.addr.as_u32;
1961       new_dst_port0 = sm0.port;
1962       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1963     }
1964   /* or active sessions */
1965   else
1966     {
1967       if (sm->num_workers > 1)
1968         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
1969       else
1970         ti = sm->num_workers;
1971
1972       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
1973         {
1974           si = value0.value;
1975           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
1976           new_dst_addr0 = s0->in2out.addr.as_u32;
1977           new_dst_port0 = s0->in2out.port;
1978           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1979         }
1980     }
1981
1982   /* Destination is behind the same NAT, use internal address and port */
1983   if (new_dst_addr0)
1984     {
1985       old_dst_addr0 = ip0->dst_address.as_u32;
1986       ip0->dst_address.as_u32 = new_dst_addr0;
1987       sum0 = ip0->checksum;
1988       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
1989                              ip4_header_t, dst_address);
1990       ip0->checksum = ip_csum_fold (sum0);
1991
1992       old_dst_port0 = dport;
1993       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
1994                        ip4_is_first_fragment (ip0)))
1995         {
1996           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1997             {
1998               tcp0 = ip4_next_header (ip0);
1999               tcp0->dst = new_dst_port0;
2000               sum0 = tcp0->checksum;
2001               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2002                                      ip4_header_t, dst_address);
2003               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2004                                      ip4_header_t /* cheat */, length);
2005               tcp0->checksum = ip_csum_fold(sum0);
2006             }
2007           else
2008             {
2009               udp0->dst_port = new_dst_port0;
2010               udp0->checksum = 0;
2011             }
2012         }
2013       else
2014         {
2015           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2016             {
2017               tcp0 = ip4_next_header (ip0);
2018               sum0 = tcp0->checksum;
2019               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2020                                      ip4_header_t, dst_address);
2021               tcp0->checksum = ip_csum_fold(sum0);
2022             }
2023         }
2024     }
2025 }
2026
2027 static uword
2028 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2029                             vlib_node_runtime_t * node,
2030                             vlib_frame_t * frame)
2031 {
2032   u32 n_left_from, *from, *to_next;
2033   snat_in2out_next_t next_index;
2034   u32 pkts_processed = 0;
2035   snat_main_t *sm = &snat_main;
2036   f64 now = vlib_time_now (vm);
2037   u32 thread_index = vlib_get_thread_index ();
2038   snat_main_per_thread_data_t *per_thread_data =
2039     &sm->per_thread_data[thread_index];
2040   u32 *fragments_to_drop = 0;
2041   u32 *fragments_to_loopback = 0;
2042
2043   from = vlib_frame_vector_args (frame);
2044   n_left_from = frame->n_vectors;
2045   next_index = node->cached_next_index;
2046
2047   while (n_left_from > 0)
2048     {
2049       u32 n_left_to_next;
2050
2051       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2052
2053       while (n_left_from > 0 && n_left_to_next > 0)
2054        {
2055           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2056           vlib_buffer_t *b0;
2057           u32 next0;
2058           u8 cached0 = 0;
2059           ip4_header_t *ip0;
2060           nat_reass_ip4_t *reass0;
2061           udp_header_t * udp0;
2062           tcp_header_t * tcp0;
2063           snat_session_key_t key0;
2064           clib_bihash_kv_8_8_t kv0, value0;
2065           snat_session_t * s0 = 0;
2066           u16 old_port0, new_port0;
2067           ip_csum_t sum0;
2068
2069           /* speculatively enqueue b0 to the current next frame */
2070           bi0 = from[0];
2071           to_next[0] = bi0;
2072           from += 1;
2073           to_next += 1;
2074           n_left_from -= 1;
2075           n_left_to_next -= 1;
2076
2077           b0 = vlib_get_buffer (vm, bi0);
2078           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2079
2080           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2081           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2082                                                                sw_if_index0);
2083
2084           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2085             {
2086               next0 = SNAT_IN2OUT_NEXT_DROP;
2087               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2088               goto trace0;
2089             }
2090
2091           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2092           udp0 = ip4_next_header (ip0);
2093           tcp0 = (tcp_header_t *) udp0;
2094           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2095
2096           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2097                                                  ip0->dst_address,
2098                                                  ip0->fragment_id,
2099                                                  ip0->protocol,
2100                                                  1,
2101                                                  &fragments_to_drop);
2102
2103           if (PREDICT_FALSE (!reass0))
2104             {
2105               next0 = SNAT_IN2OUT_NEXT_DROP;
2106               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2107               nat_log_notice ("maximum reassemblies exceeded");
2108               goto trace0;
2109             }
2110
2111           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2112             {
2113               key0.addr = ip0->src_address;
2114               key0.port = udp0->src_port;
2115               key0.protocol = proto0;
2116               key0.fib_index = rx_fib_index0;
2117               kv0.key = key0.as_u64;
2118
2119               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2120                 {
2121                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2122                       ip0, proto0, rx_fib_index0, thread_index)))
2123                     goto trace0;
2124
2125                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2126                                      &s0, node, next0, thread_index);
2127
2128                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2129                     goto trace0;
2130
2131                   reass0->sess_index = s0 - per_thread_data->sessions;
2132                 }
2133               else
2134                 {
2135                   s0 = pool_elt_at_index (per_thread_data->sessions,
2136                                           value0.value);
2137                   reass0->sess_index = value0.value;
2138                 }
2139               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2140             }
2141           else
2142             {
2143               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2144                 {
2145                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2146                     {
2147                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2148                       nat_log_notice ("maximum fragments per reassembly exceeded");
2149                       next0 = SNAT_IN2OUT_NEXT_DROP;
2150                       goto trace0;
2151                     }
2152                   cached0 = 1;
2153                   goto trace0;
2154                 }
2155               s0 = pool_elt_at_index (per_thread_data->sessions,
2156                                       reass0->sess_index);
2157             }
2158
2159           old_addr0 = ip0->src_address.as_u32;
2160           ip0->src_address = s0->out2in.addr;
2161           new_addr0 = ip0->src_address.as_u32;
2162           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2163
2164           sum0 = ip0->checksum;
2165           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2166                                  ip4_header_t,
2167                                  src_address /* changed member */);
2168           ip0->checksum = ip_csum_fold (sum0);
2169
2170           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2171             {
2172               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2173                 {
2174                   old_port0 = tcp0->src_port;
2175                   tcp0->src_port = s0->out2in.port;
2176                   new_port0 = tcp0->src_port;
2177
2178                   sum0 = tcp0->checksum;
2179                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2180                                          ip4_header_t,
2181                                          dst_address /* changed member */);
2182                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2183                                          ip4_header_t /* cheat */,
2184                                          length /* changed member */);
2185                   tcp0->checksum = ip_csum_fold(sum0);
2186                 }
2187               else
2188                 {
2189                   old_port0 = udp0->src_port;
2190                   udp0->src_port = s0->out2in.port;
2191                   udp0->checksum = 0;
2192                 }
2193             }
2194
2195           /* Hairpinning */
2196           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2197                                    s0->ext_host_port, proto0);
2198
2199           /* Accounting */
2200           nat44_session_update_counters (s0, now,
2201                                          vlib_buffer_length_in_chain (vm, b0));
2202           /* Per-user LRU list maintenance */
2203           nat44_session_update_lru (sm, s0, thread_index);
2204
2205         trace0:
2206           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2207                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2208             {
2209               nat44_in2out_reass_trace_t *t =
2210                  vlib_add_trace (vm, node, b0, sizeof (*t));
2211               t->cached = cached0;
2212               t->sw_if_index = sw_if_index0;
2213               t->next_index = next0;
2214             }
2215
2216           if (cached0)
2217             {
2218               n_left_to_next++;
2219               to_next--;
2220             }
2221           else
2222             {
2223               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2224
2225               /* verify speculative enqueue, maybe switch current next frame */
2226               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2227                                                to_next, n_left_to_next,
2228                                                bi0, next0);
2229             }
2230
2231           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2232             {
2233               from = vlib_frame_vector_args (frame);
2234               u32 len = vec_len (fragments_to_loopback);
2235               if (len <= VLIB_FRAME_SIZE)
2236                 {
2237                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2238                   n_left_from = len;
2239                   vec_reset_length (fragments_to_loopback);
2240                 }
2241               else
2242                 {
2243                   clib_memcpy (from,
2244                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2245                                sizeof (u32) * VLIB_FRAME_SIZE);
2246                   n_left_from = VLIB_FRAME_SIZE;
2247                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2248                 }
2249             }
2250        }
2251
2252       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2253     }
2254
2255   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2256                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2257                                pkts_processed);
2258
2259   nat_send_all_to_node (vm, fragments_to_drop, node,
2260                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2261                         SNAT_IN2OUT_NEXT_DROP);
2262
2263   vec_free (fragments_to_drop);
2264   vec_free (fragments_to_loopback);
2265   return frame->n_vectors;
2266 }
2267
2268 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2269   .function = nat44_in2out_reass_node_fn,
2270   .name = "nat44-in2out-reass",
2271   .vector_size = sizeof (u32),
2272   .format_trace = format_nat44_in2out_reass_trace,
2273   .type = VLIB_NODE_TYPE_INTERNAL,
2274
2275   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2276   .error_strings = snat_in2out_error_strings,
2277
2278   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2279   .next_nodes = {
2280     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2281     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2282     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2283     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2284     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2285   },
2286 };
2287
2288 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2289                               nat44_in2out_reass_node_fn);
2290
2291 /*******************************/
2292 /*** endpoint-dependent mode ***/
2293 /*******************************/
2294
2295 static_always_inline int
2296 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
2297 {
2298   icmp46_header_t *icmp0;
2299   nat_ed_ses_key_t key0;
2300   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2301   ip4_header_t *inner_ip0 = 0;
2302   void *l4_header = 0;
2303   icmp46_header_t *inner_icmp0;
2304
2305   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2306   echo0 = (icmp_echo_header_t *)(icmp0+1);
2307
2308   if (!icmp_is_error_message (icmp0))
2309     {
2310       key0.proto = IP_PROTOCOL_ICMP;
2311       key0.l_addr = ip0->src_address;
2312       key0.r_addr = ip0->dst_address;
2313       key0.l_port = echo0->identifier;
2314       key0.r_port = 0;
2315     }
2316   else
2317     {
2318       inner_ip0 = (ip4_header_t *)(echo0+1);
2319       l4_header = ip4_next_header (inner_ip0);
2320       key0.proto = inner_ip0->protocol;
2321       key0.r_addr = inner_ip0->src_address;
2322       key0.l_addr = inner_ip0->dst_address;
2323       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
2324         {
2325         case SNAT_PROTOCOL_ICMP:
2326           inner_icmp0 = (icmp46_header_t*)l4_header;
2327           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2328           key0.r_port = 0;
2329           key0.l_port = inner_echo0->identifier;
2330           break;
2331         case SNAT_PROTOCOL_UDP:
2332         case SNAT_PROTOCOL_TCP:
2333           key0.l_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2334           key0.r_port = ((tcp_udp_header_t*)l4_header)->src_port;
2335           break;
2336         default:
2337           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
2338         }
2339     }
2340   *p_key0 = key0;
2341   return 0;
2342 }
2343
2344 static u32
2345 slow_path_ed (snat_main_t *sm,
2346               vlib_buffer_t *b,
2347               u32 rx_fib_index,
2348               clib_bihash_kv_16_8_t *kv,
2349               snat_session_t ** sessionp,
2350               vlib_node_runtime_t * node,
2351               u32 next,
2352               u32 thread_index)
2353 {
2354   snat_session_t *s;
2355   snat_user_t *u;
2356   snat_session_key_t key0, key1;
2357   u8 lb = 0, is_sm = 0;
2358   u32 address_index = ~0;
2359   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2360   nat_ed_ses_key_t *key = (nat_ed_ses_key_t *) kv->key;
2361   u32 proto = ip_proto_to_snat_proto (key->proto);
2362
2363   if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
2364     {
2365       b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
2366       nat_ipfix_logging_max_sessions(sm->max_translations);
2367       nat_log_notice ("maximum sessions exceeded");
2368       return SNAT_IN2OUT_NEXT_DROP;
2369     }
2370
2371   key0.addr = key->l_addr;
2372   key0.port = key->l_port;
2373   key1.protocol = key0.protocol = proto;
2374   key0.fib_index = rx_fib_index;
2375   key1.fib_index = sm->outside_fib_index;
2376   /* First try to match static mapping by local address and port */
2377   if (snat_static_mapping_match (sm, key0, &key1, 0, 0, 0, &lb))
2378     {
2379       /* Try to create dynamic translation */
2380       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
2381                                                thread_index, &key1,
2382                                                &address_index,
2383                                                sm->port_per_thread,
2384                                                tsm->snat_thread_index))
2385         {
2386           nat_log_notice ("addresses exhausted");
2387           b->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2388           return SNAT_IN2OUT_NEXT_DROP;
2389         }
2390     }
2391   else
2392     is_sm = 1;
2393
2394   u = nat_user_get_or_create (sm, &key->l_addr, rx_fib_index, thread_index);
2395   if (!u)
2396     {
2397       nat_log_warn ("create NAT user failed");
2398       return SNAT_IN2OUT_NEXT_DROP;
2399     }
2400
2401   s = nat_session_alloc_or_recycle (sm, u, thread_index);
2402   if (!s)
2403     {
2404       nat_log_warn ("create NAT session failed");
2405       return SNAT_IN2OUT_NEXT_DROP;
2406     }
2407
2408   user_session_increment (sm, u, is_sm);
2409   if (is_sm)
2410     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2411   if (lb)
2412     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
2413   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2414   s->outside_address_index = address_index;
2415   s->ext_host_addr = key->r_addr;
2416   s->ext_host_port = key->r_port;
2417   s->in2out = key0;
2418   s->out2in = key1;
2419   s->out2in.protocol = key0.protocol;
2420
2421   /* Add to lookup tables */
2422   kv->value = s - tsm->sessions;
2423   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, kv, 1))
2424     nat_log_notice ("in2out-ed key add failed");
2425
2426   make_ed_kv (kv, &key1.addr, &key->r_addr, key->proto, key1.fib_index,
2427               key1.port, key->r_port);
2428   kv->value = s - tsm->sessions;
2429   if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, kv, 1))
2430     nat_log_notice ("out2in-ed key add failed");
2431
2432   *sessionp = s;
2433
2434   /* log NAT event */
2435   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
2436                                       s->out2in.addr.as_u32,
2437                                       s->in2out.protocol,
2438                                       s->in2out.port,
2439                                       s->out2in.port,
2440                                       s->in2out.fib_index);
2441   return next;
2442 }
2443
2444 static_always_inline int
2445 nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
2446                         u32 sw_if_index, ip4_header_t * ip, u32 proto,
2447                         u32 rx_fib_index, u32 thread_index)
2448 {
2449   udp_header_t *udp = ip4_next_header (ip);
2450   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2451   clib_bihash_kv_16_8_t kv, value;
2452   snat_session_key_t key0, key1;
2453
2454   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, ip->protocol,
2455               sm->outside_fib_index, udp->dst_port, udp->src_port);
2456
2457   /* NAT packet aimed at external address if */
2458   /* has active sessions */
2459   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2460     {
2461       key0.addr = ip->dst_address;
2462       key0.port = udp->dst_port;
2463       key0.protocol = proto;
2464       key0.fib_index = sm->outside_fib_index;
2465       /* or is static mappings */
2466       if (!snat_static_mapping_match(sm, key0, &key1, 1, 0, 0, 0))
2467         return 0;
2468     }
2469   else
2470     return 0;
2471
2472   if (sm->forwarding_enabled)
2473     return 1;
2474
2475   return snat_not_translate_fast(sm, node, sw_if_index, ip, proto, rx_fib_index);
2476 }
2477
2478 static_always_inline int
2479 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
2480                                       u32 thread_index, f64 now,
2481                                       vlib_main_t * vm, vlib_buffer_t * b)
2482 {
2483   nat_ed_ses_key_t key;
2484   clib_bihash_kv_16_8_t kv, value;
2485   udp_header_t *udp;
2486   snat_session_t *s = 0;
2487   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2488
2489   if (!sm->forwarding_enabled)
2490     return 0;
2491
2492   if (ip->protocol == IP_PROTOCOL_ICMP)
2493     {
2494       key.as_u64[0] = key.as_u64[1] = 0;
2495       if (icmp_get_ed_key (ip, &key))
2496         return 0;
2497       key.fib_index = 0;
2498       kv.key[0] = key.as_u64[0];
2499       kv.key[1] = key.as_u64[1];
2500     }
2501   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
2502     {
2503       udp = ip4_next_header(ip);
2504       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0,
2505                   udp->src_port, udp->dst_port);
2506     }
2507   else
2508     {
2509       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0, 0,
2510                   0);
2511     }
2512
2513   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2514     {
2515       s = pool_elt_at_index (tsm->sessions, value.value);
2516       if (is_fwd_bypass_session (s))
2517         {
2518           if (ip->protocol == IP_PROTOCOL_TCP)
2519             {
2520               tcp_header_t *tcp = ip4_next_header(ip);
2521               if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index))
2522                 return 1;
2523             }
2524           /* Per-user LRU list maintenance */
2525           nat44_session_update_lru (sm, s, thread_index);
2526           /* Accounting */
2527           nat44_session_update_counters (s, now,
2528                                          vlib_buffer_length_in_chain (vm, b));
2529           return 1;
2530         }
2531       else
2532         return 0;
2533     }
2534
2535   return 0;
2536 }
2537
2538 static_always_inline int
2539 nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
2540                                        u8 proto, u16 src_port, u16 dst_port,
2541                                        u32 thread_index, u32 sw_if_index)
2542 {
2543   clib_bihash_kv_16_8_t kv, value;
2544   snat_main_per_thread_data_t *tsm = tsm = &sm->per_thread_data[thread_index];
2545   snat_interface_t *i;
2546
2547   /* src NAT check */
2548   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto,
2549               sm->outside_fib_index, src_port, dst_port);
2550   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2551     return 1;
2552
2553   /* dst NAT check */
2554   make_ed_kv (&kv, &ip->dst_address, &ip->src_address, proto,
2555               sm->inside_fib_index, dst_port, src_port);
2556   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2557   {
2558     /* hairpinning */
2559     pool_foreach (i, sm->output_feature_interfaces,
2560     ({
2561       if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index))
2562         return 0;
2563     }));
2564     return 1;
2565   }
2566
2567   return 0;
2568 }
2569
2570 u32
2571 icmp_match_in2out_ed(snat_main_t *sm, vlib_node_runtime_t *node,
2572                      u32 thread_index, vlib_buffer_t *b, ip4_header_t *ip,
2573                      u8 *p_proto, snat_session_key_t *p_value,
2574                      u8 *p_dont_translate, void *d, void *e)
2575 {
2576   icmp46_header_t *icmp;
2577   u32 sw_if_index;
2578   u32 rx_fib_index;
2579   nat_ed_ses_key_t key;
2580   snat_session_t *s = 0;
2581   u8 dont_translate = 0;
2582   clib_bihash_kv_16_8_t kv, value;
2583   u32 next = ~0;
2584   int err;
2585   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2586
2587   icmp = (icmp46_header_t *) ip4_next_header (ip);
2588   sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
2589   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
2590
2591   key.as_u64[0] = key.as_u64[1] = 0;
2592   err = icmp_get_ed_key (ip, &key);
2593   if (err != 0)
2594     {
2595       b->error = node->errors[err];
2596       next = SNAT_IN2OUT_NEXT_DROP;
2597       goto out;
2598     }
2599   key.fib_index = rx_fib_index;
2600
2601   kv.key[0] = key.as_u64[0];
2602   kv.key[1] = key.as_u64[1];
2603
2604   if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
2605     {
2606       if (vnet_buffer(b)->sw_if_index[VLIB_TX] != ~0)
2607         {
2608           if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(sm, ip,
2609               key.proto, key.l_port, key.r_port, thread_index, sw_if_index)))
2610             {
2611               dont_translate = 1;
2612               goto out;
2613             }
2614         }
2615       else
2616         {
2617           if (PREDICT_FALSE(nat44_ed_not_translate(sm, node, sw_if_index,
2618               ip, SNAT_PROTOCOL_ICMP, rx_fib_index, thread_index)))
2619             {
2620               dont_translate = 1;
2621               goto out;
2622             }
2623         }
2624
2625       if (PREDICT_FALSE(icmp_is_error_message (icmp)))
2626         {
2627           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2628           next = SNAT_IN2OUT_NEXT_DROP;
2629           goto out;
2630         }
2631
2632       next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next,
2633                            thread_index);
2634
2635       if (PREDICT_FALSE (next == SNAT_IN2OUT_NEXT_DROP))
2636         goto out;
2637     }
2638   else
2639     {
2640       if (PREDICT_FALSE(icmp->type != ICMP4_echo_request &&
2641                         icmp->type != ICMP4_echo_reply &&
2642                         !icmp_is_error_message (icmp)))
2643         {
2644           b->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2645           next = SNAT_IN2OUT_NEXT_DROP;
2646           goto out;
2647         }
2648
2649       s = pool_elt_at_index (tsm->sessions, value.value);
2650     }
2651
2652   *p_proto = ip_proto_to_snat_proto (key.proto);
2653 out:
2654   if (s)
2655     *p_value = s->out2in;
2656   *p_dont_translate = dont_translate;
2657   if (d)
2658     *(snat_session_t**)d = s;
2659   return next;
2660 }
2661
2662 static inline void
2663 nat44_ed_hairpinning_unknown_proto (snat_main_t *sm,
2664                                     vlib_buffer_t * b,
2665                                     ip4_header_t * ip)
2666 {
2667   u32 old_addr, new_addr = 0, ti = 0;
2668   clib_bihash_kv_8_8_t kv, value;
2669   clib_bihash_kv_16_8_t s_kv, s_value;
2670   snat_static_mapping_t *m;
2671   ip_csum_t sum;
2672   snat_session_t *s;
2673   snat_main_per_thread_data_t *tsm;
2674
2675   if (sm->num_workers > 1)
2676     ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
2677   else
2678     ti = sm->num_workers;
2679   tsm = &sm->per_thread_data[ti];
2680
2681   old_addr = ip->dst_address.as_u32;
2682   make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2683               sm->outside_fib_index, 0, 0);
2684   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2685     {
2686       make_sm_kv (&kv, &ip->dst_address, 0, sm->outside_fib_index, 0);
2687       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2688         return;
2689
2690       m = pool_elt_at_index (sm->static_mappings, value.value);
2691       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2692         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
2693       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
2694     }
2695   else
2696     {
2697       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
2698       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2699         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
2700       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
2701     }
2702   sum = ip->checksum;
2703   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
2704   ip->checksum = ip_csum_fold (sum);
2705 }
2706
2707 static snat_session_t *
2708 nat44_ed_in2out_unknown_proto (snat_main_t *sm,
2709                                vlib_buffer_t * b,
2710                                ip4_header_t * ip,
2711                                u32 rx_fib_index,
2712                                u32 thread_index,
2713                                f64 now,
2714                                vlib_main_t * vm,
2715                                vlib_node_runtime_t * node)
2716 {
2717   clib_bihash_kv_8_8_t kv, value;
2718   clib_bihash_kv_16_8_t s_kv, s_value;
2719   snat_static_mapping_t *m;
2720   u32 old_addr, new_addr = 0;
2721   ip_csum_t sum;
2722   snat_user_t *u;
2723   dlist_elt_t *head, *elt;
2724   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2725   u32 elt_index, head_index, ses_index;
2726   snat_session_t * s;
2727   u32 address_index = ~0;
2728   int i;
2729   u8 is_sm = 0;
2730
2731   old_addr = ip->src_address.as_u32;
2732
2733   make_ed_kv (&s_kv, &ip->src_address, &ip->dst_address, ip->protocol,
2734               rx_fib_index, 0, 0);
2735
2736   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value))
2737     {
2738       s = pool_elt_at_index (tsm->sessions, s_value.value);
2739       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
2740     }
2741   else
2742     {
2743       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
2744         {
2745           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
2746           nat_ipfix_logging_max_sessions(sm->max_translations);
2747           nat_log_notice ("maximum sessions exceeded");
2748           return 0;
2749         }
2750
2751       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
2752                                   thread_index);
2753       if (!u)
2754         {
2755           nat_log_warn ("create NAT user failed");
2756           return 0;
2757         }
2758
2759       make_sm_kv (&kv, &ip->src_address, 0, rx_fib_index, 0);
2760
2761       /* Try to find static mapping first */
2762       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
2763         {
2764           m = pool_elt_at_index (sm->static_mappings, value.value);
2765           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
2766           is_sm = 1;
2767           goto create_ses;
2768         }
2769       /* Fallback to 3-tuple key */
2770       else
2771         {
2772           /* Choose same out address as for TCP/UDP session to same destination */
2773           head_index = u->sessions_per_user_list_head_index;
2774           head = pool_elt_at_index (tsm->list_pool, head_index);
2775           elt_index = head->next;
2776           if (PREDICT_FALSE (elt_index == ~0))
2777             ses_index = ~0;
2778           else
2779             {
2780               elt = pool_elt_at_index (tsm->list_pool, elt_index);
2781               ses_index = elt->value;
2782             }
2783
2784           while (ses_index != ~0)
2785             {
2786               s =  pool_elt_at_index (tsm->sessions, ses_index);
2787               elt_index = elt->next;
2788               elt = pool_elt_at_index (tsm->list_pool, elt_index);
2789               ses_index = elt->value;
2790
2791               if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
2792                 {
2793                   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
2794                   address_index = s->outside_address_index;
2795
2796                   make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address,
2797                               ip->protocol, sm->outside_fib_index, 0, 0);
2798                   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2799                     goto create_ses;
2800
2801                   break;
2802                 }
2803             }
2804
2805           for (i = 0; i < vec_len (sm->addresses); i++)
2806             {
2807               make_ed_kv (&s_kv, &sm->addresses[i].addr, &ip->dst_address,
2808                           ip->protocol, sm->outside_fib_index, 0, 0);
2809               if (clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2810                 {
2811                   new_addr = ip->src_address.as_u32 =
2812                     sm->addresses[i].addr.as_u32;
2813                   address_index = i;
2814                   goto create_ses;
2815                 }
2816             }
2817           return 0;
2818         }
2819
2820 create_ses:
2821       s = nat_session_alloc_or_recycle (sm, u, thread_index);
2822       if (!s)
2823         {
2824           nat_log_warn ("create NAT session failed");
2825           return 0;
2826         }
2827
2828       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
2829       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
2830       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2831       s->outside_address_index = address_index;
2832       s->out2in.addr.as_u32 = new_addr;
2833       s->out2in.fib_index = sm->outside_fib_index;
2834       s->in2out.addr.as_u32 = old_addr;
2835       s->in2out.fib_index = rx_fib_index;
2836       s->in2out.port = s->out2in.port = ip->protocol;
2837       if (is_sm)
2838         s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2839       user_session_increment (sm, u, is_sm);
2840
2841       /* Add to lookup tables */
2842       make_ed_kv (&s_kv, &s->in2out.addr, &ip->dst_address, ip->protocol,
2843                   rx_fib_index, 0, 0);
2844       s_kv.value = s - tsm->sessions;
2845       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
2846         nat_log_notice ("in2out key add failed");
2847
2848       make_ed_kv (&s_kv, &s->out2in.addr, &ip->dst_address, ip->protocol,
2849                   sm->outside_fib_index, 0, 0);
2850       s_kv.value = s - tsm->sessions;
2851       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
2852         nat_log_notice ("out2in key add failed");
2853   }
2854
2855   /* Update IP checksum */
2856   sum = ip->checksum;
2857   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
2858   ip->checksum = ip_csum_fold (sum);
2859
2860   /* Accounting */
2861   nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b));
2862   /* Per-user LRU list maintenance */
2863   nat44_session_update_lru (sm, s, thread_index);
2864
2865   /* Hairpinning */
2866   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2867     nat44_ed_hairpinning_unknown_proto(sm, b, ip);
2868
2869   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
2870     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2871
2872   return s;
2873 }
2874
2875 static inline uword
2876 nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
2877                                 vlib_node_runtime_t * node,
2878                                 vlib_frame_t * frame, int is_slow_path,
2879                                 int is_output_feature)
2880 {
2881   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
2882   snat_in2out_next_t next_index;
2883   snat_main_t *sm = &snat_main;
2884   f64 now = vlib_time_now (vm);
2885   u32 thread_index = vlib_get_thread_index ();
2886   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2887
2888   stats_node_index = is_slow_path ? nat44_ed_in2out_slowpath_node.index :
2889     nat44_ed_in2out_node.index;
2890
2891   from = vlib_frame_vector_args (frame);
2892   n_left_from = frame->n_vectors;
2893   next_index = node->cached_next_index;
2894
2895   while (n_left_from > 0)
2896     {
2897       u32 n_left_to_next;
2898
2899       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2900
2901       while (n_left_from >= 4 && n_left_to_next >= 2)
2902         {
2903           u32 bi0, bi1;
2904           vlib_buffer_t *b0, *b1;
2905           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
2906               new_addr0, old_addr0;
2907           u32 next1, sw_if_index1, rx_fib_index1, iph_offset1 = 0, proto1,
2908               new_addr1, old_addr1;
2909           u16 old_port0, new_port0, old_port1, new_port1;
2910           ip4_header_t *ip0, *ip1;
2911           udp_header_t *udp0, *udp1;
2912           tcp_header_t *tcp0, *tcp1;
2913           icmp46_header_t *icmp0, *icmp1;
2914           snat_session_t *s0 = 0, *s1 = 0;
2915           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
2916           ip_csum_t sum0, sum1;
2917
2918           /* Prefetch next iteration. */
2919           {
2920             vlib_buffer_t * p2, * p3;
2921
2922             p2 = vlib_get_buffer (vm, from[2]);
2923             p3 = vlib_get_buffer (vm, from[3]);
2924
2925             vlib_prefetch_buffer_header (p2, LOAD);
2926             vlib_prefetch_buffer_header (p3, LOAD);
2927
2928             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2929             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2930           }
2931
2932           /* speculatively enqueue b0 and b1 to the current next frame */
2933           to_next[0] = bi0 = from[0];
2934           to_next[1] = bi1 = from[1];
2935           from += 2;
2936           to_next += 2;
2937           n_left_from -= 2;
2938           n_left_to_next -= 2;
2939
2940           b0 = vlib_get_buffer (vm, bi0);
2941           b1 = vlib_get_buffer (vm, bi1);
2942
2943           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2944
2945           if (is_output_feature)
2946             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
2947
2948           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
2949                  iph_offset0);
2950
2951           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2952           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2953                                                                sw_if_index0);
2954
2955           if (PREDICT_FALSE(ip0->ttl == 1))
2956             {
2957               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2958               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2959                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2960                                            0);
2961               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2962               goto trace00;
2963             }
2964
2965           udp0 = ip4_next_header (ip0);
2966           tcp0 = (tcp_header_t *) udp0;
2967           icmp0 = (icmp46_header_t *) udp0;
2968           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2969
2970           if (is_slow_path)
2971             {
2972               if (PREDICT_FALSE (proto0 == ~0))
2973                 {
2974                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
2975                                                       rx_fib_index0,
2976                                                       thread_index, now, vm,
2977                                                       node);
2978                   if (!s0)
2979                     next0 = SNAT_IN2OUT_NEXT_DROP;
2980                   goto trace00;
2981                 }
2982
2983               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2984                 {
2985                   next0 = icmp_in2out_slow_path
2986                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2987                      next0, now, thread_index, &s0);
2988                   goto trace00;
2989                 }
2990             }
2991           else
2992             {
2993                if (is_output_feature)
2994                 {
2995                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
2996                       sm, ip0, thread_index, now, vm, b0)))
2997                     goto trace00;
2998                 }
2999
3000               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3001                 {
3002                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3003                   goto trace00;
3004                 }
3005
3006               if (ip4_is_fragment (ip0))
3007                 {
3008                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3009                   next0 = SNAT_IN2OUT_NEXT_DROP;
3010                   goto trace00;
3011                 }
3012             }
3013
3014           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3015                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3016
3017           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3018             {
3019               if (is_slow_path)
3020                 {
3021                   if (is_output_feature)
3022                     {
3023                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3024                           sm, ip0, ip0->protocol, udp0->src_port,
3025                           udp0->dst_port, thread_index, sw_if_index0)))
3026                         goto trace00;
3027                     }
3028                   else
3029                     {
3030                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3031                           sw_if_index0, ip0, proto0, rx_fib_index0,
3032                           thread_index)))
3033                         goto trace00;
3034                     }
3035
3036                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3037                                         next0, thread_index);
3038
3039                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3040                     goto trace00;
3041                 }
3042               else
3043                 {
3044                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3045                   goto trace00;
3046                 }
3047             }
3048           else
3049             {
3050               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3051             }
3052
3053           b0->flags |= VNET_BUFFER_F_IS_NATED;
3054
3055           if (!is_output_feature)
3056             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3057
3058           old_addr0 = ip0->src_address.as_u32;
3059           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3060           sum0 = ip0->checksum;
3061           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3062                                  src_address);
3063           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3064             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3065                                    s0->ext_host_addr.as_u32, ip4_header_t,
3066                                    dst_address);
3067           ip0->checksum = ip_csum_fold (sum0);
3068
3069           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3070             {
3071               old_port0 = tcp0->src_port;
3072               new_port0 = tcp0->src_port = s0->out2in.port;
3073
3074               sum0 = tcp0->checksum;
3075               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3076                                      dst_address);
3077               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3078                                      length);
3079               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3080                 {
3081                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3082                                          s0->ext_host_addr.as_u32,
3083                                          ip4_header_t, dst_address);
3084                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3085                                          s0->ext_host_port, ip4_header_t,
3086                                          length);
3087                   tcp0->dst_port = s0->ext_host_port;
3088                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3089                 }
3090               tcp0->checksum = ip_csum_fold(sum0);
3091               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3092                 goto trace00;
3093             }
3094           else
3095             {
3096               udp0->src_port = s0->out2in.port;
3097               udp0->checksum = 0;
3098               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3099                 {
3100                   udp0->dst_port = s0->ext_host_port;
3101                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3102                 }
3103             }
3104
3105           /* Accounting */
3106           nat44_session_update_counters (s0, now,
3107                                          vlib_buffer_length_in_chain (vm, b0));
3108           /* Per-user LRU list maintenance */
3109           nat44_session_update_lru (sm, s0, thread_index);
3110
3111         trace00:
3112           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3113                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3114             {
3115               snat_in2out_trace_t *t =
3116                 vlib_add_trace (vm, node, b0, sizeof (*t));
3117               t->is_slow_path = is_slow_path;
3118               t->sw_if_index = sw_if_index0;
3119               t->next_index = next0;
3120               t->session_index = ~0;
3121               if (s0)
3122                 t->session_index = s0 - tsm->sessions;
3123             }
3124
3125           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3126
3127
3128           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
3129
3130           if (is_output_feature)
3131             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
3132
3133           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
3134                  iph_offset1);
3135
3136           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3137           rx_fib_index1 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3138                                                                sw_if_index1);
3139
3140           if (PREDICT_FALSE(ip1->ttl == 1))
3141             {
3142               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3143               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3144                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3145                                            0);
3146               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3147               goto trace01;
3148             }
3149
3150           udp1 = ip4_next_header (ip1);
3151           tcp1 = (tcp_header_t *) udp1;
3152           icmp1 = (icmp46_header_t *) udp1;
3153           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3154
3155           if (is_slow_path)
3156             {
3157               if (PREDICT_FALSE (proto1 == ~0))
3158                 {
3159                   s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1,
3160                                                       rx_fib_index1,
3161                                                       thread_index, now, vm,
3162                                                       node);
3163                   if (!s1)
3164                     next1 = SNAT_IN2OUT_NEXT_DROP;
3165                   goto trace01;
3166                 }
3167
3168               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
3169                 {
3170                   next1 = icmp_in2out_slow_path
3171                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
3172                      next1, now, thread_index, &s1);
3173                   goto trace01;
3174                 }
3175             }
3176           else
3177             {
3178                if (is_output_feature)
3179                 {
3180                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3181                       sm, ip1, thread_index, now, vm, b1)))
3182                     goto trace01;
3183                 }
3184
3185               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
3186                 {
3187                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3188                   goto trace01;
3189                 }
3190
3191               if (ip4_is_fragment (ip1))
3192                 {
3193                   b1->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3194                   next1 = SNAT_IN2OUT_NEXT_DROP;
3195                   goto trace01;
3196                 }
3197             }
3198
3199           make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address, ip1->protocol,
3200                       rx_fib_index1, udp1->src_port, udp1->dst_port);
3201
3202           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1))
3203             {
3204               if (is_slow_path)
3205                 {
3206                   if (is_output_feature)
3207                     {
3208                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3209                           sm, ip1, ip1->protocol, udp1->src_port,
3210                           udp1->dst_port, thread_index, sw_if_index1)))
3211                         goto trace01;
3212                     }
3213                   else
3214                     {
3215                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3216                           sw_if_index1, ip1, proto1, rx_fib_index1,
3217                           thread_index)))
3218                         goto trace01;
3219                     }
3220
3221                   next1 = slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node,
3222                                         next1, thread_index);
3223
3224                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
3225                     goto trace01;
3226                 }
3227               else
3228                 {
3229                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3230                   goto trace01;
3231                 }
3232             }
3233           else
3234             {
3235               s1 = pool_elt_at_index (tsm->sessions, value1.value);
3236             }
3237
3238           b1->flags |= VNET_BUFFER_F_IS_NATED;
3239
3240           if (!is_output_feature)
3241             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
3242
3243           old_addr1 = ip1->src_address.as_u32;
3244           new_addr1 = ip1->src_address.as_u32 = s1->out2in.addr.as_u32;
3245           sum1 = ip1->checksum;
3246           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3247                                  src_address);
3248           if (PREDICT_FALSE (is_twice_nat_session (s1)))
3249             sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3250                                    s1->ext_host_addr.as_u32, ip4_header_t,
3251                                    dst_address);
3252           ip1->checksum = ip_csum_fold (sum1);
3253
3254           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
3255             {
3256               old_port1 = tcp1->src_port;
3257               new_port1 = tcp1->src_port = s1->out2in.port;
3258
3259               sum1 = tcp1->checksum;
3260               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
3261                                      dst_address);
3262               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
3263                                      length);
3264               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3265                 {
3266                   sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
3267                                          s1->ext_host_addr.as_u32,
3268                                          ip4_header_t, dst_address);
3269                   sum1 = ip_csum_update (sum1, tcp1->dst_port,
3270                                          s1->ext_host_port, ip4_header_t,
3271                                          length);
3272                   tcp1->dst_port = s1->ext_host_port;
3273                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3274                 }
3275               tcp1->checksum = ip_csum_fold(sum1);
3276               if (nat44_set_tcp_session_state_i2o (sm, s1, tcp1, thread_index))
3277                 goto trace01;
3278             }
3279           else
3280             {
3281               udp1->src_port = s1->out2in.port;
3282               udp1->checksum = 0;
3283               if (PREDICT_FALSE (is_twice_nat_session (s1)))
3284                 {
3285                   udp1->dst_port = s1->ext_host_port;
3286                   ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
3287                 }
3288             }
3289
3290           /* Accounting */
3291           nat44_session_update_counters (s1, now,
3292                                          vlib_buffer_length_in_chain (vm, b1));
3293           /* Per-user LRU list maintenance */
3294           nat44_session_update_lru (sm, s1, thread_index);
3295
3296         trace01:
3297           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3298                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3299             {
3300               snat_in2out_trace_t *t =
3301                 vlib_add_trace (vm, node, b1, sizeof (*t));
3302               t->is_slow_path = is_slow_path;
3303               t->sw_if_index = sw_if_index1;
3304               t->next_index = next1;
3305               t->session_index = ~0;
3306               if (s1)
3307                 t->session_index = s1 - tsm->sessions;
3308             }
3309
3310           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
3311
3312           /* verify speculative enqueues, maybe switch current next frame */
3313           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3314                                            to_next, n_left_to_next,
3315                                            bi0, bi1, next0, next1);
3316         }
3317
3318       while (n_left_from > 0 && n_left_to_next > 0)
3319         {
3320           u32 bi0;
3321           vlib_buffer_t *b0;
3322           u32 next0, sw_if_index0, rx_fib_index0, iph_offset0 = 0, proto0,
3323               new_addr0, old_addr0;
3324           u16 old_port0, new_port0;
3325           ip4_header_t *ip0;
3326           udp_header_t *udp0;
3327           tcp_header_t *tcp0;
3328           icmp46_header_t * icmp0;
3329           snat_session_t *s0 = 0;
3330           clib_bihash_kv_16_8_t kv0, value0;
3331           ip_csum_t sum0;
3332
3333           /* speculatively enqueue b0 to the current next frame */
3334           bi0 = from[0];
3335           to_next[0] = bi0;
3336           from += 1;
3337           to_next += 1;
3338           n_left_from -= 1;
3339           n_left_to_next -= 1;
3340
3341           b0 = vlib_get_buffer (vm, bi0);
3342           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3343
3344           if (is_output_feature)
3345             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
3346
3347           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
3348                  iph_offset0);
3349
3350           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3351           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3352                                                                sw_if_index0);
3353
3354           if (PREDICT_FALSE(ip0->ttl == 1))
3355             {
3356               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3357               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3358                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3359                                            0);
3360               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3361               goto trace0;
3362             }
3363
3364           udp0 = ip4_next_header (ip0);
3365           tcp0 = (tcp_header_t *) udp0;
3366           icmp0 = (icmp46_header_t *) udp0;
3367           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3368
3369           if (is_slow_path)
3370             {
3371               if (PREDICT_FALSE (proto0 == ~0))
3372                 {
3373                   s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
3374                                                       rx_fib_index0,
3375                                                       thread_index, now, vm,
3376                                                       node);
3377                   if (!s0)
3378                     next0 = SNAT_IN2OUT_NEXT_DROP;
3379                   goto trace0;
3380                 }
3381
3382               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3383                 {
3384                   next0 = icmp_in2out_slow_path
3385                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
3386                      next0, now, thread_index, &s0);
3387                   goto trace0;
3388                 }
3389             }
3390           else
3391             {
3392                if (is_output_feature)
3393                 {
3394                   if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(
3395                       sm, ip0, thread_index, now, vm, b0)))
3396                     goto trace0;
3397                 }
3398
3399               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
3400                 {
3401                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3402                   goto trace0;
3403                 }
3404
3405               if (ip4_is_fragment (ip0))
3406                 {
3407                   b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
3408                   next0 = SNAT_IN2OUT_NEXT_DROP;
3409                   goto trace0;
3410                 }
3411             }
3412
3413           make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, ip0->protocol,
3414                       rx_fib_index0, udp0->src_port, udp0->dst_port);
3415
3416           if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
3417             {
3418               if (is_slow_path)
3419                 {
3420                   if (is_output_feature)
3421                     {
3422                       if (PREDICT_FALSE(nat44_ed_not_translate_output_feature(
3423                           sm, ip0, ip0->protocol, udp0->src_port,
3424                           udp0->dst_port, thread_index, sw_if_index0)))
3425                         goto trace0;
3426                     }
3427                   else
3428                     {
3429                       if (PREDICT_FALSE(nat44_ed_not_translate(sm, node,
3430                           sw_if_index0, ip0, proto0, rx_fib_index0,
3431                           thread_index)))
3432                         goto trace0;
3433                     }
3434
3435                   next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
3436                                         next0, thread_index);
3437
3438                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
3439                     goto trace0;
3440                 }
3441               else
3442                 {
3443                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
3444                   goto trace0;
3445                 }
3446             }
3447           else
3448             {
3449               s0 = pool_elt_at_index (tsm->sessions, value0.value);
3450             }
3451
3452           b0->flags |= VNET_BUFFER_F_IS_NATED;
3453
3454           if (!is_output_feature)
3455             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
3456
3457           old_addr0 = ip0->src_address.as_u32;
3458           new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32;
3459           sum0 = ip0->checksum;
3460           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3461                                  src_address);
3462           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3463             sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3464                                    s0->ext_host_addr.as_u32, ip4_header_t,
3465                                    dst_address);
3466           ip0->checksum = ip_csum_fold (sum0);
3467
3468           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
3469             {
3470               old_port0 = tcp0->src_port;
3471               new_port0 = tcp0->src_port = s0->out2in.port;
3472
3473               sum0 = tcp0->checksum;
3474               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
3475                                      dst_address);
3476               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
3477                                      length);
3478               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3479                 {
3480                   sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
3481                                          s0->ext_host_addr.as_u32,
3482                                          ip4_header_t, dst_address);
3483                   sum0 = ip_csum_update (sum0, tcp0->dst_port,
3484                                          s0->ext_host_port, ip4_header_t,
3485                                          length);
3486                   tcp0->dst_port = s0->ext_host_port;
3487                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3488                 }
3489               tcp0->checksum = ip_csum_fold(sum0);
3490               if (nat44_set_tcp_session_state_i2o (sm, s0, tcp0, thread_index))
3491                 goto trace0;
3492             }
3493           else
3494             {
3495               udp0->src_port = s0->out2in.port;
3496               udp0->checksum = 0;
3497               if (PREDICT_FALSE (is_twice_nat_session (s0)))
3498                 {
3499                   udp0->dst_port = s0->ext_host_port;
3500                   ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
3501                 }
3502             }
3503
3504           /* Accounting */
3505           nat44_session_update_counters (s0, now,
3506                                          vlib_buffer_length_in_chain (vm, b0));
3507           /* Per-user LRU list maintenance */
3508           nat44_session_update_lru (sm, s0, thread_index);
3509
3510         trace0:
3511           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3512                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3513             {
3514               snat_in2out_trace_t *t =
3515                 vlib_add_trace (vm, node, b0, sizeof (*t));
3516               t->is_slow_path = is_slow_path;
3517               t->sw_if_index = sw_if_index0;
3518               t->next_index = next0;
3519               t->session_index = ~0;
3520               if (s0)
3521                 t->session_index = s0 - tsm->sessions;
3522             }
3523
3524           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3525
3526           /* verify speculative enqueue, maybe switch current next frame */
3527           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3528                                            to_next, n_left_to_next,
3529                                            bi0, next0);
3530         }
3531
3532       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3533     }
3534
3535   vlib_node_increment_counter (vm, stats_node_index,
3536                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3537                                pkts_processed);
3538   return frame->n_vectors;
3539 }
3540
3541 static uword
3542 nat44_ed_in2out_fast_path_fn (vlib_main_t * vm,
3543                               vlib_node_runtime_t * node,
3544                               vlib_frame_t * frame)
3545 {
3546   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 0);
3547 }
3548
3549 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
3550   .function = nat44_ed_in2out_fast_path_fn,
3551   .name = "nat44-ed-in2out",
3552   .vector_size = sizeof (u32),
3553   .format_trace = format_snat_in2out_trace,
3554   .type = VLIB_NODE_TYPE_INTERNAL,
3555
3556   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3557   .error_strings = snat_in2out_error_strings,
3558
3559   .runtime_data_bytes = sizeof (snat_runtime_t),
3560
3561   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3562
3563   /* edit / add dispositions here */
3564   .next_nodes = {
3565     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3566     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3567     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3568     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3569     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3570   },
3571 };
3572
3573 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_node, nat44_ed_in2out_fast_path_fn);
3574
3575 static uword
3576 nat44_ed_in2out_output_fast_path_fn (vlib_main_t * vm,
3577                                      vlib_node_runtime_t * node,
3578                                      vlib_frame_t * frame)
3579 {
3580   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 0, 1);
3581 }
3582
3583 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
3584   .function = nat44_ed_in2out_output_fast_path_fn,
3585   .name = "nat44-ed-in2out-output",
3586   .vector_size = sizeof (u32),
3587   .format_trace = format_snat_in2out_trace,
3588   .type = VLIB_NODE_TYPE_INTERNAL,
3589
3590   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3591   .error_strings = snat_in2out_error_strings,
3592
3593   .runtime_data_bytes = sizeof (snat_runtime_t),
3594
3595   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3596
3597   /* edit / add dispositions here */
3598   .next_nodes = {
3599     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3600     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3601     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3602     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3603     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3604   },
3605 };
3606
3607 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_node,
3608                               nat44_ed_in2out_output_fast_path_fn);
3609
3610 static uword
3611 nat44_ed_in2out_slow_path_fn (vlib_main_t * vm,
3612                               vlib_node_runtime_t * node,
3613                               vlib_frame_t * frame)
3614 {
3615   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0);
3616 }
3617
3618 VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
3619   .function = nat44_ed_in2out_slow_path_fn,
3620   .name = "nat44-ed-in2out-slowpath",
3621   .vector_size = sizeof (u32),
3622   .format_trace = format_snat_in2out_trace,
3623   .type = VLIB_NODE_TYPE_INTERNAL,
3624
3625   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3626   .error_strings = snat_in2out_error_strings,
3627
3628   .runtime_data_bytes = sizeof (snat_runtime_t),
3629
3630   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3631
3632   /* edit / add dispositions here */
3633   .next_nodes = {
3634     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3635     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3636     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-slowpath",
3637     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3638     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3639   },
3640 };
3641
3642 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_slowpath_node,
3643                               nat44_ed_in2out_slow_path_fn);
3644
3645 static uword
3646 nat44_ed_in2out_output_slow_path_fn (vlib_main_t * vm,
3647                                      vlib_node_runtime_t * node,
3648                                      vlib_frame_t * frame)
3649 {
3650   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1);
3651 }
3652
3653 VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
3654   .function = nat44_ed_in2out_output_slow_path_fn,
3655   .name = "nat44-ed-in2out-output-slowpath",
3656   .vector_size = sizeof (u32),
3657   .format_trace = format_snat_in2out_trace,
3658   .type = VLIB_NODE_TYPE_INTERNAL,
3659
3660   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3661   .error_strings = snat_in2out_error_strings,
3662
3663   .runtime_data_bytes = sizeof (snat_runtime_t),
3664
3665   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3666
3667   /* edit / add dispositions here */
3668   .next_nodes = {
3669     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3670     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
3671     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
3672     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3673     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3674   },
3675 };
3676
3677 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_in2out_output_slowpath_node,
3678                               nat44_ed_in2out_output_slow_path_fn);
3679
3680 /**************************/
3681 /*** deterministic mode ***/
3682 /**************************/
3683 static uword
3684 snat_det_in2out_node_fn (vlib_main_t * vm,
3685                          vlib_node_runtime_t * node,
3686                          vlib_frame_t * frame)
3687 {
3688   u32 n_left_from, * from, * to_next;
3689   snat_in2out_next_t next_index;
3690   u32 pkts_processed = 0;
3691   snat_main_t * sm = &snat_main;
3692   u32 now = (u32) vlib_time_now (vm);
3693   u32 thread_index = vlib_get_thread_index ();
3694
3695   from = vlib_frame_vector_args (frame);
3696   n_left_from = frame->n_vectors;
3697   next_index = node->cached_next_index;
3698
3699   while (n_left_from > 0)
3700     {
3701       u32 n_left_to_next;
3702
3703       vlib_get_next_frame (vm, node, next_index,
3704                            to_next, n_left_to_next);
3705
3706       while (n_left_from >= 4 && n_left_to_next >= 2)
3707         {
3708           u32 bi0, bi1;
3709           vlib_buffer_t * b0, * b1;
3710           u32 next0, next1;
3711           u32 sw_if_index0, sw_if_index1;
3712           ip4_header_t * ip0, * ip1;
3713           ip_csum_t sum0, sum1;
3714           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
3715           u16 old_port0, new_port0, lo_port0, i0;
3716           u16 old_port1, new_port1, lo_port1, i1;
3717           udp_header_t * udp0, * udp1;
3718           tcp_header_t * tcp0, * tcp1;
3719           u32 proto0, proto1;
3720           snat_det_out_key_t key0, key1;
3721           snat_det_map_t * dm0, * dm1;
3722           snat_det_session_t * ses0 = 0, * ses1 = 0;
3723           u32 rx_fib_index0, rx_fib_index1;
3724           icmp46_header_t * icmp0, * icmp1;
3725
3726           /* Prefetch next iteration. */
3727           {
3728             vlib_buffer_t * p2, * p3;
3729
3730             p2 = vlib_get_buffer (vm, from[2]);
3731             p3 = vlib_get_buffer (vm, from[3]);
3732
3733             vlib_prefetch_buffer_header (p2, LOAD);
3734             vlib_prefetch_buffer_header (p3, LOAD);
3735
3736             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
3737             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
3738           }
3739
3740           /* speculatively enqueue b0 and b1 to the current next frame */
3741           to_next[0] = bi0 = from[0];
3742           to_next[1] = bi1 = from[1];
3743           from += 2;
3744           to_next += 2;
3745           n_left_from -= 2;
3746           n_left_to_next -= 2;
3747
3748           b0 = vlib_get_buffer (vm, bi0);
3749           b1 = vlib_get_buffer (vm, bi1);
3750
3751           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3752           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
3753
3754           ip0 = vlib_buffer_get_current (b0);
3755           udp0 = ip4_next_header (ip0);
3756           tcp0 = (tcp_header_t *) udp0;
3757
3758           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3759
3760           if (PREDICT_FALSE(ip0->ttl == 1))
3761             {
3762               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3763               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3764                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3765                                            0);
3766               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3767               goto trace0;
3768             }
3769
3770           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3771
3772           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3773             {
3774               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3775               icmp0 = (icmp46_header_t *) udp0;
3776
3777               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3778                                   rx_fib_index0, node, next0, thread_index,
3779                                   &ses0, &dm0);
3780               goto trace0;
3781             }
3782
3783           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
3784           if (PREDICT_FALSE(!dm0))
3785             {
3786               nat_log_info ("no match for internal host %U",
3787                             format_ip4_address, &ip0->src_address);
3788               next0 = SNAT_IN2OUT_NEXT_DROP;
3789               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3790               goto trace0;
3791             }
3792
3793           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
3794
3795           key0.ext_host_addr = ip0->dst_address;
3796           key0.ext_host_port = tcp0->dst;
3797
3798           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
3799           if (PREDICT_FALSE(!ses0))
3800             {
3801               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3802                 {
3803                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
3804                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3805
3806                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3807                     continue;
3808
3809                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3810                   break;
3811                 }
3812               if (PREDICT_FALSE(!ses0))
3813                 {
3814                   /* too many sessions for user, send ICMP error packet */
3815
3816                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3817                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3818                                                ICMP4_destination_unreachable_destination_unreachable_host,
3819                                                0);
3820                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3821                   goto trace0;
3822                 }
3823             }
3824
3825           new_port0 = ses0->out.out_port;
3826
3827           old_addr0.as_u32 = ip0->src_address.as_u32;
3828           ip0->src_address.as_u32 = new_addr0.as_u32;
3829           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3830
3831           sum0 = ip0->checksum;
3832           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3833                                  ip4_header_t,
3834                                  src_address /* changed member */);
3835           ip0->checksum = ip_csum_fold (sum0);
3836
3837           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3838             {
3839               if (tcp0->flags & TCP_FLAG_SYN)
3840                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3841               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3842                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3843               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3844                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3845               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3846                 snat_det_ses_close(dm0, ses0);
3847               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3848                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3849               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3850                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3851
3852               old_port0 = tcp0->src;
3853               tcp0->src = new_port0;
3854
3855               sum0 = tcp0->checksum;
3856               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3857                                      ip4_header_t,
3858                                      dst_address /* changed member */);
3859               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3860                                      ip4_header_t /* cheat */,
3861                                      length /* changed member */);
3862               tcp0->checksum = ip_csum_fold(sum0);
3863             }
3864           else
3865             {
3866               ses0->state = SNAT_SESSION_UDP_ACTIVE;
3867               old_port0 = udp0->src_port;
3868               udp0->src_port = new_port0;
3869               udp0->checksum = 0;
3870             }
3871
3872           switch(ses0->state)
3873             {
3874             case SNAT_SESSION_UDP_ACTIVE:
3875                 ses0->expire = now + sm->udp_timeout;
3876                 break;
3877             case SNAT_SESSION_TCP_SYN_SENT:
3878             case SNAT_SESSION_TCP_FIN_WAIT:
3879             case SNAT_SESSION_TCP_CLOSE_WAIT:
3880             case SNAT_SESSION_TCP_LAST_ACK:
3881                 ses0->expire = now + sm->tcp_transitory_timeout;
3882                 break;
3883             case SNAT_SESSION_TCP_ESTABLISHED:
3884                 ses0->expire = now + sm->tcp_established_timeout;
3885                 break;
3886             }
3887
3888         trace0:
3889           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3890                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3891             {
3892               snat_in2out_trace_t *t =
3893                  vlib_add_trace (vm, node, b0, sizeof (*t));
3894               t->is_slow_path = 0;
3895               t->sw_if_index = sw_if_index0;
3896               t->next_index = next0;
3897               t->session_index = ~0;
3898               if (ses0)
3899                 t->session_index = ses0 - dm0->sessions;
3900             }
3901
3902           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3903
3904           ip1 = vlib_buffer_get_current (b1);
3905           udp1 = ip4_next_header (ip1);
3906           tcp1 = (tcp_header_t *) udp1;
3907
3908           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3909
3910           if (PREDICT_FALSE(ip1->ttl == 1))
3911             {
3912               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3913               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3914                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3915                                            0);
3916               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3917               goto trace1;
3918             }
3919
3920           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3921
3922           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
3923             {
3924               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
3925               icmp1 = (icmp46_header_t *) udp1;
3926
3927               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
3928                                   rx_fib_index1, node, next1, thread_index,
3929                                   &ses1, &dm1);
3930               goto trace1;
3931             }
3932
3933           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
3934           if (PREDICT_FALSE(!dm1))
3935             {
3936               nat_log_info ("no match for internal host %U",
3937                             format_ip4_address, &ip0->src_address);
3938               next1 = SNAT_IN2OUT_NEXT_DROP;
3939               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3940               goto trace1;
3941             }
3942
3943           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
3944
3945           key1.ext_host_addr = ip1->dst_address;
3946           key1.ext_host_port = tcp1->dst;
3947
3948           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
3949           if (PREDICT_FALSE(!ses1))
3950             {
3951               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
3952                 {
3953                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
3954                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
3955
3956                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
3957                     continue;
3958
3959                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
3960                   break;
3961                 }
3962               if (PREDICT_FALSE(!ses1))
3963                 {
3964                   /* too many sessions for user, send ICMP error packet */
3965
3966                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3967                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
3968                                                ICMP4_destination_unreachable_destination_unreachable_host,
3969                                                0);
3970                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3971                   goto trace1;
3972                 }
3973             }
3974
3975           new_port1 = ses1->out.out_port;
3976
3977           old_addr1.as_u32 = ip1->src_address.as_u32;
3978           ip1->src_address.as_u32 = new_addr1.as_u32;
3979           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3980
3981           sum1 = ip1->checksum;
3982           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3983                                  ip4_header_t,
3984                                  src_address /* changed member */);
3985           ip1->checksum = ip_csum_fold (sum1);
3986
3987           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
3988             {
3989               if (tcp1->flags & TCP_FLAG_SYN)
3990                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
3991               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
3992                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
3993               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
3994                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
3995               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
3996                 snat_det_ses_close(dm1, ses1);
3997               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3998                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
3999               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
4000                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
4001
4002               old_port1 = tcp1->src;
4003               tcp1->src = new_port1;
4004
4005               sum1 = tcp1->checksum;
4006               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
4007                                      ip4_header_t,
4008                                      dst_address /* changed member */);
4009               sum1 = ip_csum_update (sum1, old_port1, new_port1,
4010                                      ip4_header_t /* cheat */,
4011                                      length /* changed member */);
4012               tcp1->checksum = ip_csum_fold(sum1);
4013             }
4014           else
4015             {
4016               ses1->state = SNAT_SESSION_UDP_ACTIVE;
4017               old_port1 = udp1->src_port;
4018               udp1->src_port = new_port1;
4019               udp1->checksum = 0;
4020             }
4021
4022           switch(ses1->state)
4023             {
4024             case SNAT_SESSION_UDP_ACTIVE:
4025                 ses1->expire = now + sm->udp_timeout;
4026                 break;
4027             case SNAT_SESSION_TCP_SYN_SENT:
4028             case SNAT_SESSION_TCP_FIN_WAIT:
4029             case SNAT_SESSION_TCP_CLOSE_WAIT:
4030             case SNAT_SESSION_TCP_LAST_ACK:
4031                 ses1->expire = now + sm->tcp_transitory_timeout;
4032                 break;
4033             case SNAT_SESSION_TCP_ESTABLISHED:
4034                 ses1->expire = now + sm->tcp_established_timeout;
4035                 break;
4036             }
4037
4038         trace1:
4039           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4040                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
4041             {
4042               snat_in2out_trace_t *t =
4043                  vlib_add_trace (vm, node, b1, sizeof (*t));
4044               t->is_slow_path = 0;
4045               t->sw_if_index = sw_if_index1;
4046               t->next_index = next1;
4047               t->session_index = ~0;
4048               if (ses1)
4049                 t->session_index = ses1 - dm1->sessions;
4050             }
4051
4052           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
4053
4054           /* verify speculative enqueues, maybe switch current next frame */
4055           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
4056                                            to_next, n_left_to_next,
4057                                            bi0, bi1, next0, next1);
4058          }
4059
4060       while (n_left_from > 0 && n_left_to_next > 0)
4061         {
4062           u32 bi0;
4063           vlib_buffer_t * b0;
4064           u32 next0;
4065           u32 sw_if_index0;
4066           ip4_header_t * ip0;
4067           ip_csum_t sum0;
4068           ip4_address_t new_addr0, old_addr0;
4069           u16 old_port0, new_port0, lo_port0, i0;
4070           udp_header_t * udp0;
4071           tcp_header_t * tcp0;
4072           u32 proto0;
4073           snat_det_out_key_t key0;
4074           snat_det_map_t * dm0;
4075           snat_det_session_t * ses0 = 0;
4076           u32 rx_fib_index0;
4077           icmp46_header_t * icmp0;
4078
4079           /* speculatively enqueue b0 to the current next frame */
4080           bi0 = from[0];
4081           to_next[0] = bi0;
4082           from += 1;
4083           to_next += 1;
4084           n_left_from -= 1;
4085           n_left_to_next -= 1;
4086
4087           b0 = vlib_get_buffer (vm, bi0);
4088           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4089
4090           ip0 = vlib_buffer_get_current (b0);
4091           udp0 = ip4_next_header (ip0);
4092           tcp0 = (tcp_header_t *) udp0;
4093
4094           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4095
4096           if (PREDICT_FALSE(ip0->ttl == 1))
4097             {
4098               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4099               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4100                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4101                                            0);
4102               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4103               goto trace00;
4104             }
4105
4106           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4107
4108           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
4109             {
4110               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4111               icmp0 = (icmp46_header_t *) udp0;
4112
4113               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4114                                   rx_fib_index0, node, next0, thread_index,
4115                                   &ses0, &dm0);
4116               goto trace00;
4117             }
4118
4119           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
4120           if (PREDICT_FALSE(!dm0))
4121             {
4122               nat_log_info ("no match for internal host %U",
4123                             format_ip4_address, &ip0->src_address);
4124               next0 = SNAT_IN2OUT_NEXT_DROP;
4125               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4126               goto trace00;
4127             }
4128
4129           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
4130
4131           key0.ext_host_addr = ip0->dst_address;
4132           key0.ext_host_port = tcp0->dst;
4133
4134           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
4135           if (PREDICT_FALSE(!ses0))
4136             {
4137               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4138                 {
4139                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
4140                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
4141
4142                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
4143                     continue;
4144
4145                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
4146                   break;
4147                 }
4148               if (PREDICT_FALSE(!ses0))
4149                 {
4150                   /* too many sessions for user, send ICMP error packet */
4151
4152                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4153                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
4154                                                ICMP4_destination_unreachable_destination_unreachable_host,
4155                                                0);
4156                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4157                   goto trace00;
4158                 }
4159             }
4160
4161           new_port0 = ses0->out.out_port;
4162
4163           old_addr0.as_u32 = ip0->src_address.as_u32;
4164           ip0->src_address.as_u32 = new_addr0.as_u32;
4165           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
4166
4167           sum0 = ip0->checksum;
4168           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4169                                  ip4_header_t,
4170                                  src_address /* changed member */);
4171           ip0->checksum = ip_csum_fold (sum0);
4172
4173           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4174             {
4175               if (tcp0->flags & TCP_FLAG_SYN)
4176                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
4177               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
4178                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4179               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
4180                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
4181               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
4182                 snat_det_ses_close(dm0, ses0);
4183               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
4184                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
4185               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
4186                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
4187
4188               old_port0 = tcp0->src;
4189               tcp0->src = new_port0;
4190
4191               sum0 = tcp0->checksum;
4192               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
4193                                      ip4_header_t,
4194                                      dst_address /* changed member */);
4195               sum0 = ip_csum_update (sum0, old_port0, new_port0,
4196                                      ip4_header_t /* cheat */,
4197                                      length /* changed member */);
4198               tcp0->checksum = ip_csum_fold(sum0);
4199             }
4200           else
4201             {
4202               ses0->state = SNAT_SESSION_UDP_ACTIVE;
4203               old_port0 = udp0->src_port;
4204               udp0->src_port = new_port0;
4205               udp0->checksum = 0;
4206             }
4207
4208           switch(ses0->state)
4209             {
4210             case SNAT_SESSION_UDP_ACTIVE:
4211                 ses0->expire = now + sm->udp_timeout;
4212                 break;
4213             case SNAT_SESSION_TCP_SYN_SENT:
4214             case SNAT_SESSION_TCP_FIN_WAIT:
4215             case SNAT_SESSION_TCP_CLOSE_WAIT:
4216             case SNAT_SESSION_TCP_LAST_ACK:
4217                 ses0->expire = now + sm->tcp_transitory_timeout;
4218                 break;
4219             case SNAT_SESSION_TCP_ESTABLISHED:
4220                 ses0->expire = now + sm->tcp_established_timeout;
4221                 break;
4222             }
4223
4224         trace00:
4225           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4226                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4227             {
4228               snat_in2out_trace_t *t =
4229                  vlib_add_trace (vm, node, b0, sizeof (*t));
4230               t->is_slow_path = 0;
4231               t->sw_if_index = sw_if_index0;
4232               t->next_index = next0;
4233               t->session_index = ~0;
4234               if (ses0)
4235                 t->session_index = ses0 - dm0->sessions;
4236             }
4237
4238           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4239
4240           /* verify speculative enqueue, maybe switch current next frame */
4241           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4242                                            to_next, n_left_to_next,
4243                                            bi0, next0);
4244         }
4245
4246       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4247     }
4248
4249   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
4250                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4251                                pkts_processed);
4252   return frame->n_vectors;
4253 }
4254
4255 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
4256   .function = snat_det_in2out_node_fn,
4257   .name = "nat44-det-in2out",
4258   .vector_size = sizeof (u32),
4259   .format_trace = format_snat_in2out_trace,
4260   .type = VLIB_NODE_TYPE_INTERNAL,
4261
4262   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4263   .error_strings = snat_in2out_error_strings,
4264
4265   .runtime_data_bytes = sizeof (snat_runtime_t),
4266
4267   .n_next_nodes = 3,
4268
4269   /* edit / add dispositions here */
4270   .next_nodes = {
4271     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4272     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4273     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4274   },
4275 };
4276
4277 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
4278
4279 /**
4280  * Get address and port values to be used for ICMP packet translation
4281  * and create session if needed
4282  *
4283  * @param[in,out] sm             NAT main
4284  * @param[in,out] node           NAT node runtime
4285  * @param[in] thread_index       thread index
4286  * @param[in,out] b0             buffer containing packet to be translated
4287  * @param[out] p_proto           protocol used for matching
4288  * @param[out] p_value           address and port after NAT translation
4289  * @param[out] p_dont_translate  if packet should not be translated
4290  * @param d                      optional parameter
4291  * @param e                      optional parameter
4292  */
4293 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
4294                           u32 thread_index, vlib_buffer_t *b0,
4295                           ip4_header_t *ip0, u8 *p_proto,
4296                           snat_session_key_t *p_value,
4297                           u8 *p_dont_translate, void *d, void *e)
4298 {
4299   icmp46_header_t *icmp0;
4300   u32 sw_if_index0;
4301   u32 rx_fib_index0;
4302   u8 protocol;
4303   snat_det_out_key_t key0;
4304   u8 dont_translate = 0;
4305   u32 next0 = ~0;
4306   icmp_echo_header_t *echo0, *inner_echo0 = 0;
4307   ip4_header_t *inner_ip0;
4308   void *l4_header = 0;
4309   icmp46_header_t *inner_icmp0;
4310   snat_det_map_t * dm0 = 0;
4311   ip4_address_t new_addr0;
4312   u16 lo_port0, i0;
4313   snat_det_session_t * ses0 = 0;
4314   ip4_address_t in_addr;
4315   u16 in_port;
4316
4317   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
4318   echo0 = (icmp_echo_header_t *)(icmp0+1);
4319   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4320   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
4321
4322   if (!icmp_is_error_message (icmp0))
4323     {
4324       protocol = SNAT_PROTOCOL_ICMP;
4325       in_addr = ip0->src_address;
4326       in_port = echo0->identifier;
4327     }
4328   else
4329     {
4330       inner_ip0 = (ip4_header_t *)(echo0+1);
4331       l4_header = ip4_next_header (inner_ip0);
4332       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
4333       in_addr = inner_ip0->dst_address;
4334       switch (protocol)
4335         {
4336         case SNAT_PROTOCOL_ICMP:
4337           inner_icmp0 = (icmp46_header_t*)l4_header;
4338           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
4339           in_port = inner_echo0->identifier;
4340           break;
4341         case SNAT_PROTOCOL_UDP:
4342         case SNAT_PROTOCOL_TCP:
4343           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
4344           break;
4345         default:
4346           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
4347           next0 = SNAT_IN2OUT_NEXT_DROP;
4348           goto out;
4349         }
4350     }
4351
4352   dm0 = snat_det_map_by_user(sm, &in_addr);
4353   if (PREDICT_FALSE(!dm0))
4354     {
4355       nat_log_info ("no match for internal host %U",
4356                     format_ip4_address, &in_addr);
4357       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4358           IP_PROTOCOL_ICMP, rx_fib_index0)))
4359         {
4360           dont_translate = 1;
4361           goto out;
4362         }
4363       next0 = SNAT_IN2OUT_NEXT_DROP;
4364       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4365       goto out;
4366     }
4367
4368   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
4369
4370   key0.ext_host_addr = ip0->dst_address;
4371   key0.ext_host_port = 0;
4372
4373   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
4374   if (PREDICT_FALSE(!ses0))
4375     {
4376       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
4377           IP_PROTOCOL_ICMP, rx_fib_index0)))
4378         {
4379           dont_translate = 1;
4380           goto out;
4381         }
4382       if (icmp0->type != ICMP4_echo_request)
4383         {
4384           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4385           next0 = SNAT_IN2OUT_NEXT_DROP;
4386           goto out;
4387         }
4388       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
4389         {
4390           key0.out_port = clib_host_to_net_u16 (lo_port0 +
4391             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
4392
4393           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
4394             continue;
4395
4396           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
4397           break;
4398         }
4399       if (PREDICT_FALSE(!ses0))
4400         {
4401           next0 = SNAT_IN2OUT_NEXT_DROP;
4402           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
4403           goto out;
4404         }
4405     }
4406
4407   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
4408                     !icmp_is_error_message (icmp0)))
4409     {
4410       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
4411       next0 = SNAT_IN2OUT_NEXT_DROP;
4412       goto out;
4413     }
4414
4415   u32 now = (u32) vlib_time_now (sm->vlib_main);
4416
4417   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
4418   ses0->expire = now + sm->icmp_timeout;
4419
4420 out:
4421   *p_proto = protocol;
4422   if (ses0)
4423     {
4424       p_value->addr = new_addr0;
4425       p_value->fib_index = sm->outside_fib_index;
4426       p_value->port = ses0->out.out_port;
4427     }
4428   *p_dont_translate = dont_translate;
4429   if (d)
4430     *(snat_det_session_t**)d = ses0;
4431   if (e)
4432     *(snat_det_map_t**)e = dm0;
4433   return next0;
4434 }
4435
4436 /**********************/
4437 /*** worker handoff ***/
4438 /**********************/
4439 static inline uword
4440 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
4441                                       vlib_node_runtime_t * node,
4442                                       vlib_frame_t * frame,
4443                                       u8 is_output)
4444 {
4445   snat_main_t *sm = &snat_main;
4446   vlib_thread_main_t *tm = vlib_get_thread_main ();
4447   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
4448   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
4449   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
4450     = 0;
4451   vlib_frame_queue_elt_t *hf = 0;
4452   vlib_frame_queue_t *fq;
4453   vlib_frame_t *f = 0;
4454   int i;
4455   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
4456   u32 next_worker_index = 0;
4457   u32 current_worker_index = ~0;
4458   u32 thread_index = vlib_get_thread_index ();
4459   u32 fq_index;
4460   u32 to_node_index;
4461   vlib_frame_t *d = 0;
4462
4463   ASSERT (vec_len (sm->workers));
4464
4465   if (is_output)
4466     {
4467       fq_index = sm->fq_in2out_output_index;
4468       to_node_index = sm->in2out_output_node_index;
4469     }
4470   else
4471     {
4472       fq_index = sm->fq_in2out_index;
4473       to_node_index = sm->in2out_node_index;
4474     }
4475
4476   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
4477     {
4478       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
4479
4480       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
4481                                tm->n_vlib_mains - 1,
4482                                (vlib_frame_queue_t *) (~0));
4483     }
4484
4485   from = vlib_frame_vector_args (frame);
4486   n_left_from = frame->n_vectors;
4487
4488   while (n_left_from > 0)
4489     {
4490       u32 bi0;
4491       vlib_buffer_t *b0;
4492       u32 sw_if_index0;
4493       u32 rx_fib_index0;
4494       ip4_header_t * ip0;
4495       u8 do_handoff;
4496
4497       bi0 = from[0];
4498       from += 1;
4499       n_left_from -= 1;
4500
4501       b0 = vlib_get_buffer (vm, bi0);
4502
4503       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
4504       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4505
4506       ip0 = vlib_buffer_get_current (b0);
4507
4508       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
4509
4510       if (PREDICT_FALSE (next_worker_index != thread_index))
4511         {
4512           do_handoff = 1;
4513
4514           if (next_worker_index != current_worker_index)
4515             {
4516               fq = is_vlib_frame_queue_congested (
4517                 fq_index, next_worker_index, NAT_FQ_NELTS - 2,
4518                 congested_handoff_queue_by_worker_index);
4519
4520               if (fq)
4521                 {
4522                   /* if this is 1st frame */
4523                   if (!d)
4524                     {
4525                       d = vlib_get_frame_to_node (vm, sm->error_node_index);
4526                       to_next_drop = vlib_frame_vector_args (d);
4527                     }
4528
4529                   to_next_drop[0] = bi0;
4530                   to_next_drop += 1;
4531                   d->n_vectors++;
4532                   b0->error = node->errors[SNAT_IN2OUT_ERROR_FQ_CONGESTED];
4533                   goto trace0;
4534                 }
4535
4536               if (hf)
4537                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4538
4539               hf = vlib_get_worker_handoff_queue_elt (fq_index,
4540                                                       next_worker_index,
4541                                                       handoff_queue_elt_by_worker_index);
4542
4543               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
4544               to_next_worker = &hf->buffer_index[hf->n_vectors];
4545               current_worker_index = next_worker_index;
4546             }
4547
4548           /* enqueue to correct worker thread */
4549           to_next_worker[0] = bi0;
4550           to_next_worker++;
4551           n_left_to_next_worker--;
4552
4553           if (n_left_to_next_worker == 0)
4554             {
4555               hf->n_vectors = VLIB_FRAME_SIZE;
4556               vlib_put_frame_queue_elt (hf);
4557               current_worker_index = ~0;
4558               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
4559               hf = 0;
4560             }
4561         }
4562       else
4563         {
4564           do_handoff = 0;
4565           /* if this is 1st frame */
4566           if (!f)
4567             {
4568               f = vlib_get_frame_to_node (vm, to_node_index);
4569               to_next = vlib_frame_vector_args (f);
4570             }
4571
4572           to_next[0] = bi0;
4573           to_next += 1;
4574           f->n_vectors++;
4575         }
4576
4577 trace0:
4578       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
4579                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4580         {
4581           snat_in2out_worker_handoff_trace_t *t =
4582             vlib_add_trace (vm, node, b0, sizeof (*t));
4583           t->next_worker_index = next_worker_index;
4584           t->do_handoff = do_handoff;
4585         }
4586     }
4587
4588   if (f)
4589     vlib_put_frame_to_node (vm, to_node_index, f);
4590
4591   if (d)
4592     vlib_put_frame_to_node (vm, sm->error_node_index, d);
4593
4594   if (hf)
4595     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4596
4597   /* Ship frames to the worker nodes */
4598   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
4599     {
4600       if (handoff_queue_elt_by_worker_index[i])
4601         {
4602           hf = handoff_queue_elt_by_worker_index[i];
4603           /*
4604            * It works better to let the handoff node
4605            * rate-adapt, always ship the handoff queue element.
4606            */
4607           if (1 || hf->n_vectors == hf->last_n_vectors)
4608             {
4609               vlib_put_frame_queue_elt (hf);
4610               handoff_queue_elt_by_worker_index[i] = 0;
4611             }
4612           else
4613             hf->last_n_vectors = hf->n_vectors;
4614         }
4615       congested_handoff_queue_by_worker_index[i] =
4616         (vlib_frame_queue_t *) (~0);
4617     }
4618   hf = 0;
4619   current_worker_index = ~0;
4620   return frame->n_vectors;
4621 }
4622
4623 static uword
4624 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
4625                                vlib_node_runtime_t * node,
4626                                vlib_frame_t * frame)
4627 {
4628   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
4629 }
4630
4631 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
4632   .function = snat_in2out_worker_handoff_fn,
4633   .name = "nat44-in2out-worker-handoff",
4634   .vector_size = sizeof (u32),
4635   .format_trace = format_snat_in2out_worker_handoff_trace,
4636   .type = VLIB_NODE_TYPE_INTERNAL,
4637
4638   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4639   .error_strings = snat_in2out_error_strings,
4640
4641   .n_next_nodes = 1,
4642
4643   .next_nodes = {
4644     [0] = "error-drop",
4645   },
4646 };
4647
4648 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
4649                               snat_in2out_worker_handoff_fn);
4650
4651 static uword
4652 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
4653                                       vlib_node_runtime_t * node,
4654                                       vlib_frame_t * frame)
4655 {
4656   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
4657 }
4658
4659 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
4660   .function = snat_in2out_output_worker_handoff_fn,
4661   .name = "nat44-in2out-output-worker-handoff",
4662   .vector_size = sizeof (u32),
4663   .format_trace = format_snat_in2out_worker_handoff_trace,
4664   .type = VLIB_NODE_TYPE_INTERNAL,
4665
4666   .n_next_nodes = 1,
4667
4668   .next_nodes = {
4669     [0] = "error-drop",
4670   },
4671 };
4672
4673 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
4674                               snat_in2out_output_worker_handoff_fn);
4675
4676 static_always_inline int
4677 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
4678 {
4679   snat_address_t * ap;
4680   clib_bihash_kv_8_8_t kv, value;
4681   snat_session_key_t m_key;
4682
4683   vec_foreach (ap, sm->addresses)
4684     {
4685       if (ap->addr.as_u32 == dst_addr->as_u32)
4686         return 1;
4687     }
4688
4689   m_key.addr.as_u32 = dst_addr->as_u32;
4690   m_key.fib_index = sm->outside_fib_index;
4691   m_key.port = 0;
4692   m_key.protocol = 0;
4693   kv.key = m_key.as_u64;
4694   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4695     return 1;
4696
4697   return 0;
4698 }
4699
4700 static inline uword
4701 snat_hairpin_dst_fn_inline (vlib_main_t * vm,
4702                             vlib_node_runtime_t * node,
4703                             vlib_frame_t * frame,
4704                             int is_ed)
4705 {
4706   u32 n_left_from, * from, * to_next, stats_node_index;
4707   snat_in2out_next_t next_index;
4708   u32 pkts_processed = 0;
4709   snat_main_t * sm = &snat_main;
4710
4711   stats_node_index = is_ed ? nat44_ed_hairpin_dst_node.index :
4712     snat_hairpin_dst_node.index;
4713
4714   from = vlib_frame_vector_args (frame);
4715   n_left_from = frame->n_vectors;
4716   next_index = node->cached_next_index;
4717
4718   while (n_left_from > 0)
4719     {
4720       u32 n_left_to_next;
4721
4722       vlib_get_next_frame (vm, node, next_index,
4723                            to_next, n_left_to_next);
4724
4725       while (n_left_from > 0 && n_left_to_next > 0)
4726         {
4727           u32 bi0;
4728           vlib_buffer_t * b0;
4729           u32 next0;
4730           ip4_header_t * ip0;
4731           u32 proto0;
4732
4733           /* speculatively enqueue b0 to the current next frame */
4734           bi0 = from[0];
4735           to_next[0] = bi0;
4736           from += 1;
4737           to_next += 1;
4738           n_left_from -= 1;
4739           n_left_to_next -= 1;
4740
4741           b0 = vlib_get_buffer (vm, bi0);
4742           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4743           ip0 = vlib_buffer_get_current (b0);
4744
4745           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4746
4747           vnet_buffer (b0)->snat.flags = 0;
4748           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
4749             {
4750               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
4751                 {
4752                   udp_header_t * udp0 = ip4_next_header (ip0);
4753                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
4754
4755                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, is_ed);
4756                 }
4757               else if (proto0 == SNAT_PROTOCOL_ICMP)
4758                 {
4759                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
4760
4761                   snat_icmp_hairpinning (sm, b0, ip0, icmp0, is_ed);
4762                 }
4763               else
4764                 {
4765                   if (is_ed)
4766                     nat44_ed_hairpinning_unknown_proto (sm, b0, ip0);
4767                   else
4768                     nat_hairpinning_sm_unknown_proto (sm, b0, ip0);
4769                 }
4770
4771               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
4772             }
4773
4774           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4775
4776           /* verify speculative enqueue, maybe switch current next frame */
4777           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4778                                            to_next, n_left_to_next,
4779                                            bi0, next0);
4780          }
4781
4782       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4783     }
4784
4785   vlib_node_increment_counter (vm, stats_node_index,
4786                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4787                                pkts_processed);
4788   return frame->n_vectors;
4789 }
4790
4791 static uword
4792 snat_hairpin_dst_fn (vlib_main_t * vm,
4793                      vlib_node_runtime_t * node,
4794                      vlib_frame_t * frame)
4795 {
4796   return snat_hairpin_dst_fn_inline (vm, node, frame, 0);
4797 }
4798
4799 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
4800   .function = snat_hairpin_dst_fn,
4801   .name = "nat44-hairpin-dst",
4802   .vector_size = sizeof (u32),
4803   .type = VLIB_NODE_TYPE_INTERNAL,
4804   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4805   .error_strings = snat_in2out_error_strings,
4806   .n_next_nodes = 2,
4807   .next_nodes = {
4808     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4809     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4810   },
4811 };
4812
4813 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
4814                               snat_hairpin_dst_fn);
4815
4816 static uword
4817 nat44_ed_hairpin_dst_fn (vlib_main_t * vm,
4818                          vlib_node_runtime_t * node,
4819                          vlib_frame_t * frame)
4820 {
4821   return snat_hairpin_dst_fn_inline (vm, node, frame, 1);
4822 }
4823
4824 VLIB_REGISTER_NODE (nat44_ed_hairpin_dst_node) = {
4825   .function = nat44_ed_hairpin_dst_fn,
4826   .name = "nat44-ed-hairpin-dst",
4827   .vector_size = sizeof (u32),
4828   .type = VLIB_NODE_TYPE_INTERNAL,
4829   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4830   .error_strings = snat_in2out_error_strings,
4831   .n_next_nodes = 2,
4832   .next_nodes = {
4833     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4834     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4835   },
4836 };
4837
4838 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_dst_node,
4839                               nat44_ed_hairpin_dst_fn);
4840
4841 static inline uword
4842 snat_hairpin_src_fn_inline (vlib_main_t * vm,
4843                             vlib_node_runtime_t * node,
4844                             vlib_frame_t * frame,
4845                             int is_ed)
4846 {
4847   u32 n_left_from, * from, * to_next, stats_node_index;
4848   snat_in2out_next_t next_index;
4849   u32 pkts_processed = 0;
4850   snat_main_t *sm = &snat_main;
4851
4852   stats_node_index = is_ed ? nat44_ed_hairpin_src_node.index :
4853     snat_hairpin_src_node.index;
4854
4855   from = vlib_frame_vector_args (frame);
4856   n_left_from = frame->n_vectors;
4857   next_index = node->cached_next_index;
4858
4859   while (n_left_from > 0)
4860     {
4861       u32 n_left_to_next;
4862
4863       vlib_get_next_frame (vm, node, next_index,
4864                            to_next, n_left_to_next);
4865
4866       while (n_left_from > 0 && n_left_to_next > 0)
4867         {
4868           u32 bi0;
4869           vlib_buffer_t * b0;
4870           u32 next0;
4871           snat_interface_t *i;
4872           u32 sw_if_index0;
4873
4874           /* speculatively enqueue b0 to the current next frame */
4875           bi0 = from[0];
4876           to_next[0] = bi0;
4877           from += 1;
4878           to_next += 1;
4879           n_left_from -= 1;
4880           n_left_to_next -= 1;
4881
4882           b0 = vlib_get_buffer (vm, bi0);
4883           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4884           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
4885
4886           pool_foreach (i, sm->output_feature_interfaces,
4887           ({
4888             /* Only packets from NAT inside interface */
4889             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
4890               {
4891                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
4892                                     SNAT_FLAG_HAIRPINNING))
4893                   {
4894                     if (PREDICT_TRUE (sm->num_workers > 1))
4895                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
4896                     else
4897                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
4898                   }
4899                 break;
4900               }
4901           }));
4902
4903           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4904
4905           /* verify speculative enqueue, maybe switch current next frame */
4906           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4907                                            to_next, n_left_to_next,
4908                                            bi0, next0);
4909          }
4910
4911       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4912     }
4913
4914   vlib_node_increment_counter (vm, stats_node_index,
4915                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4916                                pkts_processed);
4917   return frame->n_vectors;
4918 }
4919
4920 static uword
4921 snat_hairpin_src_fn (vlib_main_t * vm,
4922                      vlib_node_runtime_t * node,
4923                      vlib_frame_t * frame)
4924 {
4925   return snat_hairpin_src_fn_inline (vm, node, frame, 0);
4926 }
4927
4928 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
4929   .function = snat_hairpin_src_fn,
4930   .name = "nat44-hairpin-src",
4931   .vector_size = sizeof (u32),
4932   .type = VLIB_NODE_TYPE_INTERNAL,
4933   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4934   .error_strings = snat_in2out_error_strings,
4935   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
4936   .next_nodes = {
4937      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
4938      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
4939      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
4940      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
4941   },
4942 };
4943
4944 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
4945                               snat_hairpin_src_fn);
4946
4947 static uword
4948 nat44_ed_hairpin_src_fn (vlib_main_t * vm,
4949                          vlib_node_runtime_t * node,
4950                          vlib_frame_t * frame)
4951 {
4952   return snat_hairpin_src_fn_inline (vm, node, frame, 1);
4953 }
4954
4955 VLIB_REGISTER_NODE (nat44_ed_hairpin_src_node) = {
4956   .function = nat44_ed_hairpin_src_fn,
4957   .name = "nat44-ed-hairpin-src",
4958   .vector_size = sizeof (u32),
4959   .type = VLIB_NODE_TYPE_INTERNAL,
4960   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4961   .error_strings = snat_in2out_error_strings,
4962   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
4963   .next_nodes = {
4964      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
4965      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ed-in2out-output",
4966      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
4967      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
4968   },
4969 };
4970
4971 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_hairpin_src_node,
4972                               nat44_ed_hairpin_src_fn);
4973
4974 static uword
4975 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
4976                                 vlib_node_runtime_t * node,
4977                                 vlib_frame_t * frame)
4978 {
4979   u32 n_left_from, * from, * to_next;
4980   snat_in2out_next_t next_index;
4981   u32 pkts_processed = 0;
4982   snat_main_t * sm = &snat_main;
4983   u32 stats_node_index;
4984
4985   stats_node_index = snat_in2out_fast_node.index;
4986
4987   from = vlib_frame_vector_args (frame);
4988   n_left_from = frame->n_vectors;
4989   next_index = node->cached_next_index;
4990
4991   while (n_left_from > 0)
4992     {
4993       u32 n_left_to_next;
4994
4995       vlib_get_next_frame (vm, node, next_index,
4996                            to_next, n_left_to_next);
4997
4998       while (n_left_from > 0 && n_left_to_next > 0)
4999         {
5000           u32 bi0;
5001           vlib_buffer_t * b0;
5002           u32 next0;
5003           u32 sw_if_index0;
5004           ip4_header_t * ip0;
5005           ip_csum_t sum0;
5006           u32 new_addr0, old_addr0;
5007           u16 old_port0, new_port0;
5008           udp_header_t * udp0;
5009           tcp_header_t * tcp0;
5010           icmp46_header_t * icmp0;
5011           snat_session_key_t key0, sm0;
5012           u32 proto0;
5013           u32 rx_fib_index0;
5014
5015           /* speculatively enqueue b0 to the current next frame */
5016           bi0 = from[0];
5017           to_next[0] = bi0;
5018           from += 1;
5019           to_next += 1;
5020           n_left_from -= 1;
5021           n_left_to_next -= 1;
5022
5023           b0 = vlib_get_buffer (vm, bi0);
5024           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
5025
5026           ip0 = vlib_buffer_get_current (b0);
5027           udp0 = ip4_next_header (ip0);
5028           tcp0 = (tcp_header_t *) udp0;
5029           icmp0 = (icmp46_header_t *) udp0;
5030
5031           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
5032           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
5033
5034           if (PREDICT_FALSE(ip0->ttl == 1))
5035             {
5036               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
5037               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
5038                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
5039                                            0);
5040               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
5041               goto trace0;
5042             }
5043
5044           proto0 = ip_proto_to_snat_proto (ip0->protocol);
5045
5046           if (PREDICT_FALSE (proto0 == ~0))
5047               goto trace0;
5048
5049           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
5050             {
5051               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
5052                                   rx_fib_index0, node, next0, ~0, 0, 0);
5053               goto trace0;
5054             }
5055
5056           key0.addr = ip0->src_address;
5057           key0.protocol = proto0;
5058           key0.port = udp0->src_port;
5059           key0.fib_index = rx_fib_index0;
5060
5061           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0, 0))
5062             {
5063               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
5064               next0= SNAT_IN2OUT_NEXT_DROP;
5065               goto trace0;
5066             }
5067
5068           new_addr0 = sm0.addr.as_u32;
5069           new_port0 = sm0.port;
5070           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
5071           old_addr0 = ip0->src_address.as_u32;
5072           ip0->src_address.as_u32 = new_addr0;
5073
5074           sum0 = ip0->checksum;
5075           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5076                                  ip4_header_t,
5077                                  src_address /* changed member */);
5078           ip0->checksum = ip_csum_fold (sum0);
5079
5080           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
5081             {
5082               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5083                 {
5084                   old_port0 = tcp0->src_port;
5085                   tcp0->src_port = new_port0;
5086
5087                   sum0 = tcp0->checksum;
5088                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5089                                          ip4_header_t,
5090                                          dst_address /* changed member */);
5091                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
5092                                          ip4_header_t /* cheat */,
5093                                          length /* changed member */);
5094                   tcp0->checksum = ip_csum_fold(sum0);
5095                 }
5096               else
5097                 {
5098                   old_port0 = udp0->src_port;
5099                   udp0->src_port = new_port0;
5100                   udp0->checksum = 0;
5101                 }
5102             }
5103           else
5104             {
5105               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
5106                 {
5107                   sum0 = tcp0->checksum;
5108                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
5109                                          ip4_header_t,
5110                                          dst_address /* changed member */);
5111                   tcp0->checksum = ip_csum_fold(sum0);
5112                 }
5113             }
5114
5115           /* Hairpinning */
5116           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0, 0);
5117
5118         trace0:
5119           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
5120                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
5121             {
5122               snat_in2out_trace_t *t =
5123                  vlib_add_trace (vm, node, b0, sizeof (*t));
5124               t->sw_if_index = sw_if_index0;
5125               t->next_index = next0;
5126             }
5127
5128           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
5129
5130           /* verify speculative enqueue, maybe switch current next frame */
5131           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
5132                                            to_next, n_left_to_next,
5133                                            bi0, next0);
5134         }
5135
5136       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
5137     }
5138
5139   vlib_node_increment_counter (vm, stats_node_index,
5140                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
5141                                pkts_processed);
5142   return frame->n_vectors;
5143 }
5144
5145
5146 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
5147   .function = snat_in2out_fast_static_map_fn,
5148   .name = "nat44-in2out-fast",
5149   .vector_size = sizeof (u32),
5150   .format_trace = format_snat_in2out_fast_trace,
5151   .type = VLIB_NODE_TYPE_INTERNAL,
5152
5153   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
5154   .error_strings = snat_in2out_error_strings,
5155
5156   .runtime_data_bytes = sizeof (snat_runtime_t),
5157
5158   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
5159
5160   /* edit / add dispositions here */
5161   .next_nodes = {
5162     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
5163     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
5164     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
5165     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
5166     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
5167   },
5168 };
5169
5170 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);