Translate matching packets using NAT (VPP-1069)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
32
33 typedef struct {
34   u32 sw_if_index;
35   u32 next_index;
36   u32 session_index;
37   u32 is_slow_path;
38 } snat_in2out_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_in2out_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
51   char * tag;
52
53   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
54
55   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
56               t->sw_if_index, t->next_index, t->session_index);
57
58   return s;
59 }
60
61 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
66
67   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
68               t->sw_if_index, t->next_index);
69
70   return s;
71 }
72
73 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
74 {
75   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
76   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
77   snat_in2out_worker_handoff_trace_t * t =
78     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
79   char * m;
80
81   m = t->do_handoff ? "next worker" : "same worker";
82   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
83
84   return s;
85 }
86
87 typedef struct {
88   u32 sw_if_index;
89   u32 next_index;
90   u8 cached;
91 } nat44_in2out_reass_trace_t;
92
93 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
94 {
95   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
96   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
97   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
98
99   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
100               t->sw_if_index, t->next_index,
101               t->cached ? "cached" : "translated");
102
103   return s;
104 }
105
106 vlib_node_registration_t snat_in2out_node;
107 vlib_node_registration_t snat_in2out_slowpath_node;
108 vlib_node_registration_t snat_in2out_fast_node;
109 vlib_node_registration_t snat_in2out_worker_handoff_node;
110 vlib_node_registration_t snat_det_in2out_node;
111 vlib_node_registration_t snat_in2out_output_node;
112 vlib_node_registration_t snat_in2out_output_slowpath_node;
113 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
114 vlib_node_registration_t snat_hairpin_dst_node;
115 vlib_node_registration_t snat_hairpin_src_node;
116 vlib_node_registration_t nat44_hairpinning_node;
117 vlib_node_registration_t nat44_in2out_reass_node;
118
119
120 #define foreach_snat_in2out_error                       \
121 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
122 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
123 _(OUT_OF_PORTS, "Out of ports")                         \
124 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
125 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
126 _(NO_TRANSLATION, "No translation")                     \
127 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
128 _(DROP_FRAGMENT, "Drop fragment")                       \
129 _(MAX_REASS, "Maximum reassemblies exceeded")           \
130 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
131
132 typedef enum {
133 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
134   foreach_snat_in2out_error
135 #undef _
136   SNAT_IN2OUT_N_ERROR,
137 } snat_in2out_error_t;
138
139 static char * snat_in2out_error_strings[] = {
140 #define _(sym,string) string,
141   foreach_snat_in2out_error
142 #undef _
143 };
144
145 typedef enum {
146   SNAT_IN2OUT_NEXT_LOOKUP,
147   SNAT_IN2OUT_NEXT_DROP,
148   SNAT_IN2OUT_NEXT_ICMP_ERROR,
149   SNAT_IN2OUT_NEXT_SLOW_PATH,
150   SNAT_IN2OUT_NEXT_REASS,
151   SNAT_IN2OUT_N_NEXT,
152 } snat_in2out_next_t;
153
154 typedef enum {
155   SNAT_HAIRPIN_SRC_NEXT_DROP,
156   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
157   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
158   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
159   SNAT_HAIRPIN_SRC_N_NEXT,
160 } snat_hairpin_next_t;
161
162 /**
163  * @brief Check if packet should be translated
164  *
165  * Packets aimed at outside interface and external addresss with active session
166  * should be translated.
167  *
168  * @param sm            NAT main
169  * @param rt            NAT runtime data
170  * @param sw_if_index0  index of the inside interface
171  * @param ip0           IPv4 header
172  * @param proto0        NAT protocol
173  * @param rx_fib_index0 RX FIB index
174  *
175  * @returns 0 if packet should be translated otherwise 1
176  */
177 static inline int
178 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
179                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                          u32 rx_fib_index0)
181 {
182   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
183   fib_prefix_t pfx = {
184     .fp_proto = FIB_PROTOCOL_IP4,
185     .fp_len = 32,
186     .fp_addr = {
187         .ip4.as_u32 = ip0->dst_address.as_u32,
188     },
189   };
190
191   /* Don't NAT packet aimed at the intfc address */
192   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
193                                       ip0->dst_address.as_u32)))
194     return 1;
195
196   fei = fib_table_lookup (rx_fib_index0, &pfx);
197   if (FIB_NODE_INDEX_INVALID != fei)
198     {
199       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
200       if (sw_if_index == ~0)
201         {
202           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
203           if (FIB_NODE_INDEX_INVALID != fei)
204             sw_if_index = fib_entry_get_resolving_interface (fei);
205         }
206       snat_interface_t *i;
207       pool_foreach (i, sm->interfaces,
208       ({
209         /* NAT packet aimed at outside interface */
210         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
211           return 0;
212       }));
213     }
214
215   return 1;
216 }
217
218 static inline int
219 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
220                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
221                     u32 rx_fib_index0, u32 thread_index)
222 {
223   udp_header_t * udp0 = ip4_next_header (ip0);
224   snat_session_key_t key0, sm0;
225   clib_bihash_kv_8_8_t kv0, value0;
226
227   key0.addr = ip0->dst_address;
228   key0.port = udp0->dst_port;
229   key0.protocol = proto0;
230   key0.fib_index = sm->outside_fib_index;
231   kv0.key = key0.as_u64;
232
233   /* NAT packet aimed at external address if */
234   /* has active sessions */
235   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
236                               &value0))
237     {
238       /* or is static mappings */
239       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
240         return 0;
241     }
242   else
243     return 0;
244
245   if (sm->forwarding_enabled)
246     return 1;
247
248   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
249                                  rx_fib_index0);
250 }
251
252 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
253                       ip4_header_t * ip0,
254                       u32 rx_fib_index0,
255                       snat_session_key_t * key0,
256                       snat_session_t ** sessionp,
257                       vlib_node_runtime_t * node,
258                       u32 next0,
259                       u32 thread_index)
260 {
261   snat_user_t *u;
262   snat_session_t *s;
263   clib_bihash_kv_8_8_t kv0;
264   snat_session_key_t key1;
265   u32 address_index = ~0;
266   u32 outside_fib_index;
267   uword * p;
268   udp_header_t * udp0 = ip4_next_header (ip0);
269
270   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
271     {
272       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
273       return SNAT_IN2OUT_NEXT_DROP;
274     }
275
276   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
277   if (! p)
278     {
279       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
280       return SNAT_IN2OUT_NEXT_DROP;
281     }
282   outside_fib_index = p[0];
283
284   key1.protocol = key0->protocol;
285
286   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
287                               thread_index);
288   if (!u)
289     {
290       clib_warning ("create NAT user failed");
291       return SNAT_IN2OUT_NEXT_DROP;
292     }
293
294   s = nat_session_alloc_or_recycle (sm, u, thread_index);
295   if (!s)
296     {
297       clib_warning ("create NAT session failed");
298       return SNAT_IN2OUT_NEXT_DROP;
299     }
300
301   /* First try to match static mapping by local address and port */
302   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0))
303     {
304       /* Try to create dynamic translation */
305       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
306                                                thread_index, &key1,
307                                                &address_index,
308                                                sm->port_per_thread,
309                                                sm->per_thread_data[thread_index].snat_thread_index))
310         {
311           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
312           return SNAT_IN2OUT_NEXT_DROP;
313         }
314       u->nsessions++;
315     }
316   else
317     {
318       u->nstaticsessions++;
319       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
320     }
321
322   s->outside_address_index = address_index;
323   s->in2out = *key0;
324   s->out2in = key1;
325   s->out2in.protocol = key0->protocol;
326   s->out2in.fib_index = outside_fib_index;
327   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
328   s->ext_host_port = udp0->dst_port;
329   *sessionp = s;
330
331   /* Add to translation hashes */
332   kv0.key = s->in2out.as_u64;
333   kv0.value = s - sm->per_thread_data[thread_index].sessions;
334   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
335                                1 /* is_add */))
336       clib_warning ("in2out key add failed");
337
338   kv0.key = s->out2in.as_u64;
339   kv0.value = s - sm->per_thread_data[thread_index].sessions;
340
341   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
342                                1 /* is_add */))
343       clib_warning ("out2in key add failed");
344
345   /* log NAT event */
346   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
347                                       s->out2in.addr.as_u32,
348                                       s->in2out.protocol,
349                                       s->in2out.port,
350                                       s->out2in.port,
351                                       s->in2out.fib_index);
352   return next0;
353 }
354
355 static_always_inline
356 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
357                                  snat_session_key_t *p_key0)
358 {
359   icmp46_header_t *icmp0;
360   snat_session_key_t key0;
361   icmp_echo_header_t *echo0, *inner_echo0 = 0;
362   ip4_header_t *inner_ip0 = 0;
363   void *l4_header = 0;
364   icmp46_header_t *inner_icmp0;
365
366   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
367   echo0 = (icmp_echo_header_t *)(icmp0+1);
368
369   if (!icmp_is_error_message (icmp0))
370     {
371       key0.protocol = SNAT_PROTOCOL_ICMP;
372       key0.addr = ip0->src_address;
373       key0.port = echo0->identifier;
374     }
375   else
376     {
377       inner_ip0 = (ip4_header_t *)(echo0+1);
378       l4_header = ip4_next_header (inner_ip0);
379       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
380       key0.addr = inner_ip0->dst_address;
381       switch (key0.protocol)
382         {
383         case SNAT_PROTOCOL_ICMP:
384           inner_icmp0 = (icmp46_header_t*)l4_header;
385           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
386           key0.port = inner_echo0->identifier;
387           break;
388         case SNAT_PROTOCOL_UDP:
389         case SNAT_PROTOCOL_TCP:
390           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
391           break;
392         default:
393           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
394         }
395     }
396   *p_key0 = key0;
397   return -1; /* success */
398 }
399
400 /**
401  * Get address and port values to be used for ICMP packet translation
402  * and create session if needed
403  *
404  * @param[in,out] sm             NAT main
405  * @param[in,out] node           NAT node runtime
406  * @param[in] thread_index       thread index
407  * @param[in,out] b0             buffer containing packet to be translated
408  * @param[out] p_proto           protocol used for matching
409  * @param[out] p_value           address and port after NAT translation
410  * @param[out] p_dont_translate  if packet should not be translated
411  * @param d                      optional parameter
412  * @param e                      optional parameter
413  */
414 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
415                            u32 thread_index, vlib_buffer_t *b0,
416                            ip4_header_t *ip0, u8 *p_proto,
417                            snat_session_key_t *p_value,
418                            u8 *p_dont_translate, void *d, void *e)
419 {
420   icmp46_header_t *icmp0;
421   u32 sw_if_index0;
422   u32 rx_fib_index0;
423   snat_session_key_t key0;
424   snat_session_t *s0 = 0;
425   u8 dont_translate = 0;
426   clib_bihash_kv_8_8_t kv0, value0;
427   u32 next0 = ~0;
428   int err;
429
430   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
431   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
432   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
433
434   err = icmp_get_key (ip0, &key0);
435   if (err != -1)
436     {
437       b0->error = node->errors[err];
438       next0 = SNAT_IN2OUT_NEXT_DROP;
439       goto out;
440     }
441   key0.fib_index = rx_fib_index0;
442
443   kv0.key = key0.as_u64;
444
445   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
446                               &value0))
447     {
448       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
449           IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
450           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
451         {
452           dont_translate = 1;
453           goto out;
454         }
455
456       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
457         {
458           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
459           next0 = SNAT_IN2OUT_NEXT_DROP;
460           goto out;
461         }
462
463       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
464                          &s0, node, next0, thread_index);
465
466       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
467         goto out;
468     }
469   else
470     {
471       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
472                         icmp0->type != ICMP4_echo_reply &&
473                         !icmp_is_error_message (icmp0)))
474         {
475           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
476           next0 = SNAT_IN2OUT_NEXT_DROP;
477           goto out;
478         }
479
480       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
481                               value0.value);
482     }
483
484 out:
485   *p_proto = key0.protocol;
486   if (s0)
487     *p_value = s0->out2in;
488   *p_dont_translate = dont_translate;
489   if (d)
490     *(snat_session_t**)d = s0;
491   return next0;
492 }
493
494 /**
495  * Get address and port values to be used for ICMP packet translation
496  *
497  * @param[in] sm                 NAT main
498  * @param[in,out] node           NAT node runtime
499  * @param[in] thread_index       thread index
500  * @param[in,out] b0             buffer containing packet to be translated
501  * @param[out] p_proto           protocol used for matching
502  * @param[out] p_value           address and port after NAT translation
503  * @param[out] p_dont_translate  if packet should not be translated
504  * @param d                      optional parameter
505  * @param e                      optional parameter
506  */
507 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
508                            u32 thread_index, vlib_buffer_t *b0,
509                            ip4_header_t *ip0, u8 *p_proto,
510                            snat_session_key_t *p_value,
511                            u8 *p_dont_translate, void *d, void *e)
512 {
513   icmp46_header_t *icmp0;
514   u32 sw_if_index0;
515   u32 rx_fib_index0;
516   snat_session_key_t key0;
517   snat_session_key_t sm0;
518   u8 dont_translate = 0;
519   u8 is_addr_only;
520   u32 next0 = ~0;
521   int err;
522
523   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
524   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
525   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
526
527   err = icmp_get_key (ip0, &key0);
528   if (err != -1)
529     {
530       b0->error = node->errors[err];
531       next0 = SNAT_IN2OUT_NEXT_DROP;
532       goto out2;
533     }
534   key0.fib_index = rx_fib_index0;
535
536   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0))
537     {
538       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
539           IP_PROTOCOL_ICMP, rx_fib_index0)))
540         {
541           dont_translate = 1;
542           goto out;
543         }
544
545       if (icmp_is_error_message (icmp0))
546         {
547           next0 = SNAT_IN2OUT_NEXT_DROP;
548           goto out;
549         }
550
551       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
552       next0 = SNAT_IN2OUT_NEXT_DROP;
553       goto out;
554     }
555
556   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
557                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
558                     !icmp_is_error_message (icmp0)))
559     {
560       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
561       next0 = SNAT_IN2OUT_NEXT_DROP;
562       goto out;
563     }
564
565 out:
566   *p_value = sm0;
567 out2:
568   *p_proto = key0.protocol;
569   *p_dont_translate = dont_translate;
570   return next0;
571 }
572
573 static inline u32 icmp_in2out (snat_main_t *sm,
574                                vlib_buffer_t * b0,
575                                ip4_header_t * ip0,
576                                icmp46_header_t * icmp0,
577                                u32 sw_if_index0,
578                                u32 rx_fib_index0,
579                                vlib_node_runtime_t * node,
580                                u32 next0,
581                                u32 thread_index,
582                                void *d,
583                                void *e)
584 {
585   snat_session_key_t sm0;
586   u8 protocol;
587   icmp_echo_header_t *echo0, *inner_echo0 = 0;
588   ip4_header_t *inner_ip0;
589   void *l4_header = 0;
590   icmp46_header_t *inner_icmp0;
591   u8 dont_translate;
592   u32 new_addr0, old_addr0;
593   u16 old_id0, new_id0;
594   ip_csum_t sum0;
595   u16 checksum0;
596   u32 next0_tmp;
597
598   echo0 = (icmp_echo_header_t *)(icmp0+1);
599
600   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
601                                        &protocol, &sm0, &dont_translate, d, e);
602   if (next0_tmp != ~0)
603     next0 = next0_tmp;
604   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
605     goto out;
606
607   sum0 = ip_incremental_checksum (0, icmp0,
608                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
609   checksum0 = ~ip_csum_fold (sum0);
610   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
611     {
612       next0 = SNAT_IN2OUT_NEXT_DROP;
613       goto out;
614     }
615
616   old_addr0 = ip0->src_address.as_u32;
617   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
618   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
619     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
620
621   sum0 = ip0->checksum;
622   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
623                          src_address /* changed member */);
624   ip0->checksum = ip_csum_fold (sum0);
625
626   if (!icmp_is_error_message (icmp0))
627     {
628       new_id0 = sm0.port;
629       if (PREDICT_FALSE(new_id0 != echo0->identifier))
630         {
631           old_id0 = echo0->identifier;
632           new_id0 = sm0.port;
633           echo0->identifier = new_id0;
634
635           sum0 = icmp0->checksum;
636           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
637                                  identifier);
638           icmp0->checksum = ip_csum_fold (sum0);
639         }
640     }
641   else
642     {
643       inner_ip0 = (ip4_header_t *)(echo0+1);
644       l4_header = ip4_next_header (inner_ip0);
645
646       if (!ip4_header_checksum_is_valid (inner_ip0))
647         {
648           next0 = SNAT_IN2OUT_NEXT_DROP;
649           goto out;
650         }
651
652       old_addr0 = inner_ip0->dst_address.as_u32;
653       inner_ip0->dst_address = sm0.addr;
654       new_addr0 = inner_ip0->dst_address.as_u32;
655
656       sum0 = icmp0->checksum;
657       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
658                              dst_address /* changed member */);
659       icmp0->checksum = ip_csum_fold (sum0);
660
661       switch (protocol)
662         {
663           case SNAT_PROTOCOL_ICMP:
664             inner_icmp0 = (icmp46_header_t*)l4_header;
665             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
666
667             old_id0 = inner_echo0->identifier;
668             new_id0 = sm0.port;
669             inner_echo0->identifier = new_id0;
670
671             sum0 = icmp0->checksum;
672             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
673                                    identifier);
674             icmp0->checksum = ip_csum_fold (sum0);
675             break;
676           case SNAT_PROTOCOL_UDP:
677           case SNAT_PROTOCOL_TCP:
678             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
679             new_id0 = sm0.port;
680             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
681
682             sum0 = icmp0->checksum;
683             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
684                                    dst_port);
685             icmp0->checksum = ip_csum_fold (sum0);
686             break;
687           default:
688             ASSERT(0);
689         }
690     }
691
692 out:
693   return next0;
694 }
695
696 /**
697  * @brief Hairpinning
698  *
699  * Hairpinning allows two endpoints on the internal side of the NAT to
700  * communicate even if they only use each other's external IP addresses
701  * and ports.
702  *
703  * @param sm     NAT main.
704  * @param b0     Vlib buffer.
705  * @param ip0    IP header.
706  * @param udp0   UDP header.
707  * @param tcp0   TCP header.
708  * @param proto0 NAT protocol.
709  */
710 static inline int
711 snat_hairpinning (snat_main_t *sm,
712                   vlib_buffer_t * b0,
713                   ip4_header_t * ip0,
714                   udp_header_t * udp0,
715                   tcp_header_t * tcp0,
716                   u32 proto0)
717 {
718   snat_session_key_t key0, sm0;
719   snat_session_t * s0;
720   clib_bihash_kv_8_8_t kv0, value0;
721   ip_csum_t sum0;
722   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
723   u16 new_dst_port0, old_dst_port0;
724
725   key0.addr = ip0->dst_address;
726   key0.port = udp0->dst_port;
727   key0.protocol = proto0;
728   key0.fib_index = sm->outside_fib_index;
729   kv0.key = key0.as_u64;
730
731   /* Check if destination is static mappings */
732   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
733     {
734       new_dst_addr0 = sm0.addr.as_u32;
735       new_dst_port0 = sm0.port;
736       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
737     }
738   /* or active session */
739   else
740     {
741       if (sm->num_workers > 1)
742         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
743       else
744         ti = sm->num_workers;
745
746       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
747         {
748           si = value0.value;
749
750           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
751           new_dst_addr0 = s0->in2out.addr.as_u32;
752           new_dst_port0 = s0->in2out.port;
753           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
754         }
755     }
756
757   /* Destination is behind the same NAT, use internal address and port */
758   if (new_dst_addr0)
759     {
760       old_dst_addr0 = ip0->dst_address.as_u32;
761       ip0->dst_address.as_u32 = new_dst_addr0;
762       sum0 = ip0->checksum;
763       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
764                              ip4_header_t, dst_address);
765       ip0->checksum = ip_csum_fold (sum0);
766
767       old_dst_port0 = tcp0->dst;
768       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
769         {
770           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
771             {
772               tcp0->dst = new_dst_port0;
773               sum0 = tcp0->checksum;
774               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
775                                      ip4_header_t, dst_address);
776               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
777                                      ip4_header_t /* cheat */, length);
778               tcp0->checksum = ip_csum_fold(sum0);
779             }
780           else
781             {
782               udp0->dst_port = new_dst_port0;
783               udp0->checksum = 0;
784             }
785         }
786       else
787         {
788           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
789             {
790               sum0 = tcp0->checksum;
791               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
792                                      ip4_header_t, dst_address);
793               tcp0->checksum = ip_csum_fold(sum0);
794             }
795         }
796       return 1;
797     }
798   return 0;
799 }
800
801 static inline void
802 snat_icmp_hairpinning (snat_main_t *sm,
803                        vlib_buffer_t * b0,
804                        ip4_header_t * ip0,
805                        icmp46_header_t * icmp0)
806 {
807   snat_session_key_t key0, sm0;
808   clib_bihash_kv_8_8_t kv0, value0;
809   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
810   ip_csum_t sum0;
811   snat_session_t *s0;
812
813   if (!icmp_is_error_message (icmp0))
814     {
815       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
816       u16 icmp_id0 = echo0->identifier;
817       key0.addr = ip0->dst_address;
818       key0.port = icmp_id0;
819       key0.protocol = SNAT_PROTOCOL_ICMP;
820       key0.fib_index = sm->outside_fib_index;
821       kv0.key = key0.as_u64;
822
823       if (sm->num_workers > 1)
824         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
825       else
826         ti = sm->num_workers;
827
828       /* Check if destination is in active sessions */
829       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
830                                   &value0))
831         {
832           /* or static mappings */
833           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
834             {
835               new_dst_addr0 = sm0.addr.as_u32;
836               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
837             }
838         }
839       else
840         {
841           si = value0.value;
842
843           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
844           new_dst_addr0 = s0->in2out.addr.as_u32;
845           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
846           echo0->identifier = s0->in2out.port;
847           sum0 = icmp0->checksum;
848           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
849                                  icmp_echo_header_t, identifier);
850           icmp0->checksum = ip_csum_fold (sum0);
851         }
852
853       /* Destination is behind the same NAT, use internal address and port */
854       if (new_dst_addr0)
855         {
856           old_dst_addr0 = ip0->dst_address.as_u32;
857           ip0->dst_address.as_u32 = new_dst_addr0;
858           sum0 = ip0->checksum;
859           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
860                                  ip4_header_t, dst_address);
861           ip0->checksum = ip_csum_fold (sum0);
862         }
863     }
864
865 }
866
867 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
868                                          vlib_buffer_t * b0,
869                                          ip4_header_t * ip0,
870                                          icmp46_header_t * icmp0,
871                                          u32 sw_if_index0,
872                                          u32 rx_fib_index0,
873                                          vlib_node_runtime_t * node,
874                                          u32 next0,
875                                          f64 now,
876                                          u32 thread_index,
877                                          snat_session_t ** p_s0)
878 {
879   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
880                       next0, thread_index, p_s0, 0);
881   snat_session_t * s0 = *p_s0;
882   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
883     {
884       /* Hairpinning */
885       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
886         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
887       /* Accounting */
888       s0->last_heard = now;
889       s0->total_pkts++;
890       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
891       /* Per-user LRU list maintenance */
892       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
893                          s0->per_user_index);
894       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
895                           s0->per_user_list_head_index,
896                           s0->per_user_index);
897     }
898   return next0;
899 }
900 static inline void
901 snat_hairpinning_unknown_proto (snat_main_t *sm,
902                                 vlib_buffer_t * b,
903                                 ip4_header_t * ip)
904 {
905   u32 old_addr, new_addr = 0, ti = 0;
906   clib_bihash_kv_8_8_t kv, value;
907   clib_bihash_kv_16_8_t s_kv, s_value;
908   nat_ed_ses_key_t key;
909   snat_session_key_t m_key;
910   snat_static_mapping_t *m;
911   ip_csum_t sum;
912   snat_session_t *s;
913
914   old_addr = ip->dst_address.as_u32;
915   key.l_addr.as_u32 = ip->dst_address.as_u32;
916   key.r_addr.as_u32 = ip->src_address.as_u32;
917   key.fib_index = sm->outside_fib_index;
918   key.proto = ip->protocol;
919   key.r_port = 0;
920   key.l_port = 0;
921   s_kv.key[0] = key.as_u64[0];
922   s_kv.key[1] = key.as_u64[1];
923   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
924     {
925       m_key.addr = ip->dst_address;
926       m_key.fib_index = sm->outside_fib_index;
927       m_key.port = 0;
928       m_key.protocol = 0;
929       kv.key = m_key.as_u64;
930       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
931         return;
932
933       m = pool_elt_at_index (sm->static_mappings, value.value);
934       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
935         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
936       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
937     }
938   else
939     {
940       if (sm->num_workers > 1)
941         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
942       else
943         ti = sm->num_workers;
944
945       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
946       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
947         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
948       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
949     }
950   sum = ip->checksum;
951   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
952   ip->checksum = ip_csum_fold (sum);
953 }
954
955 static snat_session_t *
956 snat_in2out_unknown_proto (snat_main_t *sm,
957                            vlib_buffer_t * b,
958                            ip4_header_t * ip,
959                            u32 rx_fib_index,
960                            u32 thread_index,
961                            f64 now,
962                            vlib_main_t * vm,
963                            vlib_node_runtime_t * node)
964 {
965   clib_bihash_kv_8_8_t kv, value;
966   clib_bihash_kv_16_8_t s_kv, s_value;
967   snat_static_mapping_t *m;
968   snat_session_key_t m_key;
969   u32 old_addr, new_addr = 0;
970   ip_csum_t sum;
971   snat_user_t *u;
972   dlist_elt_t *head, *elt;
973   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
974   u32 elt_index, head_index, ses_index;
975   snat_session_t * s;
976   nat_ed_ses_key_t key;
977   u32 address_index = ~0;
978   int i;
979   u8 is_sm = 0;
980
981   old_addr = ip->src_address.as_u32;
982
983   key.l_addr = ip->src_address;
984   key.r_addr = ip->dst_address;
985   key.fib_index = rx_fib_index;
986   key.proto = ip->protocol;
987   key.l_port = 0;
988   key.l_port = 0;
989   s_kv.key[0] = key.as_u64[0];
990   s_kv.key[1] = key.as_u64[1];
991
992   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
993     {
994       s = pool_elt_at_index (tsm->sessions, s_value.value);
995       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
996     }
997   else
998     {
999       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1000         {
1001           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1002           return 0;
1003         }
1004
1005       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1006                                   thread_index);
1007       if (!u)
1008         {
1009           clib_warning ("create NAT user failed");
1010           return 0;
1011         }
1012
1013       m_key.addr = ip->src_address;
1014       m_key.port = 0;
1015       m_key.protocol = 0;
1016       m_key.fib_index = rx_fib_index;
1017       kv.key = m_key.as_u64;
1018
1019       /* Try to find static mapping first */
1020       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1021         {
1022           m = pool_elt_at_index (sm->static_mappings, value.value);
1023           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1024           is_sm = 1;
1025           goto create_ses;
1026         }
1027       /* Fallback to 3-tuple key */
1028       else
1029         {
1030           /* Choose same out address as for TCP/UDP session to same destination */
1031           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1032             {
1033               head_index = u->sessions_per_user_list_head_index;
1034               head = pool_elt_at_index (tsm->list_pool, head_index);
1035               elt_index = head->next;
1036               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1037               ses_index = elt->value;
1038               while (ses_index != ~0)
1039                 {
1040                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1041                   elt_index = elt->next;
1042                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1043                   ses_index = elt->value;
1044
1045                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1046                     {
1047                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1048                       address_index = s->outside_address_index;
1049
1050                       key.fib_index = sm->outside_fib_index;
1051                       key.l_addr.as_u32 = new_addr;
1052                       s_kv.key[0] = key.as_u64[0];
1053                       s_kv.key[1] = key.as_u64[1];
1054                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1055                         break;
1056
1057                       goto create_ses;
1058                     }
1059                 }
1060             }
1061           key.fib_index = sm->outside_fib_index;
1062           for (i = 0; i < vec_len (sm->addresses); i++)
1063             {
1064               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1065               s_kv.key[0] = key.as_u64[0];
1066               s_kv.key[1] = key.as_u64[1];
1067               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1068                 {
1069                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1070                   address_index = i;
1071                   goto create_ses;
1072                 }
1073             }
1074           return 0;
1075         }
1076
1077 create_ses:
1078       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1079       if (!s)
1080         {
1081           clib_warning ("create NAT session failed");
1082           return 0;
1083         }
1084
1085       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1086       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1087       s->outside_address_index = address_index;
1088       s->out2in.addr.as_u32 = new_addr;
1089       s->out2in.fib_index = sm->outside_fib_index;
1090       s->in2out.addr.as_u32 = old_addr;
1091       s->in2out.fib_index = rx_fib_index;
1092       s->in2out.port = s->out2in.port = ip->protocol;
1093       if (is_sm)
1094         {
1095           u->nstaticsessions++;
1096           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1097         }
1098       else
1099         {
1100           u->nsessions++;
1101         }
1102
1103       /* Add to lookup tables */
1104       key.l_addr.as_u32 = old_addr;
1105       key.r_addr = ip->dst_address;
1106       key.proto = ip->protocol;
1107       key.fib_index = rx_fib_index;
1108       s_kv.key[0] = key.as_u64[0];
1109       s_kv.key[1] = key.as_u64[1];
1110       s_kv.value = s - tsm->sessions;
1111       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1112         clib_warning ("in2out key add failed");
1113
1114       key.l_addr.as_u32 = new_addr;
1115       key.fib_index = sm->outside_fib_index;
1116       s_kv.key[0] = key.as_u64[0];
1117       s_kv.key[1] = key.as_u64[1];
1118       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1119         clib_warning ("out2in key add failed");
1120   }
1121
1122   /* Update IP checksum */
1123   sum = ip->checksum;
1124   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1125   ip->checksum = ip_csum_fold (sum);
1126
1127   /* Accounting */
1128   s->last_heard = now;
1129   s->total_pkts++;
1130   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1131   /* Per-user LRU list maintenance */
1132   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1133   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1134                       s->per_user_index);
1135
1136   /* Hairpinning */
1137   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1138     snat_hairpinning_unknown_proto(sm, b, ip);
1139
1140   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1141     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1142
1143   return s;
1144 }
1145
1146 static snat_session_t *
1147 snat_in2out_lb (snat_main_t *sm,
1148                 vlib_buffer_t * b,
1149                 ip4_header_t * ip,
1150                 u32 rx_fib_index,
1151                 u32 thread_index,
1152                 f64 now,
1153                 vlib_main_t * vm,
1154                 vlib_node_runtime_t * node)
1155 {
1156   nat_ed_ses_key_t key;
1157   clib_bihash_kv_16_8_t s_kv, s_value;
1158   udp_header_t *udp = ip4_next_header (ip);
1159   tcp_header_t *tcp = (tcp_header_t *) udp;
1160   snat_session_t *s = 0;
1161   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1162   u32 old_addr, new_addr;
1163   u16 new_port, old_port;
1164   ip_csum_t sum;
1165   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1166   snat_session_key_t e_key, l_key;
1167   snat_user_t *u;
1168
1169   old_addr = ip->src_address.as_u32;
1170
1171   key.l_addr = ip->src_address;
1172   key.r_addr = ip->dst_address;
1173   key.fib_index = rx_fib_index;
1174   key.proto = ip->protocol;
1175   key.r_port = udp->dst_port;
1176   key.l_port = udp->src_port;
1177   s_kv.key[0] = key.as_u64[0];
1178   s_kv.key[1] = key.as_u64[1];
1179
1180   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1181     {
1182       s = pool_elt_at_index (tsm->sessions, s_value.value);
1183     }
1184   else
1185     {
1186       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1187         {
1188           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1189           return 0;
1190         }
1191
1192       l_key.addr = ip->src_address;
1193       l_key.port = udp->src_port;
1194       l_key.protocol = proto;
1195       l_key.fib_index = rx_fib_index;
1196       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0, 0))
1197         return 0;
1198
1199       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1200                                   thread_index);
1201       if (!u)
1202         {
1203           clib_warning ("create NAT user failed");
1204           return 0;
1205         }
1206
1207       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1208       if (!s)
1209         {
1210           clib_warning ("create NAT session failed");
1211           return 0;
1212         }
1213
1214       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1215       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1216       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1217       s->outside_address_index = ~0;
1218       s->in2out = l_key;
1219       s->out2in = e_key;
1220       u->nstaticsessions++;
1221
1222       /* Add to lookup tables */
1223       s_kv.value = s - tsm->sessions;
1224       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1225         clib_warning ("in2out-ed key add failed");
1226
1227       key.l_addr = e_key.addr;
1228       key.fib_index = e_key.fib_index;
1229       key.l_port = e_key.port;
1230       s_kv.key[0] = key.as_u64[0];
1231       s_kv.key[1] = key.as_u64[1];
1232       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1233         clib_warning ("out2in-ed key add failed");
1234     }
1235
1236   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1237
1238   /* Update IP checksum */
1239   sum = ip->checksum;
1240   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1241   if (is_twice_nat_session (s))
1242     sum = ip_csum_update (sum, ip->dst_address.as_u32,
1243                           s->ext_host_addr.as_u32, ip4_header_t, dst_address);
1244   ip->checksum = ip_csum_fold (sum);
1245
1246   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1247     {
1248       old_port = tcp->src_port;
1249       tcp->src_port = s->out2in.port;
1250       new_port = tcp->src_port;
1251
1252       sum = tcp->checksum;
1253       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1254       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1255       if (is_twice_nat_session (s))
1256         {
1257           sum = ip_csum_update (sum, ip->dst_address.as_u32,
1258                                 s->ext_host_addr.as_u32, ip4_header_t,
1259                                 dst_address);
1260           sum = ip_csum_update (sum, tcp->dst_port, s->ext_host_port,
1261                                 ip4_header_t, length);
1262           tcp->dst_port = s->ext_host_port;
1263           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1264         }
1265       tcp->checksum = ip_csum_fold(sum);
1266     }
1267   else
1268     {
1269       udp->src_port = s->out2in.port;
1270       if (is_twice_nat_session (s))
1271         {
1272           udp->dst_port = s->ext_host_port;
1273           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1274         }
1275       udp->checksum = 0;
1276     }
1277
1278   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1279     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1280
1281   /* Accounting */
1282   s->last_heard = now;
1283   s->total_pkts++;
1284   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1285   /* Per-user LRU list maintenance */
1286   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1287   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1288                       s->per_user_index);
1289   return s;
1290 }
1291
1292 static inline uword
1293 snat_in2out_node_fn_inline (vlib_main_t * vm,
1294                             vlib_node_runtime_t * node,
1295                             vlib_frame_t * frame, int is_slow_path,
1296                             int is_output_feature)
1297 {
1298   u32 n_left_from, * from, * to_next;
1299   snat_in2out_next_t next_index;
1300   u32 pkts_processed = 0;
1301   snat_main_t * sm = &snat_main;
1302   f64 now = vlib_time_now (vm);
1303   u32 stats_node_index;
1304   u32 thread_index = vlib_get_thread_index ();
1305
1306   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1307     snat_in2out_node.index;
1308
1309   from = vlib_frame_vector_args (frame);
1310   n_left_from = frame->n_vectors;
1311   next_index = node->cached_next_index;
1312
1313   while (n_left_from > 0)
1314     {
1315       u32 n_left_to_next;
1316
1317       vlib_get_next_frame (vm, node, next_index,
1318                            to_next, n_left_to_next);
1319
1320       while (n_left_from >= 4 && n_left_to_next >= 2)
1321         {
1322           u32 bi0, bi1;
1323           vlib_buffer_t * b0, * b1;
1324           u32 next0, next1;
1325           u32 sw_if_index0, sw_if_index1;
1326           ip4_header_t * ip0, * ip1;
1327           ip_csum_t sum0, sum1;
1328           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1329           u16 old_port0, new_port0, old_port1, new_port1;
1330           udp_header_t * udp0, * udp1;
1331           tcp_header_t * tcp0, * tcp1;
1332           icmp46_header_t * icmp0, * icmp1;
1333           snat_session_key_t key0, key1;
1334           u32 rx_fib_index0, rx_fib_index1;
1335           u32 proto0, proto1;
1336           snat_session_t * s0 = 0, * s1 = 0;
1337           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1338           u32 iph_offset0 = 0, iph_offset1 = 0;
1339
1340           /* Prefetch next iteration. */
1341           {
1342             vlib_buffer_t * p2, * p3;
1343
1344             p2 = vlib_get_buffer (vm, from[2]);
1345             p3 = vlib_get_buffer (vm, from[3]);
1346
1347             vlib_prefetch_buffer_header (p2, LOAD);
1348             vlib_prefetch_buffer_header (p3, LOAD);
1349
1350             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1351             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1352           }
1353
1354           /* speculatively enqueue b0 and b1 to the current next frame */
1355           to_next[0] = bi0 = from[0];
1356           to_next[1] = bi1 = from[1];
1357           from += 2;
1358           to_next += 2;
1359           n_left_from -= 2;
1360           n_left_to_next -= 2;
1361
1362           b0 = vlib_get_buffer (vm, bi0);
1363           b1 = vlib_get_buffer (vm, bi1);
1364
1365           if (is_output_feature)
1366             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1367
1368           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1369                  iph_offset0);
1370
1371           udp0 = ip4_next_header (ip0);
1372           tcp0 = (tcp_header_t *) udp0;
1373           icmp0 = (icmp46_header_t *) udp0;
1374
1375           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1376           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1377                                    sw_if_index0);
1378
1379           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1380
1381           if (PREDICT_FALSE(ip0->ttl == 1))
1382             {
1383               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1384               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1385                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1386                                            0);
1387               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1388               goto trace00;
1389             }
1390
1391           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1392
1393           /* Next configured feature, probably ip4-lookup */
1394           if (is_slow_path)
1395             {
1396               if (PREDICT_FALSE (proto0 == ~0))
1397                 {
1398                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1399                                                   thread_index, now, vm, node);
1400                   if (!s0)
1401                     next0 = SNAT_IN2OUT_NEXT_DROP;
1402                   goto trace00;
1403                 }
1404
1405               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1406                 {
1407                   next0 = icmp_in2out_slow_path
1408                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1409                      node, next0, now, thread_index, &s0);
1410                   goto trace00;
1411                 }
1412             }
1413           else
1414             {
1415               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1416                 {
1417                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1418                   goto trace00;
1419                 }
1420
1421               if (ip4_is_fragment (ip0))
1422                 {
1423                   next0 = SNAT_IN2OUT_NEXT_REASS;
1424                   goto trace00;
1425                 }
1426             }
1427
1428           key0.addr = ip0->src_address;
1429           key0.port = udp0->src_port;
1430           key0.protocol = proto0;
1431           key0.fib_index = rx_fib_index0;
1432
1433           kv0.key = key0.as_u64;
1434
1435           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1436               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1437             {
1438               if (is_slow_path)
1439                 {
1440                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1441                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1442                     goto trace00;
1443
1444                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1445                                      &s0, node, next0, thread_index);
1446                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1447                     goto trace00;
1448                 }
1449               else
1450                 {
1451                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1452                   goto trace00;
1453                 }
1454             }
1455           else
1456             {
1457               if (PREDICT_FALSE (value0.value == ~0ULL))
1458                 {
1459                   if (is_slow_path)
1460                     {
1461                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1462                                           thread_index, now, vm, node);
1463                       if (!s0)
1464                         next0 = SNAT_IN2OUT_NEXT_DROP;
1465                       goto trace00;
1466                     }
1467                   else
1468                     {
1469                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1470                       goto trace00;
1471                     }
1472                 }
1473               else
1474                 {
1475                   s0 = pool_elt_at_index (
1476                     sm->per_thread_data[thread_index].sessions,
1477                     value0.value);
1478                 }
1479             }
1480
1481           b0->flags |= VNET_BUFFER_F_IS_NATED;
1482
1483           old_addr0 = ip0->src_address.as_u32;
1484           ip0->src_address = s0->out2in.addr;
1485           new_addr0 = ip0->src_address.as_u32;
1486           if (!is_output_feature)
1487             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1488
1489           sum0 = ip0->checksum;
1490           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1491                                  ip4_header_t,
1492                                  src_address /* changed member */);
1493           ip0->checksum = ip_csum_fold (sum0);
1494
1495           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1496             {
1497               old_port0 = tcp0->src_port;
1498               tcp0->src_port = s0->out2in.port;
1499               new_port0 = tcp0->src_port;
1500
1501               sum0 = tcp0->checksum;
1502               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1503                                      ip4_header_t,
1504                                      dst_address /* changed member */);
1505               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1506                                      ip4_header_t /* cheat */,
1507                                      length /* changed member */);
1508               tcp0->checksum = ip_csum_fold(sum0);
1509             }
1510           else
1511             {
1512               old_port0 = udp0->src_port;
1513               udp0->src_port = s0->out2in.port;
1514               udp0->checksum = 0;
1515             }
1516
1517           /* Accounting */
1518           s0->last_heard = now;
1519           s0->total_pkts++;
1520           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1521           /* Per-user LRU list maintenance */
1522           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1523                              s0->per_user_index);
1524           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1525                               s0->per_user_list_head_index,
1526                               s0->per_user_index);
1527         trace00:
1528
1529           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1530                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1531             {
1532               snat_in2out_trace_t *t =
1533                  vlib_add_trace (vm, node, b0, sizeof (*t));
1534               t->is_slow_path = is_slow_path;
1535               t->sw_if_index = sw_if_index0;
1536               t->next_index = next0;
1537                   t->session_index = ~0;
1538               if (s0)
1539                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1540             }
1541
1542           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1543
1544           if (is_output_feature)
1545             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1546
1547           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1548                  iph_offset1);
1549
1550           udp1 = ip4_next_header (ip1);
1551           tcp1 = (tcp_header_t *) udp1;
1552           icmp1 = (icmp46_header_t *) udp1;
1553
1554           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1555           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1556                                    sw_if_index1);
1557
1558           if (PREDICT_FALSE(ip1->ttl == 1))
1559             {
1560               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1561               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1562                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1563                                            0);
1564               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1565               goto trace01;
1566             }
1567
1568           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1569
1570           /* Next configured feature, probably ip4-lookup */
1571           if (is_slow_path)
1572             {
1573               if (PREDICT_FALSE (proto1 == ~0))
1574                 {
1575                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1576                                                   thread_index, now, vm, node);
1577                   if (!s1)
1578                     next1 = SNAT_IN2OUT_NEXT_DROP;
1579                   goto trace01;
1580                 }
1581
1582               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1583                 {
1584                   next1 = icmp_in2out_slow_path
1585                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1586                      next1, now, thread_index, &s1);
1587                   goto trace01;
1588                 }
1589             }
1590           else
1591             {
1592               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1593                 {
1594                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1595                   goto trace01;
1596                 }
1597
1598               if (ip4_is_fragment (ip1))
1599                 {
1600                   next1 = SNAT_IN2OUT_NEXT_REASS;
1601                   goto trace01;
1602                 }
1603             }
1604
1605           b1->flags |= VNET_BUFFER_F_IS_NATED;
1606
1607           key1.addr = ip1->src_address;
1608           key1.port = udp1->src_port;
1609           key1.protocol = proto1;
1610           key1.fib_index = rx_fib_index1;
1611
1612           kv1.key = key1.as_u64;
1613
1614             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1615                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1616             {
1617               if (is_slow_path)
1618                 {
1619                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1620                       ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
1621                     goto trace01;
1622
1623                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1624                                      &s1, node, next1, thread_index);
1625                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1626                     goto trace01;
1627                 }
1628               else
1629                 {
1630                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1631                   goto trace01;
1632                 }
1633             }
1634           else
1635             {
1636               if (PREDICT_FALSE (value1.value == ~0ULL))
1637                 {
1638                   if (is_slow_path)
1639                     {
1640                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1641                                           thread_index, now, vm, node);
1642                       if (!s1)
1643                         next1 = SNAT_IN2OUT_NEXT_DROP;
1644                       goto trace01;
1645                     }
1646                   else
1647                     {
1648                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1649                       goto trace01;
1650                     }
1651                 }
1652               else
1653                 {
1654                   s1 = pool_elt_at_index (
1655                     sm->per_thread_data[thread_index].sessions,
1656                     value1.value);
1657                 }
1658             }
1659
1660           old_addr1 = ip1->src_address.as_u32;
1661           ip1->src_address = s1->out2in.addr;
1662           new_addr1 = ip1->src_address.as_u32;
1663           if (!is_output_feature)
1664             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1665
1666           sum1 = ip1->checksum;
1667           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1668                                  ip4_header_t,
1669                                  src_address /* changed member */);
1670           ip1->checksum = ip_csum_fold (sum1);
1671
1672           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1673             {
1674               old_port1 = tcp1->src_port;
1675               tcp1->src_port = s1->out2in.port;
1676               new_port1 = tcp1->src_port;
1677
1678               sum1 = tcp1->checksum;
1679               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1680                                      ip4_header_t,
1681                                      dst_address /* changed member */);
1682               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1683                                      ip4_header_t /* cheat */,
1684                                      length /* changed member */);
1685               tcp1->checksum = ip_csum_fold(sum1);
1686             }
1687           else
1688             {
1689               old_port1 = udp1->src_port;
1690               udp1->src_port = s1->out2in.port;
1691               udp1->checksum = 0;
1692             }
1693
1694           /* Accounting */
1695           s1->last_heard = now;
1696           s1->total_pkts++;
1697           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1698           /* Per-user LRU list maintenance */
1699           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1700                              s1->per_user_index);
1701           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1702                               s1->per_user_list_head_index,
1703                               s1->per_user_index);
1704         trace01:
1705
1706           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1707                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1708             {
1709               snat_in2out_trace_t *t =
1710                  vlib_add_trace (vm, node, b1, sizeof (*t));
1711               t->sw_if_index = sw_if_index1;
1712               t->next_index = next1;
1713               t->session_index = ~0;
1714               if (s1)
1715                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1716             }
1717
1718           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1719
1720           /* verify speculative enqueues, maybe switch current next frame */
1721           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1722                                            to_next, n_left_to_next,
1723                                            bi0, bi1, next0, next1);
1724         }
1725
1726       while (n_left_from > 0 && n_left_to_next > 0)
1727         {
1728           u32 bi0;
1729           vlib_buffer_t * b0;
1730           u32 next0;
1731           u32 sw_if_index0;
1732           ip4_header_t * ip0;
1733           ip_csum_t sum0;
1734           u32 new_addr0, old_addr0;
1735           u16 old_port0, new_port0;
1736           udp_header_t * udp0;
1737           tcp_header_t * tcp0;
1738           icmp46_header_t * icmp0;
1739           snat_session_key_t key0;
1740           u32 rx_fib_index0;
1741           u32 proto0;
1742           snat_session_t * s0 = 0;
1743           clib_bihash_kv_8_8_t kv0, value0;
1744           u32 iph_offset0 = 0;
1745
1746           /* speculatively enqueue b0 to the current next frame */
1747           bi0 = from[0];
1748           to_next[0] = bi0;
1749           from += 1;
1750           to_next += 1;
1751           n_left_from -= 1;
1752           n_left_to_next -= 1;
1753
1754           b0 = vlib_get_buffer (vm, bi0);
1755           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1756
1757           if (is_output_feature)
1758             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1759
1760           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1761                  iph_offset0);
1762
1763           udp0 = ip4_next_header (ip0);
1764           tcp0 = (tcp_header_t *) udp0;
1765           icmp0 = (icmp46_header_t *) udp0;
1766
1767           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1768           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1769                                    sw_if_index0);
1770
1771           if (PREDICT_FALSE(ip0->ttl == 1))
1772             {
1773               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1774               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1775                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1776                                            0);
1777               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1778               goto trace0;
1779             }
1780
1781           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1782
1783           /* Next configured feature, probably ip4-lookup */
1784           if (is_slow_path)
1785             {
1786               if (PREDICT_FALSE (proto0 == ~0))
1787                 {
1788                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1789                                                   thread_index, now, vm, node);
1790                   if (!s0)
1791                     next0 = SNAT_IN2OUT_NEXT_DROP;
1792                   goto trace0;
1793                 }
1794
1795               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1796                 {
1797                   next0 = icmp_in2out_slow_path
1798                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1799                      next0, now, thread_index, &s0);
1800                   goto trace0;
1801                 }
1802             }
1803           else
1804             {
1805               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1806                 {
1807                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1808                   goto trace0;
1809                 }
1810
1811               if (ip4_is_fragment (ip0))
1812                 {
1813                   next0 = SNAT_IN2OUT_NEXT_REASS;
1814                   goto trace0;
1815                 }
1816             }
1817
1818           key0.addr = ip0->src_address;
1819           key0.port = udp0->src_port;
1820           key0.protocol = proto0;
1821           key0.fib_index = rx_fib_index0;
1822
1823           kv0.key = key0.as_u64;
1824
1825           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1826                                       &kv0, &value0))
1827             {
1828               if (is_slow_path)
1829                 {
1830                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1831                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1832                     goto trace0;
1833
1834                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1835                                      &s0, node, next0, thread_index);
1836
1837                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1838                     goto trace0;
1839                 }
1840               else
1841                 {
1842                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1843                   goto trace0;
1844                 }
1845             }
1846           else
1847             {
1848               if (PREDICT_FALSE (value0.value == ~0ULL))
1849                 {
1850                   if (is_slow_path)
1851                     {
1852                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1853                                           thread_index, now, vm, node);
1854                       if (!s0)
1855                         next0 = SNAT_IN2OUT_NEXT_DROP;
1856                       goto trace0;
1857                     }
1858                   else
1859                     {
1860                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1861                       goto trace0;
1862                     }
1863                 }
1864               else
1865                 {
1866                   s0 = pool_elt_at_index (
1867                     sm->per_thread_data[thread_index].sessions,
1868                     value0.value);
1869                 }
1870             }
1871
1872           b0->flags |= VNET_BUFFER_F_IS_NATED;
1873
1874           old_addr0 = ip0->src_address.as_u32;
1875           ip0->src_address = s0->out2in.addr;
1876           new_addr0 = ip0->src_address.as_u32;
1877           if (!is_output_feature)
1878             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1879
1880           sum0 = ip0->checksum;
1881           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1882                                  ip4_header_t,
1883                                  src_address /* changed member */);
1884           ip0->checksum = ip_csum_fold (sum0);
1885
1886           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1887             {
1888               old_port0 = tcp0->src_port;
1889               tcp0->src_port = s0->out2in.port;
1890               new_port0 = tcp0->src_port;
1891
1892               sum0 = tcp0->checksum;
1893               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1894                                      ip4_header_t,
1895                                      dst_address /* changed member */);
1896               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1897                                      ip4_header_t /* cheat */,
1898                                      length /* changed member */);
1899               tcp0->checksum = ip_csum_fold(sum0);
1900             }
1901           else
1902             {
1903               old_port0 = udp0->src_port;
1904               udp0->src_port = s0->out2in.port;
1905               udp0->checksum = 0;
1906             }
1907
1908           /* Accounting */
1909           s0->last_heard = now;
1910           s0->total_pkts++;
1911           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1912           /* Per-user LRU list maintenance */
1913           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1914                              s0->per_user_index);
1915           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1916                               s0->per_user_list_head_index,
1917                               s0->per_user_index);
1918
1919         trace0:
1920           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1921                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1922             {
1923               snat_in2out_trace_t *t =
1924                  vlib_add_trace (vm, node, b0, sizeof (*t));
1925               t->is_slow_path = is_slow_path;
1926               t->sw_if_index = sw_if_index0;
1927               t->next_index = next0;
1928                   t->session_index = ~0;
1929               if (s0)
1930                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1931             }
1932
1933           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1934
1935           /* verify speculative enqueue, maybe switch current next frame */
1936           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1937                                            to_next, n_left_to_next,
1938                                            bi0, next0);
1939         }
1940
1941       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1942     }
1943
1944   vlib_node_increment_counter (vm, stats_node_index,
1945                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1946                                pkts_processed);
1947   return frame->n_vectors;
1948 }
1949
1950 static uword
1951 snat_in2out_fast_path_fn (vlib_main_t * vm,
1952                           vlib_node_runtime_t * node,
1953                           vlib_frame_t * frame)
1954 {
1955   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1956 }
1957
1958 VLIB_REGISTER_NODE (snat_in2out_node) = {
1959   .function = snat_in2out_fast_path_fn,
1960   .name = "nat44-in2out",
1961   .vector_size = sizeof (u32),
1962   .format_trace = format_snat_in2out_trace,
1963   .type = VLIB_NODE_TYPE_INTERNAL,
1964
1965   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1966   .error_strings = snat_in2out_error_strings,
1967
1968   .runtime_data_bytes = sizeof (snat_runtime_t),
1969
1970   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1971
1972   /* edit / add dispositions here */
1973   .next_nodes = {
1974     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1975     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1976     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1977     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1978     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1979   },
1980 };
1981
1982 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1983
1984 static uword
1985 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1986                                  vlib_node_runtime_t * node,
1987                                  vlib_frame_t * frame)
1988 {
1989   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1990 }
1991
1992 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1993   .function = snat_in2out_output_fast_path_fn,
1994   .name = "nat44-in2out-output",
1995   .vector_size = sizeof (u32),
1996   .format_trace = format_snat_in2out_trace,
1997   .type = VLIB_NODE_TYPE_INTERNAL,
1998
1999   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2000   .error_strings = snat_in2out_error_strings,
2001
2002   .runtime_data_bytes = sizeof (snat_runtime_t),
2003
2004   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2005
2006   /* edit / add dispositions here */
2007   .next_nodes = {
2008     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2009     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2010     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2011     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2012     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2013   },
2014 };
2015
2016 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2017                               snat_in2out_output_fast_path_fn);
2018
2019 static uword
2020 snat_in2out_slow_path_fn (vlib_main_t * vm,
2021                           vlib_node_runtime_t * node,
2022                           vlib_frame_t * frame)
2023 {
2024   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2025 }
2026
2027 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2028   .function = snat_in2out_slow_path_fn,
2029   .name = "nat44-in2out-slowpath",
2030   .vector_size = sizeof (u32),
2031   .format_trace = format_snat_in2out_trace,
2032   .type = VLIB_NODE_TYPE_INTERNAL,
2033
2034   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2035   .error_strings = snat_in2out_error_strings,
2036
2037   .runtime_data_bytes = sizeof (snat_runtime_t),
2038
2039   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2040
2041   /* edit / add dispositions here */
2042   .next_nodes = {
2043     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2044     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2045     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2046     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2047     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2048   },
2049 };
2050
2051 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2052                               snat_in2out_slow_path_fn);
2053
2054 static uword
2055 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2056                                  vlib_node_runtime_t * node,
2057                                  vlib_frame_t * frame)
2058 {
2059   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2060 }
2061
2062 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2063   .function = snat_in2out_output_slow_path_fn,
2064   .name = "nat44-in2out-output-slowpath",
2065   .vector_size = sizeof (u32),
2066   .format_trace = format_snat_in2out_trace,
2067   .type = VLIB_NODE_TYPE_INTERNAL,
2068
2069   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2070   .error_strings = snat_in2out_error_strings,
2071
2072   .runtime_data_bytes = sizeof (snat_runtime_t),
2073
2074   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2075
2076   /* edit / add dispositions here */
2077   .next_nodes = {
2078     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2079     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2080     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2081     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2082     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2083   },
2084 };
2085
2086 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2087                               snat_in2out_output_slow_path_fn);
2088
2089 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
2090
2091 static uword
2092 nat44_hairpinning_fn (vlib_main_t * vm,
2093                       vlib_node_runtime_t * node,
2094                       vlib_frame_t * frame)
2095 {
2096   u32 n_left_from, * from, * to_next;
2097   snat_in2out_next_t next_index;
2098   u32 pkts_processed = 0;
2099   snat_main_t * sm = &snat_main;
2100   vnet_feature_main_t *fm = &feature_main;
2101   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
2102   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
2103
2104   from = vlib_frame_vector_args (frame);
2105   n_left_from = frame->n_vectors;
2106   next_index = node->cached_next_index;
2107
2108   while (n_left_from > 0)
2109     {
2110       u32 n_left_to_next;
2111
2112       vlib_get_next_frame (vm, node, next_index,
2113                            to_next, n_left_to_next);
2114
2115       while (n_left_from > 0 && n_left_to_next > 0)
2116         {
2117           u32 bi0;
2118           vlib_buffer_t * b0;
2119           u32 next0;
2120           ip4_header_t * ip0;
2121           u32 proto0;
2122           udp_header_t * udp0;
2123           tcp_header_t * tcp0;
2124
2125           /* speculatively enqueue b0 to the current next frame */
2126           bi0 = from[0];
2127           to_next[0] = bi0;
2128           from += 1;
2129           to_next += 1;
2130           n_left_from -= 1;
2131           n_left_to_next -= 1;
2132
2133           b0 = vlib_get_buffer (vm, bi0);
2134           ip0 = vlib_buffer_get_current (b0);
2135           udp0 = ip4_next_header (ip0);
2136           tcp0 = (tcp_header_t *) udp0;
2137
2138           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2139
2140           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
2141                                 &next0, 0);
2142
2143           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0))
2144             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2145
2146           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2147
2148           /* verify speculative enqueue, maybe switch current next frame */
2149           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2150                                            to_next, n_left_to_next,
2151                                            bi0, next0);
2152          }
2153
2154       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2155     }
2156
2157   vlib_node_increment_counter (vm, nat44_hairpinning_node.index,
2158                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2159                                pkts_processed);
2160   return frame->n_vectors;
2161 }
2162
2163 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
2164   .function = nat44_hairpinning_fn,
2165   .name = "nat44-hairpinning",
2166   .vector_size = sizeof (u32),
2167   .type = VLIB_NODE_TYPE_INTERNAL,
2168   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2169   .error_strings = snat_in2out_error_strings,
2170   .n_next_nodes = 2,
2171   .next_nodes = {
2172     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2173     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2174   },
2175 };
2176
2177 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
2178                               nat44_hairpinning_fn);
2179
2180 static inline void
2181 nat44_reass_hairpinning (snat_main_t *sm,
2182                          vlib_buffer_t * b0,
2183                          ip4_header_t * ip0,
2184                          u16 sport,
2185                          u16 dport,
2186                          u32 proto0)
2187 {
2188   snat_session_key_t key0, sm0;
2189   snat_session_t * s0;
2190   clib_bihash_kv_8_8_t kv0, value0;
2191   ip_csum_t sum0;
2192   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2193   u16 new_dst_port0, old_dst_port0;
2194   udp_header_t * udp0;
2195   tcp_header_t * tcp0;
2196
2197   key0.addr = ip0->dst_address;
2198   key0.port = dport;
2199   key0.protocol = proto0;
2200   key0.fib_index = sm->outside_fib_index;
2201   kv0.key = key0.as_u64;
2202
2203   udp0 = ip4_next_header (ip0);
2204
2205   /* Check if destination is static mappings */
2206   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2207     {
2208       new_dst_addr0 = sm0.addr.as_u32;
2209       new_dst_port0 = sm0.port;
2210       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2211     }
2212   /* or active sessions */
2213   else
2214     {
2215       if (sm->num_workers > 1)
2216         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2217       else
2218         ti = sm->num_workers;
2219
2220       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2221         {
2222           si = value0.value;
2223           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2224           new_dst_addr0 = s0->in2out.addr.as_u32;
2225           new_dst_port0 = s0->in2out.port;
2226           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2227         }
2228     }
2229
2230   /* Destination is behind the same NAT, use internal address and port */
2231   if (new_dst_addr0)
2232     {
2233       old_dst_addr0 = ip0->dst_address.as_u32;
2234       ip0->dst_address.as_u32 = new_dst_addr0;
2235       sum0 = ip0->checksum;
2236       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2237                              ip4_header_t, dst_address);
2238       ip0->checksum = ip_csum_fold (sum0);
2239
2240       old_dst_port0 = dport;
2241       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2242                        ip4_is_first_fragment (ip0)))
2243         {
2244           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2245             {
2246               tcp0 = ip4_next_header (ip0);
2247               tcp0->dst = new_dst_port0;
2248               sum0 = tcp0->checksum;
2249               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2250                                      ip4_header_t, dst_address);
2251               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2252                                      ip4_header_t /* cheat */, length);
2253               tcp0->checksum = ip_csum_fold(sum0);
2254             }
2255           else
2256             {
2257               udp0->dst_port = new_dst_port0;
2258               udp0->checksum = 0;
2259             }
2260         }
2261       else
2262         {
2263           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2264             {
2265               tcp0 = ip4_next_header (ip0);
2266               sum0 = tcp0->checksum;
2267               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2268                                      ip4_header_t, dst_address);
2269               tcp0->checksum = ip_csum_fold(sum0);
2270             }
2271         }
2272     }
2273 }
2274
2275 static uword
2276 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2277                             vlib_node_runtime_t * node,
2278                             vlib_frame_t * frame)
2279 {
2280   u32 n_left_from, *from, *to_next;
2281   snat_in2out_next_t next_index;
2282   u32 pkts_processed = 0;
2283   snat_main_t *sm = &snat_main;
2284   f64 now = vlib_time_now (vm);
2285   u32 thread_index = vlib_get_thread_index ();
2286   snat_main_per_thread_data_t *per_thread_data =
2287     &sm->per_thread_data[thread_index];
2288   u32 *fragments_to_drop = 0;
2289   u32 *fragments_to_loopback = 0;
2290
2291   from = vlib_frame_vector_args (frame);
2292   n_left_from = frame->n_vectors;
2293   next_index = node->cached_next_index;
2294
2295   while (n_left_from > 0)
2296     {
2297       u32 n_left_to_next;
2298
2299       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2300
2301       while (n_left_from > 0 && n_left_to_next > 0)
2302        {
2303           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2304           vlib_buffer_t *b0;
2305           u32 next0;
2306           u8 cached0 = 0;
2307           ip4_header_t *ip0;
2308           nat_reass_ip4_t *reass0;
2309           udp_header_t * udp0;
2310           tcp_header_t * tcp0;
2311           snat_session_key_t key0;
2312           clib_bihash_kv_8_8_t kv0, value0;
2313           snat_session_t * s0 = 0;
2314           u16 old_port0, new_port0;
2315           ip_csum_t sum0;
2316
2317           /* speculatively enqueue b0 to the current next frame */
2318           bi0 = from[0];
2319           to_next[0] = bi0;
2320           from += 1;
2321           to_next += 1;
2322           n_left_from -= 1;
2323           n_left_to_next -= 1;
2324
2325           b0 = vlib_get_buffer (vm, bi0);
2326           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2327
2328           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2329           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2330                                                                sw_if_index0);
2331
2332           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2333             {
2334               next0 = SNAT_IN2OUT_NEXT_DROP;
2335               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2336               goto trace0;
2337             }
2338
2339           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2340           udp0 = ip4_next_header (ip0);
2341           tcp0 = (tcp_header_t *) udp0;
2342           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2343
2344           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2345                                                  ip0->dst_address,
2346                                                  ip0->fragment_id,
2347                                                  ip0->protocol,
2348                                                  1,
2349                                                  &fragments_to_drop);
2350
2351           if (PREDICT_FALSE (!reass0))
2352             {
2353               next0 = SNAT_IN2OUT_NEXT_DROP;
2354               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2355               goto trace0;
2356             }
2357
2358           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2359             {
2360               key0.addr = ip0->src_address;
2361               key0.port = udp0->src_port;
2362               key0.protocol = proto0;
2363               key0.fib_index = rx_fib_index0;
2364               kv0.key = key0.as_u64;
2365
2366               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2367                 {
2368                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2369                       ip0, proto0, rx_fib_index0, thread_index)))
2370                     goto trace0;
2371
2372                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2373                                      &s0, node, next0, thread_index);
2374
2375                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2376                     goto trace0;
2377
2378                   reass0->sess_index = s0 - per_thread_data->sessions;
2379                 }
2380               else
2381                 {
2382                   s0 = pool_elt_at_index (per_thread_data->sessions,
2383                                           value0.value);
2384                   reass0->sess_index = value0.value;
2385                 }
2386               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2387             }
2388           else
2389             {
2390               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2391                 {
2392                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2393                     {
2394                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2395                       next0 = SNAT_IN2OUT_NEXT_DROP;
2396                       goto trace0;
2397                     }
2398                   cached0 = 1;
2399                   goto trace0;
2400                 }
2401               s0 = pool_elt_at_index (per_thread_data->sessions,
2402                                       reass0->sess_index);
2403             }
2404
2405           old_addr0 = ip0->src_address.as_u32;
2406           ip0->src_address = s0->out2in.addr;
2407           new_addr0 = ip0->src_address.as_u32;
2408           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2409
2410           sum0 = ip0->checksum;
2411           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2412                                  ip4_header_t,
2413                                  src_address /* changed member */);
2414           ip0->checksum = ip_csum_fold (sum0);
2415
2416           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2417             {
2418               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2419                 {
2420                   old_port0 = tcp0->src_port;
2421                   tcp0->src_port = s0->out2in.port;
2422                   new_port0 = tcp0->src_port;
2423
2424                   sum0 = tcp0->checksum;
2425                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2426                                          ip4_header_t,
2427                                          dst_address /* changed member */);
2428                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2429                                          ip4_header_t /* cheat */,
2430                                          length /* changed member */);
2431                   tcp0->checksum = ip_csum_fold(sum0);
2432                 }
2433               else
2434                 {
2435                   old_port0 = udp0->src_port;
2436                   udp0->src_port = s0->out2in.port;
2437                   udp0->checksum = 0;
2438                 }
2439             }
2440
2441           /* Hairpinning */
2442           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2443                                    s0->ext_host_port, proto0);
2444
2445           /* Accounting */
2446           s0->last_heard = now;
2447           s0->total_pkts++;
2448           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2449           /* Per-user LRU list maintenance */
2450           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2451                              s0->per_user_index);
2452           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2453                               s0->per_user_list_head_index,
2454                               s0->per_user_index);
2455
2456         trace0:
2457           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2458                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2459             {
2460               nat44_in2out_reass_trace_t *t =
2461                  vlib_add_trace (vm, node, b0, sizeof (*t));
2462               t->cached = cached0;
2463               t->sw_if_index = sw_if_index0;
2464               t->next_index = next0;
2465             }
2466
2467           if (cached0)
2468             {
2469               n_left_to_next++;
2470               to_next--;
2471             }
2472           else
2473             {
2474               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2475
2476               /* verify speculative enqueue, maybe switch current next frame */
2477               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2478                                                to_next, n_left_to_next,
2479                                                bi0, next0);
2480             }
2481
2482           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2483             {
2484               from = vlib_frame_vector_args (frame);
2485               u32 len = vec_len (fragments_to_loopback);
2486               if (len <= VLIB_FRAME_SIZE)
2487                 {
2488                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2489                   n_left_from = len;
2490                   vec_reset_length (fragments_to_loopback);
2491                 }
2492               else
2493                 {
2494                   clib_memcpy (from,
2495                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2496                                sizeof (u32) * VLIB_FRAME_SIZE);
2497                   n_left_from = VLIB_FRAME_SIZE;
2498                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2499                 }
2500             }
2501        }
2502
2503       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2504     }
2505
2506   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2507                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2508                                pkts_processed);
2509
2510   nat_send_all_to_node (vm, fragments_to_drop, node,
2511                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2512                         SNAT_IN2OUT_NEXT_DROP);
2513
2514   vec_free (fragments_to_drop);
2515   vec_free (fragments_to_loopback);
2516   return frame->n_vectors;
2517 }
2518
2519 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2520   .function = nat44_in2out_reass_node_fn,
2521   .name = "nat44-in2out-reass",
2522   .vector_size = sizeof (u32),
2523   .format_trace = format_nat44_in2out_reass_trace,
2524   .type = VLIB_NODE_TYPE_INTERNAL,
2525
2526   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2527   .error_strings = snat_in2out_error_strings,
2528
2529   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2530   .next_nodes = {
2531     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2532     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2533     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2534     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2535     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2536   },
2537 };
2538
2539 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2540                               nat44_in2out_reass_node_fn);
2541
2542 /**************************/
2543 /*** deterministic mode ***/
2544 /**************************/
2545 static uword
2546 snat_det_in2out_node_fn (vlib_main_t * vm,
2547                          vlib_node_runtime_t * node,
2548                          vlib_frame_t * frame)
2549 {
2550   u32 n_left_from, * from, * to_next;
2551   snat_in2out_next_t next_index;
2552   u32 pkts_processed = 0;
2553   snat_main_t * sm = &snat_main;
2554   u32 now = (u32) vlib_time_now (vm);
2555   u32 thread_index = vlib_get_thread_index ();
2556
2557   from = vlib_frame_vector_args (frame);
2558   n_left_from = frame->n_vectors;
2559   next_index = node->cached_next_index;
2560
2561   while (n_left_from > 0)
2562     {
2563       u32 n_left_to_next;
2564
2565       vlib_get_next_frame (vm, node, next_index,
2566                            to_next, n_left_to_next);
2567
2568       while (n_left_from >= 4 && n_left_to_next >= 2)
2569         {
2570           u32 bi0, bi1;
2571           vlib_buffer_t * b0, * b1;
2572           u32 next0, next1;
2573           u32 sw_if_index0, sw_if_index1;
2574           ip4_header_t * ip0, * ip1;
2575           ip_csum_t sum0, sum1;
2576           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2577           u16 old_port0, new_port0, lo_port0, i0;
2578           u16 old_port1, new_port1, lo_port1, i1;
2579           udp_header_t * udp0, * udp1;
2580           tcp_header_t * tcp0, * tcp1;
2581           u32 proto0, proto1;
2582           snat_det_out_key_t key0, key1;
2583           snat_det_map_t * dm0, * dm1;
2584           snat_det_session_t * ses0 = 0, * ses1 = 0;
2585           u32 rx_fib_index0, rx_fib_index1;
2586           icmp46_header_t * icmp0, * icmp1;
2587
2588           /* Prefetch next iteration. */
2589           {
2590             vlib_buffer_t * p2, * p3;
2591
2592             p2 = vlib_get_buffer (vm, from[2]);
2593             p3 = vlib_get_buffer (vm, from[3]);
2594
2595             vlib_prefetch_buffer_header (p2, LOAD);
2596             vlib_prefetch_buffer_header (p3, LOAD);
2597
2598             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2599             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2600           }
2601
2602           /* speculatively enqueue b0 and b1 to the current next frame */
2603           to_next[0] = bi0 = from[0];
2604           to_next[1] = bi1 = from[1];
2605           from += 2;
2606           to_next += 2;
2607           n_left_from -= 2;
2608           n_left_to_next -= 2;
2609
2610           b0 = vlib_get_buffer (vm, bi0);
2611           b1 = vlib_get_buffer (vm, bi1);
2612
2613           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2614           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2615
2616           ip0 = vlib_buffer_get_current (b0);
2617           udp0 = ip4_next_header (ip0);
2618           tcp0 = (tcp_header_t *) udp0;
2619
2620           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2621
2622           if (PREDICT_FALSE(ip0->ttl == 1))
2623             {
2624               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2625               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2626                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2627                                            0);
2628               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2629               goto trace0;
2630             }
2631
2632           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2633
2634           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2635             {
2636               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2637               icmp0 = (icmp46_header_t *) udp0;
2638
2639               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2640                                   rx_fib_index0, node, next0, thread_index,
2641                                   &ses0, &dm0);
2642               goto trace0;
2643             }
2644
2645           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2646           if (PREDICT_FALSE(!dm0))
2647             {
2648               clib_warning("no match for internal host %U",
2649                            format_ip4_address, &ip0->src_address);
2650               next0 = SNAT_IN2OUT_NEXT_DROP;
2651               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2652               goto trace0;
2653             }
2654
2655           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2656
2657           key0.ext_host_addr = ip0->dst_address;
2658           key0.ext_host_port = tcp0->dst;
2659
2660           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2661           if (PREDICT_FALSE(!ses0))
2662             {
2663               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2664                 {
2665                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2666                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2667
2668                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2669                     continue;
2670
2671                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2672                   break;
2673                 }
2674               if (PREDICT_FALSE(!ses0))
2675                 {
2676                   /* too many sessions for user, send ICMP error packet */
2677
2678                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2679                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2680                                                ICMP4_destination_unreachable_destination_unreachable_host,
2681                                                0);
2682                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2683                   goto trace0;
2684                 }
2685             }
2686
2687           new_port0 = ses0->out.out_port;
2688
2689           old_addr0.as_u32 = ip0->src_address.as_u32;
2690           ip0->src_address.as_u32 = new_addr0.as_u32;
2691           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2692
2693           sum0 = ip0->checksum;
2694           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2695                                  ip4_header_t,
2696                                  src_address /* changed member */);
2697           ip0->checksum = ip_csum_fold (sum0);
2698
2699           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2700             {
2701               if (tcp0->flags & TCP_FLAG_SYN)
2702                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2703               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2704                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2705               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2706                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2707               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2708                 snat_det_ses_close(dm0, ses0);
2709               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2710                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2711               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2712                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2713
2714               old_port0 = tcp0->src;
2715               tcp0->src = new_port0;
2716
2717               sum0 = tcp0->checksum;
2718               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2719                                      ip4_header_t,
2720                                      dst_address /* changed member */);
2721               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2722                                      ip4_header_t /* cheat */,
2723                                      length /* changed member */);
2724               tcp0->checksum = ip_csum_fold(sum0);
2725             }
2726           else
2727             {
2728               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2729               old_port0 = udp0->src_port;
2730               udp0->src_port = new_port0;
2731               udp0->checksum = 0;
2732             }
2733
2734           switch(ses0->state)
2735             {
2736             case SNAT_SESSION_UDP_ACTIVE:
2737                 ses0->expire = now + sm->udp_timeout;
2738                 break;
2739             case SNAT_SESSION_TCP_SYN_SENT:
2740             case SNAT_SESSION_TCP_FIN_WAIT:
2741             case SNAT_SESSION_TCP_CLOSE_WAIT:
2742             case SNAT_SESSION_TCP_LAST_ACK:
2743                 ses0->expire = now + sm->tcp_transitory_timeout;
2744                 break;
2745             case SNAT_SESSION_TCP_ESTABLISHED:
2746                 ses0->expire = now + sm->tcp_established_timeout;
2747                 break;
2748             }
2749
2750         trace0:
2751           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2752                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2753             {
2754               snat_in2out_trace_t *t =
2755                  vlib_add_trace (vm, node, b0, sizeof (*t));
2756               t->is_slow_path = 0;
2757               t->sw_if_index = sw_if_index0;
2758               t->next_index = next0;
2759               t->session_index = ~0;
2760               if (ses0)
2761                 t->session_index = ses0 - dm0->sessions;
2762             }
2763
2764           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2765
2766           ip1 = vlib_buffer_get_current (b1);
2767           udp1 = ip4_next_header (ip1);
2768           tcp1 = (tcp_header_t *) udp1;
2769
2770           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2771
2772           if (PREDICT_FALSE(ip1->ttl == 1))
2773             {
2774               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2775               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2776                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2777                                            0);
2778               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2779               goto trace1;
2780             }
2781
2782           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2783
2784           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2785             {
2786               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2787               icmp1 = (icmp46_header_t *) udp1;
2788
2789               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2790                                   rx_fib_index1, node, next1, thread_index,
2791                                   &ses1, &dm1);
2792               goto trace1;
2793             }
2794
2795           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2796           if (PREDICT_FALSE(!dm1))
2797             {
2798               clib_warning("no match for internal host %U",
2799                            format_ip4_address, &ip0->src_address);
2800               next1 = SNAT_IN2OUT_NEXT_DROP;
2801               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2802               goto trace1;
2803             }
2804
2805           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2806
2807           key1.ext_host_addr = ip1->dst_address;
2808           key1.ext_host_port = tcp1->dst;
2809
2810           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2811           if (PREDICT_FALSE(!ses1))
2812             {
2813               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2814                 {
2815                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2816                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2817
2818                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2819                     continue;
2820
2821                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2822                   break;
2823                 }
2824               if (PREDICT_FALSE(!ses1))
2825                 {
2826                   /* too many sessions for user, send ICMP error packet */
2827
2828                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2829                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2830                                                ICMP4_destination_unreachable_destination_unreachable_host,
2831                                                0);
2832                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2833                   goto trace1;
2834                 }
2835             }
2836
2837           new_port1 = ses1->out.out_port;
2838
2839           old_addr1.as_u32 = ip1->src_address.as_u32;
2840           ip1->src_address.as_u32 = new_addr1.as_u32;
2841           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2842
2843           sum1 = ip1->checksum;
2844           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2845                                  ip4_header_t,
2846                                  src_address /* changed member */);
2847           ip1->checksum = ip_csum_fold (sum1);
2848
2849           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2850             {
2851               if (tcp1->flags & TCP_FLAG_SYN)
2852                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2853               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2854                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2855               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2856                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2857               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2858                 snat_det_ses_close(dm1, ses1);
2859               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2860                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2861               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2862                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2863
2864               old_port1 = tcp1->src;
2865               tcp1->src = new_port1;
2866
2867               sum1 = tcp1->checksum;
2868               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2869                                      ip4_header_t,
2870                                      dst_address /* changed member */);
2871               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2872                                      ip4_header_t /* cheat */,
2873                                      length /* changed member */);
2874               tcp1->checksum = ip_csum_fold(sum1);
2875             }
2876           else
2877             {
2878               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2879               old_port1 = udp1->src_port;
2880               udp1->src_port = new_port1;
2881               udp1->checksum = 0;
2882             }
2883
2884           switch(ses1->state)
2885             {
2886             case SNAT_SESSION_UDP_ACTIVE:
2887                 ses1->expire = now + sm->udp_timeout;
2888                 break;
2889             case SNAT_SESSION_TCP_SYN_SENT:
2890             case SNAT_SESSION_TCP_FIN_WAIT:
2891             case SNAT_SESSION_TCP_CLOSE_WAIT:
2892             case SNAT_SESSION_TCP_LAST_ACK:
2893                 ses1->expire = now + sm->tcp_transitory_timeout;
2894                 break;
2895             case SNAT_SESSION_TCP_ESTABLISHED:
2896                 ses1->expire = now + sm->tcp_established_timeout;
2897                 break;
2898             }
2899
2900         trace1:
2901           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2902                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2903             {
2904               snat_in2out_trace_t *t =
2905                  vlib_add_trace (vm, node, b1, sizeof (*t));
2906               t->is_slow_path = 0;
2907               t->sw_if_index = sw_if_index1;
2908               t->next_index = next1;
2909               t->session_index = ~0;
2910               if (ses1)
2911                 t->session_index = ses1 - dm1->sessions;
2912             }
2913
2914           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2915
2916           /* verify speculative enqueues, maybe switch current next frame */
2917           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2918                                            to_next, n_left_to_next,
2919                                            bi0, bi1, next0, next1);
2920          }
2921
2922       while (n_left_from > 0 && n_left_to_next > 0)
2923         {
2924           u32 bi0;
2925           vlib_buffer_t * b0;
2926           u32 next0;
2927           u32 sw_if_index0;
2928           ip4_header_t * ip0;
2929           ip_csum_t sum0;
2930           ip4_address_t new_addr0, old_addr0;
2931           u16 old_port0, new_port0, lo_port0, i0;
2932           udp_header_t * udp0;
2933           tcp_header_t * tcp0;
2934           u32 proto0;
2935           snat_det_out_key_t key0;
2936           snat_det_map_t * dm0;
2937           snat_det_session_t * ses0 = 0;
2938           u32 rx_fib_index0;
2939           icmp46_header_t * icmp0;
2940
2941           /* speculatively enqueue b0 to the current next frame */
2942           bi0 = from[0];
2943           to_next[0] = bi0;
2944           from += 1;
2945           to_next += 1;
2946           n_left_from -= 1;
2947           n_left_to_next -= 1;
2948
2949           b0 = vlib_get_buffer (vm, bi0);
2950           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2951
2952           ip0 = vlib_buffer_get_current (b0);
2953           udp0 = ip4_next_header (ip0);
2954           tcp0 = (tcp_header_t *) udp0;
2955
2956           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2957
2958           if (PREDICT_FALSE(ip0->ttl == 1))
2959             {
2960               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2961               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2962                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2963                                            0);
2964               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2965               goto trace00;
2966             }
2967
2968           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2969
2970           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2971             {
2972               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2973               icmp0 = (icmp46_header_t *) udp0;
2974
2975               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2976                                   rx_fib_index0, node, next0, thread_index,
2977                                   &ses0, &dm0);
2978               goto trace00;
2979             }
2980
2981           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2982           if (PREDICT_FALSE(!dm0))
2983             {
2984               clib_warning("no match for internal host %U",
2985                            format_ip4_address, &ip0->src_address);
2986               next0 = SNAT_IN2OUT_NEXT_DROP;
2987               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2988               goto trace00;
2989             }
2990
2991           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2992
2993           key0.ext_host_addr = ip0->dst_address;
2994           key0.ext_host_port = tcp0->dst;
2995
2996           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2997           if (PREDICT_FALSE(!ses0))
2998             {
2999               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3000                 {
3001                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
3002                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3003
3004                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3005                     continue;
3006
3007                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3008                   break;
3009                 }
3010               if (PREDICT_FALSE(!ses0))
3011                 {
3012                   /* too many sessions for user, send ICMP error packet */
3013
3014                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3015                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3016                                                ICMP4_destination_unreachable_destination_unreachable_host,
3017                                                0);
3018                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3019                   goto trace00;
3020                 }
3021             }
3022
3023           new_port0 = ses0->out.out_port;
3024
3025           old_addr0.as_u32 = ip0->src_address.as_u32;
3026           ip0->src_address.as_u32 = new_addr0.as_u32;
3027           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3028
3029           sum0 = ip0->checksum;
3030           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3031                                  ip4_header_t,
3032                                  src_address /* changed member */);
3033           ip0->checksum = ip_csum_fold (sum0);
3034
3035           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3036             {
3037               if (tcp0->flags & TCP_FLAG_SYN)
3038                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3039               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3040                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3041               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3042                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3043               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3044                 snat_det_ses_close(dm0, ses0);
3045               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3046                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3047               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3048                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3049
3050               old_port0 = tcp0->src;
3051               tcp0->src = new_port0;
3052
3053               sum0 = tcp0->checksum;
3054               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3055                                      ip4_header_t,
3056                                      dst_address /* changed member */);
3057               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3058                                      ip4_header_t /* cheat */,
3059                                      length /* changed member */);
3060               tcp0->checksum = ip_csum_fold(sum0);
3061             }
3062           else
3063             {
3064               ses0->state = SNAT_SESSION_UDP_ACTIVE;
3065               old_port0 = udp0->src_port;
3066               udp0->src_port = new_port0;
3067               udp0->checksum = 0;
3068             }
3069
3070           switch(ses0->state)
3071             {
3072             case SNAT_SESSION_UDP_ACTIVE:
3073                 ses0->expire = now + sm->udp_timeout;
3074                 break;
3075             case SNAT_SESSION_TCP_SYN_SENT:
3076             case SNAT_SESSION_TCP_FIN_WAIT:
3077             case SNAT_SESSION_TCP_CLOSE_WAIT:
3078             case SNAT_SESSION_TCP_LAST_ACK:
3079                 ses0->expire = now + sm->tcp_transitory_timeout;
3080                 break;
3081             case SNAT_SESSION_TCP_ESTABLISHED:
3082                 ses0->expire = now + sm->tcp_established_timeout;
3083                 break;
3084             }
3085
3086         trace00:
3087           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3088                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3089             {
3090               snat_in2out_trace_t *t =
3091                  vlib_add_trace (vm, node, b0, sizeof (*t));
3092               t->is_slow_path = 0;
3093               t->sw_if_index = sw_if_index0;
3094               t->next_index = next0;
3095               t->session_index = ~0;
3096               if (ses0)
3097                 t->session_index = ses0 - dm0->sessions;
3098             }
3099
3100           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3101
3102           /* verify speculative enqueue, maybe switch current next frame */
3103           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3104                                            to_next, n_left_to_next,
3105                                            bi0, next0);
3106         }
3107
3108       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3109     }
3110
3111   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
3112                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3113                                pkts_processed);
3114   return frame->n_vectors;
3115 }
3116
3117 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
3118   .function = snat_det_in2out_node_fn,
3119   .name = "nat44-det-in2out",
3120   .vector_size = sizeof (u32),
3121   .format_trace = format_snat_in2out_trace,
3122   .type = VLIB_NODE_TYPE_INTERNAL,
3123
3124   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3125   .error_strings = snat_in2out_error_strings,
3126
3127   .runtime_data_bytes = sizeof (snat_runtime_t),
3128
3129   .n_next_nodes = 3,
3130
3131   /* edit / add dispositions here */
3132   .next_nodes = {
3133     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3134     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3135     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3136   },
3137 };
3138
3139 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
3140
3141 /**
3142  * Get address and port values to be used for ICMP packet translation
3143  * and create session if needed
3144  *
3145  * @param[in,out] sm             NAT main
3146  * @param[in,out] node           NAT node runtime
3147  * @param[in] thread_index       thread index
3148  * @param[in,out] b0             buffer containing packet to be translated
3149  * @param[out] p_proto           protocol used for matching
3150  * @param[out] p_value           address and port after NAT translation
3151  * @param[out] p_dont_translate  if packet should not be translated
3152  * @param d                      optional parameter
3153  * @param e                      optional parameter
3154  */
3155 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
3156                           u32 thread_index, vlib_buffer_t *b0,
3157                           ip4_header_t *ip0, u8 *p_proto,
3158                           snat_session_key_t *p_value,
3159                           u8 *p_dont_translate, void *d, void *e)
3160 {
3161   icmp46_header_t *icmp0;
3162   u32 sw_if_index0;
3163   u32 rx_fib_index0;
3164   u8 protocol;
3165   snat_det_out_key_t key0;
3166   u8 dont_translate = 0;
3167   u32 next0 = ~0;
3168   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3169   ip4_header_t *inner_ip0;
3170   void *l4_header = 0;
3171   icmp46_header_t *inner_icmp0;
3172   snat_det_map_t * dm0 = 0;
3173   ip4_address_t new_addr0;
3174   u16 lo_port0, i0;
3175   snat_det_session_t * ses0 = 0;
3176   ip4_address_t in_addr;
3177   u16 in_port;
3178
3179   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3180   echo0 = (icmp_echo_header_t *)(icmp0+1);
3181   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3182   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
3183
3184   if (!icmp_is_error_message (icmp0))
3185     {
3186       protocol = SNAT_PROTOCOL_ICMP;
3187       in_addr = ip0->src_address;
3188       in_port = echo0->identifier;
3189     }
3190   else
3191     {
3192       inner_ip0 = (ip4_header_t *)(echo0+1);
3193       l4_header = ip4_next_header (inner_ip0);
3194       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3195       in_addr = inner_ip0->dst_address;
3196       switch (protocol)
3197         {
3198         case SNAT_PROTOCOL_ICMP:
3199           inner_icmp0 = (icmp46_header_t*)l4_header;
3200           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3201           in_port = inner_echo0->identifier;
3202           break;
3203         case SNAT_PROTOCOL_UDP:
3204         case SNAT_PROTOCOL_TCP:
3205           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3206           break;
3207         default:
3208           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
3209           next0 = SNAT_IN2OUT_NEXT_DROP;
3210           goto out;
3211         }
3212     }
3213
3214   dm0 = snat_det_map_by_user(sm, &in_addr);
3215   if (PREDICT_FALSE(!dm0))
3216     {
3217       clib_warning("no match for internal host %U",
3218                    format_ip4_address, &in_addr);
3219       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3220           IP_PROTOCOL_ICMP, rx_fib_index0)))
3221         {
3222           dont_translate = 1;
3223           goto out;
3224         }
3225       next0 = SNAT_IN2OUT_NEXT_DROP;
3226       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3227       goto out;
3228     }
3229
3230   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
3231
3232   key0.ext_host_addr = ip0->dst_address;
3233   key0.ext_host_port = 0;
3234
3235   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3236   if (PREDICT_FALSE(!ses0))
3237     {
3238       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3239           IP_PROTOCOL_ICMP, rx_fib_index0)))
3240         {
3241           dont_translate = 1;
3242           goto out;
3243         }
3244       if (icmp0->type != ICMP4_echo_request)
3245         {
3246           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3247           next0 = SNAT_IN2OUT_NEXT_DROP;
3248           goto out;
3249         }
3250       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3251         {
3252           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3253             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3254
3255           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3256             continue;
3257
3258           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3259           break;
3260         }
3261       if (PREDICT_FALSE(!ses0))
3262         {
3263           next0 = SNAT_IN2OUT_NEXT_DROP;
3264           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3265           goto out;
3266         }
3267     }
3268
3269   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3270                     !icmp_is_error_message (icmp0)))
3271     {
3272       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3273       next0 = SNAT_IN2OUT_NEXT_DROP;
3274       goto out;
3275     }
3276
3277   u32 now = (u32) vlib_time_now (sm->vlib_main);
3278
3279   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3280   ses0->expire = now + sm->icmp_timeout;
3281
3282 out:
3283   *p_proto = protocol;
3284   if (ses0)
3285     {
3286       p_value->addr = new_addr0;
3287       p_value->fib_index = sm->outside_fib_index;
3288       p_value->port = ses0->out.out_port;
3289     }
3290   *p_dont_translate = dont_translate;
3291   if (d)
3292     *(snat_det_session_t**)d = ses0;
3293   if (e)
3294     *(snat_det_map_t**)e = dm0;
3295   return next0;
3296 }
3297
3298 /**********************/
3299 /*** worker handoff ***/
3300 /**********************/
3301 static inline uword
3302 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3303                                       vlib_node_runtime_t * node,
3304                                       vlib_frame_t * frame,
3305                                       u8 is_output)
3306 {
3307   snat_main_t *sm = &snat_main;
3308   vlib_thread_main_t *tm = vlib_get_thread_main ();
3309   u32 n_left_from, *from, *to_next = 0;
3310   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3311   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3312     = 0;
3313   vlib_frame_queue_elt_t *hf = 0;
3314   vlib_frame_t *f = 0;
3315   int i;
3316   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3317   u32 next_worker_index = 0;
3318   u32 current_worker_index = ~0;
3319   u32 thread_index = vlib_get_thread_index ();
3320   u32 fq_index;
3321   u32 to_node_index;
3322
3323   ASSERT (vec_len (sm->workers));
3324
3325   if (is_output)
3326     {
3327       fq_index = sm->fq_in2out_output_index;
3328       to_node_index = sm->in2out_output_node_index;
3329     }
3330   else
3331     {
3332       fq_index = sm->fq_in2out_index;
3333       to_node_index = sm->in2out_node_index;
3334     }
3335
3336   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3337     {
3338       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3339
3340       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3341                                sm->first_worker_index + sm->num_workers - 1,
3342                                (vlib_frame_queue_t *) (~0));
3343     }
3344
3345   from = vlib_frame_vector_args (frame);
3346   n_left_from = frame->n_vectors;
3347
3348   while (n_left_from > 0)
3349     {
3350       u32 bi0;
3351       vlib_buffer_t *b0;
3352       u32 sw_if_index0;
3353       u32 rx_fib_index0;
3354       ip4_header_t * ip0;
3355       u8 do_handoff;
3356
3357       bi0 = from[0];
3358       from += 1;
3359       n_left_from -= 1;
3360
3361       b0 = vlib_get_buffer (vm, bi0);
3362
3363       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3364       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3365
3366       ip0 = vlib_buffer_get_current (b0);
3367
3368       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3369
3370       if (PREDICT_FALSE (next_worker_index != thread_index))
3371         {
3372           do_handoff = 1;
3373
3374           if (next_worker_index != current_worker_index)
3375             {
3376               if (hf)
3377                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3378
3379               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3380                                                       next_worker_index,
3381                                                       handoff_queue_elt_by_worker_index);
3382
3383               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3384               to_next_worker = &hf->buffer_index[hf->n_vectors];
3385               current_worker_index = next_worker_index;
3386             }
3387
3388           /* enqueue to correct worker thread */
3389           to_next_worker[0] = bi0;
3390           to_next_worker++;
3391           n_left_to_next_worker--;
3392
3393           if (n_left_to_next_worker == 0)
3394             {
3395               hf->n_vectors = VLIB_FRAME_SIZE;
3396               vlib_put_frame_queue_elt (hf);
3397               current_worker_index = ~0;
3398               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3399               hf = 0;
3400             }
3401         }
3402       else
3403         {
3404           do_handoff = 0;
3405           /* if this is 1st frame */
3406           if (!f)
3407             {
3408               f = vlib_get_frame_to_node (vm, to_node_index);
3409               to_next = vlib_frame_vector_args (f);
3410             }
3411
3412           to_next[0] = bi0;
3413           to_next += 1;
3414           f->n_vectors++;
3415         }
3416
3417       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3418                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3419         {
3420           snat_in2out_worker_handoff_trace_t *t =
3421             vlib_add_trace (vm, node, b0, sizeof (*t));
3422           t->next_worker_index = next_worker_index;
3423           t->do_handoff = do_handoff;
3424         }
3425     }
3426
3427   if (f)
3428     vlib_put_frame_to_node (vm, to_node_index, f);
3429
3430   if (hf)
3431     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3432
3433   /* Ship frames to the worker nodes */
3434   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3435     {
3436       if (handoff_queue_elt_by_worker_index[i])
3437         {
3438           hf = handoff_queue_elt_by_worker_index[i];
3439           /*
3440            * It works better to let the handoff node
3441            * rate-adapt, always ship the handoff queue element.
3442            */
3443           if (1 || hf->n_vectors == hf->last_n_vectors)
3444             {
3445               vlib_put_frame_queue_elt (hf);
3446               handoff_queue_elt_by_worker_index[i] = 0;
3447             }
3448           else
3449             hf->last_n_vectors = hf->n_vectors;
3450         }
3451       congested_handoff_queue_by_worker_index[i] =
3452         (vlib_frame_queue_t *) (~0);
3453     }
3454   hf = 0;
3455   current_worker_index = ~0;
3456   return frame->n_vectors;
3457 }
3458
3459 static uword
3460 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3461                                vlib_node_runtime_t * node,
3462                                vlib_frame_t * frame)
3463 {
3464   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3465 }
3466
3467 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3468   .function = snat_in2out_worker_handoff_fn,
3469   .name = "nat44-in2out-worker-handoff",
3470   .vector_size = sizeof (u32),
3471   .format_trace = format_snat_in2out_worker_handoff_trace,
3472   .type = VLIB_NODE_TYPE_INTERNAL,
3473
3474   .n_next_nodes = 1,
3475
3476   .next_nodes = {
3477     [0] = "error-drop",
3478   },
3479 };
3480
3481 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3482                               snat_in2out_worker_handoff_fn);
3483
3484 static uword
3485 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3486                                       vlib_node_runtime_t * node,
3487                                       vlib_frame_t * frame)
3488 {
3489   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3490 }
3491
3492 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3493   .function = snat_in2out_output_worker_handoff_fn,
3494   .name = "nat44-in2out-output-worker-handoff",
3495   .vector_size = sizeof (u32),
3496   .format_trace = format_snat_in2out_worker_handoff_trace,
3497   .type = VLIB_NODE_TYPE_INTERNAL,
3498
3499   .n_next_nodes = 1,
3500
3501   .next_nodes = {
3502     [0] = "error-drop",
3503   },
3504 };
3505
3506 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3507                               snat_in2out_output_worker_handoff_fn);
3508
3509 static_always_inline int
3510 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3511 {
3512   snat_address_t * ap;
3513   clib_bihash_kv_8_8_t kv, value;
3514   snat_session_key_t m_key;
3515
3516   vec_foreach (ap, sm->addresses)
3517     {
3518       if (ap->addr.as_u32 == dst_addr->as_u32)
3519         return 1;
3520     }
3521
3522   m_key.addr.as_u32 = dst_addr->as_u32;
3523   m_key.fib_index = sm->outside_fib_index;
3524   m_key.port = 0;
3525   m_key.protocol = 0;
3526   kv.key = m_key.as_u64;
3527   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3528     return 1;
3529
3530   return 0;
3531 }
3532
3533 static uword
3534 snat_hairpin_dst_fn (vlib_main_t * vm,
3535                      vlib_node_runtime_t * node,
3536                      vlib_frame_t * frame)
3537 {
3538   u32 n_left_from, * from, * to_next;
3539   snat_in2out_next_t next_index;
3540   u32 pkts_processed = 0;
3541   snat_main_t * sm = &snat_main;
3542
3543   from = vlib_frame_vector_args (frame);
3544   n_left_from = frame->n_vectors;
3545   next_index = node->cached_next_index;
3546
3547   while (n_left_from > 0)
3548     {
3549       u32 n_left_to_next;
3550
3551       vlib_get_next_frame (vm, node, next_index,
3552                            to_next, n_left_to_next);
3553
3554       while (n_left_from > 0 && n_left_to_next > 0)
3555         {
3556           u32 bi0;
3557           vlib_buffer_t * b0;
3558           u32 next0;
3559           ip4_header_t * ip0;
3560           u32 proto0;
3561
3562           /* speculatively enqueue b0 to the current next frame */
3563           bi0 = from[0];
3564           to_next[0] = bi0;
3565           from += 1;
3566           to_next += 1;
3567           n_left_from -= 1;
3568           n_left_to_next -= 1;
3569
3570           b0 = vlib_get_buffer (vm, bi0);
3571           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3572           ip0 = vlib_buffer_get_current (b0);
3573
3574           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3575
3576           vnet_buffer (b0)->snat.flags = 0;
3577           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3578             {
3579               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3580                 {
3581                   udp_header_t * udp0 = ip4_next_header (ip0);
3582                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3583
3584                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3585                 }
3586               else if (proto0 == SNAT_PROTOCOL_ICMP)
3587                 {
3588                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3589
3590                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3591                 }
3592               else
3593                 {
3594                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3595                 }
3596
3597               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3598             }
3599
3600           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3601
3602           /* verify speculative enqueue, maybe switch current next frame */
3603           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3604                                            to_next, n_left_to_next,
3605                                            bi0, next0);
3606          }
3607
3608       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3609     }
3610
3611   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3612                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3613                                pkts_processed);
3614   return frame->n_vectors;
3615 }
3616
3617 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3618   .function = snat_hairpin_dst_fn,
3619   .name = "nat44-hairpin-dst",
3620   .vector_size = sizeof (u32),
3621   .type = VLIB_NODE_TYPE_INTERNAL,
3622   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3623   .error_strings = snat_in2out_error_strings,
3624   .n_next_nodes = 2,
3625   .next_nodes = {
3626     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3627     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3628   },
3629 };
3630
3631 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3632                               snat_hairpin_dst_fn);
3633
3634 static uword
3635 snat_hairpin_src_fn (vlib_main_t * vm,
3636                      vlib_node_runtime_t * node,
3637                      vlib_frame_t * frame)
3638 {
3639   u32 n_left_from, * from, * to_next;
3640   snat_in2out_next_t next_index;
3641   u32 pkts_processed = 0;
3642   snat_main_t *sm = &snat_main;
3643
3644   from = vlib_frame_vector_args (frame);
3645   n_left_from = frame->n_vectors;
3646   next_index = node->cached_next_index;
3647
3648   while (n_left_from > 0)
3649     {
3650       u32 n_left_to_next;
3651
3652       vlib_get_next_frame (vm, node, next_index,
3653                            to_next, n_left_to_next);
3654
3655       while (n_left_from > 0 && n_left_to_next > 0)
3656         {
3657           u32 bi0;
3658           vlib_buffer_t * b0;
3659           u32 next0;
3660           snat_interface_t *i;
3661           u32 sw_if_index0;
3662
3663           /* speculatively enqueue b0 to the current next frame */
3664           bi0 = from[0];
3665           to_next[0] = bi0;
3666           from += 1;
3667           to_next += 1;
3668           n_left_from -= 1;
3669           n_left_to_next -= 1;
3670
3671           b0 = vlib_get_buffer (vm, bi0);
3672           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3673           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3674
3675           pool_foreach (i, sm->output_feature_interfaces,
3676           ({
3677             /* Only packets from NAT inside interface */
3678             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
3679               {
3680                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3681                                     SNAT_FLAG_HAIRPINNING))
3682                   {
3683                     if (PREDICT_TRUE (sm->num_workers > 1))
3684                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3685                     else
3686                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3687                   }
3688                 break;
3689               }
3690           }));
3691
3692           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3693
3694           /* verify speculative enqueue, maybe switch current next frame */
3695           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3696                                            to_next, n_left_to_next,
3697                                            bi0, next0);
3698          }
3699
3700       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3701     }
3702
3703   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3704                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3705                                pkts_processed);
3706   return frame->n_vectors;
3707 }
3708
3709 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3710   .function = snat_hairpin_src_fn,
3711   .name = "nat44-hairpin-src",
3712   .vector_size = sizeof (u32),
3713   .type = VLIB_NODE_TYPE_INTERNAL,
3714   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3715   .error_strings = snat_in2out_error_strings,
3716   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3717   .next_nodes = {
3718      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3719      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3720      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3721      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3722   },
3723 };
3724
3725 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3726                               snat_hairpin_src_fn);
3727
3728 static uword
3729 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3730                                 vlib_node_runtime_t * node,
3731                                 vlib_frame_t * frame)
3732 {
3733   u32 n_left_from, * from, * to_next;
3734   snat_in2out_next_t next_index;
3735   u32 pkts_processed = 0;
3736   snat_main_t * sm = &snat_main;
3737   u32 stats_node_index;
3738
3739   stats_node_index = snat_in2out_fast_node.index;
3740
3741   from = vlib_frame_vector_args (frame);
3742   n_left_from = frame->n_vectors;
3743   next_index = node->cached_next_index;
3744
3745   while (n_left_from > 0)
3746     {
3747       u32 n_left_to_next;
3748
3749       vlib_get_next_frame (vm, node, next_index,
3750                            to_next, n_left_to_next);
3751
3752       while (n_left_from > 0 && n_left_to_next > 0)
3753         {
3754           u32 bi0;
3755           vlib_buffer_t * b0;
3756           u32 next0;
3757           u32 sw_if_index0;
3758           ip4_header_t * ip0;
3759           ip_csum_t sum0;
3760           u32 new_addr0, old_addr0;
3761           u16 old_port0, new_port0;
3762           udp_header_t * udp0;
3763           tcp_header_t * tcp0;
3764           icmp46_header_t * icmp0;
3765           snat_session_key_t key0, sm0;
3766           u32 proto0;
3767           u32 rx_fib_index0;
3768
3769           /* speculatively enqueue b0 to the current next frame */
3770           bi0 = from[0];
3771           to_next[0] = bi0;
3772           from += 1;
3773           to_next += 1;
3774           n_left_from -= 1;
3775           n_left_to_next -= 1;
3776
3777           b0 = vlib_get_buffer (vm, bi0);
3778           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3779
3780           ip0 = vlib_buffer_get_current (b0);
3781           udp0 = ip4_next_header (ip0);
3782           tcp0 = (tcp_header_t *) udp0;
3783           icmp0 = (icmp46_header_t *) udp0;
3784
3785           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3786           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3787
3788           if (PREDICT_FALSE(ip0->ttl == 1))
3789             {
3790               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3791               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3792                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3793                                            0);
3794               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3795               goto trace0;
3796             }
3797
3798           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3799
3800           if (PREDICT_FALSE (proto0 == ~0))
3801               goto trace0;
3802
3803           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3804             {
3805               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3806                                   rx_fib_index0, node, next0, ~0, 0, 0);
3807               goto trace0;
3808             }
3809
3810           key0.addr = ip0->src_address;
3811           key0.protocol = proto0;
3812           key0.port = udp0->src_port;
3813           key0.fib_index = rx_fib_index0;
3814
3815           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0))
3816             {
3817               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3818               next0= SNAT_IN2OUT_NEXT_DROP;
3819               goto trace0;
3820             }
3821
3822           new_addr0 = sm0.addr.as_u32;
3823           new_port0 = sm0.port;
3824           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3825           old_addr0 = ip0->src_address.as_u32;
3826           ip0->src_address.as_u32 = new_addr0;
3827
3828           sum0 = ip0->checksum;
3829           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3830                                  ip4_header_t,
3831                                  src_address /* changed member */);
3832           ip0->checksum = ip_csum_fold (sum0);
3833
3834           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3835             {
3836               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3837                 {
3838                   old_port0 = tcp0->src_port;
3839                   tcp0->src_port = new_port0;
3840
3841                   sum0 = tcp0->checksum;
3842                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3843                                          ip4_header_t,
3844                                          dst_address /* changed member */);
3845                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3846                                          ip4_header_t /* cheat */,
3847                                          length /* changed member */);
3848                   tcp0->checksum = ip_csum_fold(sum0);
3849                 }
3850               else
3851                 {
3852                   old_port0 = udp0->src_port;
3853                   udp0->src_port = new_port0;
3854                   udp0->checksum = 0;
3855                 }
3856             }
3857           else
3858             {
3859               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3860                 {
3861                   sum0 = tcp0->checksum;
3862                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3863                                          ip4_header_t,
3864                                          dst_address /* changed member */);
3865                   tcp0->checksum = ip_csum_fold(sum0);
3866                 }
3867             }
3868
3869           /* Hairpinning */
3870           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3871
3872         trace0:
3873           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3874                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3875             {
3876               snat_in2out_trace_t *t =
3877                  vlib_add_trace (vm, node, b0, sizeof (*t));
3878               t->sw_if_index = sw_if_index0;
3879               t->next_index = next0;
3880             }
3881
3882           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3883
3884           /* verify speculative enqueue, maybe switch current next frame */
3885           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3886                                            to_next, n_left_to_next,
3887                                            bi0, next0);
3888         }
3889
3890       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3891     }
3892
3893   vlib_node_increment_counter (vm, stats_node_index,
3894                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3895                                pkts_processed);
3896   return frame->n_vectors;
3897 }
3898
3899
3900 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3901   .function = snat_in2out_fast_static_map_fn,
3902   .name = "nat44-in2out-fast",
3903   .vector_size = sizeof (u32),
3904   .format_trace = format_snat_in2out_fast_trace,
3905   .type = VLIB_NODE_TYPE_INTERNAL,
3906
3907   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3908   .error_strings = snat_in2out_error_strings,
3909
3910   .runtime_data_bytes = sizeof (snat_runtime_t),
3911
3912   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3913
3914   /* edit / add dispositions here */
3915   .next_nodes = {
3916     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3917     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3918     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3919     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3920     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3921   },
3922 };
3923
3924 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);