NAT44: asymmetrical load balancing static mapping rule (VPP-1132)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
32
33 typedef struct {
34   u32 sw_if_index;
35   u32 next_index;
36   u32 session_index;
37   u32 is_slow_path;
38 } snat_in2out_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_in2out_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
51   char * tag;
52
53   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
54
55   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
56               t->sw_if_index, t->next_index, t->session_index);
57
58   return s;
59 }
60
61 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
66
67   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
68               t->sw_if_index, t->next_index);
69
70   return s;
71 }
72
73 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
74 {
75   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
76   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
77   snat_in2out_worker_handoff_trace_t * t =
78     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
79   char * m;
80
81   m = t->do_handoff ? "next worker" : "same worker";
82   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
83
84   return s;
85 }
86
87 typedef struct {
88   u32 sw_if_index;
89   u32 next_index;
90   u8 cached;
91 } nat44_in2out_reass_trace_t;
92
93 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
94 {
95   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
96   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
97   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
98
99   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
100               t->sw_if_index, t->next_index,
101               t->cached ? "cached" : "translated");
102
103   return s;
104 }
105
106 vlib_node_registration_t snat_in2out_node;
107 vlib_node_registration_t snat_in2out_slowpath_node;
108 vlib_node_registration_t snat_in2out_fast_node;
109 vlib_node_registration_t snat_in2out_worker_handoff_node;
110 vlib_node_registration_t snat_det_in2out_node;
111 vlib_node_registration_t snat_in2out_output_node;
112 vlib_node_registration_t snat_in2out_output_slowpath_node;
113 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
114 vlib_node_registration_t snat_hairpin_dst_node;
115 vlib_node_registration_t snat_hairpin_src_node;
116 vlib_node_registration_t nat44_hairpinning_node;
117 vlib_node_registration_t nat44_in2out_reass_node;
118
119
120 #define foreach_snat_in2out_error                       \
121 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
122 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
123 _(OUT_OF_PORTS, "Out of ports")                         \
124 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
125 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
126 _(NO_TRANSLATION, "No translation")                     \
127 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
128 _(DROP_FRAGMENT, "Drop fragment")                       \
129 _(MAX_REASS, "Maximum reassemblies exceeded")           \
130 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
131
132 typedef enum {
133 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
134   foreach_snat_in2out_error
135 #undef _
136   SNAT_IN2OUT_N_ERROR,
137 } snat_in2out_error_t;
138
139 static char * snat_in2out_error_strings[] = {
140 #define _(sym,string) string,
141   foreach_snat_in2out_error
142 #undef _
143 };
144
145 typedef enum {
146   SNAT_IN2OUT_NEXT_LOOKUP,
147   SNAT_IN2OUT_NEXT_DROP,
148   SNAT_IN2OUT_NEXT_ICMP_ERROR,
149   SNAT_IN2OUT_NEXT_SLOW_PATH,
150   SNAT_IN2OUT_NEXT_REASS,
151   SNAT_IN2OUT_N_NEXT,
152 } snat_in2out_next_t;
153
154 typedef enum {
155   SNAT_HAIRPIN_SRC_NEXT_DROP,
156   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
157   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
158   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
159   SNAT_HAIRPIN_SRC_N_NEXT,
160 } snat_hairpin_next_t;
161
162 /**
163  * @brief Check if packet should be translated
164  *
165  * Packets aimed at outside interface and external address with active session
166  * should be translated.
167  *
168  * @param sm            NAT main
169  * @param rt            NAT runtime data
170  * @param sw_if_index0  index of the inside interface
171  * @param ip0           IPv4 header
172  * @param proto0        NAT protocol
173  * @param rx_fib_index0 RX FIB index
174  *
175  * @returns 0 if packet should be translated otherwise 1
176  */
177 static inline int
178 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
179                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                          u32 rx_fib_index0)
181 {
182   if (sm->out2in_dpo)
183     return 0;
184
185   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
186   fib_prefix_t pfx = {
187     .fp_proto = FIB_PROTOCOL_IP4,
188     .fp_len = 32,
189     .fp_addr = {
190         .ip4.as_u32 = ip0->dst_address.as_u32,
191     },
192   };
193
194   /* Don't NAT packet aimed at the intfc address */
195   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
196                                       ip0->dst_address.as_u32)))
197     return 1;
198
199   fei = fib_table_lookup (rx_fib_index0, &pfx);
200   if (FIB_NODE_INDEX_INVALID != fei)
201     {
202       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
203       if (sw_if_index == ~0)
204         {
205           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
206           if (FIB_NODE_INDEX_INVALID != fei)
207             sw_if_index = fib_entry_get_resolving_interface (fei);
208         }
209       snat_interface_t *i;
210       pool_foreach (i, sm->interfaces,
211       ({
212         /* NAT packet aimed at outside interface */
213         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
214           return 0;
215       }));
216     }
217
218   return 1;
219 }
220
221 static inline int
222 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
223                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
224                     u32 rx_fib_index0, u32 thread_index)
225 {
226   udp_header_t * udp0 = ip4_next_header (ip0);
227   snat_session_key_t key0, sm0;
228   clib_bihash_kv_8_8_t kv0, value0;
229
230   key0.addr = ip0->dst_address;
231   key0.port = udp0->dst_port;
232   key0.protocol = proto0;
233   key0.fib_index = sm->outside_fib_index;
234   kv0.key = key0.as_u64;
235
236   /* NAT packet aimed at external address if */
237   /* has active sessions */
238   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
239                               &value0))
240     {
241       /* or is static mappings */
242       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
243         return 0;
244     }
245   else
246     return 0;
247
248   if (sm->forwarding_enabled)
249     return 1;
250
251   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
252                                  rx_fib_index0);
253 }
254
255 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
256                       ip4_header_t * ip0,
257                       u32 rx_fib_index0,
258                       snat_session_key_t * key0,
259                       snat_session_t ** sessionp,
260                       vlib_node_runtime_t * node,
261                       u32 next0,
262                       u32 thread_index)
263 {
264   snat_user_t *u;
265   snat_session_t *s;
266   clib_bihash_kv_8_8_t kv0;
267   snat_session_key_t key1;
268   u32 address_index = ~0;
269   u32 outside_fib_index;
270   uword * p;
271   udp_header_t * udp0 = ip4_next_header (ip0);
272
273   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
274     {
275       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
276       nat_ipfix_logging_max_sessions(sm->max_translations);
277       return SNAT_IN2OUT_NEXT_DROP;
278     }
279
280   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
281   if (! p)
282     {
283       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
284       return SNAT_IN2OUT_NEXT_DROP;
285     }
286   outside_fib_index = p[0];
287
288   key1.protocol = key0->protocol;
289
290   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
291                               thread_index);
292   if (!u)
293     {
294       clib_warning ("create NAT user failed");
295       return SNAT_IN2OUT_NEXT_DROP;
296     }
297
298   /* First try to match static mapping by local address and port */
299   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0))
300     {
301       /* Try to create dynamic translation */
302       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
303                                                thread_index, &key1,
304                                                &address_index,
305                                                sm->port_per_thread,
306                                                sm->per_thread_data[thread_index].snat_thread_index))
307         {
308           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
309           return SNAT_IN2OUT_NEXT_DROP;
310         }
311       u->nsessions++;
312     }
313   else
314     {
315       u->nstaticsessions++;
316     }
317
318   s = nat_session_alloc_or_recycle (sm, u, thread_index);
319   if (!s)
320     {
321       clib_warning ("create NAT session failed");
322       return SNAT_IN2OUT_NEXT_DROP;
323     }
324
325   if (address_index == ~0)
326     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
327   s->outside_address_index = address_index;
328   s->in2out = *key0;
329   s->out2in = key1;
330   s->out2in.protocol = key0->protocol;
331   s->out2in.fib_index = outside_fib_index;
332   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
333   s->ext_host_port = udp0->dst_port;
334   *sessionp = s;
335
336   /* Add to translation hashes */
337   kv0.key = s->in2out.as_u64;
338   kv0.value = s - sm->per_thread_data[thread_index].sessions;
339   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
340                                1 /* is_add */))
341       clib_warning ("in2out key add failed");
342
343   kv0.key = s->out2in.as_u64;
344   kv0.value = s - sm->per_thread_data[thread_index].sessions;
345
346   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
347                                1 /* is_add */))
348       clib_warning ("out2in key add failed");
349
350   /* log NAT event */
351   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
352                                       s->out2in.addr.as_u32,
353                                       s->in2out.protocol,
354                                       s->in2out.port,
355                                       s->out2in.port,
356                                       s->in2out.fib_index);
357   return next0;
358 }
359
360 static_always_inline
361 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
362                                  snat_session_key_t *p_key0)
363 {
364   icmp46_header_t *icmp0;
365   snat_session_key_t key0;
366   icmp_echo_header_t *echo0, *inner_echo0 = 0;
367   ip4_header_t *inner_ip0 = 0;
368   void *l4_header = 0;
369   icmp46_header_t *inner_icmp0;
370
371   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
372   echo0 = (icmp_echo_header_t *)(icmp0+1);
373
374   if (!icmp_is_error_message (icmp0))
375     {
376       key0.protocol = SNAT_PROTOCOL_ICMP;
377       key0.addr = ip0->src_address;
378       key0.port = echo0->identifier;
379     }
380   else
381     {
382       inner_ip0 = (ip4_header_t *)(echo0+1);
383       l4_header = ip4_next_header (inner_ip0);
384       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
385       key0.addr = inner_ip0->dst_address;
386       switch (key0.protocol)
387         {
388         case SNAT_PROTOCOL_ICMP:
389           inner_icmp0 = (icmp46_header_t*)l4_header;
390           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
391           key0.port = inner_echo0->identifier;
392           break;
393         case SNAT_PROTOCOL_UDP:
394         case SNAT_PROTOCOL_TCP:
395           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
396           break;
397         default:
398           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
399         }
400     }
401   *p_key0 = key0;
402   return -1; /* success */
403 }
404
405 /**
406  * Get address and port values to be used for ICMP packet translation
407  * and create session if needed
408  *
409  * @param[in,out] sm             NAT main
410  * @param[in,out] node           NAT node runtime
411  * @param[in] thread_index       thread index
412  * @param[in,out] b0             buffer containing packet to be translated
413  * @param[out] p_proto           protocol used for matching
414  * @param[out] p_value           address and port after NAT translation
415  * @param[out] p_dont_translate  if packet should not be translated
416  * @param d                      optional parameter
417  * @param e                      optional parameter
418  */
419 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
420                            u32 thread_index, vlib_buffer_t *b0,
421                            ip4_header_t *ip0, u8 *p_proto,
422                            snat_session_key_t *p_value,
423                            u8 *p_dont_translate, void *d, void *e)
424 {
425   icmp46_header_t *icmp0;
426   u32 sw_if_index0;
427   u32 rx_fib_index0;
428   snat_session_key_t key0;
429   snat_session_t *s0 = 0;
430   u8 dont_translate = 0;
431   clib_bihash_kv_8_8_t kv0, value0;
432   u32 next0 = ~0;
433   int err;
434
435   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
436   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
437   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
438
439   err = icmp_get_key (ip0, &key0);
440   if (err != -1)
441     {
442       b0->error = node->errors[err];
443       next0 = SNAT_IN2OUT_NEXT_DROP;
444       goto out;
445     }
446   key0.fib_index = rx_fib_index0;
447
448   kv0.key = key0.as_u64;
449
450   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
451                               &value0))
452     {
453       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
454           IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
455           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
456         {
457           dont_translate = 1;
458           goto out;
459         }
460
461       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
462         {
463           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
464           next0 = SNAT_IN2OUT_NEXT_DROP;
465           goto out;
466         }
467
468       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
469                          &s0, node, next0, thread_index);
470
471       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
472         goto out;
473     }
474   else
475     {
476       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
477                         icmp0->type != ICMP4_echo_reply &&
478                         !icmp_is_error_message (icmp0)))
479         {
480           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
481           next0 = SNAT_IN2OUT_NEXT_DROP;
482           goto out;
483         }
484
485       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
486                               value0.value);
487     }
488
489 out:
490   *p_proto = key0.protocol;
491   if (s0)
492     *p_value = s0->out2in;
493   *p_dont_translate = dont_translate;
494   if (d)
495     *(snat_session_t**)d = s0;
496   return next0;
497 }
498
499 /**
500  * Get address and port values to be used for ICMP packet translation
501  *
502  * @param[in] sm                 NAT main
503  * @param[in,out] node           NAT node runtime
504  * @param[in] thread_index       thread index
505  * @param[in,out] b0             buffer containing packet to be translated
506  * @param[out] p_proto           protocol used for matching
507  * @param[out] p_value           address and port after NAT translation
508  * @param[out] p_dont_translate  if packet should not be translated
509  * @param d                      optional parameter
510  * @param e                      optional parameter
511  */
512 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
513                            u32 thread_index, vlib_buffer_t *b0,
514                            ip4_header_t *ip0, u8 *p_proto,
515                            snat_session_key_t *p_value,
516                            u8 *p_dont_translate, void *d, void *e)
517 {
518   icmp46_header_t *icmp0;
519   u32 sw_if_index0;
520   u32 rx_fib_index0;
521   snat_session_key_t key0;
522   snat_session_key_t sm0;
523   u8 dont_translate = 0;
524   u8 is_addr_only;
525   u32 next0 = ~0;
526   int err;
527
528   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
529   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
530   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
531
532   err = icmp_get_key (ip0, &key0);
533   if (err != -1)
534     {
535       b0->error = node->errors[err];
536       next0 = SNAT_IN2OUT_NEXT_DROP;
537       goto out2;
538     }
539   key0.fib_index = rx_fib_index0;
540
541   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0))
542     {
543       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
544           IP_PROTOCOL_ICMP, rx_fib_index0)))
545         {
546           dont_translate = 1;
547           goto out;
548         }
549
550       if (icmp_is_error_message (icmp0))
551         {
552           next0 = SNAT_IN2OUT_NEXT_DROP;
553           goto out;
554         }
555
556       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
557       next0 = SNAT_IN2OUT_NEXT_DROP;
558       goto out;
559     }
560
561   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
562                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
563                     !icmp_is_error_message (icmp0)))
564     {
565       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
566       next0 = SNAT_IN2OUT_NEXT_DROP;
567       goto out;
568     }
569
570 out:
571   *p_value = sm0;
572 out2:
573   *p_proto = key0.protocol;
574   *p_dont_translate = dont_translate;
575   return next0;
576 }
577
578 static inline u32 icmp_in2out (snat_main_t *sm,
579                                vlib_buffer_t * b0,
580                                ip4_header_t * ip0,
581                                icmp46_header_t * icmp0,
582                                u32 sw_if_index0,
583                                u32 rx_fib_index0,
584                                vlib_node_runtime_t * node,
585                                u32 next0,
586                                u32 thread_index,
587                                void *d,
588                                void *e)
589 {
590   snat_session_key_t sm0;
591   u8 protocol;
592   icmp_echo_header_t *echo0, *inner_echo0 = 0;
593   ip4_header_t *inner_ip0;
594   void *l4_header = 0;
595   icmp46_header_t *inner_icmp0;
596   u8 dont_translate;
597   u32 new_addr0, old_addr0;
598   u16 old_id0, new_id0;
599   ip_csum_t sum0;
600   u16 checksum0;
601   u32 next0_tmp;
602
603   echo0 = (icmp_echo_header_t *)(icmp0+1);
604
605   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
606                                        &protocol, &sm0, &dont_translate, d, e);
607   if (next0_tmp != ~0)
608     next0 = next0_tmp;
609   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
610     goto out;
611
612   sum0 = ip_incremental_checksum (0, icmp0,
613                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
614   checksum0 = ~ip_csum_fold (sum0);
615   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
616     {
617       next0 = SNAT_IN2OUT_NEXT_DROP;
618       goto out;
619     }
620
621   old_addr0 = ip0->src_address.as_u32;
622   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
623   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
624     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
625
626   sum0 = ip0->checksum;
627   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
628                          src_address /* changed member */);
629   ip0->checksum = ip_csum_fold (sum0);
630
631   if (!icmp_is_error_message (icmp0))
632     {
633       new_id0 = sm0.port;
634       if (PREDICT_FALSE(new_id0 != echo0->identifier))
635         {
636           old_id0 = echo0->identifier;
637           new_id0 = sm0.port;
638           echo0->identifier = new_id0;
639
640           sum0 = icmp0->checksum;
641           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
642                                  identifier);
643           icmp0->checksum = ip_csum_fold (sum0);
644         }
645     }
646   else
647     {
648       inner_ip0 = (ip4_header_t *)(echo0+1);
649       l4_header = ip4_next_header (inner_ip0);
650
651       if (!ip4_header_checksum_is_valid (inner_ip0))
652         {
653           next0 = SNAT_IN2OUT_NEXT_DROP;
654           goto out;
655         }
656
657       old_addr0 = inner_ip0->dst_address.as_u32;
658       inner_ip0->dst_address = sm0.addr;
659       new_addr0 = inner_ip0->dst_address.as_u32;
660
661       sum0 = icmp0->checksum;
662       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
663                              dst_address /* changed member */);
664       icmp0->checksum = ip_csum_fold (sum0);
665
666       switch (protocol)
667         {
668           case SNAT_PROTOCOL_ICMP:
669             inner_icmp0 = (icmp46_header_t*)l4_header;
670             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
671
672             old_id0 = inner_echo0->identifier;
673             new_id0 = sm0.port;
674             inner_echo0->identifier = new_id0;
675
676             sum0 = icmp0->checksum;
677             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
678                                    identifier);
679             icmp0->checksum = ip_csum_fold (sum0);
680             break;
681           case SNAT_PROTOCOL_UDP:
682           case SNAT_PROTOCOL_TCP:
683             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
684             new_id0 = sm0.port;
685             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
686
687             sum0 = icmp0->checksum;
688             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
689                                    dst_port);
690             icmp0->checksum = ip_csum_fold (sum0);
691             break;
692           default:
693             ASSERT(0);
694         }
695     }
696
697 out:
698   return next0;
699 }
700
701 /**
702  * @brief Hairpinning
703  *
704  * Hairpinning allows two endpoints on the internal side of the NAT to
705  * communicate even if they only use each other's external IP addresses
706  * and ports.
707  *
708  * @param sm     NAT main.
709  * @param b0     Vlib buffer.
710  * @param ip0    IP header.
711  * @param udp0   UDP header.
712  * @param tcp0   TCP header.
713  * @param proto0 NAT protocol.
714  */
715 static inline int
716 snat_hairpinning (snat_main_t *sm,
717                   vlib_buffer_t * b0,
718                   ip4_header_t * ip0,
719                   udp_header_t * udp0,
720                   tcp_header_t * tcp0,
721                   u32 proto0)
722 {
723   snat_session_key_t key0, sm0;
724   snat_session_t * s0;
725   clib_bihash_kv_8_8_t kv0, value0;
726   ip_csum_t sum0;
727   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
728   u16 new_dst_port0, old_dst_port0;
729
730   key0.addr = ip0->dst_address;
731   key0.port = udp0->dst_port;
732   key0.protocol = proto0;
733   key0.fib_index = sm->outside_fib_index;
734   kv0.key = key0.as_u64;
735
736   /* Check if destination is static mappings */
737   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
738     {
739       new_dst_addr0 = sm0.addr.as_u32;
740       new_dst_port0 = sm0.port;
741       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
742     }
743   /* or active session */
744   else
745     {
746       if (sm->num_workers > 1)
747         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
748       else
749         ti = sm->num_workers;
750
751       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
752         {
753           si = value0.value;
754
755           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
756           new_dst_addr0 = s0->in2out.addr.as_u32;
757           new_dst_port0 = s0->in2out.port;
758           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
759         }
760     }
761
762   /* Destination is behind the same NAT, use internal address and port */
763   if (new_dst_addr0)
764     {
765       old_dst_addr0 = ip0->dst_address.as_u32;
766       ip0->dst_address.as_u32 = new_dst_addr0;
767       sum0 = ip0->checksum;
768       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
769                              ip4_header_t, dst_address);
770       ip0->checksum = ip_csum_fold (sum0);
771
772       old_dst_port0 = tcp0->dst;
773       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
774         {
775           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
776             {
777               tcp0->dst = new_dst_port0;
778               sum0 = tcp0->checksum;
779               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
780                                      ip4_header_t, dst_address);
781               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
782                                      ip4_header_t /* cheat */, length);
783               tcp0->checksum = ip_csum_fold(sum0);
784             }
785           else
786             {
787               udp0->dst_port = new_dst_port0;
788               udp0->checksum = 0;
789             }
790         }
791       else
792         {
793           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
794             {
795               sum0 = tcp0->checksum;
796               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
797                                      ip4_header_t, dst_address);
798               tcp0->checksum = ip_csum_fold(sum0);
799             }
800         }
801       return 1;
802     }
803   return 0;
804 }
805
806 static inline void
807 snat_icmp_hairpinning (snat_main_t *sm,
808                        vlib_buffer_t * b0,
809                        ip4_header_t * ip0,
810                        icmp46_header_t * icmp0)
811 {
812   snat_session_key_t key0, sm0;
813   clib_bihash_kv_8_8_t kv0, value0;
814   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
815   ip_csum_t sum0;
816   snat_session_t *s0;
817
818   if (!icmp_is_error_message (icmp0))
819     {
820       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
821       u16 icmp_id0 = echo0->identifier;
822       key0.addr = ip0->dst_address;
823       key0.port = icmp_id0;
824       key0.protocol = SNAT_PROTOCOL_ICMP;
825       key0.fib_index = sm->outside_fib_index;
826       kv0.key = key0.as_u64;
827
828       if (sm->num_workers > 1)
829         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
830       else
831         ti = sm->num_workers;
832
833       /* Check if destination is in active sessions */
834       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
835                                   &value0))
836         {
837           /* or static mappings */
838           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
839             {
840               new_dst_addr0 = sm0.addr.as_u32;
841               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
842             }
843         }
844       else
845         {
846           si = value0.value;
847
848           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
849           new_dst_addr0 = s0->in2out.addr.as_u32;
850           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
851           echo0->identifier = s0->in2out.port;
852           sum0 = icmp0->checksum;
853           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
854                                  icmp_echo_header_t, identifier);
855           icmp0->checksum = ip_csum_fold (sum0);
856         }
857
858       /* Destination is behind the same NAT, use internal address and port */
859       if (new_dst_addr0)
860         {
861           old_dst_addr0 = ip0->dst_address.as_u32;
862           ip0->dst_address.as_u32 = new_dst_addr0;
863           sum0 = ip0->checksum;
864           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
865                                  ip4_header_t, dst_address);
866           ip0->checksum = ip_csum_fold (sum0);
867         }
868     }
869
870 }
871
872 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
873                                          vlib_buffer_t * b0,
874                                          ip4_header_t * ip0,
875                                          icmp46_header_t * icmp0,
876                                          u32 sw_if_index0,
877                                          u32 rx_fib_index0,
878                                          vlib_node_runtime_t * node,
879                                          u32 next0,
880                                          f64 now,
881                                          u32 thread_index,
882                                          snat_session_t ** p_s0)
883 {
884   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
885                       next0, thread_index, p_s0, 0);
886   snat_session_t * s0 = *p_s0;
887   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
888     {
889       /* Hairpinning */
890       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
891         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
892       /* Accounting */
893       s0->last_heard = now;
894       s0->total_pkts++;
895       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
896       /* Per-user LRU list maintenance */
897       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
898                          s0->per_user_index);
899       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
900                           s0->per_user_list_head_index,
901                           s0->per_user_index);
902     }
903   return next0;
904 }
905 static inline void
906 snat_hairpinning_unknown_proto (snat_main_t *sm,
907                                 vlib_buffer_t * b,
908                                 ip4_header_t * ip)
909 {
910   u32 old_addr, new_addr = 0, ti = 0;
911   clib_bihash_kv_8_8_t kv, value;
912   clib_bihash_kv_16_8_t s_kv, s_value;
913   nat_ed_ses_key_t key;
914   snat_session_key_t m_key;
915   snat_static_mapping_t *m;
916   ip_csum_t sum;
917   snat_session_t *s;
918
919   old_addr = ip->dst_address.as_u32;
920   key.l_addr.as_u32 = ip->dst_address.as_u32;
921   key.r_addr.as_u32 = ip->src_address.as_u32;
922   key.fib_index = sm->outside_fib_index;
923   key.proto = ip->protocol;
924   key.r_port = 0;
925   key.l_port = 0;
926   s_kv.key[0] = key.as_u64[0];
927   s_kv.key[1] = key.as_u64[1];
928   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
929     {
930       m_key.addr = ip->dst_address;
931       m_key.fib_index = sm->outside_fib_index;
932       m_key.port = 0;
933       m_key.protocol = 0;
934       kv.key = m_key.as_u64;
935       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
936         return;
937
938       m = pool_elt_at_index (sm->static_mappings, value.value);
939       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
940         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
941       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
942     }
943   else
944     {
945       if (sm->num_workers > 1)
946         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
947       else
948         ti = sm->num_workers;
949
950       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
951       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
952         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
953       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
954     }
955   sum = ip->checksum;
956   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
957   ip->checksum = ip_csum_fold (sum);
958 }
959
960 static snat_session_t *
961 snat_in2out_unknown_proto (snat_main_t *sm,
962                            vlib_buffer_t * b,
963                            ip4_header_t * ip,
964                            u32 rx_fib_index,
965                            u32 thread_index,
966                            f64 now,
967                            vlib_main_t * vm,
968                            vlib_node_runtime_t * node)
969 {
970   clib_bihash_kv_8_8_t kv, value;
971   clib_bihash_kv_16_8_t s_kv, s_value;
972   snat_static_mapping_t *m;
973   snat_session_key_t m_key;
974   u32 old_addr, new_addr = 0;
975   ip_csum_t sum;
976   snat_user_t *u;
977   dlist_elt_t *head, *elt;
978   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
979   u32 elt_index, head_index, ses_index;
980   snat_session_t * s;
981   nat_ed_ses_key_t key;
982   u32 address_index = ~0;
983   int i;
984   u8 is_sm = 0;
985
986   old_addr = ip->src_address.as_u32;
987
988   key.l_addr = ip->src_address;
989   key.r_addr = ip->dst_address;
990   key.fib_index = rx_fib_index;
991   key.proto = ip->protocol;
992   key.l_port = 0;
993   key.l_port = 0;
994   s_kv.key[0] = key.as_u64[0];
995   s_kv.key[1] = key.as_u64[1];
996
997   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
998     {
999       s = pool_elt_at_index (tsm->sessions, s_value.value);
1000       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1001     }
1002   else
1003     {
1004       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1005         {
1006           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1007           nat_ipfix_logging_max_sessions(sm->max_translations);
1008           return 0;
1009         }
1010
1011       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1012                                   thread_index);
1013       if (!u)
1014         {
1015           clib_warning ("create NAT user failed");
1016           return 0;
1017         }
1018
1019       m_key.addr = ip->src_address;
1020       m_key.port = 0;
1021       m_key.protocol = 0;
1022       m_key.fib_index = rx_fib_index;
1023       kv.key = m_key.as_u64;
1024
1025       /* Try to find static mapping first */
1026       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1027         {
1028           m = pool_elt_at_index (sm->static_mappings, value.value);
1029           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1030           is_sm = 1;
1031           goto create_ses;
1032         }
1033       /* Fallback to 3-tuple key */
1034       else
1035         {
1036           /* Choose same out address as for TCP/UDP session to same destination */
1037           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1038             {
1039               head_index = u->sessions_per_user_list_head_index;
1040               head = pool_elt_at_index (tsm->list_pool, head_index);
1041               elt_index = head->next;
1042               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1043               ses_index = elt->value;
1044               while (ses_index != ~0)
1045                 {
1046                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1047                   elt_index = elt->next;
1048                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1049                   ses_index = elt->value;
1050
1051                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1052                     {
1053                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1054                       address_index = s->outside_address_index;
1055
1056                       key.fib_index = sm->outside_fib_index;
1057                       key.l_addr.as_u32 = new_addr;
1058                       s_kv.key[0] = key.as_u64[0];
1059                       s_kv.key[1] = key.as_u64[1];
1060                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1061                         break;
1062
1063                       goto create_ses;
1064                     }
1065                 }
1066             }
1067           key.fib_index = sm->outside_fib_index;
1068           for (i = 0; i < vec_len (sm->addresses); i++)
1069             {
1070               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1071               s_kv.key[0] = key.as_u64[0];
1072               s_kv.key[1] = key.as_u64[1];
1073               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1074                 {
1075                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1076                   address_index = i;
1077                   goto create_ses;
1078                 }
1079             }
1080           return 0;
1081         }
1082
1083 create_ses:
1084       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1085       if (!s)
1086         {
1087           clib_warning ("create NAT session failed");
1088           return 0;
1089         }
1090
1091       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1092       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1093       s->outside_address_index = address_index;
1094       s->out2in.addr.as_u32 = new_addr;
1095       s->out2in.fib_index = sm->outside_fib_index;
1096       s->in2out.addr.as_u32 = old_addr;
1097       s->in2out.fib_index = rx_fib_index;
1098       s->in2out.port = s->out2in.port = ip->protocol;
1099       if (is_sm)
1100         {
1101           u->nstaticsessions++;
1102           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1103         }
1104       else
1105         {
1106           u->nsessions++;
1107         }
1108
1109       /* Add to lookup tables */
1110       key.l_addr.as_u32 = old_addr;
1111       key.r_addr = ip->dst_address;
1112       key.proto = ip->protocol;
1113       key.fib_index = rx_fib_index;
1114       s_kv.key[0] = key.as_u64[0];
1115       s_kv.key[1] = key.as_u64[1];
1116       s_kv.value = s - tsm->sessions;
1117       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1118         clib_warning ("in2out key add failed");
1119
1120       key.l_addr.as_u32 = new_addr;
1121       key.fib_index = sm->outside_fib_index;
1122       s_kv.key[0] = key.as_u64[0];
1123       s_kv.key[1] = key.as_u64[1];
1124       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1125         clib_warning ("out2in key add failed");
1126   }
1127
1128   /* Update IP checksum */
1129   sum = ip->checksum;
1130   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1131   ip->checksum = ip_csum_fold (sum);
1132
1133   /* Accounting */
1134   s->last_heard = now;
1135   s->total_pkts++;
1136   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1137   /* Per-user LRU list maintenance */
1138   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1139   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1140                       s->per_user_index);
1141
1142   /* Hairpinning */
1143   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1144     snat_hairpinning_unknown_proto(sm, b, ip);
1145
1146   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1147     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1148
1149   return s;
1150 }
1151
1152 static snat_session_t *
1153 snat_in2out_lb (snat_main_t *sm,
1154                 vlib_buffer_t * b,
1155                 ip4_header_t * ip,
1156                 u32 rx_fib_index,
1157                 u32 thread_index,
1158                 f64 now,
1159                 vlib_main_t * vm,
1160                 vlib_node_runtime_t * node)
1161 {
1162   nat_ed_ses_key_t key;
1163   clib_bihash_kv_16_8_t s_kv, s_value;
1164   udp_header_t *udp = ip4_next_header (ip);
1165   tcp_header_t *tcp = (tcp_header_t *) udp;
1166   snat_session_t *s = 0;
1167   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1168   u32 old_addr, new_addr;
1169   u16 new_port, old_port;
1170   ip_csum_t sum;
1171   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1172   snat_session_key_t e_key, l_key;
1173   snat_user_t *u;
1174
1175   old_addr = ip->src_address.as_u32;
1176
1177   key.l_addr = ip->src_address;
1178   key.r_addr = ip->dst_address;
1179   key.fib_index = rx_fib_index;
1180   key.proto = ip->protocol;
1181   key.r_port = udp->dst_port;
1182   key.l_port = udp->src_port;
1183   s_kv.key[0] = key.as_u64[0];
1184   s_kv.key[1] = key.as_u64[1];
1185
1186   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1187     {
1188       s = pool_elt_at_index (tsm->sessions, s_value.value);
1189     }
1190   else
1191     {
1192       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1193         {
1194           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1195           nat_ipfix_logging_max_sessions(sm->max_translations);
1196           return 0;
1197         }
1198
1199       l_key.addr = ip->src_address;
1200       l_key.port = udp->src_port;
1201       l_key.protocol = proto;
1202       l_key.fib_index = rx_fib_index;
1203       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0, 0))
1204         return 0;
1205
1206       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1207                                   thread_index);
1208       if (!u)
1209         {
1210           clib_warning ("create NAT user failed");
1211           return 0;
1212         }
1213
1214       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1215       if (!s)
1216         {
1217           clib_warning ("create NAT session failed");
1218           return 0;
1219         }
1220
1221       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1222       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1223       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1224       s->outside_address_index = ~0;
1225       s->in2out = l_key;
1226       s->out2in = e_key;
1227       u->nstaticsessions++;
1228
1229       /* Add to lookup tables */
1230       s_kv.value = s - tsm->sessions;
1231       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1232         clib_warning ("in2out-ed key add failed");
1233
1234       key.l_addr = e_key.addr;
1235       key.fib_index = e_key.fib_index;
1236       key.l_port = e_key.port;
1237       s_kv.key[0] = key.as_u64[0];
1238       s_kv.key[1] = key.as_u64[1];
1239       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1240         clib_warning ("out2in-ed key add failed");
1241     }
1242
1243   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1244
1245   /* Update IP checksum */
1246   sum = ip->checksum;
1247   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1248   if (is_twice_nat_session (s))
1249     sum = ip_csum_update (sum, ip->dst_address.as_u32,
1250                           s->ext_host_addr.as_u32, ip4_header_t, dst_address);
1251   ip->checksum = ip_csum_fold (sum);
1252
1253   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1254     {
1255       old_port = tcp->src_port;
1256       tcp->src_port = s->out2in.port;
1257       new_port = tcp->src_port;
1258
1259       sum = tcp->checksum;
1260       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1261       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1262       if (is_twice_nat_session (s))
1263         {
1264           sum = ip_csum_update (sum, ip->dst_address.as_u32,
1265                                 s->ext_host_addr.as_u32, ip4_header_t,
1266                                 dst_address);
1267           sum = ip_csum_update (sum, tcp->dst_port, s->ext_host_port,
1268                                 ip4_header_t, length);
1269           tcp->dst_port = s->ext_host_port;
1270           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1271         }
1272       tcp->checksum = ip_csum_fold(sum);
1273     }
1274   else
1275     {
1276       udp->src_port = s->out2in.port;
1277       if (is_twice_nat_session (s))
1278         {
1279           udp->dst_port = s->ext_host_port;
1280           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1281         }
1282       udp->checksum = 0;
1283     }
1284
1285   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1286     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1287
1288   /* Accounting */
1289   s->last_heard = now;
1290   s->total_pkts++;
1291   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1292   /* Per-user LRU list maintenance */
1293   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1294   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1295                       s->per_user_index);
1296   return s;
1297 }
1298
1299 static inline uword
1300 snat_in2out_node_fn_inline (vlib_main_t * vm,
1301                             vlib_node_runtime_t * node,
1302                             vlib_frame_t * frame, int is_slow_path,
1303                             int is_output_feature)
1304 {
1305   u32 n_left_from, * from, * to_next;
1306   snat_in2out_next_t next_index;
1307   u32 pkts_processed = 0;
1308   snat_main_t * sm = &snat_main;
1309   f64 now = vlib_time_now (vm);
1310   u32 stats_node_index;
1311   u32 thread_index = vlib_get_thread_index ();
1312
1313   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1314     snat_in2out_node.index;
1315
1316   from = vlib_frame_vector_args (frame);
1317   n_left_from = frame->n_vectors;
1318   next_index = node->cached_next_index;
1319
1320   while (n_left_from > 0)
1321     {
1322       u32 n_left_to_next;
1323
1324       vlib_get_next_frame (vm, node, next_index,
1325                            to_next, n_left_to_next);
1326
1327       while (n_left_from >= 4 && n_left_to_next >= 2)
1328         {
1329           u32 bi0, bi1;
1330           vlib_buffer_t * b0, * b1;
1331           u32 next0, next1;
1332           u32 sw_if_index0, sw_if_index1;
1333           ip4_header_t * ip0, * ip1;
1334           ip_csum_t sum0, sum1;
1335           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1336           u16 old_port0, new_port0, old_port1, new_port1;
1337           udp_header_t * udp0, * udp1;
1338           tcp_header_t * tcp0, * tcp1;
1339           icmp46_header_t * icmp0, * icmp1;
1340           snat_session_key_t key0, key1;
1341           u32 rx_fib_index0, rx_fib_index1;
1342           u32 proto0, proto1;
1343           snat_session_t * s0 = 0, * s1 = 0;
1344           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1345           u32 iph_offset0 = 0, iph_offset1 = 0;
1346
1347           /* Prefetch next iteration. */
1348           {
1349             vlib_buffer_t * p2, * p3;
1350
1351             p2 = vlib_get_buffer (vm, from[2]);
1352             p3 = vlib_get_buffer (vm, from[3]);
1353
1354             vlib_prefetch_buffer_header (p2, LOAD);
1355             vlib_prefetch_buffer_header (p3, LOAD);
1356
1357             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1358             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1359           }
1360
1361           /* speculatively enqueue b0 and b1 to the current next frame */
1362           to_next[0] = bi0 = from[0];
1363           to_next[1] = bi1 = from[1];
1364           from += 2;
1365           to_next += 2;
1366           n_left_from -= 2;
1367           n_left_to_next -= 2;
1368
1369           b0 = vlib_get_buffer (vm, bi0);
1370           b1 = vlib_get_buffer (vm, bi1);
1371
1372           if (is_output_feature)
1373             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1374
1375           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1376                  iph_offset0);
1377
1378           udp0 = ip4_next_header (ip0);
1379           tcp0 = (tcp_header_t *) udp0;
1380           icmp0 = (icmp46_header_t *) udp0;
1381
1382           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1383           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1384                                    sw_if_index0);
1385
1386           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1387
1388           if (PREDICT_FALSE(ip0->ttl == 1))
1389             {
1390               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1391               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1392                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1393                                            0);
1394               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1395               goto trace00;
1396             }
1397
1398           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1399
1400           /* Next configured feature, probably ip4-lookup */
1401           if (is_slow_path)
1402             {
1403               if (PREDICT_FALSE (proto0 == ~0))
1404                 {
1405                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1406                                                   thread_index, now, vm, node);
1407                   if (!s0)
1408                     next0 = SNAT_IN2OUT_NEXT_DROP;
1409                   goto trace00;
1410                 }
1411
1412               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1413                 {
1414                   next0 = icmp_in2out_slow_path
1415                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1416                      node, next0, now, thread_index, &s0);
1417                   goto trace00;
1418                 }
1419             }
1420           else
1421             {
1422               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1423                 {
1424                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1425                   goto trace00;
1426                 }
1427
1428               if (ip4_is_fragment (ip0))
1429                 {
1430                   next0 = SNAT_IN2OUT_NEXT_REASS;
1431                   goto trace00;
1432                 }
1433             }
1434
1435           key0.addr = ip0->src_address;
1436           key0.port = udp0->src_port;
1437           key0.protocol = proto0;
1438           key0.fib_index = rx_fib_index0;
1439
1440           kv0.key = key0.as_u64;
1441
1442           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1443               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1444             {
1445               if (is_slow_path)
1446                 {
1447                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1448                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1449                     goto trace00;
1450
1451                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1452                                      &s0, node, next0, thread_index);
1453                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1454                     goto trace00;
1455                 }
1456               else
1457                 {
1458                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1459                   goto trace00;
1460                 }
1461             }
1462           else
1463             {
1464               if (PREDICT_FALSE (value0.value == ~0ULL))
1465                 {
1466                   if (is_slow_path)
1467                     {
1468                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1469                                           thread_index, now, vm, node);
1470                       if (!s0 && !sm->forwarding_enabled)
1471                         next0 = SNAT_IN2OUT_NEXT_DROP;
1472                       goto trace00;
1473                     }
1474                   else
1475                     {
1476                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1477                       goto trace00;
1478                     }
1479                 }
1480               else
1481                 {
1482                   s0 = pool_elt_at_index (
1483                     sm->per_thread_data[thread_index].sessions,
1484                     value0.value);
1485                 }
1486             }
1487
1488           b0->flags |= VNET_BUFFER_F_IS_NATED;
1489
1490           old_addr0 = ip0->src_address.as_u32;
1491           ip0->src_address = s0->out2in.addr;
1492           new_addr0 = ip0->src_address.as_u32;
1493           if (!is_output_feature)
1494             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1495
1496           sum0 = ip0->checksum;
1497           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1498                                  ip4_header_t,
1499                                  src_address /* changed member */);
1500           ip0->checksum = ip_csum_fold (sum0);
1501
1502           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1503             {
1504               old_port0 = tcp0->src_port;
1505               tcp0->src_port = s0->out2in.port;
1506               new_port0 = tcp0->src_port;
1507
1508               sum0 = tcp0->checksum;
1509               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1510                                      ip4_header_t,
1511                                      dst_address /* changed member */);
1512               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1513                                      ip4_header_t /* cheat */,
1514                                      length /* changed member */);
1515               tcp0->checksum = ip_csum_fold(sum0);
1516             }
1517           else
1518             {
1519               old_port0 = udp0->src_port;
1520               udp0->src_port = s0->out2in.port;
1521               udp0->checksum = 0;
1522             }
1523
1524           /* Accounting */
1525           s0->last_heard = now;
1526           s0->total_pkts++;
1527           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1528           /* Per-user LRU list maintenance */
1529           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1530                              s0->per_user_index);
1531           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1532                               s0->per_user_list_head_index,
1533                               s0->per_user_index);
1534         trace00:
1535
1536           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1537                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1538             {
1539               snat_in2out_trace_t *t =
1540                  vlib_add_trace (vm, node, b0, sizeof (*t));
1541               t->is_slow_path = is_slow_path;
1542               t->sw_if_index = sw_if_index0;
1543               t->next_index = next0;
1544                   t->session_index = ~0;
1545               if (s0)
1546                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1547             }
1548
1549           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1550
1551           if (is_output_feature)
1552             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1553
1554           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1555                  iph_offset1);
1556
1557           udp1 = ip4_next_header (ip1);
1558           tcp1 = (tcp_header_t *) udp1;
1559           icmp1 = (icmp46_header_t *) udp1;
1560
1561           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1562           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1563                                    sw_if_index1);
1564
1565           if (PREDICT_FALSE(ip1->ttl == 1))
1566             {
1567               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1568               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1569                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1570                                            0);
1571               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1572               goto trace01;
1573             }
1574
1575           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1576
1577           /* Next configured feature, probably ip4-lookup */
1578           if (is_slow_path)
1579             {
1580               if (PREDICT_FALSE (proto1 == ~0))
1581                 {
1582                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1583                                                   thread_index, now, vm, node);
1584                   if (!s1)
1585                     next1 = SNAT_IN2OUT_NEXT_DROP;
1586                   goto trace01;
1587                 }
1588
1589               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1590                 {
1591                   next1 = icmp_in2out_slow_path
1592                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1593                      next1, now, thread_index, &s1);
1594                   goto trace01;
1595                 }
1596             }
1597           else
1598             {
1599               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1600                 {
1601                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1602                   goto trace01;
1603                 }
1604
1605               if (ip4_is_fragment (ip1))
1606                 {
1607                   next1 = SNAT_IN2OUT_NEXT_REASS;
1608                   goto trace01;
1609                 }
1610             }
1611
1612           b1->flags |= VNET_BUFFER_F_IS_NATED;
1613
1614           key1.addr = ip1->src_address;
1615           key1.port = udp1->src_port;
1616           key1.protocol = proto1;
1617           key1.fib_index = rx_fib_index1;
1618
1619           kv1.key = key1.as_u64;
1620
1621             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1622                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1623             {
1624               if (is_slow_path)
1625                 {
1626                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1627                       ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
1628                     goto trace01;
1629
1630                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1631                                      &s1, node, next1, thread_index);
1632                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1633                     goto trace01;
1634                 }
1635               else
1636                 {
1637                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1638                   goto trace01;
1639                 }
1640             }
1641           else
1642             {
1643               if (PREDICT_FALSE (value1.value == ~0ULL))
1644                 {
1645                   if (is_slow_path)
1646                     {
1647                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1648                                           thread_index, now, vm, node);
1649                       if (!s1 && !sm->forwarding_enabled)
1650                         next1 = SNAT_IN2OUT_NEXT_DROP;
1651                       goto trace01;
1652                     }
1653                   else
1654                     {
1655                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1656                       goto trace01;
1657                     }
1658                 }
1659               else
1660                 {
1661                   s1 = pool_elt_at_index (
1662                     sm->per_thread_data[thread_index].sessions,
1663                     value1.value);
1664                 }
1665             }
1666
1667           old_addr1 = ip1->src_address.as_u32;
1668           ip1->src_address = s1->out2in.addr;
1669           new_addr1 = ip1->src_address.as_u32;
1670           if (!is_output_feature)
1671             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1672
1673           sum1 = ip1->checksum;
1674           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1675                                  ip4_header_t,
1676                                  src_address /* changed member */);
1677           ip1->checksum = ip_csum_fold (sum1);
1678
1679           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1680             {
1681               old_port1 = tcp1->src_port;
1682               tcp1->src_port = s1->out2in.port;
1683               new_port1 = tcp1->src_port;
1684
1685               sum1 = tcp1->checksum;
1686               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1687                                      ip4_header_t,
1688                                      dst_address /* changed member */);
1689               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1690                                      ip4_header_t /* cheat */,
1691                                      length /* changed member */);
1692               tcp1->checksum = ip_csum_fold(sum1);
1693             }
1694           else
1695             {
1696               old_port1 = udp1->src_port;
1697               udp1->src_port = s1->out2in.port;
1698               udp1->checksum = 0;
1699             }
1700
1701           /* Accounting */
1702           s1->last_heard = now;
1703           s1->total_pkts++;
1704           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1705           /* Per-user LRU list maintenance */
1706           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1707                              s1->per_user_index);
1708           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1709                               s1->per_user_list_head_index,
1710                               s1->per_user_index);
1711         trace01:
1712
1713           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1714                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1715             {
1716               snat_in2out_trace_t *t =
1717                  vlib_add_trace (vm, node, b1, sizeof (*t));
1718               t->sw_if_index = sw_if_index1;
1719               t->next_index = next1;
1720               t->session_index = ~0;
1721               if (s1)
1722                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1723             }
1724
1725           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1726
1727           /* verify speculative enqueues, maybe switch current next frame */
1728           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1729                                            to_next, n_left_to_next,
1730                                            bi0, bi1, next0, next1);
1731         }
1732
1733       while (n_left_from > 0 && n_left_to_next > 0)
1734         {
1735           u32 bi0;
1736           vlib_buffer_t * b0;
1737           u32 next0;
1738           u32 sw_if_index0;
1739           ip4_header_t * ip0;
1740           ip_csum_t sum0;
1741           u32 new_addr0, old_addr0;
1742           u16 old_port0, new_port0;
1743           udp_header_t * udp0;
1744           tcp_header_t * tcp0;
1745           icmp46_header_t * icmp0;
1746           snat_session_key_t key0;
1747           u32 rx_fib_index0;
1748           u32 proto0;
1749           snat_session_t * s0 = 0;
1750           clib_bihash_kv_8_8_t kv0, value0;
1751           u32 iph_offset0 = 0;
1752
1753           /* speculatively enqueue b0 to the current next frame */
1754           bi0 = from[0];
1755           to_next[0] = bi0;
1756           from += 1;
1757           to_next += 1;
1758           n_left_from -= 1;
1759           n_left_to_next -= 1;
1760
1761           b0 = vlib_get_buffer (vm, bi0);
1762           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1763
1764           if (is_output_feature)
1765             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1766
1767           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1768                  iph_offset0);
1769
1770           udp0 = ip4_next_header (ip0);
1771           tcp0 = (tcp_header_t *) udp0;
1772           icmp0 = (icmp46_header_t *) udp0;
1773
1774           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1775           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1776                                    sw_if_index0);
1777
1778           if (PREDICT_FALSE(ip0->ttl == 1))
1779             {
1780               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1781               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1782                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1783                                            0);
1784               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1785               goto trace0;
1786             }
1787
1788           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1789
1790           /* Next configured feature, probably ip4-lookup */
1791           if (is_slow_path)
1792             {
1793               if (PREDICT_FALSE (proto0 == ~0))
1794                 {
1795                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1796                                                   thread_index, now, vm, node);
1797                   if (!s0)
1798                     next0 = SNAT_IN2OUT_NEXT_DROP;
1799                   goto trace0;
1800                 }
1801
1802               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1803                 {
1804                   next0 = icmp_in2out_slow_path
1805                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1806                      next0, now, thread_index, &s0);
1807                   goto trace0;
1808                 }
1809             }
1810           else
1811             {
1812               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1813                 {
1814                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1815                   goto trace0;
1816                 }
1817
1818               if (ip4_is_fragment (ip0))
1819                 {
1820                   next0 = SNAT_IN2OUT_NEXT_REASS;
1821                   goto trace0;
1822                 }
1823             }
1824
1825           key0.addr = ip0->src_address;
1826           key0.port = udp0->src_port;
1827           key0.protocol = proto0;
1828           key0.fib_index = rx_fib_index0;
1829
1830           kv0.key = key0.as_u64;
1831
1832           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1833                                       &kv0, &value0))
1834             {
1835               if (is_slow_path)
1836                 {
1837                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1838                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1839                     goto trace0;
1840
1841                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1842                                      &s0, node, next0, thread_index);
1843
1844                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1845                     goto trace0;
1846                 }
1847               else
1848                 {
1849                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1850                   goto trace0;
1851                 }
1852             }
1853           else
1854             {
1855               if (PREDICT_FALSE (value0.value == ~0ULL))
1856                 {
1857                   if (is_slow_path)
1858                     {
1859                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1860                                           thread_index, now, vm, node);
1861                       if (!s0 && !sm->forwarding_enabled)
1862                         next0 = SNAT_IN2OUT_NEXT_DROP;
1863                       goto trace0;
1864                     }
1865                   else
1866                     {
1867                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1868                       goto trace0;
1869                     }
1870                 }
1871               else
1872                 {
1873                   s0 = pool_elt_at_index (
1874                     sm->per_thread_data[thread_index].sessions,
1875                     value0.value);
1876                 }
1877             }
1878
1879           b0->flags |= VNET_BUFFER_F_IS_NATED;
1880
1881           old_addr0 = ip0->src_address.as_u32;
1882           ip0->src_address = s0->out2in.addr;
1883           new_addr0 = ip0->src_address.as_u32;
1884           if (!is_output_feature)
1885             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1886
1887           sum0 = ip0->checksum;
1888           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1889                                  ip4_header_t,
1890                                  src_address /* changed member */);
1891           ip0->checksum = ip_csum_fold (sum0);
1892
1893           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1894             {
1895               old_port0 = tcp0->src_port;
1896               tcp0->src_port = s0->out2in.port;
1897               new_port0 = tcp0->src_port;
1898
1899               sum0 = tcp0->checksum;
1900               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1901                                      ip4_header_t,
1902                                      dst_address /* changed member */);
1903               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1904                                      ip4_header_t /* cheat */,
1905                                      length /* changed member */);
1906               tcp0->checksum = ip_csum_fold(sum0);
1907             }
1908           else
1909             {
1910               old_port0 = udp0->src_port;
1911               udp0->src_port = s0->out2in.port;
1912               udp0->checksum = 0;
1913             }
1914
1915           /* Accounting */
1916           s0->last_heard = now;
1917           s0->total_pkts++;
1918           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1919           /* Per-user LRU list maintenance */
1920           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1921                              s0->per_user_index);
1922           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1923                               s0->per_user_list_head_index,
1924                               s0->per_user_index);
1925
1926         trace0:
1927           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1928                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1929             {
1930               snat_in2out_trace_t *t =
1931                  vlib_add_trace (vm, node, b0, sizeof (*t));
1932               t->is_slow_path = is_slow_path;
1933               t->sw_if_index = sw_if_index0;
1934               t->next_index = next0;
1935                   t->session_index = ~0;
1936               if (s0)
1937                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1938             }
1939
1940           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1941
1942           /* verify speculative enqueue, maybe switch current next frame */
1943           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1944                                            to_next, n_left_to_next,
1945                                            bi0, next0);
1946         }
1947
1948       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1949     }
1950
1951   vlib_node_increment_counter (vm, stats_node_index,
1952                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1953                                pkts_processed);
1954   return frame->n_vectors;
1955 }
1956
1957 static uword
1958 snat_in2out_fast_path_fn (vlib_main_t * vm,
1959                           vlib_node_runtime_t * node,
1960                           vlib_frame_t * frame)
1961 {
1962   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1963 }
1964
1965 VLIB_REGISTER_NODE (snat_in2out_node) = {
1966   .function = snat_in2out_fast_path_fn,
1967   .name = "nat44-in2out",
1968   .vector_size = sizeof (u32),
1969   .format_trace = format_snat_in2out_trace,
1970   .type = VLIB_NODE_TYPE_INTERNAL,
1971
1972   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1973   .error_strings = snat_in2out_error_strings,
1974
1975   .runtime_data_bytes = sizeof (snat_runtime_t),
1976
1977   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1978
1979   /* edit / add dispositions here */
1980   .next_nodes = {
1981     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1982     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1983     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1984     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1985     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1986   },
1987 };
1988
1989 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1990
1991 static uword
1992 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1993                                  vlib_node_runtime_t * node,
1994                                  vlib_frame_t * frame)
1995 {
1996   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1997 }
1998
1999 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2000   .function = snat_in2out_output_fast_path_fn,
2001   .name = "nat44-in2out-output",
2002   .vector_size = sizeof (u32),
2003   .format_trace = format_snat_in2out_trace,
2004   .type = VLIB_NODE_TYPE_INTERNAL,
2005
2006   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2007   .error_strings = snat_in2out_error_strings,
2008
2009   .runtime_data_bytes = sizeof (snat_runtime_t),
2010
2011   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2012
2013   /* edit / add dispositions here */
2014   .next_nodes = {
2015     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2016     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2017     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2018     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2019     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2020   },
2021 };
2022
2023 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2024                               snat_in2out_output_fast_path_fn);
2025
2026 static uword
2027 snat_in2out_slow_path_fn (vlib_main_t * vm,
2028                           vlib_node_runtime_t * node,
2029                           vlib_frame_t * frame)
2030 {
2031   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2032 }
2033
2034 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2035   .function = snat_in2out_slow_path_fn,
2036   .name = "nat44-in2out-slowpath",
2037   .vector_size = sizeof (u32),
2038   .format_trace = format_snat_in2out_trace,
2039   .type = VLIB_NODE_TYPE_INTERNAL,
2040
2041   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2042   .error_strings = snat_in2out_error_strings,
2043
2044   .runtime_data_bytes = sizeof (snat_runtime_t),
2045
2046   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2047
2048   /* edit / add dispositions here */
2049   .next_nodes = {
2050     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2051     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2052     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2053     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2054     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2055   },
2056 };
2057
2058 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2059                               snat_in2out_slow_path_fn);
2060
2061 static uword
2062 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2063                                  vlib_node_runtime_t * node,
2064                                  vlib_frame_t * frame)
2065 {
2066   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2067 }
2068
2069 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2070   .function = snat_in2out_output_slow_path_fn,
2071   .name = "nat44-in2out-output-slowpath",
2072   .vector_size = sizeof (u32),
2073   .format_trace = format_snat_in2out_trace,
2074   .type = VLIB_NODE_TYPE_INTERNAL,
2075
2076   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2077   .error_strings = snat_in2out_error_strings,
2078
2079   .runtime_data_bytes = sizeof (snat_runtime_t),
2080
2081   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2082
2083   /* edit / add dispositions here */
2084   .next_nodes = {
2085     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2086     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2087     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2088     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2089     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2090   },
2091 };
2092
2093 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2094                               snat_in2out_output_slow_path_fn);
2095
2096 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
2097
2098 static uword
2099 nat44_hairpinning_fn (vlib_main_t * vm,
2100                       vlib_node_runtime_t * node,
2101                       vlib_frame_t * frame)
2102 {
2103   u32 n_left_from, * from, * to_next;
2104   snat_in2out_next_t next_index;
2105   u32 pkts_processed = 0;
2106   snat_main_t * sm = &snat_main;
2107   vnet_feature_main_t *fm = &feature_main;
2108   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
2109   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
2110
2111   from = vlib_frame_vector_args (frame);
2112   n_left_from = frame->n_vectors;
2113   next_index = node->cached_next_index;
2114
2115   while (n_left_from > 0)
2116     {
2117       u32 n_left_to_next;
2118
2119       vlib_get_next_frame (vm, node, next_index,
2120                            to_next, n_left_to_next);
2121
2122       while (n_left_from > 0 && n_left_to_next > 0)
2123         {
2124           u32 bi0;
2125           vlib_buffer_t * b0;
2126           u32 next0;
2127           ip4_header_t * ip0;
2128           u32 proto0;
2129           udp_header_t * udp0;
2130           tcp_header_t * tcp0;
2131
2132           /* speculatively enqueue b0 to the current next frame */
2133           bi0 = from[0];
2134           to_next[0] = bi0;
2135           from += 1;
2136           to_next += 1;
2137           n_left_from -= 1;
2138           n_left_to_next -= 1;
2139
2140           b0 = vlib_get_buffer (vm, bi0);
2141           ip0 = vlib_buffer_get_current (b0);
2142           udp0 = ip4_next_header (ip0);
2143           tcp0 = (tcp_header_t *) udp0;
2144
2145           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2146
2147           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
2148                                 &next0, 0);
2149
2150           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0))
2151             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2152
2153           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2154
2155           /* verify speculative enqueue, maybe switch current next frame */
2156           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2157                                            to_next, n_left_to_next,
2158                                            bi0, next0);
2159          }
2160
2161       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2162     }
2163
2164   vlib_node_increment_counter (vm, nat44_hairpinning_node.index,
2165                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2166                                pkts_processed);
2167   return frame->n_vectors;
2168 }
2169
2170 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
2171   .function = nat44_hairpinning_fn,
2172   .name = "nat44-hairpinning",
2173   .vector_size = sizeof (u32),
2174   .type = VLIB_NODE_TYPE_INTERNAL,
2175   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2176   .error_strings = snat_in2out_error_strings,
2177   .n_next_nodes = 2,
2178   .next_nodes = {
2179     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2180     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2181   },
2182 };
2183
2184 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
2185                               nat44_hairpinning_fn);
2186
2187 static inline void
2188 nat44_reass_hairpinning (snat_main_t *sm,
2189                          vlib_buffer_t * b0,
2190                          ip4_header_t * ip0,
2191                          u16 sport,
2192                          u16 dport,
2193                          u32 proto0)
2194 {
2195   snat_session_key_t key0, sm0;
2196   snat_session_t * s0;
2197   clib_bihash_kv_8_8_t kv0, value0;
2198   ip_csum_t sum0;
2199   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2200   u16 new_dst_port0, old_dst_port0;
2201   udp_header_t * udp0;
2202   tcp_header_t * tcp0;
2203
2204   key0.addr = ip0->dst_address;
2205   key0.port = dport;
2206   key0.protocol = proto0;
2207   key0.fib_index = sm->outside_fib_index;
2208   kv0.key = key0.as_u64;
2209
2210   udp0 = ip4_next_header (ip0);
2211
2212   /* Check if destination is static mappings */
2213   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2214     {
2215       new_dst_addr0 = sm0.addr.as_u32;
2216       new_dst_port0 = sm0.port;
2217       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2218     }
2219   /* or active sessions */
2220   else
2221     {
2222       if (sm->num_workers > 1)
2223         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2224       else
2225         ti = sm->num_workers;
2226
2227       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2228         {
2229           si = value0.value;
2230           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2231           new_dst_addr0 = s0->in2out.addr.as_u32;
2232           new_dst_port0 = s0->in2out.port;
2233           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2234         }
2235     }
2236
2237   /* Destination is behind the same NAT, use internal address and port */
2238   if (new_dst_addr0)
2239     {
2240       old_dst_addr0 = ip0->dst_address.as_u32;
2241       ip0->dst_address.as_u32 = new_dst_addr0;
2242       sum0 = ip0->checksum;
2243       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2244                              ip4_header_t, dst_address);
2245       ip0->checksum = ip_csum_fold (sum0);
2246
2247       old_dst_port0 = dport;
2248       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2249                        ip4_is_first_fragment (ip0)))
2250         {
2251           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2252             {
2253               tcp0 = ip4_next_header (ip0);
2254               tcp0->dst = new_dst_port0;
2255               sum0 = tcp0->checksum;
2256               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2257                                      ip4_header_t, dst_address);
2258               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2259                                      ip4_header_t /* cheat */, length);
2260               tcp0->checksum = ip_csum_fold(sum0);
2261             }
2262           else
2263             {
2264               udp0->dst_port = new_dst_port0;
2265               udp0->checksum = 0;
2266             }
2267         }
2268       else
2269         {
2270           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2271             {
2272               tcp0 = ip4_next_header (ip0);
2273               sum0 = tcp0->checksum;
2274               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2275                                      ip4_header_t, dst_address);
2276               tcp0->checksum = ip_csum_fold(sum0);
2277             }
2278         }
2279     }
2280 }
2281
2282 static uword
2283 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2284                             vlib_node_runtime_t * node,
2285                             vlib_frame_t * frame)
2286 {
2287   u32 n_left_from, *from, *to_next;
2288   snat_in2out_next_t next_index;
2289   u32 pkts_processed = 0;
2290   snat_main_t *sm = &snat_main;
2291   f64 now = vlib_time_now (vm);
2292   u32 thread_index = vlib_get_thread_index ();
2293   snat_main_per_thread_data_t *per_thread_data =
2294     &sm->per_thread_data[thread_index];
2295   u32 *fragments_to_drop = 0;
2296   u32 *fragments_to_loopback = 0;
2297
2298   from = vlib_frame_vector_args (frame);
2299   n_left_from = frame->n_vectors;
2300   next_index = node->cached_next_index;
2301
2302   while (n_left_from > 0)
2303     {
2304       u32 n_left_to_next;
2305
2306       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2307
2308       while (n_left_from > 0 && n_left_to_next > 0)
2309        {
2310           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2311           vlib_buffer_t *b0;
2312           u32 next0;
2313           u8 cached0 = 0;
2314           ip4_header_t *ip0;
2315           nat_reass_ip4_t *reass0;
2316           udp_header_t * udp0;
2317           tcp_header_t * tcp0;
2318           snat_session_key_t key0;
2319           clib_bihash_kv_8_8_t kv0, value0;
2320           snat_session_t * s0 = 0;
2321           u16 old_port0, new_port0;
2322           ip_csum_t sum0;
2323
2324           /* speculatively enqueue b0 to the current next frame */
2325           bi0 = from[0];
2326           to_next[0] = bi0;
2327           from += 1;
2328           to_next += 1;
2329           n_left_from -= 1;
2330           n_left_to_next -= 1;
2331
2332           b0 = vlib_get_buffer (vm, bi0);
2333           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2334
2335           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2336           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2337                                                                sw_if_index0);
2338
2339           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2340             {
2341               next0 = SNAT_IN2OUT_NEXT_DROP;
2342               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2343               goto trace0;
2344             }
2345
2346           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2347           udp0 = ip4_next_header (ip0);
2348           tcp0 = (tcp_header_t *) udp0;
2349           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2350
2351           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2352                                                  ip0->dst_address,
2353                                                  ip0->fragment_id,
2354                                                  ip0->protocol,
2355                                                  1,
2356                                                  &fragments_to_drop);
2357
2358           if (PREDICT_FALSE (!reass0))
2359             {
2360               next0 = SNAT_IN2OUT_NEXT_DROP;
2361               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2362               goto trace0;
2363             }
2364
2365           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2366             {
2367               key0.addr = ip0->src_address;
2368               key0.port = udp0->src_port;
2369               key0.protocol = proto0;
2370               key0.fib_index = rx_fib_index0;
2371               kv0.key = key0.as_u64;
2372
2373               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2374                 {
2375                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2376                       ip0, proto0, rx_fib_index0, thread_index)))
2377                     goto trace0;
2378
2379                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2380                                      &s0, node, next0, thread_index);
2381
2382                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2383                     goto trace0;
2384
2385                   reass0->sess_index = s0 - per_thread_data->sessions;
2386                 }
2387               else
2388                 {
2389                   s0 = pool_elt_at_index (per_thread_data->sessions,
2390                                           value0.value);
2391                   reass0->sess_index = value0.value;
2392                 }
2393               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2394             }
2395           else
2396             {
2397               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2398                 {
2399                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2400                     {
2401                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2402                       next0 = SNAT_IN2OUT_NEXT_DROP;
2403                       goto trace0;
2404                     }
2405                   cached0 = 1;
2406                   goto trace0;
2407                 }
2408               s0 = pool_elt_at_index (per_thread_data->sessions,
2409                                       reass0->sess_index);
2410             }
2411
2412           old_addr0 = ip0->src_address.as_u32;
2413           ip0->src_address = s0->out2in.addr;
2414           new_addr0 = ip0->src_address.as_u32;
2415           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2416
2417           sum0 = ip0->checksum;
2418           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2419                                  ip4_header_t,
2420                                  src_address /* changed member */);
2421           ip0->checksum = ip_csum_fold (sum0);
2422
2423           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2424             {
2425               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2426                 {
2427                   old_port0 = tcp0->src_port;
2428                   tcp0->src_port = s0->out2in.port;
2429                   new_port0 = tcp0->src_port;
2430
2431                   sum0 = tcp0->checksum;
2432                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2433                                          ip4_header_t,
2434                                          dst_address /* changed member */);
2435                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2436                                          ip4_header_t /* cheat */,
2437                                          length /* changed member */);
2438                   tcp0->checksum = ip_csum_fold(sum0);
2439                 }
2440               else
2441                 {
2442                   old_port0 = udp0->src_port;
2443                   udp0->src_port = s0->out2in.port;
2444                   udp0->checksum = 0;
2445                 }
2446             }
2447
2448           /* Hairpinning */
2449           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2450                                    s0->ext_host_port, proto0);
2451
2452           /* Accounting */
2453           s0->last_heard = now;
2454           s0->total_pkts++;
2455           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2456           /* Per-user LRU list maintenance */
2457           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2458                              s0->per_user_index);
2459           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2460                               s0->per_user_list_head_index,
2461                               s0->per_user_index);
2462
2463         trace0:
2464           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2465                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2466             {
2467               nat44_in2out_reass_trace_t *t =
2468                  vlib_add_trace (vm, node, b0, sizeof (*t));
2469               t->cached = cached0;
2470               t->sw_if_index = sw_if_index0;
2471               t->next_index = next0;
2472             }
2473
2474           if (cached0)
2475             {
2476               n_left_to_next++;
2477               to_next--;
2478             }
2479           else
2480             {
2481               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2482
2483               /* verify speculative enqueue, maybe switch current next frame */
2484               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2485                                                to_next, n_left_to_next,
2486                                                bi0, next0);
2487             }
2488
2489           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2490             {
2491               from = vlib_frame_vector_args (frame);
2492               u32 len = vec_len (fragments_to_loopback);
2493               if (len <= VLIB_FRAME_SIZE)
2494                 {
2495                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2496                   n_left_from = len;
2497                   vec_reset_length (fragments_to_loopback);
2498                 }
2499               else
2500                 {
2501                   clib_memcpy (from,
2502                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2503                                sizeof (u32) * VLIB_FRAME_SIZE);
2504                   n_left_from = VLIB_FRAME_SIZE;
2505                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2506                 }
2507             }
2508        }
2509
2510       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2511     }
2512
2513   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2514                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2515                                pkts_processed);
2516
2517   nat_send_all_to_node (vm, fragments_to_drop, node,
2518                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2519                         SNAT_IN2OUT_NEXT_DROP);
2520
2521   vec_free (fragments_to_drop);
2522   vec_free (fragments_to_loopback);
2523   return frame->n_vectors;
2524 }
2525
2526 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2527   .function = nat44_in2out_reass_node_fn,
2528   .name = "nat44-in2out-reass",
2529   .vector_size = sizeof (u32),
2530   .format_trace = format_nat44_in2out_reass_trace,
2531   .type = VLIB_NODE_TYPE_INTERNAL,
2532
2533   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2534   .error_strings = snat_in2out_error_strings,
2535
2536   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2537   .next_nodes = {
2538     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2539     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2540     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2541     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2542     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2543   },
2544 };
2545
2546 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2547                               nat44_in2out_reass_node_fn);
2548
2549 /**************************/
2550 /*** deterministic mode ***/
2551 /**************************/
2552 static uword
2553 snat_det_in2out_node_fn (vlib_main_t * vm,
2554                          vlib_node_runtime_t * node,
2555                          vlib_frame_t * frame)
2556 {
2557   u32 n_left_from, * from, * to_next;
2558   snat_in2out_next_t next_index;
2559   u32 pkts_processed = 0;
2560   snat_main_t * sm = &snat_main;
2561   u32 now = (u32) vlib_time_now (vm);
2562   u32 thread_index = vlib_get_thread_index ();
2563
2564   from = vlib_frame_vector_args (frame);
2565   n_left_from = frame->n_vectors;
2566   next_index = node->cached_next_index;
2567
2568   while (n_left_from > 0)
2569     {
2570       u32 n_left_to_next;
2571
2572       vlib_get_next_frame (vm, node, next_index,
2573                            to_next, n_left_to_next);
2574
2575       while (n_left_from >= 4 && n_left_to_next >= 2)
2576         {
2577           u32 bi0, bi1;
2578           vlib_buffer_t * b0, * b1;
2579           u32 next0, next1;
2580           u32 sw_if_index0, sw_if_index1;
2581           ip4_header_t * ip0, * ip1;
2582           ip_csum_t sum0, sum1;
2583           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2584           u16 old_port0, new_port0, lo_port0, i0;
2585           u16 old_port1, new_port1, lo_port1, i1;
2586           udp_header_t * udp0, * udp1;
2587           tcp_header_t * tcp0, * tcp1;
2588           u32 proto0, proto1;
2589           snat_det_out_key_t key0, key1;
2590           snat_det_map_t * dm0, * dm1;
2591           snat_det_session_t * ses0 = 0, * ses1 = 0;
2592           u32 rx_fib_index0, rx_fib_index1;
2593           icmp46_header_t * icmp0, * icmp1;
2594
2595           /* Prefetch next iteration. */
2596           {
2597             vlib_buffer_t * p2, * p3;
2598
2599             p2 = vlib_get_buffer (vm, from[2]);
2600             p3 = vlib_get_buffer (vm, from[3]);
2601
2602             vlib_prefetch_buffer_header (p2, LOAD);
2603             vlib_prefetch_buffer_header (p3, LOAD);
2604
2605             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2606             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2607           }
2608
2609           /* speculatively enqueue b0 and b1 to the current next frame */
2610           to_next[0] = bi0 = from[0];
2611           to_next[1] = bi1 = from[1];
2612           from += 2;
2613           to_next += 2;
2614           n_left_from -= 2;
2615           n_left_to_next -= 2;
2616
2617           b0 = vlib_get_buffer (vm, bi0);
2618           b1 = vlib_get_buffer (vm, bi1);
2619
2620           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2621           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2622
2623           ip0 = vlib_buffer_get_current (b0);
2624           udp0 = ip4_next_header (ip0);
2625           tcp0 = (tcp_header_t *) udp0;
2626
2627           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2628
2629           if (PREDICT_FALSE(ip0->ttl == 1))
2630             {
2631               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2632               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2633                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2634                                            0);
2635               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2636               goto trace0;
2637             }
2638
2639           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2640
2641           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2642             {
2643               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2644               icmp0 = (icmp46_header_t *) udp0;
2645
2646               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2647                                   rx_fib_index0, node, next0, thread_index,
2648                                   &ses0, &dm0);
2649               goto trace0;
2650             }
2651
2652           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2653           if (PREDICT_FALSE(!dm0))
2654             {
2655               clib_warning("no match for internal host %U",
2656                            format_ip4_address, &ip0->src_address);
2657               next0 = SNAT_IN2OUT_NEXT_DROP;
2658               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2659               goto trace0;
2660             }
2661
2662           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2663
2664           key0.ext_host_addr = ip0->dst_address;
2665           key0.ext_host_port = tcp0->dst;
2666
2667           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2668           if (PREDICT_FALSE(!ses0))
2669             {
2670               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2671                 {
2672                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2673                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2674
2675                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2676                     continue;
2677
2678                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2679                   break;
2680                 }
2681               if (PREDICT_FALSE(!ses0))
2682                 {
2683                   /* too many sessions for user, send ICMP error packet */
2684
2685                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2686                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2687                                                ICMP4_destination_unreachable_destination_unreachable_host,
2688                                                0);
2689                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2690                   goto trace0;
2691                 }
2692             }
2693
2694           new_port0 = ses0->out.out_port;
2695
2696           old_addr0.as_u32 = ip0->src_address.as_u32;
2697           ip0->src_address.as_u32 = new_addr0.as_u32;
2698           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2699
2700           sum0 = ip0->checksum;
2701           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2702                                  ip4_header_t,
2703                                  src_address /* changed member */);
2704           ip0->checksum = ip_csum_fold (sum0);
2705
2706           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2707             {
2708               if (tcp0->flags & TCP_FLAG_SYN)
2709                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2710               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2711                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2712               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2713                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2714               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2715                 snat_det_ses_close(dm0, ses0);
2716               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2717                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2718               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2719                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2720
2721               old_port0 = tcp0->src;
2722               tcp0->src = new_port0;
2723
2724               sum0 = tcp0->checksum;
2725               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2726                                      ip4_header_t,
2727                                      dst_address /* changed member */);
2728               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2729                                      ip4_header_t /* cheat */,
2730                                      length /* changed member */);
2731               tcp0->checksum = ip_csum_fold(sum0);
2732             }
2733           else
2734             {
2735               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2736               old_port0 = udp0->src_port;
2737               udp0->src_port = new_port0;
2738               udp0->checksum = 0;
2739             }
2740
2741           switch(ses0->state)
2742             {
2743             case SNAT_SESSION_UDP_ACTIVE:
2744                 ses0->expire = now + sm->udp_timeout;
2745                 break;
2746             case SNAT_SESSION_TCP_SYN_SENT:
2747             case SNAT_SESSION_TCP_FIN_WAIT:
2748             case SNAT_SESSION_TCP_CLOSE_WAIT:
2749             case SNAT_SESSION_TCP_LAST_ACK:
2750                 ses0->expire = now + sm->tcp_transitory_timeout;
2751                 break;
2752             case SNAT_SESSION_TCP_ESTABLISHED:
2753                 ses0->expire = now + sm->tcp_established_timeout;
2754                 break;
2755             }
2756
2757         trace0:
2758           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2759                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2760             {
2761               snat_in2out_trace_t *t =
2762                  vlib_add_trace (vm, node, b0, sizeof (*t));
2763               t->is_slow_path = 0;
2764               t->sw_if_index = sw_if_index0;
2765               t->next_index = next0;
2766               t->session_index = ~0;
2767               if (ses0)
2768                 t->session_index = ses0 - dm0->sessions;
2769             }
2770
2771           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2772
2773           ip1 = vlib_buffer_get_current (b1);
2774           udp1 = ip4_next_header (ip1);
2775           tcp1 = (tcp_header_t *) udp1;
2776
2777           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2778
2779           if (PREDICT_FALSE(ip1->ttl == 1))
2780             {
2781               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2782               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2783                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2784                                            0);
2785               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2786               goto trace1;
2787             }
2788
2789           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2790
2791           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2792             {
2793               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2794               icmp1 = (icmp46_header_t *) udp1;
2795
2796               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2797                                   rx_fib_index1, node, next1, thread_index,
2798                                   &ses1, &dm1);
2799               goto trace1;
2800             }
2801
2802           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2803           if (PREDICT_FALSE(!dm1))
2804             {
2805               clib_warning("no match for internal host %U",
2806                            format_ip4_address, &ip0->src_address);
2807               next1 = SNAT_IN2OUT_NEXT_DROP;
2808               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2809               goto trace1;
2810             }
2811
2812           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2813
2814           key1.ext_host_addr = ip1->dst_address;
2815           key1.ext_host_port = tcp1->dst;
2816
2817           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2818           if (PREDICT_FALSE(!ses1))
2819             {
2820               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2821                 {
2822                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2823                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2824
2825                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2826                     continue;
2827
2828                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2829                   break;
2830                 }
2831               if (PREDICT_FALSE(!ses1))
2832                 {
2833                   /* too many sessions for user, send ICMP error packet */
2834
2835                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2836                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2837                                                ICMP4_destination_unreachable_destination_unreachable_host,
2838                                                0);
2839                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2840                   goto trace1;
2841                 }
2842             }
2843
2844           new_port1 = ses1->out.out_port;
2845
2846           old_addr1.as_u32 = ip1->src_address.as_u32;
2847           ip1->src_address.as_u32 = new_addr1.as_u32;
2848           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2849
2850           sum1 = ip1->checksum;
2851           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2852                                  ip4_header_t,
2853                                  src_address /* changed member */);
2854           ip1->checksum = ip_csum_fold (sum1);
2855
2856           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2857             {
2858               if (tcp1->flags & TCP_FLAG_SYN)
2859                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2860               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2861                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2862               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2863                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2864               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2865                 snat_det_ses_close(dm1, ses1);
2866               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2867                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2868               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2869                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2870
2871               old_port1 = tcp1->src;
2872               tcp1->src = new_port1;
2873
2874               sum1 = tcp1->checksum;
2875               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2876                                      ip4_header_t,
2877                                      dst_address /* changed member */);
2878               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2879                                      ip4_header_t /* cheat */,
2880                                      length /* changed member */);
2881               tcp1->checksum = ip_csum_fold(sum1);
2882             }
2883           else
2884             {
2885               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2886               old_port1 = udp1->src_port;
2887               udp1->src_port = new_port1;
2888               udp1->checksum = 0;
2889             }
2890
2891           switch(ses1->state)
2892             {
2893             case SNAT_SESSION_UDP_ACTIVE:
2894                 ses1->expire = now + sm->udp_timeout;
2895                 break;
2896             case SNAT_SESSION_TCP_SYN_SENT:
2897             case SNAT_SESSION_TCP_FIN_WAIT:
2898             case SNAT_SESSION_TCP_CLOSE_WAIT:
2899             case SNAT_SESSION_TCP_LAST_ACK:
2900                 ses1->expire = now + sm->tcp_transitory_timeout;
2901                 break;
2902             case SNAT_SESSION_TCP_ESTABLISHED:
2903                 ses1->expire = now + sm->tcp_established_timeout;
2904                 break;
2905             }
2906
2907         trace1:
2908           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2909                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2910             {
2911               snat_in2out_trace_t *t =
2912                  vlib_add_trace (vm, node, b1, sizeof (*t));
2913               t->is_slow_path = 0;
2914               t->sw_if_index = sw_if_index1;
2915               t->next_index = next1;
2916               t->session_index = ~0;
2917               if (ses1)
2918                 t->session_index = ses1 - dm1->sessions;
2919             }
2920
2921           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2922
2923           /* verify speculative enqueues, maybe switch current next frame */
2924           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2925                                            to_next, n_left_to_next,
2926                                            bi0, bi1, next0, next1);
2927          }
2928
2929       while (n_left_from > 0 && n_left_to_next > 0)
2930         {
2931           u32 bi0;
2932           vlib_buffer_t * b0;
2933           u32 next0;
2934           u32 sw_if_index0;
2935           ip4_header_t * ip0;
2936           ip_csum_t sum0;
2937           ip4_address_t new_addr0, old_addr0;
2938           u16 old_port0, new_port0, lo_port0, i0;
2939           udp_header_t * udp0;
2940           tcp_header_t * tcp0;
2941           u32 proto0;
2942           snat_det_out_key_t key0;
2943           snat_det_map_t * dm0;
2944           snat_det_session_t * ses0 = 0;
2945           u32 rx_fib_index0;
2946           icmp46_header_t * icmp0;
2947
2948           /* speculatively enqueue b0 to the current next frame */
2949           bi0 = from[0];
2950           to_next[0] = bi0;
2951           from += 1;
2952           to_next += 1;
2953           n_left_from -= 1;
2954           n_left_to_next -= 1;
2955
2956           b0 = vlib_get_buffer (vm, bi0);
2957           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2958
2959           ip0 = vlib_buffer_get_current (b0);
2960           udp0 = ip4_next_header (ip0);
2961           tcp0 = (tcp_header_t *) udp0;
2962
2963           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2964
2965           if (PREDICT_FALSE(ip0->ttl == 1))
2966             {
2967               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2968               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2969                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2970                                            0);
2971               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2972               goto trace00;
2973             }
2974
2975           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2976
2977           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2978             {
2979               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2980               icmp0 = (icmp46_header_t *) udp0;
2981
2982               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2983                                   rx_fib_index0, node, next0, thread_index,
2984                                   &ses0, &dm0);
2985               goto trace00;
2986             }
2987
2988           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2989           if (PREDICT_FALSE(!dm0))
2990             {
2991               clib_warning("no match for internal host %U",
2992                            format_ip4_address, &ip0->src_address);
2993               next0 = SNAT_IN2OUT_NEXT_DROP;
2994               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2995               goto trace00;
2996             }
2997
2998           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2999
3000           key0.ext_host_addr = ip0->dst_address;
3001           key0.ext_host_port = tcp0->dst;
3002
3003           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
3004           if (PREDICT_FALSE(!ses0))
3005             {
3006               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3007                 {
3008                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
3009                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3010
3011                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3012                     continue;
3013
3014                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3015                   break;
3016                 }
3017               if (PREDICT_FALSE(!ses0))
3018                 {
3019                   /* too many sessions for user, send ICMP error packet */
3020
3021                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3022                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3023                                                ICMP4_destination_unreachable_destination_unreachable_host,
3024                                                0);
3025                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3026                   goto trace00;
3027                 }
3028             }
3029
3030           new_port0 = ses0->out.out_port;
3031
3032           old_addr0.as_u32 = ip0->src_address.as_u32;
3033           ip0->src_address.as_u32 = new_addr0.as_u32;
3034           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3035
3036           sum0 = ip0->checksum;
3037           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3038                                  ip4_header_t,
3039                                  src_address /* changed member */);
3040           ip0->checksum = ip_csum_fold (sum0);
3041
3042           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3043             {
3044               if (tcp0->flags & TCP_FLAG_SYN)
3045                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3046               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3047                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3048               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3049                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3050               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3051                 snat_det_ses_close(dm0, ses0);
3052               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3053                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3054               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3055                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3056
3057               old_port0 = tcp0->src;
3058               tcp0->src = new_port0;
3059
3060               sum0 = tcp0->checksum;
3061               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3062                                      ip4_header_t,
3063                                      dst_address /* changed member */);
3064               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3065                                      ip4_header_t /* cheat */,
3066                                      length /* changed member */);
3067               tcp0->checksum = ip_csum_fold(sum0);
3068             }
3069           else
3070             {
3071               ses0->state = SNAT_SESSION_UDP_ACTIVE;
3072               old_port0 = udp0->src_port;
3073               udp0->src_port = new_port0;
3074               udp0->checksum = 0;
3075             }
3076
3077           switch(ses0->state)
3078             {
3079             case SNAT_SESSION_UDP_ACTIVE:
3080                 ses0->expire = now + sm->udp_timeout;
3081                 break;
3082             case SNAT_SESSION_TCP_SYN_SENT:
3083             case SNAT_SESSION_TCP_FIN_WAIT:
3084             case SNAT_SESSION_TCP_CLOSE_WAIT:
3085             case SNAT_SESSION_TCP_LAST_ACK:
3086                 ses0->expire = now + sm->tcp_transitory_timeout;
3087                 break;
3088             case SNAT_SESSION_TCP_ESTABLISHED:
3089                 ses0->expire = now + sm->tcp_established_timeout;
3090                 break;
3091             }
3092
3093         trace00:
3094           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3095                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3096             {
3097               snat_in2out_trace_t *t =
3098                  vlib_add_trace (vm, node, b0, sizeof (*t));
3099               t->is_slow_path = 0;
3100               t->sw_if_index = sw_if_index0;
3101               t->next_index = next0;
3102               t->session_index = ~0;
3103               if (ses0)
3104                 t->session_index = ses0 - dm0->sessions;
3105             }
3106
3107           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3108
3109           /* verify speculative enqueue, maybe switch current next frame */
3110           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3111                                            to_next, n_left_to_next,
3112                                            bi0, next0);
3113         }
3114
3115       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3116     }
3117
3118   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
3119                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3120                                pkts_processed);
3121   return frame->n_vectors;
3122 }
3123
3124 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
3125   .function = snat_det_in2out_node_fn,
3126   .name = "nat44-det-in2out",
3127   .vector_size = sizeof (u32),
3128   .format_trace = format_snat_in2out_trace,
3129   .type = VLIB_NODE_TYPE_INTERNAL,
3130
3131   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3132   .error_strings = snat_in2out_error_strings,
3133
3134   .runtime_data_bytes = sizeof (snat_runtime_t),
3135
3136   .n_next_nodes = 3,
3137
3138   /* edit / add dispositions here */
3139   .next_nodes = {
3140     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3141     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3142     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3143   },
3144 };
3145
3146 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
3147
3148 /**
3149  * Get address and port values to be used for ICMP packet translation
3150  * and create session if needed
3151  *
3152  * @param[in,out] sm             NAT main
3153  * @param[in,out] node           NAT node runtime
3154  * @param[in] thread_index       thread index
3155  * @param[in,out] b0             buffer containing packet to be translated
3156  * @param[out] p_proto           protocol used for matching
3157  * @param[out] p_value           address and port after NAT translation
3158  * @param[out] p_dont_translate  if packet should not be translated
3159  * @param d                      optional parameter
3160  * @param e                      optional parameter
3161  */
3162 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
3163                           u32 thread_index, vlib_buffer_t *b0,
3164                           ip4_header_t *ip0, u8 *p_proto,
3165                           snat_session_key_t *p_value,
3166                           u8 *p_dont_translate, void *d, void *e)
3167 {
3168   icmp46_header_t *icmp0;
3169   u32 sw_if_index0;
3170   u32 rx_fib_index0;
3171   u8 protocol;
3172   snat_det_out_key_t key0;
3173   u8 dont_translate = 0;
3174   u32 next0 = ~0;
3175   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3176   ip4_header_t *inner_ip0;
3177   void *l4_header = 0;
3178   icmp46_header_t *inner_icmp0;
3179   snat_det_map_t * dm0 = 0;
3180   ip4_address_t new_addr0;
3181   u16 lo_port0, i0;
3182   snat_det_session_t * ses0 = 0;
3183   ip4_address_t in_addr;
3184   u16 in_port;
3185
3186   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3187   echo0 = (icmp_echo_header_t *)(icmp0+1);
3188   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3189   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
3190
3191   if (!icmp_is_error_message (icmp0))
3192     {
3193       protocol = SNAT_PROTOCOL_ICMP;
3194       in_addr = ip0->src_address;
3195       in_port = echo0->identifier;
3196     }
3197   else
3198     {
3199       inner_ip0 = (ip4_header_t *)(echo0+1);
3200       l4_header = ip4_next_header (inner_ip0);
3201       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3202       in_addr = inner_ip0->dst_address;
3203       switch (protocol)
3204         {
3205         case SNAT_PROTOCOL_ICMP:
3206           inner_icmp0 = (icmp46_header_t*)l4_header;
3207           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3208           in_port = inner_echo0->identifier;
3209           break;
3210         case SNAT_PROTOCOL_UDP:
3211         case SNAT_PROTOCOL_TCP:
3212           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3213           break;
3214         default:
3215           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
3216           next0 = SNAT_IN2OUT_NEXT_DROP;
3217           goto out;
3218         }
3219     }
3220
3221   dm0 = snat_det_map_by_user(sm, &in_addr);
3222   if (PREDICT_FALSE(!dm0))
3223     {
3224       clib_warning("no match for internal host %U",
3225                    format_ip4_address, &in_addr);
3226       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3227           IP_PROTOCOL_ICMP, rx_fib_index0)))
3228         {
3229           dont_translate = 1;
3230           goto out;
3231         }
3232       next0 = SNAT_IN2OUT_NEXT_DROP;
3233       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3234       goto out;
3235     }
3236
3237   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
3238
3239   key0.ext_host_addr = ip0->dst_address;
3240   key0.ext_host_port = 0;
3241
3242   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3243   if (PREDICT_FALSE(!ses0))
3244     {
3245       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3246           IP_PROTOCOL_ICMP, rx_fib_index0)))
3247         {
3248           dont_translate = 1;
3249           goto out;
3250         }
3251       if (icmp0->type != ICMP4_echo_request)
3252         {
3253           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3254           next0 = SNAT_IN2OUT_NEXT_DROP;
3255           goto out;
3256         }
3257       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3258         {
3259           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3260             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3261
3262           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3263             continue;
3264
3265           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3266           break;
3267         }
3268       if (PREDICT_FALSE(!ses0))
3269         {
3270           next0 = SNAT_IN2OUT_NEXT_DROP;
3271           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3272           goto out;
3273         }
3274     }
3275
3276   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3277                     !icmp_is_error_message (icmp0)))
3278     {
3279       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3280       next0 = SNAT_IN2OUT_NEXT_DROP;
3281       goto out;
3282     }
3283
3284   u32 now = (u32) vlib_time_now (sm->vlib_main);
3285
3286   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3287   ses0->expire = now + sm->icmp_timeout;
3288
3289 out:
3290   *p_proto = protocol;
3291   if (ses0)
3292     {
3293       p_value->addr = new_addr0;
3294       p_value->fib_index = sm->outside_fib_index;
3295       p_value->port = ses0->out.out_port;
3296     }
3297   *p_dont_translate = dont_translate;
3298   if (d)
3299     *(snat_det_session_t**)d = ses0;
3300   if (e)
3301     *(snat_det_map_t**)e = dm0;
3302   return next0;
3303 }
3304
3305 /**********************/
3306 /*** worker handoff ***/
3307 /**********************/
3308 static inline uword
3309 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3310                                       vlib_node_runtime_t * node,
3311                                       vlib_frame_t * frame,
3312                                       u8 is_output)
3313 {
3314   snat_main_t *sm = &snat_main;
3315   vlib_thread_main_t *tm = vlib_get_thread_main ();
3316   u32 n_left_from, *from, *to_next = 0;
3317   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3318   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3319     = 0;
3320   vlib_frame_queue_elt_t *hf = 0;
3321   vlib_frame_t *f = 0;
3322   int i;
3323   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3324   u32 next_worker_index = 0;
3325   u32 current_worker_index = ~0;
3326   u32 thread_index = vlib_get_thread_index ();
3327   u32 fq_index;
3328   u32 to_node_index;
3329
3330   ASSERT (vec_len (sm->workers));
3331
3332   if (is_output)
3333     {
3334       fq_index = sm->fq_in2out_output_index;
3335       to_node_index = sm->in2out_output_node_index;
3336     }
3337   else
3338     {
3339       fq_index = sm->fq_in2out_index;
3340       to_node_index = sm->in2out_node_index;
3341     }
3342
3343   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3344     {
3345       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3346
3347       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3348                                sm->first_worker_index + sm->num_workers - 1,
3349                                (vlib_frame_queue_t *) (~0));
3350     }
3351
3352   from = vlib_frame_vector_args (frame);
3353   n_left_from = frame->n_vectors;
3354
3355   while (n_left_from > 0)
3356     {
3357       u32 bi0;
3358       vlib_buffer_t *b0;
3359       u32 sw_if_index0;
3360       u32 rx_fib_index0;
3361       ip4_header_t * ip0;
3362       u8 do_handoff;
3363
3364       bi0 = from[0];
3365       from += 1;
3366       n_left_from -= 1;
3367
3368       b0 = vlib_get_buffer (vm, bi0);
3369
3370       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3371       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3372
3373       ip0 = vlib_buffer_get_current (b0);
3374
3375       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3376
3377       if (PREDICT_FALSE (next_worker_index != thread_index))
3378         {
3379           do_handoff = 1;
3380
3381           if (next_worker_index != current_worker_index)
3382             {
3383               if (hf)
3384                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3385
3386               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3387                                                       next_worker_index,
3388                                                       handoff_queue_elt_by_worker_index);
3389
3390               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3391               to_next_worker = &hf->buffer_index[hf->n_vectors];
3392               current_worker_index = next_worker_index;
3393             }
3394
3395           /* enqueue to correct worker thread */
3396           to_next_worker[0] = bi0;
3397           to_next_worker++;
3398           n_left_to_next_worker--;
3399
3400           if (n_left_to_next_worker == 0)
3401             {
3402               hf->n_vectors = VLIB_FRAME_SIZE;
3403               vlib_put_frame_queue_elt (hf);
3404               current_worker_index = ~0;
3405               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3406               hf = 0;
3407             }
3408         }
3409       else
3410         {
3411           do_handoff = 0;
3412           /* if this is 1st frame */
3413           if (!f)
3414             {
3415               f = vlib_get_frame_to_node (vm, to_node_index);
3416               to_next = vlib_frame_vector_args (f);
3417             }
3418
3419           to_next[0] = bi0;
3420           to_next += 1;
3421           f->n_vectors++;
3422         }
3423
3424       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3425                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3426         {
3427           snat_in2out_worker_handoff_trace_t *t =
3428             vlib_add_trace (vm, node, b0, sizeof (*t));
3429           t->next_worker_index = next_worker_index;
3430           t->do_handoff = do_handoff;
3431         }
3432     }
3433
3434   if (f)
3435     vlib_put_frame_to_node (vm, to_node_index, f);
3436
3437   if (hf)
3438     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3439
3440   /* Ship frames to the worker nodes */
3441   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3442     {
3443       if (handoff_queue_elt_by_worker_index[i])
3444         {
3445           hf = handoff_queue_elt_by_worker_index[i];
3446           /*
3447            * It works better to let the handoff node
3448            * rate-adapt, always ship the handoff queue element.
3449            */
3450           if (1 || hf->n_vectors == hf->last_n_vectors)
3451             {
3452               vlib_put_frame_queue_elt (hf);
3453               handoff_queue_elt_by_worker_index[i] = 0;
3454             }
3455           else
3456             hf->last_n_vectors = hf->n_vectors;
3457         }
3458       congested_handoff_queue_by_worker_index[i] =
3459         (vlib_frame_queue_t *) (~0);
3460     }
3461   hf = 0;
3462   current_worker_index = ~0;
3463   return frame->n_vectors;
3464 }
3465
3466 static uword
3467 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3468                                vlib_node_runtime_t * node,
3469                                vlib_frame_t * frame)
3470 {
3471   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3472 }
3473
3474 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3475   .function = snat_in2out_worker_handoff_fn,
3476   .name = "nat44-in2out-worker-handoff",
3477   .vector_size = sizeof (u32),
3478   .format_trace = format_snat_in2out_worker_handoff_trace,
3479   .type = VLIB_NODE_TYPE_INTERNAL,
3480
3481   .n_next_nodes = 1,
3482
3483   .next_nodes = {
3484     [0] = "error-drop",
3485   },
3486 };
3487
3488 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3489                               snat_in2out_worker_handoff_fn);
3490
3491 static uword
3492 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3493                                       vlib_node_runtime_t * node,
3494                                       vlib_frame_t * frame)
3495 {
3496   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3497 }
3498
3499 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3500   .function = snat_in2out_output_worker_handoff_fn,
3501   .name = "nat44-in2out-output-worker-handoff",
3502   .vector_size = sizeof (u32),
3503   .format_trace = format_snat_in2out_worker_handoff_trace,
3504   .type = VLIB_NODE_TYPE_INTERNAL,
3505
3506   .n_next_nodes = 1,
3507
3508   .next_nodes = {
3509     [0] = "error-drop",
3510   },
3511 };
3512
3513 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3514                               snat_in2out_output_worker_handoff_fn);
3515
3516 static_always_inline int
3517 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3518 {
3519   snat_address_t * ap;
3520   clib_bihash_kv_8_8_t kv, value;
3521   snat_session_key_t m_key;
3522
3523   vec_foreach (ap, sm->addresses)
3524     {
3525       if (ap->addr.as_u32 == dst_addr->as_u32)
3526         return 1;
3527     }
3528
3529   m_key.addr.as_u32 = dst_addr->as_u32;
3530   m_key.fib_index = sm->outside_fib_index;
3531   m_key.port = 0;
3532   m_key.protocol = 0;
3533   kv.key = m_key.as_u64;
3534   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3535     return 1;
3536
3537   return 0;
3538 }
3539
3540 static uword
3541 snat_hairpin_dst_fn (vlib_main_t * vm,
3542                      vlib_node_runtime_t * node,
3543                      vlib_frame_t * frame)
3544 {
3545   u32 n_left_from, * from, * to_next;
3546   snat_in2out_next_t next_index;
3547   u32 pkts_processed = 0;
3548   snat_main_t * sm = &snat_main;
3549
3550   from = vlib_frame_vector_args (frame);
3551   n_left_from = frame->n_vectors;
3552   next_index = node->cached_next_index;
3553
3554   while (n_left_from > 0)
3555     {
3556       u32 n_left_to_next;
3557
3558       vlib_get_next_frame (vm, node, next_index,
3559                            to_next, n_left_to_next);
3560
3561       while (n_left_from > 0 && n_left_to_next > 0)
3562         {
3563           u32 bi0;
3564           vlib_buffer_t * b0;
3565           u32 next0;
3566           ip4_header_t * ip0;
3567           u32 proto0;
3568
3569           /* speculatively enqueue b0 to the current next frame */
3570           bi0 = from[0];
3571           to_next[0] = bi0;
3572           from += 1;
3573           to_next += 1;
3574           n_left_from -= 1;
3575           n_left_to_next -= 1;
3576
3577           b0 = vlib_get_buffer (vm, bi0);
3578           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3579           ip0 = vlib_buffer_get_current (b0);
3580
3581           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3582
3583           vnet_buffer (b0)->snat.flags = 0;
3584           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3585             {
3586               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3587                 {
3588                   udp_header_t * udp0 = ip4_next_header (ip0);
3589                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3590
3591                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3592                 }
3593               else if (proto0 == SNAT_PROTOCOL_ICMP)
3594                 {
3595                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3596
3597                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3598                 }
3599               else
3600                 {
3601                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3602                 }
3603
3604               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3605             }
3606
3607           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3608
3609           /* verify speculative enqueue, maybe switch current next frame */
3610           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3611                                            to_next, n_left_to_next,
3612                                            bi0, next0);
3613          }
3614
3615       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3616     }
3617
3618   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3619                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3620                                pkts_processed);
3621   return frame->n_vectors;
3622 }
3623
3624 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3625   .function = snat_hairpin_dst_fn,
3626   .name = "nat44-hairpin-dst",
3627   .vector_size = sizeof (u32),
3628   .type = VLIB_NODE_TYPE_INTERNAL,
3629   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3630   .error_strings = snat_in2out_error_strings,
3631   .n_next_nodes = 2,
3632   .next_nodes = {
3633     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3634     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3635   },
3636 };
3637
3638 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3639                               snat_hairpin_dst_fn);
3640
3641 static uword
3642 snat_hairpin_src_fn (vlib_main_t * vm,
3643                      vlib_node_runtime_t * node,
3644                      vlib_frame_t * frame)
3645 {
3646   u32 n_left_from, * from, * to_next;
3647   snat_in2out_next_t next_index;
3648   u32 pkts_processed = 0;
3649   snat_main_t *sm = &snat_main;
3650
3651   from = vlib_frame_vector_args (frame);
3652   n_left_from = frame->n_vectors;
3653   next_index = node->cached_next_index;
3654
3655   while (n_left_from > 0)
3656     {
3657       u32 n_left_to_next;
3658
3659       vlib_get_next_frame (vm, node, next_index,
3660                            to_next, n_left_to_next);
3661
3662       while (n_left_from > 0 && n_left_to_next > 0)
3663         {
3664           u32 bi0;
3665           vlib_buffer_t * b0;
3666           u32 next0;
3667           snat_interface_t *i;
3668           u32 sw_if_index0;
3669
3670           /* speculatively enqueue b0 to the current next frame */
3671           bi0 = from[0];
3672           to_next[0] = bi0;
3673           from += 1;
3674           to_next += 1;
3675           n_left_from -= 1;
3676           n_left_to_next -= 1;
3677
3678           b0 = vlib_get_buffer (vm, bi0);
3679           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3680           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3681
3682           pool_foreach (i, sm->output_feature_interfaces,
3683           ({
3684             /* Only packets from NAT inside interface */
3685             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
3686               {
3687                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3688                                     SNAT_FLAG_HAIRPINNING))
3689                   {
3690                     if (PREDICT_TRUE (sm->num_workers > 1))
3691                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3692                     else
3693                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3694                   }
3695                 break;
3696               }
3697           }));
3698
3699           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3700
3701           /* verify speculative enqueue, maybe switch current next frame */
3702           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3703                                            to_next, n_left_to_next,
3704                                            bi0, next0);
3705          }
3706
3707       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3708     }
3709
3710   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3711                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3712                                pkts_processed);
3713   return frame->n_vectors;
3714 }
3715
3716 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3717   .function = snat_hairpin_src_fn,
3718   .name = "nat44-hairpin-src",
3719   .vector_size = sizeof (u32),
3720   .type = VLIB_NODE_TYPE_INTERNAL,
3721   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3722   .error_strings = snat_in2out_error_strings,
3723   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3724   .next_nodes = {
3725      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3726      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3727      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3728      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3729   },
3730 };
3731
3732 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3733                               snat_hairpin_src_fn);
3734
3735 static uword
3736 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3737                                 vlib_node_runtime_t * node,
3738                                 vlib_frame_t * frame)
3739 {
3740   u32 n_left_from, * from, * to_next;
3741   snat_in2out_next_t next_index;
3742   u32 pkts_processed = 0;
3743   snat_main_t * sm = &snat_main;
3744   u32 stats_node_index;
3745
3746   stats_node_index = snat_in2out_fast_node.index;
3747
3748   from = vlib_frame_vector_args (frame);
3749   n_left_from = frame->n_vectors;
3750   next_index = node->cached_next_index;
3751
3752   while (n_left_from > 0)
3753     {
3754       u32 n_left_to_next;
3755
3756       vlib_get_next_frame (vm, node, next_index,
3757                            to_next, n_left_to_next);
3758
3759       while (n_left_from > 0 && n_left_to_next > 0)
3760         {
3761           u32 bi0;
3762           vlib_buffer_t * b0;
3763           u32 next0;
3764           u32 sw_if_index0;
3765           ip4_header_t * ip0;
3766           ip_csum_t sum0;
3767           u32 new_addr0, old_addr0;
3768           u16 old_port0, new_port0;
3769           udp_header_t * udp0;
3770           tcp_header_t * tcp0;
3771           icmp46_header_t * icmp0;
3772           snat_session_key_t key0, sm0;
3773           u32 proto0;
3774           u32 rx_fib_index0;
3775
3776           /* speculatively enqueue b0 to the current next frame */
3777           bi0 = from[0];
3778           to_next[0] = bi0;
3779           from += 1;
3780           to_next += 1;
3781           n_left_from -= 1;
3782           n_left_to_next -= 1;
3783
3784           b0 = vlib_get_buffer (vm, bi0);
3785           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3786
3787           ip0 = vlib_buffer_get_current (b0);
3788           udp0 = ip4_next_header (ip0);
3789           tcp0 = (tcp_header_t *) udp0;
3790           icmp0 = (icmp46_header_t *) udp0;
3791
3792           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3793           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3794
3795           if (PREDICT_FALSE(ip0->ttl == 1))
3796             {
3797               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3798               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3799                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3800                                            0);
3801               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3802               goto trace0;
3803             }
3804
3805           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3806
3807           if (PREDICT_FALSE (proto0 == ~0))
3808               goto trace0;
3809
3810           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3811             {
3812               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3813                                   rx_fib_index0, node, next0, ~0, 0, 0);
3814               goto trace0;
3815             }
3816
3817           key0.addr = ip0->src_address;
3818           key0.protocol = proto0;
3819           key0.port = udp0->src_port;
3820           key0.fib_index = rx_fib_index0;
3821
3822           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0))
3823             {
3824               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3825               next0= SNAT_IN2OUT_NEXT_DROP;
3826               goto trace0;
3827             }
3828
3829           new_addr0 = sm0.addr.as_u32;
3830           new_port0 = sm0.port;
3831           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3832           old_addr0 = ip0->src_address.as_u32;
3833           ip0->src_address.as_u32 = new_addr0;
3834
3835           sum0 = ip0->checksum;
3836           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3837                                  ip4_header_t,
3838                                  src_address /* changed member */);
3839           ip0->checksum = ip_csum_fold (sum0);
3840
3841           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3842             {
3843               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3844                 {
3845                   old_port0 = tcp0->src_port;
3846                   tcp0->src_port = new_port0;
3847
3848                   sum0 = tcp0->checksum;
3849                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3850                                          ip4_header_t,
3851                                          dst_address /* changed member */);
3852                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3853                                          ip4_header_t /* cheat */,
3854                                          length /* changed member */);
3855                   tcp0->checksum = ip_csum_fold(sum0);
3856                 }
3857               else
3858                 {
3859                   old_port0 = udp0->src_port;
3860                   udp0->src_port = new_port0;
3861                   udp0->checksum = 0;
3862                 }
3863             }
3864           else
3865             {
3866               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3867                 {
3868                   sum0 = tcp0->checksum;
3869                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3870                                          ip4_header_t,
3871                                          dst_address /* changed member */);
3872                   tcp0->checksum = ip_csum_fold(sum0);
3873                 }
3874             }
3875
3876           /* Hairpinning */
3877           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3878
3879         trace0:
3880           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3881                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3882             {
3883               snat_in2out_trace_t *t =
3884                  vlib_add_trace (vm, node, b0, sizeof (*t));
3885               t->sw_if_index = sw_if_index0;
3886               t->next_index = next0;
3887             }
3888
3889           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3890
3891           /* verify speculative enqueue, maybe switch current next frame */
3892           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3893                                            to_next, n_left_to_next,
3894                                            bi0, next0);
3895         }
3896
3897       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3898     }
3899
3900   vlib_node_increment_counter (vm, stats_node_index,
3901                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3902                                pkts_processed);
3903   return frame->n_vectors;
3904 }
3905
3906
3907 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3908   .function = snat_in2out_fast_static_map_fn,
3909   .name = "nat44-in2out-fast",
3910   .vector_size = sizeof (u32),
3911   .format_trace = format_snat_in2out_fast_trace,
3912   .type = VLIB_NODE_TYPE_INTERNAL,
3913
3914   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3915   .error_strings = snat_in2out_error_strings,
3916
3917   .runtime_data_bytes = sizeof (snat_runtime_t),
3918
3919   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3920
3921   /* edit / add dispositions here */
3922   .next_nodes = {
3923     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3924     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3925     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3926     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3927     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3928   },
3929 };
3930
3931 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);