SNAT: IP fragmentation (VPP-890)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
32
33 typedef struct {
34   u32 sw_if_index;
35   u32 next_index;
36   u32 session_index;
37   u32 is_slow_path;
38 } snat_in2out_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_in2out_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
51   char * tag;
52
53   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
54
55   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
56               t->sw_if_index, t->next_index, t->session_index);
57
58   return s;
59 }
60
61 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
66
67   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
68               t->sw_if_index, t->next_index);
69
70   return s;
71 }
72
73 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
74 {
75   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
76   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
77   snat_in2out_worker_handoff_trace_t * t =
78     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
79   char * m;
80
81   m = t->do_handoff ? "next worker" : "same worker";
82   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
83
84   return s;
85 }
86
87 typedef struct {
88   u32 sw_if_index;
89   u32 next_index;
90   u8 cached;
91 } nat44_in2out_reass_trace_t;
92
93 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
94 {
95   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
96   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
97   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
98
99   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
100               t->sw_if_index, t->next_index,
101               t->cached ? "cached" : "translated");
102
103   return s;
104 }
105
106 vlib_node_registration_t snat_in2out_node;
107 vlib_node_registration_t snat_in2out_slowpath_node;
108 vlib_node_registration_t snat_in2out_fast_node;
109 vlib_node_registration_t snat_in2out_worker_handoff_node;
110 vlib_node_registration_t snat_det_in2out_node;
111 vlib_node_registration_t snat_in2out_output_node;
112 vlib_node_registration_t snat_in2out_output_slowpath_node;
113 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
114 vlib_node_registration_t snat_hairpin_dst_node;
115 vlib_node_registration_t snat_hairpin_src_node;
116 vlib_node_registration_t nat44_hairpinning_node;
117 vlib_node_registration_t nat44_in2out_reass_node;
118
119
120 #define foreach_snat_in2out_error                       \
121 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
122 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
123 _(OUT_OF_PORTS, "Out of ports")                         \
124 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
125 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
126 _(NO_TRANSLATION, "No translation")                     \
127 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
128 _(DROP_FRAGMENT, "Drop fragment")                       \
129 _(MAX_REASS, "Maximum reassemblies exceeded")           \
130 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
131
132 typedef enum {
133 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
134   foreach_snat_in2out_error
135 #undef _
136   SNAT_IN2OUT_N_ERROR,
137 } snat_in2out_error_t;
138
139 static char * snat_in2out_error_strings[] = {
140 #define _(sym,string) string,
141   foreach_snat_in2out_error
142 #undef _
143 };
144
145 typedef enum {
146   SNAT_IN2OUT_NEXT_LOOKUP,
147   SNAT_IN2OUT_NEXT_DROP,
148   SNAT_IN2OUT_NEXT_ICMP_ERROR,
149   SNAT_IN2OUT_NEXT_SLOW_PATH,
150   SNAT_IN2OUT_NEXT_REASS,
151   SNAT_IN2OUT_N_NEXT,
152 } snat_in2out_next_t;
153
154 typedef enum {
155   SNAT_HAIRPIN_SRC_NEXT_DROP,
156   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
157   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
158   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
159   SNAT_HAIRPIN_SRC_N_NEXT,
160 } snat_hairpin_next_t;
161
162 /**
163  * @brief Check if packet should be translated
164  *
165  * Packets aimed at outside interface and external addresss with active session
166  * should be translated.
167  *
168  * @param sm            NAT main
169  * @param rt            NAT runtime data
170  * @param sw_if_index0  index of the inside interface
171  * @param ip0           IPv4 header
172  * @param proto0        NAT protocol
173  * @param rx_fib_index0 RX FIB index
174  *
175  * @returns 0 if packet should be translated otherwise 1
176  */
177 static inline int
178 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
179                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                          u32 rx_fib_index0)
181 {
182   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
183   fib_prefix_t pfx = {
184     .fp_proto = FIB_PROTOCOL_IP4,
185     .fp_len = 32,
186     .fp_addr = {
187         .ip4.as_u32 = ip0->dst_address.as_u32,
188     },
189   };
190
191   /* Don't NAT packet aimed at the intfc address */
192   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
193                                       ip0->dst_address.as_u32)))
194     return 1;
195
196   fei = fib_table_lookup (rx_fib_index0, &pfx);
197   if (FIB_NODE_INDEX_INVALID != fei)
198     {
199       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
200       if (sw_if_index == ~0)
201         {
202           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
203           if (FIB_NODE_INDEX_INVALID != fei)
204             sw_if_index = fib_entry_get_resolving_interface (fei);
205         }
206       snat_interface_t *i;
207       pool_foreach (i, sm->interfaces,
208       ({
209         /* NAT packet aimed at outside interface */
210         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
211           return 0;
212       }));
213     }
214
215   return 1;
216 }
217
218 static inline int
219 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
220                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
221                     u32 rx_fib_index0, u32 thread_index)
222 {
223   udp_header_t * udp0 = ip4_next_header (ip0);
224   snat_session_key_t key0, sm0;
225   clib_bihash_kv_8_8_t kv0, value0;
226
227   key0.addr = ip0->dst_address;
228   key0.port = udp0->dst_port;
229   key0.protocol = proto0;
230   key0.fib_index = sm->outside_fib_index;
231   kv0.key = key0.as_u64;
232
233   /* NAT packet aimed at external address if */
234   /* has active sessions */
235   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
236                               &value0))
237     {
238       /* or is static mappings */
239       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
240         return 0;
241     }
242   else
243     return 0;
244
245   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
246                                  rx_fib_index0);
247 }
248
249 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
250                       ip4_header_t * ip0,
251                       u32 rx_fib_index0,
252                       snat_session_key_t * key0,
253                       snat_session_t ** sessionp,
254                       vlib_node_runtime_t * node,
255                       u32 next0,
256                       u32 thread_index)
257 {
258   snat_user_t *u;
259   snat_user_key_t user_key;
260   snat_session_t *s;
261   clib_bihash_kv_8_8_t kv0, value0;
262   u32 oldest_per_user_translation_list_index;
263   dlist_elt_t * oldest_per_user_translation_list_elt;
264   dlist_elt_t * per_user_translation_list_elt;
265   dlist_elt_t * per_user_list_head_elt;
266   u32 session_index;
267   snat_session_key_t key1;
268   u32 address_index = ~0;
269   u32 outside_fib_index;
270   uword * p;
271   udp_header_t * udp0 = ip4_next_header (ip0);
272
273   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
274     {
275       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
276       return SNAT_IN2OUT_NEXT_DROP;
277     }
278
279   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
280   if (! p)
281     {
282       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
283       return SNAT_IN2OUT_NEXT_DROP;
284     }
285   outside_fib_index = p[0];
286
287   key1.protocol = key0->protocol;
288   user_key.addr = ip0->src_address;
289   user_key.fib_index = rx_fib_index0;
290   kv0.key = user_key.as_u64;
291
292   /* Ever heard of the "user" = src ip4 address before? */
293   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
294                               &kv0, &value0))
295     {
296       /* no, make a new one */
297       pool_get (sm->per_thread_data[thread_index].users, u);
298       memset (u, 0, sizeof (*u));
299       u->addr = ip0->src_address;
300       u->fib_index = rx_fib_index0;
301
302       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
303
304       u->sessions_per_user_list_head_index = per_user_list_head_elt -
305         sm->per_thread_data[thread_index].list_pool;
306
307       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
308                        u->sessions_per_user_list_head_index);
309
310       kv0.value = u - sm->per_thread_data[thread_index].users;
311
312       /* add user */
313       clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
314                                &kv0, 1 /* is_add */);
315     }
316   else
317     {
318       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
319                              value0.value);
320     }
321
322   /* Over quota? Recycle the least recently used dynamic translation */
323   if (u->nsessions >= sm->max_translations_per_user)
324     {
325       /* Remove the oldest dynamic translation */
326       do {
327           oldest_per_user_translation_list_index =
328             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
329                                     u->sessions_per_user_list_head_index);
330
331           ASSERT (oldest_per_user_translation_list_index != ~0);
332
333           /* add it back to the end of the LRU list */
334           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
335                               u->sessions_per_user_list_head_index,
336                               oldest_per_user_translation_list_index);
337           /* Get the list element */
338           oldest_per_user_translation_list_elt =
339             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
340                                oldest_per_user_translation_list_index);
341
342           /* Get the session index from the list element */
343           session_index = oldest_per_user_translation_list_elt->value;
344
345           /* Get the session */
346           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
347                                  session_index);
348       } while (snat_is_session_static (s));
349
350       if (snat_is_unk_proto_session (s))
351         {
352           clib_bihash_kv_16_8_t up_kv;
353           nat_ed_ses_key_t key;
354
355           /* Remove from lookup tables */
356           key.l_addr = s->in2out.addr;
357           key.r_addr = s->ext_host_addr;
358           key.fib_index = s->in2out.fib_index;
359           key.proto = s->in2out.port;
360           key.rsvd = 0;
361           key.l_port = 0;
362           up_kv.key[0] = key.as_u64[0];
363           up_kv.key[1] = key.as_u64[1];
364           if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0))
365             clib_warning ("in2out key del failed");
366
367           key.l_addr = s->out2in.addr;
368           key.fib_index = s->out2in.fib_index;
369           up_kv.key[0] = key.as_u64[0];
370           up_kv.key[1] = key.as_u64[1];
371           if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0))
372             clib_warning ("out2in key del failed");
373         }
374       else
375         {
376           /* Remove in2out, out2in keys */
377           kv0.key = s->in2out.as_u64;
378           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out,
379                                        &kv0, 0 /* is_add */))
380               clib_warning ("in2out key delete failed");
381           kv0.key = s->out2in.as_u64;
382           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in,
383                                        &kv0, 0 /* is_add */))
384               clib_warning ("out2in key delete failed");
385
386           /* log NAT event */
387           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
388                                               s->out2in.addr.as_u32,
389                                               s->in2out.protocol,
390                                               s->in2out.port,
391                                               s->out2in.port,
392                                               s->in2out.fib_index);
393
394           snat_free_outside_address_and_port
395             (sm->addresses, thread_index, &s->out2in, s->outside_address_index);
396         }
397       s->outside_address_index = ~0;
398
399       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
400                                                thread_index, &key1,
401                                                &address_index, sm->vrf_mode,
402                                                sm->port_per_thread,
403                                                sm->per_thread_data[thread_index].snat_thread_index))
404         {
405           ASSERT(0);
406
407           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
408           return SNAT_IN2OUT_NEXT_DROP;
409         }
410       s->outside_address_index = address_index;
411     }
412   else
413     {
414       u8 static_mapping = 1;
415
416       /* First try to match static mapping by local address and port */
417       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
418         {
419           static_mapping = 0;
420           /* Try to create dynamic translation */
421           if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
422                                                    thread_index, &key1,
423                                                    &address_index, sm->vrf_mode,
424                                                    sm->port_per_thread,
425                                                    sm->per_thread_data[thread_index].snat_thread_index))
426             {
427               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
428               return SNAT_IN2OUT_NEXT_DROP;
429             }
430         }
431
432       /* Create a new session */
433       pool_get (sm->per_thread_data[thread_index].sessions, s);
434       memset (s, 0, sizeof (*s));
435
436       s->outside_address_index = address_index;
437
438       if (static_mapping)
439         {
440           u->nstaticsessions++;
441           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
442         }
443       else
444         {
445           u->nsessions++;
446         }
447
448       /* Create list elts */
449       pool_get (sm->per_thread_data[thread_index].list_pool,
450                 per_user_translation_list_elt);
451       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
452                        per_user_translation_list_elt -
453                        sm->per_thread_data[thread_index].list_pool);
454
455       per_user_translation_list_elt->value =
456         s - sm->per_thread_data[thread_index].sessions;
457       s->per_user_index = per_user_translation_list_elt -
458                           sm->per_thread_data[thread_index].list_pool;
459       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
460
461       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
462                           s->per_user_list_head_index,
463                           per_user_translation_list_elt -
464                           sm->per_thread_data[thread_index].list_pool);
465    }
466
467   s->in2out = *key0;
468   s->out2in = key1;
469   s->out2in.protocol = key0->protocol;
470   s->out2in.fib_index = outside_fib_index;
471   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
472   s->ext_host_port = udp0->dst_port;
473   *sessionp = s;
474
475   /* Add to translation hashes */
476   kv0.key = s->in2out.as_u64;
477   kv0.value = s - sm->per_thread_data[thread_index].sessions;
478   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
479                                1 /* is_add */))
480       clib_warning ("in2out key add failed");
481
482   kv0.key = s->out2in.as_u64;
483   kv0.value = s - sm->per_thread_data[thread_index].sessions;
484
485   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
486                                1 /* is_add */))
487       clib_warning ("out2in key add failed");
488
489   /* log NAT event */
490   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
491                                       s->out2in.addr.as_u32,
492                                       s->in2out.protocol,
493                                       s->in2out.port,
494                                       s->out2in.port,
495                                       s->in2out.fib_index);
496   return next0;
497 }
498
499 static_always_inline
500 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
501                                  snat_session_key_t *p_key0)
502 {
503   icmp46_header_t *icmp0;
504   snat_session_key_t key0;
505   icmp_echo_header_t *echo0, *inner_echo0 = 0;
506   ip4_header_t *inner_ip0 = 0;
507   void *l4_header = 0;
508   icmp46_header_t *inner_icmp0;
509
510   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
511   echo0 = (icmp_echo_header_t *)(icmp0+1);
512
513   if (!icmp_is_error_message (icmp0))
514     {
515       key0.protocol = SNAT_PROTOCOL_ICMP;
516       key0.addr = ip0->src_address;
517       key0.port = echo0->identifier;
518     }
519   else
520     {
521       inner_ip0 = (ip4_header_t *)(echo0+1);
522       l4_header = ip4_next_header (inner_ip0);
523       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
524       key0.addr = inner_ip0->dst_address;
525       switch (key0.protocol)
526         {
527         case SNAT_PROTOCOL_ICMP:
528           inner_icmp0 = (icmp46_header_t*)l4_header;
529           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
530           key0.port = inner_echo0->identifier;
531           break;
532         case SNAT_PROTOCOL_UDP:
533         case SNAT_PROTOCOL_TCP:
534           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
535           break;
536         default:
537           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
538         }
539     }
540   *p_key0 = key0;
541   return -1; /* success */
542 }
543
544 /**
545  * Get address and port values to be used for ICMP packet translation
546  * and create session if needed
547  *
548  * @param[in,out] sm             NAT main
549  * @param[in,out] node           NAT node runtime
550  * @param[in] thread_index       thread index
551  * @param[in,out] b0             buffer containing packet to be translated
552  * @param[out] p_proto           protocol used for matching
553  * @param[out] p_value           address and port after NAT translation
554  * @param[out] p_dont_translate  if packet should not be translated
555  * @param d                      optional parameter
556  * @param e                      optional parameter
557  */
558 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
559                            u32 thread_index, vlib_buffer_t *b0,
560                            ip4_header_t *ip0, u8 *p_proto,
561                            snat_session_key_t *p_value,
562                            u8 *p_dont_translate, void *d, void *e)
563 {
564   icmp46_header_t *icmp0;
565   u32 sw_if_index0;
566   u32 rx_fib_index0;
567   snat_session_key_t key0;
568   snat_session_t *s0 = 0;
569   u8 dont_translate = 0;
570   clib_bihash_kv_8_8_t kv0, value0;
571   u32 next0 = ~0;
572   int err;
573
574   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
575   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
576   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
577
578   err = icmp_get_key (ip0, &key0);
579   if (err != -1)
580     {
581       b0->error = node->errors[err];
582       next0 = SNAT_IN2OUT_NEXT_DROP;
583       goto out;
584     }
585   key0.fib_index = rx_fib_index0;
586
587   kv0.key = key0.as_u64;
588
589   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
590                               &value0))
591     {
592       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
593           IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
594           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
595         {
596           dont_translate = 1;
597           goto out;
598         }
599
600       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
601         {
602           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
603           next0 = SNAT_IN2OUT_NEXT_DROP;
604           goto out;
605         }
606
607       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
608                          &s0, node, next0, thread_index);
609
610       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
611         goto out;
612     }
613   else
614     {
615       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
616                         icmp0->type != ICMP4_echo_reply &&
617                         !icmp_is_error_message (icmp0)))
618         {
619           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
620           next0 = SNAT_IN2OUT_NEXT_DROP;
621           goto out;
622         }
623
624       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
625                               value0.value);
626     }
627
628 out:
629   *p_proto = key0.protocol;
630   if (s0)
631     *p_value = s0->out2in;
632   *p_dont_translate = dont_translate;
633   if (d)
634     *(snat_session_t**)d = s0;
635   return next0;
636 }
637
638 /**
639  * Get address and port values to be used for ICMP packet translation
640  *
641  * @param[in] sm                 NAT main
642  * @param[in,out] node           NAT node runtime
643  * @param[in] thread_index       thread index
644  * @param[in,out] b0             buffer containing packet to be translated
645  * @param[out] p_proto           protocol used for matching
646  * @param[out] p_value           address and port after NAT translation
647  * @param[out] p_dont_translate  if packet should not be translated
648  * @param d                      optional parameter
649  * @param e                      optional parameter
650  */
651 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
652                            u32 thread_index, vlib_buffer_t *b0,
653                            ip4_header_t *ip0, u8 *p_proto,
654                            snat_session_key_t *p_value,
655                            u8 *p_dont_translate, void *d, void *e)
656 {
657   icmp46_header_t *icmp0;
658   u32 sw_if_index0;
659   u32 rx_fib_index0;
660   snat_session_key_t key0;
661   snat_session_key_t sm0;
662   u8 dont_translate = 0;
663   u8 is_addr_only;
664   u32 next0 = ~0;
665   int err;
666
667   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
668   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
669   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
670
671   err = icmp_get_key (ip0, &key0);
672   if (err != -1)
673     {
674       b0->error = node->errors[err];
675       next0 = SNAT_IN2OUT_NEXT_DROP;
676       goto out2;
677     }
678   key0.fib_index = rx_fib_index0;
679
680   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
681     {
682       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
683           IP_PROTOCOL_ICMP, rx_fib_index0)))
684         {
685           dont_translate = 1;
686           goto out;
687         }
688
689       if (icmp_is_error_message (icmp0))
690         {
691           next0 = SNAT_IN2OUT_NEXT_DROP;
692           goto out;
693         }
694
695       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
696       next0 = SNAT_IN2OUT_NEXT_DROP;
697       goto out;
698     }
699
700   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
701                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
702                     !icmp_is_error_message (icmp0)))
703     {
704       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
705       next0 = SNAT_IN2OUT_NEXT_DROP;
706       goto out;
707     }
708
709 out:
710   *p_value = sm0;
711 out2:
712   *p_proto = key0.protocol;
713   *p_dont_translate = dont_translate;
714   return next0;
715 }
716
717 static inline u32 icmp_in2out (snat_main_t *sm,
718                                vlib_buffer_t * b0,
719                                ip4_header_t * ip0,
720                                icmp46_header_t * icmp0,
721                                u32 sw_if_index0,
722                                u32 rx_fib_index0,
723                                vlib_node_runtime_t * node,
724                                u32 next0,
725                                u32 thread_index,
726                                void *d,
727                                void *e)
728 {
729   snat_session_key_t sm0;
730   u8 protocol;
731   icmp_echo_header_t *echo0, *inner_echo0 = 0;
732   ip4_header_t *inner_ip0;
733   void *l4_header = 0;
734   icmp46_header_t *inner_icmp0;
735   u8 dont_translate;
736   u32 new_addr0, old_addr0;
737   u16 old_id0, new_id0;
738   ip_csum_t sum0;
739   u16 checksum0;
740   u32 next0_tmp;
741
742   echo0 = (icmp_echo_header_t *)(icmp0+1);
743
744   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
745                                        &protocol, &sm0, &dont_translate, d, e);
746   if (next0_tmp != ~0)
747     next0 = next0_tmp;
748   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
749     goto out;
750
751   sum0 = ip_incremental_checksum (0, icmp0,
752                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
753   checksum0 = ~ip_csum_fold (sum0);
754   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
755     {
756       next0 = SNAT_IN2OUT_NEXT_DROP;
757       goto out;
758     }
759
760   old_addr0 = ip0->src_address.as_u32;
761   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
762   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
763     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
764
765   sum0 = ip0->checksum;
766   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
767                          src_address /* changed member */);
768   ip0->checksum = ip_csum_fold (sum0);
769
770   if (!icmp_is_error_message (icmp0))
771     {
772       new_id0 = sm0.port;
773       if (PREDICT_FALSE(new_id0 != echo0->identifier))
774         {
775           old_id0 = echo0->identifier;
776           new_id0 = sm0.port;
777           echo0->identifier = new_id0;
778
779           sum0 = icmp0->checksum;
780           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
781                                  identifier);
782           icmp0->checksum = ip_csum_fold (sum0);
783         }
784     }
785   else
786     {
787       inner_ip0 = (ip4_header_t *)(echo0+1);
788       l4_header = ip4_next_header (inner_ip0);
789
790       if (!ip4_header_checksum_is_valid (inner_ip0))
791         {
792           next0 = SNAT_IN2OUT_NEXT_DROP;
793           goto out;
794         }
795
796       old_addr0 = inner_ip0->dst_address.as_u32;
797       inner_ip0->dst_address = sm0.addr;
798       new_addr0 = inner_ip0->dst_address.as_u32;
799
800       sum0 = icmp0->checksum;
801       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
802                              dst_address /* changed member */);
803       icmp0->checksum = ip_csum_fold (sum0);
804
805       switch (protocol)
806         {
807           case SNAT_PROTOCOL_ICMP:
808             inner_icmp0 = (icmp46_header_t*)l4_header;
809             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
810
811             old_id0 = inner_echo0->identifier;
812             new_id0 = sm0.port;
813             inner_echo0->identifier = new_id0;
814
815             sum0 = icmp0->checksum;
816             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
817                                    identifier);
818             icmp0->checksum = ip_csum_fold (sum0);
819             break;
820           case SNAT_PROTOCOL_UDP:
821           case SNAT_PROTOCOL_TCP:
822             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
823             new_id0 = sm0.port;
824             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
825
826             sum0 = icmp0->checksum;
827             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
828                                    dst_port);
829             icmp0->checksum = ip_csum_fold (sum0);
830             break;
831           default:
832             ASSERT(0);
833         }
834     }
835
836 out:
837   return next0;
838 }
839
840 /**
841  * @brief Hairpinning
842  *
843  * Hairpinning allows two endpoints on the internal side of the NAT to
844  * communicate even if they only use each other's external IP addresses
845  * and ports.
846  *
847  * @param sm     NAT main.
848  * @param b0     Vlib buffer.
849  * @param ip0    IP header.
850  * @param udp0   UDP header.
851  * @param tcp0   TCP header.
852  * @param proto0 NAT protocol.
853  */
854 static inline int
855 snat_hairpinning (snat_main_t *sm,
856                   vlib_buffer_t * b0,
857                   ip4_header_t * ip0,
858                   udp_header_t * udp0,
859                   tcp_header_t * tcp0,
860                   u32 proto0)
861 {
862   snat_session_key_t key0, sm0;
863   snat_session_t * s0;
864   clib_bihash_kv_8_8_t kv0, value0;
865   ip_csum_t sum0;
866   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
867   u16 new_dst_port0, old_dst_port0;
868
869   key0.addr = ip0->dst_address;
870   key0.port = udp0->dst_port;
871   key0.protocol = proto0;
872   key0.fib_index = sm->outside_fib_index;
873   kv0.key = key0.as_u64;
874
875   /* Check if destination is static mappings */
876   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
877     {
878       new_dst_addr0 = sm0.addr.as_u32;
879       new_dst_port0 = sm0.port;
880       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
881     }
882   /* or active session */
883   else
884     {
885       if (sm->num_workers > 1)
886         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
887       else
888         ti = sm->num_workers;
889
890       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
891         {
892           si = value0.value;
893
894           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
895           new_dst_addr0 = s0->in2out.addr.as_u32;
896           new_dst_port0 = s0->in2out.port;
897           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
898         }
899     }
900
901   /* Destination is behind the same NAT, use internal address and port */
902   if (new_dst_addr0)
903     {
904       old_dst_addr0 = ip0->dst_address.as_u32;
905       ip0->dst_address.as_u32 = new_dst_addr0;
906       sum0 = ip0->checksum;
907       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
908                              ip4_header_t, dst_address);
909       ip0->checksum = ip_csum_fold (sum0);
910
911       old_dst_port0 = tcp0->dst;
912       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
913         {
914           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
915             {
916               tcp0->dst = new_dst_port0;
917               sum0 = tcp0->checksum;
918               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
919                                      ip4_header_t, dst_address);
920               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
921                                      ip4_header_t /* cheat */, length);
922               tcp0->checksum = ip_csum_fold(sum0);
923             }
924           else
925             {
926               udp0->dst_port = new_dst_port0;
927               udp0->checksum = 0;
928             }
929         }
930       else
931         {
932           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
933             {
934               sum0 = tcp0->checksum;
935               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
936                                      ip4_header_t, dst_address);
937               tcp0->checksum = ip_csum_fold(sum0);
938             }
939         }
940       return 1;
941     }
942   return 0;
943 }
944
945 static inline void
946 snat_icmp_hairpinning (snat_main_t *sm,
947                        vlib_buffer_t * b0,
948                        ip4_header_t * ip0,
949                        icmp46_header_t * icmp0)
950 {
951   snat_session_key_t key0, sm0;
952   clib_bihash_kv_8_8_t kv0, value0;
953   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
954   ip_csum_t sum0;
955   snat_session_t *s0;
956
957   if (!icmp_is_error_message (icmp0))
958     {
959       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
960       u16 icmp_id0 = echo0->identifier;
961       key0.addr = ip0->dst_address;
962       key0.port = icmp_id0;
963       key0.protocol = SNAT_PROTOCOL_ICMP;
964       key0.fib_index = sm->outside_fib_index;
965       kv0.key = key0.as_u64;
966
967       if (sm->num_workers > 1)
968         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
969       else
970         ti = sm->num_workers;
971
972       /* Check if destination is in active sessions */
973       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
974                                   &value0))
975         {
976           /* or static mappings */
977           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
978             {
979               new_dst_addr0 = sm0.addr.as_u32;
980               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
981             }
982         }
983       else
984         {
985           si = value0.value;
986
987           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
988           new_dst_addr0 = s0->in2out.addr.as_u32;
989           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
990           echo0->identifier = s0->in2out.port;
991           sum0 = icmp0->checksum;
992           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
993                                  icmp_echo_header_t, identifier);
994           icmp0->checksum = ip_csum_fold (sum0);
995         }
996
997       /* Destination is behind the same NAT, use internal address and port */
998       if (new_dst_addr0)
999         {
1000           old_dst_addr0 = ip0->dst_address.as_u32;
1001           ip0->dst_address.as_u32 = new_dst_addr0;
1002           sum0 = ip0->checksum;
1003           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
1004                                  ip4_header_t, dst_address);
1005           ip0->checksum = ip_csum_fold (sum0);
1006         }
1007     }
1008
1009 }
1010
1011 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
1012                                          vlib_buffer_t * b0,
1013                                          ip4_header_t * ip0,
1014                                          icmp46_header_t * icmp0,
1015                                          u32 sw_if_index0,
1016                                          u32 rx_fib_index0,
1017                                          vlib_node_runtime_t * node,
1018                                          u32 next0,
1019                                          f64 now,
1020                                          u32 thread_index,
1021                                          snat_session_t ** p_s0)
1022 {
1023   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1024                       next0, thread_index, p_s0, 0);
1025   snat_session_t * s0 = *p_s0;
1026   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1027     {
1028       /* Hairpinning */
1029       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1030         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
1031       /* Accounting */
1032       s0->last_heard = now;
1033       s0->total_pkts++;
1034       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1035       /* Per-user LRU list maintenance for dynamic translations */
1036       if (!snat_is_session_static (s0))
1037         {
1038           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1039                              s0->per_user_index);
1040           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1041                               s0->per_user_list_head_index,
1042                               s0->per_user_index);
1043         }
1044     }
1045   return next0;
1046 }
1047 static inline void
1048 snat_hairpinning_unknown_proto (snat_main_t *sm,
1049                                 vlib_buffer_t * b,
1050                                 ip4_header_t * ip)
1051 {
1052   u32 old_addr, new_addr = 0, ti = 0;
1053   clib_bihash_kv_8_8_t kv, value;
1054   clib_bihash_kv_16_8_t s_kv, s_value;
1055   nat_ed_ses_key_t key;
1056   snat_session_key_t m_key;
1057   snat_static_mapping_t *m;
1058   ip_csum_t sum;
1059   snat_session_t *s;
1060
1061   old_addr = ip->dst_address.as_u32;
1062   key.l_addr.as_u32 = ip->dst_address.as_u32;
1063   key.r_addr.as_u32 = ip->src_address.as_u32;
1064   key.fib_index = sm->outside_fib_index;
1065   key.proto = ip->protocol;
1066   key.rsvd = 0;
1067   key.l_port = 0;
1068   s_kv.key[0] = key.as_u64[0];
1069   s_kv.key[1] = key.as_u64[1];
1070   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1071     {
1072       m_key.addr = ip->dst_address;
1073       m_key.fib_index = sm->outside_fib_index;
1074       m_key.port = 0;
1075       m_key.protocol = 0;
1076       kv.key = m_key.as_u64;
1077       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1078         return;
1079
1080       m = pool_elt_at_index (sm->static_mappings, value.value);
1081       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1082         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1083       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1084     }
1085   else
1086     {
1087       if (sm->num_workers > 1)
1088         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
1089       else
1090         ti = sm->num_workers;
1091
1092       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1093       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1094         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1095       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1096     }
1097   sum = ip->checksum;
1098   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1099   ip->checksum = ip_csum_fold (sum);
1100 }
1101
1102 static snat_session_t *
1103 snat_in2out_unknown_proto (snat_main_t *sm,
1104                            vlib_buffer_t * b,
1105                            ip4_header_t * ip,
1106                            u32 rx_fib_index,
1107                            u32 thread_index,
1108                            f64 now,
1109                            vlib_main_t * vm,
1110                            vlib_node_runtime_t * node)
1111 {
1112   clib_bihash_kv_8_8_t kv, value;
1113   clib_bihash_kv_16_8_t s_kv, s_value;
1114   snat_static_mapping_t *m;
1115   snat_session_key_t m_key;
1116   u32 old_addr, new_addr = 0;
1117   ip_csum_t sum;
1118   snat_user_key_t u_key;
1119   snat_user_t *u;
1120   dlist_elt_t *head, *elt, *oldest;
1121   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1122   u32 elt_index, head_index, ses_index, oldest_index;
1123   snat_session_t * s;
1124   nat_ed_ses_key_t key;
1125   u32 address_index = ~0;
1126   int i;
1127   u8 is_sm = 0;
1128
1129   old_addr = ip->src_address.as_u32;
1130
1131   key.l_addr = ip->src_address;
1132   key.r_addr = ip->dst_address;
1133   key.fib_index = rx_fib_index;
1134   key.proto = ip->protocol;
1135   key.rsvd = 0;
1136   key.l_port = 0;
1137   s_kv.key[0] = key.as_u64[0];
1138   s_kv.key[1] = key.as_u64[1];
1139
1140   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1141     {
1142       s = pool_elt_at_index (tsm->sessions, s_value.value);
1143       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1144     }
1145   else
1146     {
1147       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1148         {
1149           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1150           return 0;
1151         }
1152
1153       u_key.addr = ip->src_address;
1154       u_key.fib_index = rx_fib_index;
1155       kv.key = u_key.as_u64;
1156
1157       /* Ever heard of the "user" = src ip4 address before? */
1158       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1159         {
1160           /* no, make a new one */
1161           pool_get (tsm->users, u);
1162           memset (u, 0, sizeof (*u));
1163           u->addr = ip->src_address;
1164           u->fib_index = rx_fib_index;
1165
1166           pool_get (tsm->list_pool, head);
1167           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1168
1169           clib_dlist_init (tsm->list_pool,
1170                            u->sessions_per_user_list_head_index);
1171
1172           kv.value = u - tsm->users;
1173
1174           /* add user */
1175           clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
1176         }
1177       else
1178         {
1179           u = pool_elt_at_index (tsm->users, value.value);
1180         }
1181
1182       m_key.addr = ip->src_address;
1183       m_key.port = 0;
1184       m_key.protocol = 0;
1185       m_key.fib_index = rx_fib_index;
1186       kv.key = m_key.as_u64;
1187
1188       /* Try to find static mapping first */
1189       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1190         {
1191           m = pool_elt_at_index (sm->static_mappings, value.value);
1192           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1193           is_sm = 1;
1194           goto create_ses;
1195         }
1196       /* Fallback to 3-tuple key */
1197       else
1198         {
1199           /* Choose same out address as for TCP/UDP session to same destination */
1200           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1201             {
1202               head_index = u->sessions_per_user_list_head_index;
1203               head = pool_elt_at_index (tsm->list_pool, head_index);
1204               elt_index = head->next;
1205               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1206               ses_index = elt->value;
1207               while (ses_index != ~0)
1208                 {
1209                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1210                   elt_index = elt->next;
1211                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1212                   ses_index = elt->value;
1213
1214                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1215                     {
1216                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1217                       address_index = s->outside_address_index;
1218
1219                       key.fib_index = sm->outside_fib_index;
1220                       key.l_addr.as_u32 = new_addr;
1221                       s_kv.key[0] = key.as_u64[0];
1222                       s_kv.key[1] = key.as_u64[1];
1223                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1224                         break;
1225
1226                       goto create_ses;
1227                     }
1228                 }
1229             }
1230           key.fib_index = sm->outside_fib_index;
1231           for (i = 0; i < vec_len (sm->addresses); i++)
1232             {
1233               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1234               s_kv.key[0] = key.as_u64[0];
1235               s_kv.key[1] = key.as_u64[1];
1236               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1237                 {
1238                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1239                   address_index = i;
1240                   goto create_ses;
1241                 }
1242             }
1243           return 0;
1244         }
1245
1246 create_ses:
1247       /* Over quota? Recycle the least recently used dynamic translation */
1248       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1249         {
1250           /* Remove the oldest dynamic translation */
1251           do {
1252               oldest_index = clib_dlist_remove_head (
1253                 tsm->list_pool, u->sessions_per_user_list_head_index);
1254
1255               ASSERT (oldest_index != ~0);
1256
1257               /* add it back to the end of the LRU list */
1258               clib_dlist_addtail (tsm->list_pool,
1259                                   u->sessions_per_user_list_head_index,
1260                                   oldest_index);
1261               /* Get the list element */
1262               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1263
1264               /* Get the session index from the list element */
1265               ses_index = oldest->value;
1266
1267               /* Get the session */
1268               s = pool_elt_at_index (tsm->sessions, ses_index);
1269           } while (snat_is_session_static (s));
1270
1271           if (snat_is_unk_proto_session (s))
1272             {
1273               /* Remove from lookup tables */
1274               key.l_addr = s->in2out.addr;
1275               key.r_addr = s->ext_host_addr;
1276               key.fib_index = s->in2out.fib_index;
1277               key.proto = s->in2out.port;
1278               s_kv.key[0] = key.as_u64[0];
1279               s_kv.key[1] = key.as_u64[1];
1280               if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0))
1281                 clib_warning ("in2out key del failed");
1282
1283               key.l_addr = s->out2in.addr;
1284               key.fib_index = s->out2in.fib_index;
1285               s_kv.key[0] = key.as_u64[0];
1286               s_kv.key[1] = key.as_u64[1];
1287               if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0))
1288                 clib_warning ("out2in key del failed");
1289             }
1290           else
1291             {
1292               /* log NAT event */
1293               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1294                                                   s->out2in.addr.as_u32,
1295                                                   s->in2out.protocol,
1296                                                   s->in2out.port,
1297                                                   s->out2in.port,
1298                                                   s->in2out.fib_index);
1299
1300               snat_free_outside_address_and_port (sm->addresses, thread_index,
1301                                                   &s->out2in,
1302                                                   s->outside_address_index);
1303
1304               /* Remove in2out, out2in keys */
1305               kv.key = s->in2out.as_u64;
1306               if (clib_bihash_add_del_8_8 (
1307                     &sm->per_thread_data[thread_index].in2out, &kv, 0))
1308                 clib_warning ("in2out key del failed");
1309               kv.key = s->out2in.as_u64;
1310               if (clib_bihash_add_del_8_8 (
1311                     &sm->per_thread_data[thread_index].out2in, &kv, 0))
1312                 clib_warning ("out2in key del failed");
1313             }
1314         }
1315       else
1316         {
1317           /* Create a new session */
1318           pool_get (tsm->sessions, s);
1319           memset (s, 0, sizeof (*s));
1320
1321           /* Create list elts */
1322           pool_get (tsm->list_pool, elt);
1323           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1324           elt->value = s - tsm->sessions;
1325           s->per_user_index = elt - tsm->list_pool;
1326           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1327           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1328                               s->per_user_index);
1329         }
1330
1331       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1332       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1333       s->outside_address_index = address_index;
1334       s->out2in.addr.as_u32 = new_addr;
1335       s->out2in.fib_index = sm->outside_fib_index;
1336       s->in2out.addr.as_u32 = old_addr;
1337       s->in2out.fib_index = rx_fib_index;
1338       s->in2out.port = s->out2in.port = ip->protocol;
1339       if (is_sm)
1340         {
1341           u->nstaticsessions++;
1342           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1343         }
1344       else
1345         {
1346           u->nsessions++;
1347         }
1348
1349       /* Add to lookup tables */
1350       key.l_addr.as_u32 = old_addr;
1351       key.r_addr = ip->dst_address;
1352       key.proto = ip->protocol;
1353       key.fib_index = rx_fib_index;
1354       s_kv.key[0] = key.as_u64[0];
1355       s_kv.key[1] = key.as_u64[1];
1356       s_kv.value = s - tsm->sessions;
1357       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1358         clib_warning ("in2out key add failed");
1359
1360       key.l_addr.as_u32 = new_addr;
1361       key.fib_index = sm->outside_fib_index;
1362       s_kv.key[0] = key.as_u64[0];
1363       s_kv.key[1] = key.as_u64[1];
1364       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1365         clib_warning ("out2in key add failed");
1366   }
1367
1368   /* Update IP checksum */
1369   sum = ip->checksum;
1370   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1371   ip->checksum = ip_csum_fold (sum);
1372
1373   /* Accounting */
1374   s->last_heard = now;
1375   s->total_pkts++;
1376   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1377   /* Per-user LRU list maintenance */
1378   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1379   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1380                       s->per_user_index);
1381
1382   /* Hairpinning */
1383   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1384     snat_hairpinning_unknown_proto(sm, b, ip);
1385
1386   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1387     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1388
1389   return s;
1390 }
1391
1392 static snat_session_t *
1393 snat_in2out_lb (snat_main_t *sm,
1394                 vlib_buffer_t * b,
1395                 ip4_header_t * ip,
1396                 u32 rx_fib_index,
1397                 u32 thread_index,
1398                 f64 now,
1399                 vlib_main_t * vm,
1400                 vlib_node_runtime_t * node)
1401 {
1402   nat_ed_ses_key_t key;
1403   clib_bihash_kv_16_8_t s_kv, s_value;
1404   udp_header_t *udp = ip4_next_header (ip);
1405   tcp_header_t *tcp = (tcp_header_t *) udp;
1406   snat_session_t *s = 0;
1407   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1408   u32 old_addr, new_addr;
1409   u16 new_port, old_port;
1410   ip_csum_t sum;
1411   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1412   snat_session_key_t e_key, l_key;
1413   clib_bihash_kv_8_8_t kv, value;
1414   snat_user_key_t u_key;
1415   snat_user_t *u;
1416   dlist_elt_t *head, *elt;
1417
1418   old_addr = ip->src_address.as_u32;
1419
1420   key.l_addr = ip->src_address;
1421   key.r_addr = ip->dst_address;
1422   key.fib_index = rx_fib_index;
1423   key.proto = ip->protocol;
1424   key.rsvd = 0;
1425   key.l_port = udp->src_port;
1426   s_kv.key[0] = key.as_u64[0];
1427   s_kv.key[1] = key.as_u64[1];
1428
1429   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1430     {
1431       s = pool_elt_at_index (tsm->sessions, s_value.value);
1432     }
1433   else
1434     {
1435       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1436         {
1437           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1438           return 0;
1439         }
1440
1441       l_key.addr = ip->src_address;
1442       l_key.port = udp->src_port;
1443       l_key.protocol = proto;
1444       l_key.fib_index = rx_fib_index;
1445       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0))
1446         return 0;
1447
1448       u_key.addr = ip->src_address;
1449       u_key.fib_index = rx_fib_index;
1450       kv.key = u_key.as_u64;
1451
1452       /* Ever heard of the "user" = src ip4 address before? */
1453       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1454         {
1455           /* no, make a new one */
1456           pool_get (tsm->users, u);
1457           memset (u, 0, sizeof (*u));
1458           u->addr = ip->src_address;
1459           u->fib_index = rx_fib_index;
1460
1461           pool_get (tsm->list_pool, head);
1462           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1463
1464           clib_dlist_init (tsm->list_pool,
1465                            u->sessions_per_user_list_head_index);
1466
1467           kv.value = u - tsm->users;
1468
1469           /* add user */
1470           if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
1471             clib_warning ("user key add failed");
1472         }
1473       else
1474         {
1475           u = pool_elt_at_index (tsm->users, value.value);
1476         }
1477
1478       /* Create a new session */
1479       pool_get (tsm->sessions, s);
1480       memset (s, 0, sizeof (*s));
1481
1482       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1483       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1484       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1485       s->outside_address_index = ~0;
1486       s->in2out = l_key;
1487       s->out2in = e_key;
1488       u->nstaticsessions++;
1489
1490       /* Create list elts */
1491       pool_get (tsm->list_pool, elt);
1492       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1493       elt->value = s - tsm->sessions;
1494       s->per_user_index = elt - tsm->list_pool;
1495       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1496       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1497                           s->per_user_index);
1498
1499       /* Add to lookup tables */
1500       s_kv.value = s - tsm->sessions;
1501       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1502         clib_warning ("in2out-ed key add failed");
1503
1504       key.l_addr = e_key.addr;
1505       key.fib_index = e_key.fib_index;
1506       key.l_port = e_key.port;
1507       s_kv.key[0] = key.as_u64[0];
1508       s_kv.key[1] = key.as_u64[1];
1509       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1510         clib_warning ("out2in-ed key add failed");
1511     }
1512
1513   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1514
1515   /* Update IP checksum */
1516   sum = ip->checksum;
1517   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1518   ip->checksum = ip_csum_fold (sum);
1519
1520   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1521     {
1522       old_port = tcp->src_port;
1523       tcp->src_port = s->out2in.port;
1524       new_port = tcp->src_port;
1525
1526       sum = tcp->checksum;
1527       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1528       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1529       tcp->checksum = ip_csum_fold(sum);
1530     }
1531   else
1532     {
1533       udp->src_port = s->out2in.port;
1534       udp->checksum = 0;
1535     }
1536
1537   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1538     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1539
1540   /* Accounting */
1541   s->last_heard = now;
1542   s->total_pkts++;
1543   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1544   return s;
1545 }
1546
1547 static inline uword
1548 snat_in2out_node_fn_inline (vlib_main_t * vm,
1549                             vlib_node_runtime_t * node,
1550                             vlib_frame_t * frame, int is_slow_path,
1551                             int is_output_feature)
1552 {
1553   u32 n_left_from, * from, * to_next;
1554   snat_in2out_next_t next_index;
1555   u32 pkts_processed = 0;
1556   snat_main_t * sm = &snat_main;
1557   f64 now = vlib_time_now (vm);
1558   u32 stats_node_index;
1559   u32 thread_index = vlib_get_thread_index ();
1560
1561   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1562     snat_in2out_node.index;
1563
1564   from = vlib_frame_vector_args (frame);
1565   n_left_from = frame->n_vectors;
1566   next_index = node->cached_next_index;
1567
1568   while (n_left_from > 0)
1569     {
1570       u32 n_left_to_next;
1571
1572       vlib_get_next_frame (vm, node, next_index,
1573                            to_next, n_left_to_next);
1574
1575       while (n_left_from >= 4 && n_left_to_next >= 2)
1576         {
1577           u32 bi0, bi1;
1578           vlib_buffer_t * b0, * b1;
1579           u32 next0, next1;
1580           u32 sw_if_index0, sw_if_index1;
1581           ip4_header_t * ip0, * ip1;
1582           ip_csum_t sum0, sum1;
1583           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1584           u16 old_port0, new_port0, old_port1, new_port1;
1585           udp_header_t * udp0, * udp1;
1586           tcp_header_t * tcp0, * tcp1;
1587           icmp46_header_t * icmp0, * icmp1;
1588           snat_session_key_t key0, key1;
1589           u32 rx_fib_index0, rx_fib_index1;
1590           u32 proto0, proto1;
1591           snat_session_t * s0 = 0, * s1 = 0;
1592           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1593           u32 iph_offset0 = 0, iph_offset1 = 0;
1594
1595           /* Prefetch next iteration. */
1596           {
1597             vlib_buffer_t * p2, * p3;
1598
1599             p2 = vlib_get_buffer (vm, from[2]);
1600             p3 = vlib_get_buffer (vm, from[3]);
1601
1602             vlib_prefetch_buffer_header (p2, LOAD);
1603             vlib_prefetch_buffer_header (p3, LOAD);
1604
1605             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1606             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1607           }
1608
1609           /* speculatively enqueue b0 and b1 to the current next frame */
1610           to_next[0] = bi0 = from[0];
1611           to_next[1] = bi1 = from[1];
1612           from += 2;
1613           to_next += 2;
1614           n_left_from -= 2;
1615           n_left_to_next -= 2;
1616
1617           b0 = vlib_get_buffer (vm, bi0);
1618           b1 = vlib_get_buffer (vm, bi1);
1619
1620           if (is_output_feature)
1621             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1622
1623           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1624                  iph_offset0);
1625
1626           udp0 = ip4_next_header (ip0);
1627           tcp0 = (tcp_header_t *) udp0;
1628           icmp0 = (icmp46_header_t *) udp0;
1629
1630           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1631           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1632                                    sw_if_index0);
1633
1634           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1635
1636           if (PREDICT_FALSE(ip0->ttl == 1))
1637             {
1638               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1639               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1640                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1641                                            0);
1642               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1643               goto trace00;
1644             }
1645
1646           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1647
1648           /* Next configured feature, probably ip4-lookup */
1649           if (is_slow_path)
1650             {
1651               if (PREDICT_FALSE (proto0 == ~0))
1652                 {
1653                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1654                                                   thread_index, now, vm, node);
1655                   if (!s0)
1656                     next0 = SNAT_IN2OUT_NEXT_DROP;
1657                   goto trace00;
1658                 }
1659
1660               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1661                 {
1662                   next0 = icmp_in2out_slow_path
1663                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1664                      node, next0, now, thread_index, &s0);
1665                   goto trace00;
1666                 }
1667             }
1668           else
1669             {
1670               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1671                 {
1672                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1673                   goto trace00;
1674                 }
1675
1676               if (ip4_is_fragment (ip0))
1677                 {
1678                   next0 = SNAT_IN2OUT_NEXT_REASS;
1679                   goto trace00;
1680                 }
1681             }
1682
1683           key0.addr = ip0->src_address;
1684           key0.port = udp0->src_port;
1685           key0.protocol = proto0;
1686           key0.fib_index = rx_fib_index0;
1687
1688           kv0.key = key0.as_u64;
1689
1690           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1691               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1692             {
1693               if (is_slow_path)
1694                 {
1695                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1696                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1697                     goto trace00;
1698
1699                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1700                                      &s0, node, next0, thread_index);
1701                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1702                     goto trace00;
1703                 }
1704               else
1705                 {
1706                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1707                   goto trace00;
1708                 }
1709             }
1710           else
1711             {
1712               if (PREDICT_FALSE (value0.value == ~0ULL))
1713                 {
1714                   if (is_slow_path)
1715                     {
1716                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1717                                           thread_index, now, vm, node);
1718                       if (!s0)
1719                         next0 = SNAT_IN2OUT_NEXT_DROP;
1720                       goto trace00;
1721                     }
1722                   else
1723                     {
1724                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1725                       goto trace00;
1726                     }
1727                 }
1728               else
1729                 {
1730                   s0 = pool_elt_at_index (
1731                     sm->per_thread_data[thread_index].sessions,
1732                     value0.value);
1733                 }
1734             }
1735
1736           b0->flags |= VNET_BUFFER_F_IS_NATED;
1737
1738           old_addr0 = ip0->src_address.as_u32;
1739           ip0->src_address = s0->out2in.addr;
1740           new_addr0 = ip0->src_address.as_u32;
1741           if (!is_output_feature)
1742             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1743
1744           sum0 = ip0->checksum;
1745           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1746                                  ip4_header_t,
1747                                  src_address /* changed member */);
1748           ip0->checksum = ip_csum_fold (sum0);
1749
1750           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1751             {
1752               old_port0 = tcp0->src_port;
1753               tcp0->src_port = s0->out2in.port;
1754               new_port0 = tcp0->src_port;
1755
1756               sum0 = tcp0->checksum;
1757               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1758                                      ip4_header_t,
1759                                      dst_address /* changed member */);
1760               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1761                                      ip4_header_t /* cheat */,
1762                                      length /* changed member */);
1763               tcp0->checksum = ip_csum_fold(sum0);
1764             }
1765           else
1766             {
1767               old_port0 = udp0->src_port;
1768               udp0->src_port = s0->out2in.port;
1769               udp0->checksum = 0;
1770             }
1771
1772           /* Accounting */
1773           s0->last_heard = now;
1774           s0->total_pkts++;
1775           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1776           /* Per-user LRU list maintenance for dynamic translation */
1777           if (!snat_is_session_static (s0))
1778             {
1779               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1780                                  s0->per_user_index);
1781               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1782                                   s0->per_user_list_head_index,
1783                                   s0->per_user_index);
1784             }
1785         trace00:
1786
1787           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1788                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1789             {
1790               snat_in2out_trace_t *t =
1791                  vlib_add_trace (vm, node, b0, sizeof (*t));
1792               t->is_slow_path = is_slow_path;
1793               t->sw_if_index = sw_if_index0;
1794               t->next_index = next0;
1795                   t->session_index = ~0;
1796               if (s0)
1797                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1798             }
1799
1800           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1801
1802           if (is_output_feature)
1803             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1804
1805           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1806                  iph_offset1);
1807
1808           udp1 = ip4_next_header (ip1);
1809           tcp1 = (tcp_header_t *) udp1;
1810           icmp1 = (icmp46_header_t *) udp1;
1811
1812           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1813           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1814                                    sw_if_index1);
1815
1816           if (PREDICT_FALSE(ip1->ttl == 1))
1817             {
1818               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1819               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1820                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1821                                            0);
1822               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1823               goto trace01;
1824             }
1825
1826           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1827
1828           /* Next configured feature, probably ip4-lookup */
1829           if (is_slow_path)
1830             {
1831               if (PREDICT_FALSE (proto1 == ~0))
1832                 {
1833                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1834                                                   thread_index, now, vm, node);
1835                   if (!s1)
1836                     next1 = SNAT_IN2OUT_NEXT_DROP;
1837                   goto trace01;
1838                 }
1839
1840               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1841                 {
1842                   next1 = icmp_in2out_slow_path
1843                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1844                      next1, now, thread_index, &s1);
1845                   goto trace01;
1846                 }
1847             }
1848           else
1849             {
1850               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1851                 {
1852                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1853                   goto trace01;
1854                 }
1855
1856               if (ip4_is_fragment (ip1))
1857                 {
1858                   next0 = SNAT_IN2OUT_NEXT_REASS;
1859                   goto trace01;
1860                 }
1861             }
1862
1863           b1->flags |= VNET_BUFFER_F_IS_NATED;
1864
1865           key1.addr = ip1->src_address;
1866           key1.port = udp1->src_port;
1867           key1.protocol = proto1;
1868           key1.fib_index = rx_fib_index1;
1869
1870           kv1.key = key1.as_u64;
1871
1872             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1873                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1874             {
1875               if (is_slow_path)
1876                 {
1877                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1878                       ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
1879                     goto trace01;
1880
1881                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1882                                      &s1, node, next1, thread_index);
1883                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1884                     goto trace01;
1885                 }
1886               else
1887                 {
1888                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1889                   goto trace01;
1890                 }
1891             }
1892           else
1893             {
1894               if (PREDICT_FALSE (value1.value == ~0ULL))
1895                 {
1896                   if (is_slow_path)
1897                     {
1898                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1899                                           thread_index, now, vm, node);
1900                       if (!s1)
1901                         next1 = SNAT_IN2OUT_NEXT_DROP;
1902                       goto trace01;
1903                     }
1904                   else
1905                     {
1906                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1907                       goto trace01;
1908                     }
1909                 }
1910               else
1911                 {
1912                   s1 = pool_elt_at_index (
1913                     sm->per_thread_data[thread_index].sessions,
1914                     value1.value);
1915                 }
1916             }
1917
1918           old_addr1 = ip1->src_address.as_u32;
1919           ip1->src_address = s1->out2in.addr;
1920           new_addr1 = ip1->src_address.as_u32;
1921           if (!is_output_feature)
1922             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1923
1924           sum1 = ip1->checksum;
1925           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1926                                  ip4_header_t,
1927                                  src_address /* changed member */);
1928           ip1->checksum = ip_csum_fold (sum1);
1929
1930           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1931             {
1932               old_port1 = tcp1->src_port;
1933               tcp1->src_port = s1->out2in.port;
1934               new_port1 = tcp1->src_port;
1935
1936               sum1 = tcp1->checksum;
1937               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1938                                      ip4_header_t,
1939                                      dst_address /* changed member */);
1940               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1941                                      ip4_header_t /* cheat */,
1942                                      length /* changed member */);
1943               tcp1->checksum = ip_csum_fold(sum1);
1944             }
1945           else
1946             {
1947               old_port1 = udp1->src_port;
1948               udp1->src_port = s1->out2in.port;
1949               udp1->checksum = 0;
1950             }
1951
1952           /* Accounting */
1953           s1->last_heard = now;
1954           s1->total_pkts++;
1955           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1956           /* Per-user LRU list maintenance for dynamic translation */
1957           if (!snat_is_session_static (s1))
1958             {
1959               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1960                                  s1->per_user_index);
1961               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1962                                   s1->per_user_list_head_index,
1963                                   s1->per_user_index);
1964             }
1965         trace01:
1966
1967           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1968                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1969             {
1970               snat_in2out_trace_t *t =
1971                  vlib_add_trace (vm, node, b1, sizeof (*t));
1972               t->sw_if_index = sw_if_index1;
1973               t->next_index = next1;
1974               t->session_index = ~0;
1975               if (s1)
1976                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1977             }
1978
1979           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1980
1981           /* verify speculative enqueues, maybe switch current next frame */
1982           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1983                                            to_next, n_left_to_next,
1984                                            bi0, bi1, next0, next1);
1985         }
1986
1987       while (n_left_from > 0 && n_left_to_next > 0)
1988         {
1989           u32 bi0;
1990           vlib_buffer_t * b0;
1991           u32 next0;
1992           u32 sw_if_index0;
1993           ip4_header_t * ip0;
1994           ip_csum_t sum0;
1995           u32 new_addr0, old_addr0;
1996           u16 old_port0, new_port0;
1997           udp_header_t * udp0;
1998           tcp_header_t * tcp0;
1999           icmp46_header_t * icmp0;
2000           snat_session_key_t key0;
2001           u32 rx_fib_index0;
2002           u32 proto0;
2003           snat_session_t * s0 = 0;
2004           clib_bihash_kv_8_8_t kv0, value0;
2005           u32 iph_offset0 = 0;
2006
2007           /* speculatively enqueue b0 to the current next frame */
2008           bi0 = from[0];
2009           to_next[0] = bi0;
2010           from += 1;
2011           to_next += 1;
2012           n_left_from -= 1;
2013           n_left_to_next -= 1;
2014
2015           b0 = vlib_get_buffer (vm, bi0);
2016           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2017
2018           if (is_output_feature)
2019             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
2020
2021           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
2022                  iph_offset0);
2023
2024           udp0 = ip4_next_header (ip0);
2025           tcp0 = (tcp_header_t *) udp0;
2026           icmp0 = (icmp46_header_t *) udp0;
2027
2028           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2029           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
2030                                    sw_if_index0);
2031
2032           if (PREDICT_FALSE(ip0->ttl == 1))
2033             {
2034               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2035               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2036                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2037                                            0);
2038               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2039               goto trace0;
2040             }
2041
2042           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2043
2044           /* Next configured feature, probably ip4-lookup */
2045           if (is_slow_path)
2046             {
2047               if (PREDICT_FALSE (proto0 == ~0))
2048                 {
2049                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
2050                                                   thread_index, now, vm, node);
2051                   if (!s0)
2052                     next0 = SNAT_IN2OUT_NEXT_DROP;
2053                   goto trace0;
2054                 }
2055
2056               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2057                 {
2058                   next0 = icmp_in2out_slow_path
2059                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2060                      next0, now, thread_index, &s0);
2061                   goto trace0;
2062                 }
2063             }
2064           else
2065             {
2066               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2067                 {
2068                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2069                   goto trace0;
2070                 }
2071
2072               if (ip4_is_fragment (ip0))
2073                 {
2074                   next0 = SNAT_IN2OUT_NEXT_REASS;
2075                   goto trace0;
2076                 }
2077             }
2078
2079           key0.addr = ip0->src_address;
2080           key0.port = udp0->src_port;
2081           key0.protocol = proto0;
2082           key0.fib_index = rx_fib_index0;
2083
2084           kv0.key = key0.as_u64;
2085
2086           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
2087                                       &kv0, &value0))
2088             {
2089               if (is_slow_path)
2090                 {
2091                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2092                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
2093                     goto trace0;
2094
2095                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2096                                      &s0, node, next0, thread_index);
2097
2098                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2099                     goto trace0;
2100                 }
2101               else
2102                 {
2103                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2104                   goto trace0;
2105                 }
2106             }
2107           else
2108             {
2109               if (PREDICT_FALSE (value0.value == ~0ULL))
2110                 {
2111                   if (is_slow_path)
2112                     {
2113                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
2114                                           thread_index, now, vm, node);
2115                       if (!s0)
2116                         next0 = SNAT_IN2OUT_NEXT_DROP;
2117                       goto trace0;
2118                     }
2119                   else
2120                     {
2121                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2122                       goto trace0;
2123                     }
2124                 }
2125               else
2126                 {
2127                   s0 = pool_elt_at_index (
2128                     sm->per_thread_data[thread_index].sessions,
2129                     value0.value);
2130                 }
2131             }
2132
2133           b0->flags |= VNET_BUFFER_F_IS_NATED;
2134
2135           old_addr0 = ip0->src_address.as_u32;
2136           ip0->src_address = s0->out2in.addr;
2137           new_addr0 = ip0->src_address.as_u32;
2138           if (!is_output_feature)
2139             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2140
2141           sum0 = ip0->checksum;
2142           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2143                                  ip4_header_t,
2144                                  src_address /* changed member */);
2145           ip0->checksum = ip_csum_fold (sum0);
2146
2147           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2148             {
2149               old_port0 = tcp0->src_port;
2150               tcp0->src_port = s0->out2in.port;
2151               new_port0 = tcp0->src_port;
2152
2153               sum0 = tcp0->checksum;
2154               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2155                                      ip4_header_t,
2156                                      dst_address /* changed member */);
2157               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2158                                      ip4_header_t /* cheat */,
2159                                      length /* changed member */);
2160               tcp0->checksum = ip_csum_fold(sum0);
2161             }
2162           else
2163             {
2164               old_port0 = udp0->src_port;
2165               udp0->src_port = s0->out2in.port;
2166               udp0->checksum = 0;
2167             }
2168
2169           /* Accounting */
2170           s0->last_heard = now;
2171           s0->total_pkts++;
2172           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2173           /* Per-user LRU list maintenance for dynamic translation */
2174           if (!snat_is_session_static (s0))
2175             {
2176               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2177                                  s0->per_user_index);
2178               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2179                                   s0->per_user_list_head_index,
2180                                   s0->per_user_index);
2181             }
2182
2183         trace0:
2184           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2185                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2186             {
2187               snat_in2out_trace_t *t =
2188                  vlib_add_trace (vm, node, b0, sizeof (*t));
2189               t->is_slow_path = is_slow_path;
2190               t->sw_if_index = sw_if_index0;
2191               t->next_index = next0;
2192                   t->session_index = ~0;
2193               if (s0)
2194                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
2195             }
2196
2197           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2198
2199           /* verify speculative enqueue, maybe switch current next frame */
2200           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2201                                            to_next, n_left_to_next,
2202                                            bi0, next0);
2203         }
2204
2205       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2206     }
2207
2208   vlib_node_increment_counter (vm, stats_node_index,
2209                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2210                                pkts_processed);
2211   return frame->n_vectors;
2212 }
2213
2214 static uword
2215 snat_in2out_fast_path_fn (vlib_main_t * vm,
2216                           vlib_node_runtime_t * node,
2217                           vlib_frame_t * frame)
2218 {
2219   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
2220 }
2221
2222 VLIB_REGISTER_NODE (snat_in2out_node) = {
2223   .function = snat_in2out_fast_path_fn,
2224   .name = "nat44-in2out",
2225   .vector_size = sizeof (u32),
2226   .format_trace = format_snat_in2out_trace,
2227   .type = VLIB_NODE_TYPE_INTERNAL,
2228
2229   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2230   .error_strings = snat_in2out_error_strings,
2231
2232   .runtime_data_bytes = sizeof (snat_runtime_t),
2233
2234   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2235
2236   /* edit / add dispositions here */
2237   .next_nodes = {
2238     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2239     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2240     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2241     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2242     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2243   },
2244 };
2245
2246 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
2247
2248 static uword
2249 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
2250                                  vlib_node_runtime_t * node,
2251                                  vlib_frame_t * frame)
2252 {
2253   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
2254 }
2255
2256 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2257   .function = snat_in2out_output_fast_path_fn,
2258   .name = "nat44-in2out-output",
2259   .vector_size = sizeof (u32),
2260   .format_trace = format_snat_in2out_trace,
2261   .type = VLIB_NODE_TYPE_INTERNAL,
2262
2263   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2264   .error_strings = snat_in2out_error_strings,
2265
2266   .runtime_data_bytes = sizeof (snat_runtime_t),
2267
2268   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2269
2270   /* edit / add dispositions here */
2271   .next_nodes = {
2272     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2273     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2274     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2275     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2276     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2277   },
2278 };
2279
2280 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2281                               snat_in2out_output_fast_path_fn);
2282
2283 static uword
2284 snat_in2out_slow_path_fn (vlib_main_t * vm,
2285                           vlib_node_runtime_t * node,
2286                           vlib_frame_t * frame)
2287 {
2288   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2289 }
2290
2291 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2292   .function = snat_in2out_slow_path_fn,
2293   .name = "nat44-in2out-slowpath",
2294   .vector_size = sizeof (u32),
2295   .format_trace = format_snat_in2out_trace,
2296   .type = VLIB_NODE_TYPE_INTERNAL,
2297
2298   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2299   .error_strings = snat_in2out_error_strings,
2300
2301   .runtime_data_bytes = sizeof (snat_runtime_t),
2302
2303   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2304
2305   /* edit / add dispositions here */
2306   .next_nodes = {
2307     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2308     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2309     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2310     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2311     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2312   },
2313 };
2314
2315 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2316                               snat_in2out_slow_path_fn);
2317
2318 static uword
2319 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2320                                  vlib_node_runtime_t * node,
2321                                  vlib_frame_t * frame)
2322 {
2323   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2324 }
2325
2326 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2327   .function = snat_in2out_output_slow_path_fn,
2328   .name = "nat44-in2out-output-slowpath",
2329   .vector_size = sizeof (u32),
2330   .format_trace = format_snat_in2out_trace,
2331   .type = VLIB_NODE_TYPE_INTERNAL,
2332
2333   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2334   .error_strings = snat_in2out_error_strings,
2335
2336   .runtime_data_bytes = sizeof (snat_runtime_t),
2337
2338   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2339
2340   /* edit / add dispositions here */
2341   .next_nodes = {
2342     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2343     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2344     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2345     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2346     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2347   },
2348 };
2349
2350 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2351                               snat_in2out_output_slow_path_fn);
2352
2353 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
2354
2355 static uword
2356 nat44_hairpinning_fn (vlib_main_t * vm,
2357                       vlib_node_runtime_t * node,
2358                       vlib_frame_t * frame)
2359 {
2360   u32 n_left_from, * from, * to_next;
2361   snat_in2out_next_t next_index;
2362   u32 pkts_processed = 0;
2363   snat_main_t * sm = &snat_main;
2364   vnet_feature_main_t *fm = &feature_main;
2365   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
2366   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
2367
2368   from = vlib_frame_vector_args (frame);
2369   n_left_from = frame->n_vectors;
2370   next_index = node->cached_next_index;
2371
2372   while (n_left_from > 0)
2373     {
2374       u32 n_left_to_next;
2375
2376       vlib_get_next_frame (vm, node, next_index,
2377                            to_next, n_left_to_next);
2378
2379       while (n_left_from > 0 && n_left_to_next > 0)
2380         {
2381           u32 bi0;
2382           vlib_buffer_t * b0;
2383           u32 next0;
2384           ip4_header_t * ip0;
2385           u32 proto0;
2386           udp_header_t * udp0;
2387           tcp_header_t * tcp0;
2388
2389           /* speculatively enqueue b0 to the current next frame */
2390           bi0 = from[0];
2391           to_next[0] = bi0;
2392           from += 1;
2393           to_next += 1;
2394           n_left_from -= 1;
2395           n_left_to_next -= 1;
2396
2397           b0 = vlib_get_buffer (vm, bi0);
2398           ip0 = vlib_buffer_get_current (b0);
2399           udp0 = ip4_next_header (ip0);
2400           tcp0 = (tcp_header_t *) udp0;
2401
2402           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2403
2404           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
2405                                 &next0, 0);
2406
2407           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0))
2408             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2409
2410           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2411
2412           /* verify speculative enqueue, maybe switch current next frame */
2413           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2414                                            to_next, n_left_to_next,
2415                                            bi0, next0);
2416          }
2417
2418       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2419     }
2420
2421   vlib_node_increment_counter (vm, nat44_hairpinning_node.index,
2422                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2423                                pkts_processed);
2424   return frame->n_vectors;
2425 }
2426
2427 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
2428   .function = nat44_hairpinning_fn,
2429   .name = "nat44-hairpinning",
2430   .vector_size = sizeof (u32),
2431   .type = VLIB_NODE_TYPE_INTERNAL,
2432   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2433   .error_strings = snat_in2out_error_strings,
2434   .n_next_nodes = 2,
2435   .next_nodes = {
2436     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2437     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2438   },
2439 };
2440
2441 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
2442                               nat44_hairpinning_fn);
2443
2444 static inline void
2445 nat44_reass_hairpinning (snat_main_t *sm,
2446                          vlib_buffer_t * b0,
2447                          ip4_header_t * ip0,
2448                          u16 sport,
2449                          u16 dport,
2450                          u32 proto0)
2451 {
2452   snat_session_key_t key0, sm0;
2453   snat_session_t * s0;
2454   clib_bihash_kv_8_8_t kv0, value0;
2455   ip_csum_t sum0;
2456   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2457   u16 new_dst_port0, old_dst_port0;
2458   udp_header_t * udp0;
2459   tcp_header_t * tcp0;
2460
2461   key0.addr = ip0->dst_address;
2462   key0.port = dport;
2463   key0.protocol = proto0;
2464   key0.fib_index = sm->outside_fib_index;
2465   kv0.key = key0.as_u64;
2466
2467   udp0 = ip4_next_header (ip0);
2468
2469   /* Check if destination is static mappings */
2470   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
2471     {
2472       new_dst_addr0 = sm0.addr.as_u32;
2473       new_dst_port0 = sm0.port;
2474       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2475     }
2476   /* or active sessions */
2477   else
2478     {
2479       if (sm->num_workers > 1)
2480         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2481       else
2482         ti = sm->num_workers;
2483
2484       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2485         {
2486           si = value0.value;
2487           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2488           new_dst_addr0 = s0->in2out.addr.as_u32;
2489           new_dst_port0 = s0->in2out.port;
2490           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2491         }
2492     }
2493
2494   /* Destination is behind the same NAT, use internal address and port */
2495   if (new_dst_addr0)
2496     {
2497       old_dst_addr0 = ip0->dst_address.as_u32;
2498       ip0->dst_address.as_u32 = new_dst_addr0;
2499       sum0 = ip0->checksum;
2500       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2501                              ip4_header_t, dst_address);
2502       ip0->checksum = ip_csum_fold (sum0);
2503
2504       old_dst_port0 = dport;
2505       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2506                        ip4_is_first_fragment (ip0)))
2507         {
2508           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2509             {
2510               tcp0 = ip4_next_header (ip0);
2511               tcp0->dst = new_dst_port0;
2512               sum0 = tcp0->checksum;
2513               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2514                                      ip4_header_t, dst_address);
2515               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2516                                      ip4_header_t /* cheat */, length);
2517               tcp0->checksum = ip_csum_fold(sum0);
2518             }
2519           else
2520             {
2521               udp0->dst_port = new_dst_port0;
2522               udp0->checksum = 0;
2523             }
2524         }
2525       else
2526         {
2527           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2528             {
2529               tcp0 = ip4_next_header (ip0);
2530               sum0 = tcp0->checksum;
2531               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2532                                      ip4_header_t, dst_address);
2533               tcp0->checksum = ip_csum_fold(sum0);
2534             }
2535         }
2536     }
2537 }
2538
2539 static uword
2540 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2541                             vlib_node_runtime_t * node,
2542                             vlib_frame_t * frame)
2543 {
2544   u32 n_left_from, *from, *to_next;
2545   snat_in2out_next_t next_index;
2546   u32 pkts_processed = 0;
2547   snat_main_t *sm = &snat_main;
2548   f64 now = vlib_time_now (vm);
2549   u32 thread_index = vlib_get_thread_index ();
2550   snat_main_per_thread_data_t *per_thread_data =
2551     &sm->per_thread_data[thread_index];
2552   u32 *fragments_to_drop = 0;
2553   u32 *fragments_to_loopback = 0;
2554
2555   from = vlib_frame_vector_args (frame);
2556   n_left_from = frame->n_vectors;
2557   next_index = node->cached_next_index;
2558
2559   while (n_left_from > 0)
2560     {
2561       u32 n_left_to_next;
2562
2563       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2564
2565       while (n_left_from > 0 && n_left_to_next > 0)
2566        {
2567           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2568           vlib_buffer_t *b0;
2569           u32 next0;
2570           u8 cached0 = 0;
2571           ip4_header_t *ip0;
2572           nat_reass_ip4_t *reass0;
2573           udp_header_t * udp0;
2574           tcp_header_t * tcp0;
2575           snat_session_key_t key0;
2576           clib_bihash_kv_8_8_t kv0, value0;
2577           snat_session_t * s0 = 0;
2578           u16 old_port0, new_port0;
2579           ip_csum_t sum0;
2580
2581           /* speculatively enqueue b0 to the current next frame */
2582           bi0 = from[0];
2583           to_next[0] = bi0;
2584           from += 1;
2585           to_next += 1;
2586           n_left_from -= 1;
2587           n_left_to_next -= 1;
2588
2589           b0 = vlib_get_buffer (vm, bi0);
2590           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2591
2592           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2593           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2594                                                                sw_if_index0);
2595
2596           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2597             {
2598               next0 = SNAT_IN2OUT_NEXT_DROP;
2599               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2600               goto trace0;
2601             }
2602
2603           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2604           udp0 = ip4_next_header (ip0);
2605           tcp0 = (tcp_header_t *) udp0;
2606           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2607
2608           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2609                                                  ip0->dst_address,
2610                                                  ip0->fragment_id,
2611                                                  ip0->protocol,
2612                                                  1,
2613                                                  &fragments_to_drop);
2614
2615           if (PREDICT_FALSE (!reass0))
2616             {
2617               next0 = SNAT_IN2OUT_NEXT_DROP;
2618               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2619               goto trace0;
2620             }
2621
2622           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2623             {
2624               key0.addr = ip0->src_address;
2625               key0.port = udp0->src_port;
2626               key0.protocol = proto0;
2627               key0.fib_index = rx_fib_index0;
2628               kv0.key = key0.as_u64;
2629
2630               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2631                 {
2632                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2633                       ip0, proto0, rx_fib_index0, thread_index)))
2634                     goto trace0;
2635
2636                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2637                                      &s0, node, next0, thread_index);
2638
2639                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2640                     goto trace0;
2641
2642                   reass0->sess_index = s0 - per_thread_data->sessions;
2643                 }
2644               else
2645                 {
2646                   s0 = pool_elt_at_index (per_thread_data->sessions,
2647                                           value0.value);
2648                   reass0->sess_index = value0.value;
2649                 }
2650               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2651             }
2652           else
2653             {
2654               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2655                 {
2656                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2657                     {
2658                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2659                       next0 = SNAT_IN2OUT_NEXT_DROP;
2660                       goto trace0;
2661                     }
2662                   cached0 = 1;
2663                   goto trace0;
2664                 }
2665               s0 = pool_elt_at_index (per_thread_data->sessions,
2666                                       reass0->sess_index);
2667             }
2668
2669           old_addr0 = ip0->src_address.as_u32;
2670           ip0->src_address = s0->out2in.addr;
2671           new_addr0 = ip0->src_address.as_u32;
2672           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2673
2674           sum0 = ip0->checksum;
2675           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2676                                  ip4_header_t,
2677                                  src_address /* changed member */);
2678           ip0->checksum = ip_csum_fold (sum0);
2679
2680           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2681             {
2682               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2683                 {
2684                   old_port0 = tcp0->src_port;
2685                   tcp0->src_port = s0->out2in.port;
2686                   new_port0 = tcp0->src_port;
2687
2688                   sum0 = tcp0->checksum;
2689                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2690                                          ip4_header_t,
2691                                          dst_address /* changed member */);
2692                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2693                                          ip4_header_t /* cheat */,
2694                                          length /* changed member */);
2695                   tcp0->checksum = ip_csum_fold(sum0);
2696                 }
2697               else
2698                 {
2699                   old_port0 = udp0->src_port;
2700                   udp0->src_port = s0->out2in.port;
2701                   udp0->checksum = 0;
2702                 }
2703             }
2704
2705           /* Hairpinning */
2706           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2707                                    s0->ext_host_port, proto0);
2708
2709           /* Accounting */
2710           s0->last_heard = now;
2711           s0->total_pkts++;
2712           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2713           /* Per-user LRU list maintenance for dynamic translation */
2714           if (!snat_is_session_static (s0))
2715             {
2716               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2717                                  s0->per_user_index);
2718               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2719                                   s0->per_user_list_head_index,
2720                                   s0->per_user_index);
2721             }
2722
2723         trace0:
2724           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2725                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2726             {
2727               nat44_in2out_reass_trace_t *t =
2728                  vlib_add_trace (vm, node, b0, sizeof (*t));
2729               t->cached = cached0;
2730               t->sw_if_index = sw_if_index0;
2731               t->next_index = next0;
2732             }
2733
2734           if (cached0)
2735             {
2736               n_left_to_next++;
2737               to_next--;
2738             }
2739           else
2740             {
2741               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2742
2743               /* verify speculative enqueue, maybe switch current next frame */
2744               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2745                                                to_next, n_left_to_next,
2746                                                bi0, next0);
2747             }
2748
2749           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2750             {
2751               from = vlib_frame_vector_args (frame);
2752               u32 len = vec_len (fragments_to_loopback);
2753               if (len <= VLIB_FRAME_SIZE)
2754                 {
2755                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2756                   n_left_from = len;
2757                   vec_reset_length (fragments_to_loopback);
2758                 }
2759               else
2760                 {
2761                   clib_memcpy (from,
2762                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2763                                sizeof (u32) * VLIB_FRAME_SIZE);
2764                   n_left_from = VLIB_FRAME_SIZE;
2765                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2766                 }
2767             }
2768        }
2769
2770       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2771     }
2772
2773   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2774                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2775                                pkts_processed);
2776
2777   nat_send_all_to_node (vm, fragments_to_drop, node,
2778                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2779                         SNAT_IN2OUT_NEXT_DROP);
2780
2781   vec_free (fragments_to_drop);
2782   vec_free (fragments_to_loopback);
2783   return frame->n_vectors;
2784 }
2785
2786 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2787   .function = nat44_in2out_reass_node_fn,
2788   .name = "nat44-in2out-reass",
2789   .vector_size = sizeof (u32),
2790   .format_trace = format_nat44_in2out_reass_trace,
2791   .type = VLIB_NODE_TYPE_INTERNAL,
2792
2793   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2794   .error_strings = snat_in2out_error_strings,
2795
2796   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2797   .next_nodes = {
2798     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2799     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2800     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2801     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2802     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2803   },
2804 };
2805
2806 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2807                               nat44_in2out_reass_node_fn);
2808
2809 /**************************/
2810 /*** deterministic mode ***/
2811 /**************************/
2812 static uword
2813 snat_det_in2out_node_fn (vlib_main_t * vm,
2814                          vlib_node_runtime_t * node,
2815                          vlib_frame_t * frame)
2816 {
2817   u32 n_left_from, * from, * to_next;
2818   snat_in2out_next_t next_index;
2819   u32 pkts_processed = 0;
2820   snat_main_t * sm = &snat_main;
2821   u32 now = (u32) vlib_time_now (vm);
2822   u32 thread_index = vlib_get_thread_index ();
2823
2824   from = vlib_frame_vector_args (frame);
2825   n_left_from = frame->n_vectors;
2826   next_index = node->cached_next_index;
2827
2828   while (n_left_from > 0)
2829     {
2830       u32 n_left_to_next;
2831
2832       vlib_get_next_frame (vm, node, next_index,
2833                            to_next, n_left_to_next);
2834
2835       while (n_left_from >= 4 && n_left_to_next >= 2)
2836         {
2837           u32 bi0, bi1;
2838           vlib_buffer_t * b0, * b1;
2839           u32 next0, next1;
2840           u32 sw_if_index0, sw_if_index1;
2841           ip4_header_t * ip0, * ip1;
2842           ip_csum_t sum0, sum1;
2843           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2844           u16 old_port0, new_port0, lo_port0, i0;
2845           u16 old_port1, new_port1, lo_port1, i1;
2846           udp_header_t * udp0, * udp1;
2847           tcp_header_t * tcp0, * tcp1;
2848           u32 proto0, proto1;
2849           snat_det_out_key_t key0, key1;
2850           snat_det_map_t * dm0, * dm1;
2851           snat_det_session_t * ses0 = 0, * ses1 = 0;
2852           u32 rx_fib_index0, rx_fib_index1;
2853           icmp46_header_t * icmp0, * icmp1;
2854
2855           /* Prefetch next iteration. */
2856           {
2857             vlib_buffer_t * p2, * p3;
2858
2859             p2 = vlib_get_buffer (vm, from[2]);
2860             p3 = vlib_get_buffer (vm, from[3]);
2861
2862             vlib_prefetch_buffer_header (p2, LOAD);
2863             vlib_prefetch_buffer_header (p3, LOAD);
2864
2865             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2866             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2867           }
2868
2869           /* speculatively enqueue b0 and b1 to the current next frame */
2870           to_next[0] = bi0 = from[0];
2871           to_next[1] = bi1 = from[1];
2872           from += 2;
2873           to_next += 2;
2874           n_left_from -= 2;
2875           n_left_to_next -= 2;
2876
2877           b0 = vlib_get_buffer (vm, bi0);
2878           b1 = vlib_get_buffer (vm, bi1);
2879
2880           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2881           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2882
2883           ip0 = vlib_buffer_get_current (b0);
2884           udp0 = ip4_next_header (ip0);
2885           tcp0 = (tcp_header_t *) udp0;
2886
2887           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2888
2889           if (PREDICT_FALSE(ip0->ttl == 1))
2890             {
2891               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2892               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2893                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2894                                            0);
2895               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2896               goto trace0;
2897             }
2898
2899           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2900
2901           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2902             {
2903               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2904               icmp0 = (icmp46_header_t *) udp0;
2905
2906               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2907                                   rx_fib_index0, node, next0, thread_index,
2908                                   &ses0, &dm0);
2909               goto trace0;
2910             }
2911
2912           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2913           if (PREDICT_FALSE(!dm0))
2914             {
2915               clib_warning("no match for internal host %U",
2916                            format_ip4_address, &ip0->src_address);
2917               next0 = SNAT_IN2OUT_NEXT_DROP;
2918               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2919               goto trace0;
2920             }
2921
2922           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2923
2924           key0.ext_host_addr = ip0->dst_address;
2925           key0.ext_host_port = tcp0->dst;
2926
2927           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2928           if (PREDICT_FALSE(!ses0))
2929             {
2930               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2931                 {
2932                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2933                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2934
2935                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2936                     continue;
2937
2938                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2939                   break;
2940                 }
2941               if (PREDICT_FALSE(!ses0))
2942                 {
2943                   /* too many sessions for user, send ICMP error packet */
2944
2945                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2946                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2947                                                ICMP4_destination_unreachable_destination_unreachable_host,
2948                                                0);
2949                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2950                   goto trace0;
2951                 }
2952             }
2953
2954           new_port0 = ses0->out.out_port;
2955
2956           old_addr0.as_u32 = ip0->src_address.as_u32;
2957           ip0->src_address.as_u32 = new_addr0.as_u32;
2958           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2959
2960           sum0 = ip0->checksum;
2961           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2962                                  ip4_header_t,
2963                                  src_address /* changed member */);
2964           ip0->checksum = ip_csum_fold (sum0);
2965
2966           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2967             {
2968               if (tcp0->flags & TCP_FLAG_SYN)
2969                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2970               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2971                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2972               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2973                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2974               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2975                 snat_det_ses_close(dm0, ses0);
2976               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2977                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2978               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2979                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2980
2981               old_port0 = tcp0->src;
2982               tcp0->src = new_port0;
2983
2984               sum0 = tcp0->checksum;
2985               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2986                                      ip4_header_t,
2987                                      dst_address /* changed member */);
2988               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2989                                      ip4_header_t /* cheat */,
2990                                      length /* changed member */);
2991               tcp0->checksum = ip_csum_fold(sum0);
2992             }
2993           else
2994             {
2995               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2996               old_port0 = udp0->src_port;
2997               udp0->src_port = new_port0;
2998               udp0->checksum = 0;
2999             }
3000
3001           switch(ses0->state)
3002             {
3003             case SNAT_SESSION_UDP_ACTIVE:
3004                 ses0->expire = now + sm->udp_timeout;
3005                 break;
3006             case SNAT_SESSION_TCP_SYN_SENT:
3007             case SNAT_SESSION_TCP_FIN_WAIT:
3008             case SNAT_SESSION_TCP_CLOSE_WAIT:
3009             case SNAT_SESSION_TCP_LAST_ACK:
3010                 ses0->expire = now + sm->tcp_transitory_timeout;
3011                 break;
3012             case SNAT_SESSION_TCP_ESTABLISHED:
3013                 ses0->expire = now + sm->tcp_established_timeout;
3014                 break;
3015             }
3016
3017         trace0:
3018           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3019                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3020             {
3021               snat_in2out_trace_t *t =
3022                  vlib_add_trace (vm, node, b0, sizeof (*t));
3023               t->is_slow_path = 0;
3024               t->sw_if_index = sw_if_index0;
3025               t->next_index = next0;
3026               t->session_index = ~0;
3027               if (ses0)
3028                 t->session_index = ses0 - dm0->sessions;
3029             }
3030
3031           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3032
3033           ip1 = vlib_buffer_get_current (b1);
3034           udp1 = ip4_next_header (ip1);
3035           tcp1 = (tcp_header_t *) udp1;
3036
3037           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3038
3039           if (PREDICT_FALSE(ip1->ttl == 1))
3040             {
3041               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3042               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3043                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3044                                            0);
3045               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3046               goto trace1;
3047             }
3048
3049           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3050
3051           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
3052             {
3053               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
3054               icmp1 = (icmp46_header_t *) udp1;
3055
3056               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
3057                                   rx_fib_index1, node, next1, thread_index,
3058                                   &ses1, &dm1);
3059               goto trace1;
3060             }
3061
3062           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
3063           if (PREDICT_FALSE(!dm1))
3064             {
3065               clib_warning("no match for internal host %U",
3066                            format_ip4_address, &ip0->src_address);
3067               next1 = SNAT_IN2OUT_NEXT_DROP;
3068               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3069               goto trace1;
3070             }
3071
3072           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
3073
3074           key1.ext_host_addr = ip1->dst_address;
3075           key1.ext_host_port = tcp1->dst;
3076
3077           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
3078           if (PREDICT_FALSE(!ses1))
3079             {
3080               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
3081                 {
3082                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
3083                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
3084
3085                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
3086                     continue;
3087
3088                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
3089                   break;
3090                 }
3091               if (PREDICT_FALSE(!ses1))
3092                 {
3093                   /* too many sessions for user, send ICMP error packet */
3094
3095                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3096                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
3097                                                ICMP4_destination_unreachable_destination_unreachable_host,
3098                                                0);
3099                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3100                   goto trace1;
3101                 }
3102             }
3103
3104           new_port1 = ses1->out.out_port;
3105
3106           old_addr1.as_u32 = ip1->src_address.as_u32;
3107           ip1->src_address.as_u32 = new_addr1.as_u32;
3108           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3109
3110           sum1 = ip1->checksum;
3111           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3112                                  ip4_header_t,
3113                                  src_address /* changed member */);
3114           ip1->checksum = ip_csum_fold (sum1);
3115
3116           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
3117             {
3118               if (tcp1->flags & TCP_FLAG_SYN)
3119                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
3120               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
3121                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
3122               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
3123                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
3124               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
3125                 snat_det_ses_close(dm1, ses1);
3126               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3127                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
3128               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
3129                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
3130
3131               old_port1 = tcp1->src;
3132               tcp1->src = new_port1;
3133
3134               sum1 = tcp1->checksum;
3135               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3136                                      ip4_header_t,
3137                                      dst_address /* changed member */);
3138               sum1 = ip_csum_update (sum1, old_port1, new_port1,
3139                                      ip4_header_t /* cheat */,
3140                                      length /* changed member */);
3141               tcp1->checksum = ip_csum_fold(sum1);
3142             }
3143           else
3144             {
3145               ses1->state = SNAT_SESSION_UDP_ACTIVE;
3146               old_port1 = udp1->src_port;
3147               udp1->src_port = new_port1;
3148               udp1->checksum = 0;
3149             }
3150
3151           switch(ses1->state)
3152             {
3153             case SNAT_SESSION_UDP_ACTIVE:
3154                 ses1->expire = now + sm->udp_timeout;
3155                 break;
3156             case SNAT_SESSION_TCP_SYN_SENT:
3157             case SNAT_SESSION_TCP_FIN_WAIT:
3158             case SNAT_SESSION_TCP_CLOSE_WAIT:
3159             case SNAT_SESSION_TCP_LAST_ACK:
3160                 ses1->expire = now + sm->tcp_transitory_timeout;
3161                 break;
3162             case SNAT_SESSION_TCP_ESTABLISHED:
3163                 ses1->expire = now + sm->tcp_established_timeout;
3164                 break;
3165             }
3166
3167         trace1:
3168           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3169                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3170             {
3171               snat_in2out_trace_t *t =
3172                  vlib_add_trace (vm, node, b1, sizeof (*t));
3173               t->is_slow_path = 0;
3174               t->sw_if_index = sw_if_index1;
3175               t->next_index = next1;
3176               t->session_index = ~0;
3177               if (ses1)
3178                 t->session_index = ses1 - dm1->sessions;
3179             }
3180
3181           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
3182
3183           /* verify speculative enqueues, maybe switch current next frame */
3184           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3185                                            to_next, n_left_to_next,
3186                                            bi0, bi1, next0, next1);
3187          }
3188
3189       while (n_left_from > 0 && n_left_to_next > 0)
3190         {
3191           u32 bi0;
3192           vlib_buffer_t * b0;
3193           u32 next0;
3194           u32 sw_if_index0;
3195           ip4_header_t * ip0;
3196           ip_csum_t sum0;
3197           ip4_address_t new_addr0, old_addr0;
3198           u16 old_port0, new_port0, lo_port0, i0;
3199           udp_header_t * udp0;
3200           tcp_header_t * tcp0;
3201           u32 proto0;
3202           snat_det_out_key_t key0;
3203           snat_det_map_t * dm0;
3204           snat_det_session_t * ses0 = 0;
3205           u32 rx_fib_index0;
3206           icmp46_header_t * icmp0;
3207
3208           /* speculatively enqueue b0 to the current next frame */
3209           bi0 = from[0];
3210           to_next[0] = bi0;
3211           from += 1;
3212           to_next += 1;
3213           n_left_from -= 1;
3214           n_left_to_next -= 1;
3215
3216           b0 = vlib_get_buffer (vm, bi0);
3217           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3218
3219           ip0 = vlib_buffer_get_current (b0);
3220           udp0 = ip4_next_header (ip0);
3221           tcp0 = (tcp_header_t *) udp0;
3222
3223           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3224
3225           if (PREDICT_FALSE(ip0->ttl == 1))
3226             {
3227               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3228               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3229                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3230                                            0);
3231               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3232               goto trace00;
3233             }
3234
3235           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3236
3237           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3238             {
3239               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3240               icmp0 = (icmp46_header_t *) udp0;
3241
3242               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3243                                   rx_fib_index0, node, next0, thread_index,
3244                                   &ses0, &dm0);
3245               goto trace00;
3246             }
3247
3248           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
3249           if (PREDICT_FALSE(!dm0))
3250             {
3251               clib_warning("no match for internal host %U",
3252                            format_ip4_address, &ip0->src_address);
3253               next0 = SNAT_IN2OUT_NEXT_DROP;
3254               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3255               goto trace00;
3256             }
3257
3258           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
3259
3260           key0.ext_host_addr = ip0->dst_address;
3261           key0.ext_host_port = tcp0->dst;
3262
3263           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
3264           if (PREDICT_FALSE(!ses0))
3265             {
3266               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3267                 {
3268                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
3269                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3270
3271                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3272                     continue;
3273
3274                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3275                   break;
3276                 }
3277               if (PREDICT_FALSE(!ses0))
3278                 {
3279                   /* too many sessions for user, send ICMP error packet */
3280
3281                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3282                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3283                                                ICMP4_destination_unreachable_destination_unreachable_host,
3284                                                0);
3285                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3286                   goto trace00;
3287                 }
3288             }
3289
3290           new_port0 = ses0->out.out_port;
3291
3292           old_addr0.as_u32 = ip0->src_address.as_u32;
3293           ip0->src_address.as_u32 = new_addr0.as_u32;
3294           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3295
3296           sum0 = ip0->checksum;
3297           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3298                                  ip4_header_t,
3299                                  src_address /* changed member */);
3300           ip0->checksum = ip_csum_fold (sum0);
3301
3302           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3303             {
3304               if (tcp0->flags & TCP_FLAG_SYN)
3305                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3306               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3307                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3308               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3309                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3310               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3311                 snat_det_ses_close(dm0, ses0);
3312               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3313                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3314               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3315                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3316
3317               old_port0 = tcp0->src;
3318               tcp0->src = new_port0;
3319
3320               sum0 = tcp0->checksum;
3321               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3322                                      ip4_header_t,
3323                                      dst_address /* changed member */);
3324               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3325                                      ip4_header_t /* cheat */,
3326                                      length /* changed member */);
3327               tcp0->checksum = ip_csum_fold(sum0);
3328             }
3329           else
3330             {
3331               ses0->state = SNAT_SESSION_UDP_ACTIVE;
3332               old_port0 = udp0->src_port;
3333               udp0->src_port = new_port0;
3334               udp0->checksum = 0;
3335             }
3336
3337           switch(ses0->state)
3338             {
3339             case SNAT_SESSION_UDP_ACTIVE:
3340                 ses0->expire = now + sm->udp_timeout;
3341                 break;
3342             case SNAT_SESSION_TCP_SYN_SENT:
3343             case SNAT_SESSION_TCP_FIN_WAIT:
3344             case SNAT_SESSION_TCP_CLOSE_WAIT:
3345             case SNAT_SESSION_TCP_LAST_ACK:
3346                 ses0->expire = now + sm->tcp_transitory_timeout;
3347                 break;
3348             case SNAT_SESSION_TCP_ESTABLISHED:
3349                 ses0->expire = now + sm->tcp_established_timeout;
3350                 break;
3351             }
3352
3353         trace00:
3354           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3355                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3356             {
3357               snat_in2out_trace_t *t =
3358                  vlib_add_trace (vm, node, b0, sizeof (*t));
3359               t->is_slow_path = 0;
3360               t->sw_if_index = sw_if_index0;
3361               t->next_index = next0;
3362               t->session_index = ~0;
3363               if (ses0)
3364                 t->session_index = ses0 - dm0->sessions;
3365             }
3366
3367           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3368
3369           /* verify speculative enqueue, maybe switch current next frame */
3370           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3371                                            to_next, n_left_to_next,
3372                                            bi0, next0);
3373         }
3374
3375       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3376     }
3377
3378   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
3379                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3380                                pkts_processed);
3381   return frame->n_vectors;
3382 }
3383
3384 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
3385   .function = snat_det_in2out_node_fn,
3386   .name = "nat44-det-in2out",
3387   .vector_size = sizeof (u32),
3388   .format_trace = format_snat_in2out_trace,
3389   .type = VLIB_NODE_TYPE_INTERNAL,
3390
3391   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3392   .error_strings = snat_in2out_error_strings,
3393
3394   .runtime_data_bytes = sizeof (snat_runtime_t),
3395
3396   .n_next_nodes = 3,
3397
3398   /* edit / add dispositions here */
3399   .next_nodes = {
3400     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3401     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3402     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3403   },
3404 };
3405
3406 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
3407
3408 /**
3409  * Get address and port values to be used for ICMP packet translation
3410  * and create session if needed
3411  *
3412  * @param[in,out] sm             NAT main
3413  * @param[in,out] node           NAT node runtime
3414  * @param[in] thread_index       thread index
3415  * @param[in,out] b0             buffer containing packet to be translated
3416  * @param[out] p_proto           protocol used for matching
3417  * @param[out] p_value           address and port after NAT translation
3418  * @param[out] p_dont_translate  if packet should not be translated
3419  * @param d                      optional parameter
3420  * @param e                      optional parameter
3421  */
3422 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
3423                           u32 thread_index, vlib_buffer_t *b0,
3424                           ip4_header_t *ip0, u8 *p_proto,
3425                           snat_session_key_t *p_value,
3426                           u8 *p_dont_translate, void *d, void *e)
3427 {
3428   icmp46_header_t *icmp0;
3429   u32 sw_if_index0;
3430   u32 rx_fib_index0;
3431   u8 protocol;
3432   snat_det_out_key_t key0;
3433   u8 dont_translate = 0;
3434   u32 next0 = ~0;
3435   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3436   ip4_header_t *inner_ip0;
3437   void *l4_header = 0;
3438   icmp46_header_t *inner_icmp0;
3439   snat_det_map_t * dm0 = 0;
3440   ip4_address_t new_addr0;
3441   u16 lo_port0, i0;
3442   snat_det_session_t * ses0 = 0;
3443   ip4_address_t in_addr;
3444   u16 in_port;
3445
3446   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3447   echo0 = (icmp_echo_header_t *)(icmp0+1);
3448   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3449   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
3450
3451   if (!icmp_is_error_message (icmp0))
3452     {
3453       protocol = SNAT_PROTOCOL_ICMP;
3454       in_addr = ip0->src_address;
3455       in_port = echo0->identifier;
3456     }
3457   else
3458     {
3459       inner_ip0 = (ip4_header_t *)(echo0+1);
3460       l4_header = ip4_next_header (inner_ip0);
3461       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3462       in_addr = inner_ip0->dst_address;
3463       switch (protocol)
3464         {
3465         case SNAT_PROTOCOL_ICMP:
3466           inner_icmp0 = (icmp46_header_t*)l4_header;
3467           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3468           in_port = inner_echo0->identifier;
3469           break;
3470         case SNAT_PROTOCOL_UDP:
3471         case SNAT_PROTOCOL_TCP:
3472           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3473           break;
3474         default:
3475           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
3476           next0 = SNAT_IN2OUT_NEXT_DROP;
3477           goto out;
3478         }
3479     }
3480
3481   dm0 = snat_det_map_by_user(sm, &in_addr);
3482   if (PREDICT_FALSE(!dm0))
3483     {
3484       clib_warning("no match for internal host %U",
3485                    format_ip4_address, &in_addr);
3486       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3487           IP_PROTOCOL_ICMP, rx_fib_index0)))
3488         {
3489           dont_translate = 1;
3490           goto out;
3491         }
3492       next0 = SNAT_IN2OUT_NEXT_DROP;
3493       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3494       goto out;
3495     }
3496
3497   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
3498
3499   key0.ext_host_addr = ip0->dst_address;
3500   key0.ext_host_port = 0;
3501
3502   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3503   if (PREDICT_FALSE(!ses0))
3504     {
3505       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3506           IP_PROTOCOL_ICMP, rx_fib_index0)))
3507         {
3508           dont_translate = 1;
3509           goto out;
3510         }
3511       if (icmp0->type != ICMP4_echo_request)
3512         {
3513           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3514           next0 = SNAT_IN2OUT_NEXT_DROP;
3515           goto out;
3516         }
3517       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3518         {
3519           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3520             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3521
3522           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3523             continue;
3524
3525           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3526           break;
3527         }
3528       if (PREDICT_FALSE(!ses0))
3529         {
3530           next0 = SNAT_IN2OUT_NEXT_DROP;
3531           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3532           goto out;
3533         }
3534     }
3535
3536   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3537                     !icmp_is_error_message (icmp0)))
3538     {
3539       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3540       next0 = SNAT_IN2OUT_NEXT_DROP;
3541       goto out;
3542     }
3543
3544   u32 now = (u32) vlib_time_now (sm->vlib_main);
3545
3546   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3547   ses0->expire = now + sm->icmp_timeout;
3548
3549 out:
3550   *p_proto = protocol;
3551   if (ses0)
3552     {
3553       p_value->addr = new_addr0;
3554       p_value->fib_index = sm->outside_fib_index;
3555       p_value->port = ses0->out.out_port;
3556     }
3557   *p_dont_translate = dont_translate;
3558   if (d)
3559     *(snat_det_session_t**)d = ses0;
3560   if (e)
3561     *(snat_det_map_t**)e = dm0;
3562   return next0;
3563 }
3564
3565 /**********************/
3566 /*** worker handoff ***/
3567 /**********************/
3568 static inline uword
3569 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3570                                       vlib_node_runtime_t * node,
3571                                       vlib_frame_t * frame,
3572                                       u8 is_output)
3573 {
3574   snat_main_t *sm = &snat_main;
3575   vlib_thread_main_t *tm = vlib_get_thread_main ();
3576   u32 n_left_from, *from, *to_next = 0;
3577   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3578   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3579     = 0;
3580   vlib_frame_queue_elt_t *hf = 0;
3581   vlib_frame_t *f = 0;
3582   int i;
3583   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3584   u32 next_worker_index = 0;
3585   u32 current_worker_index = ~0;
3586   u32 thread_index = vlib_get_thread_index ();
3587   u32 fq_index;
3588   u32 to_node_index;
3589
3590   ASSERT (vec_len (sm->workers));
3591
3592   if (is_output)
3593     {
3594       fq_index = sm->fq_in2out_output_index;
3595       to_node_index = sm->in2out_output_node_index;
3596     }
3597   else
3598     {
3599       fq_index = sm->fq_in2out_index;
3600       to_node_index = sm->in2out_node_index;
3601     }
3602
3603   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3604     {
3605       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3606
3607       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3608                                sm->first_worker_index + sm->num_workers - 1,
3609                                (vlib_frame_queue_t *) (~0));
3610     }
3611
3612   from = vlib_frame_vector_args (frame);
3613   n_left_from = frame->n_vectors;
3614
3615   while (n_left_from > 0)
3616     {
3617       u32 bi0;
3618       vlib_buffer_t *b0;
3619       u32 sw_if_index0;
3620       u32 rx_fib_index0;
3621       ip4_header_t * ip0;
3622       u8 do_handoff;
3623
3624       bi0 = from[0];
3625       from += 1;
3626       n_left_from -= 1;
3627
3628       b0 = vlib_get_buffer (vm, bi0);
3629
3630       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3631       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3632
3633       ip0 = vlib_buffer_get_current (b0);
3634
3635       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3636
3637       if (PREDICT_FALSE (next_worker_index != thread_index))
3638         {
3639           do_handoff = 1;
3640
3641           if (next_worker_index != current_worker_index)
3642             {
3643               if (hf)
3644                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3645
3646               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3647                                                       next_worker_index,
3648                                                       handoff_queue_elt_by_worker_index);
3649
3650               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3651               to_next_worker = &hf->buffer_index[hf->n_vectors];
3652               current_worker_index = next_worker_index;
3653             }
3654
3655           /* enqueue to correct worker thread */
3656           to_next_worker[0] = bi0;
3657           to_next_worker++;
3658           n_left_to_next_worker--;
3659
3660           if (n_left_to_next_worker == 0)
3661             {
3662               hf->n_vectors = VLIB_FRAME_SIZE;
3663               vlib_put_frame_queue_elt (hf);
3664               current_worker_index = ~0;
3665               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3666               hf = 0;
3667             }
3668         }
3669       else
3670         {
3671           do_handoff = 0;
3672           /* if this is 1st frame */
3673           if (!f)
3674             {
3675               f = vlib_get_frame_to_node (vm, to_node_index);
3676               to_next = vlib_frame_vector_args (f);
3677             }
3678
3679           to_next[0] = bi0;
3680           to_next += 1;
3681           f->n_vectors++;
3682         }
3683
3684       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3685                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3686         {
3687           snat_in2out_worker_handoff_trace_t *t =
3688             vlib_add_trace (vm, node, b0, sizeof (*t));
3689           t->next_worker_index = next_worker_index;
3690           t->do_handoff = do_handoff;
3691         }
3692     }
3693
3694   if (f)
3695     vlib_put_frame_to_node (vm, to_node_index, f);
3696
3697   if (hf)
3698     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3699
3700   /* Ship frames to the worker nodes */
3701   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3702     {
3703       if (handoff_queue_elt_by_worker_index[i])
3704         {
3705           hf = handoff_queue_elt_by_worker_index[i];
3706           /*
3707            * It works better to let the handoff node
3708            * rate-adapt, always ship the handoff queue element.
3709            */
3710           if (1 || hf->n_vectors == hf->last_n_vectors)
3711             {
3712               vlib_put_frame_queue_elt (hf);
3713               handoff_queue_elt_by_worker_index[i] = 0;
3714             }
3715           else
3716             hf->last_n_vectors = hf->n_vectors;
3717         }
3718       congested_handoff_queue_by_worker_index[i] =
3719         (vlib_frame_queue_t *) (~0);
3720     }
3721   hf = 0;
3722   current_worker_index = ~0;
3723   return frame->n_vectors;
3724 }
3725
3726 static uword
3727 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3728                                vlib_node_runtime_t * node,
3729                                vlib_frame_t * frame)
3730 {
3731   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3732 }
3733
3734 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3735   .function = snat_in2out_worker_handoff_fn,
3736   .name = "nat44-in2out-worker-handoff",
3737   .vector_size = sizeof (u32),
3738   .format_trace = format_snat_in2out_worker_handoff_trace,
3739   .type = VLIB_NODE_TYPE_INTERNAL,
3740
3741   .n_next_nodes = 1,
3742
3743   .next_nodes = {
3744     [0] = "error-drop",
3745   },
3746 };
3747
3748 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3749                               snat_in2out_worker_handoff_fn);
3750
3751 static uword
3752 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3753                                       vlib_node_runtime_t * node,
3754                                       vlib_frame_t * frame)
3755 {
3756   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3757 }
3758
3759 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3760   .function = snat_in2out_output_worker_handoff_fn,
3761   .name = "nat44-in2out-output-worker-handoff",
3762   .vector_size = sizeof (u32),
3763   .format_trace = format_snat_in2out_worker_handoff_trace,
3764   .type = VLIB_NODE_TYPE_INTERNAL,
3765
3766   .n_next_nodes = 1,
3767
3768   .next_nodes = {
3769     [0] = "error-drop",
3770   },
3771 };
3772
3773 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3774                               snat_in2out_output_worker_handoff_fn);
3775
3776 static_always_inline int
3777 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3778 {
3779   snat_address_t * ap;
3780   clib_bihash_kv_8_8_t kv, value;
3781   snat_session_key_t m_key;
3782
3783   vec_foreach (ap, sm->addresses)
3784     {
3785       if (ap->addr.as_u32 == dst_addr->as_u32)
3786         return 1;
3787     }
3788
3789   m_key.addr.as_u32 = dst_addr->as_u32;
3790   m_key.fib_index = sm->outside_fib_index;
3791   m_key.port = 0;
3792   m_key.protocol = 0;
3793   kv.key = m_key.as_u64;
3794   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3795     return 1;
3796
3797   return 0;
3798 }
3799
3800 static uword
3801 snat_hairpin_dst_fn (vlib_main_t * vm,
3802                      vlib_node_runtime_t * node,
3803                      vlib_frame_t * frame)
3804 {
3805   u32 n_left_from, * from, * to_next;
3806   snat_in2out_next_t next_index;
3807   u32 pkts_processed = 0;
3808   snat_main_t * sm = &snat_main;
3809
3810   from = vlib_frame_vector_args (frame);
3811   n_left_from = frame->n_vectors;
3812   next_index = node->cached_next_index;
3813
3814   while (n_left_from > 0)
3815     {
3816       u32 n_left_to_next;
3817
3818       vlib_get_next_frame (vm, node, next_index,
3819                            to_next, n_left_to_next);
3820
3821       while (n_left_from > 0 && n_left_to_next > 0)
3822         {
3823           u32 bi0;
3824           vlib_buffer_t * b0;
3825           u32 next0;
3826           ip4_header_t * ip0;
3827           u32 proto0;
3828
3829           /* speculatively enqueue b0 to the current next frame */
3830           bi0 = from[0];
3831           to_next[0] = bi0;
3832           from += 1;
3833           to_next += 1;
3834           n_left_from -= 1;
3835           n_left_to_next -= 1;
3836
3837           b0 = vlib_get_buffer (vm, bi0);
3838           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3839           ip0 = vlib_buffer_get_current (b0);
3840
3841           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3842
3843           vnet_buffer (b0)->snat.flags = 0;
3844           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3845             {
3846               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3847                 {
3848                   udp_header_t * udp0 = ip4_next_header (ip0);
3849                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3850
3851                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3852                 }
3853               else if (proto0 == SNAT_PROTOCOL_ICMP)
3854                 {
3855                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3856
3857                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3858                 }
3859               else
3860                 {
3861                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3862                 }
3863
3864               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3865               clib_warning("is hairpinning");
3866             }
3867
3868           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3869
3870           /* verify speculative enqueue, maybe switch current next frame */
3871           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3872                                            to_next, n_left_to_next,
3873                                            bi0, next0);
3874          }
3875
3876       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3877     }
3878
3879   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3880                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3881                                pkts_processed);
3882   return frame->n_vectors;
3883 }
3884
3885 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3886   .function = snat_hairpin_dst_fn,
3887   .name = "nat44-hairpin-dst",
3888   .vector_size = sizeof (u32),
3889   .type = VLIB_NODE_TYPE_INTERNAL,
3890   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3891   .error_strings = snat_in2out_error_strings,
3892   .n_next_nodes = 2,
3893   .next_nodes = {
3894     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3895     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3896   },
3897 };
3898
3899 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3900                               snat_hairpin_dst_fn);
3901
3902 static uword
3903 snat_hairpin_src_fn (vlib_main_t * vm,
3904                      vlib_node_runtime_t * node,
3905                      vlib_frame_t * frame)
3906 {
3907   u32 n_left_from, * from, * to_next;
3908   snat_in2out_next_t next_index;
3909   u32 pkts_processed = 0;
3910   snat_main_t *sm = &snat_main;
3911
3912   from = vlib_frame_vector_args (frame);
3913   n_left_from = frame->n_vectors;
3914   next_index = node->cached_next_index;
3915
3916   while (n_left_from > 0)
3917     {
3918       u32 n_left_to_next;
3919
3920       vlib_get_next_frame (vm, node, next_index,
3921                            to_next, n_left_to_next);
3922
3923       while (n_left_from > 0 && n_left_to_next > 0)
3924         {
3925           u32 bi0;
3926           vlib_buffer_t * b0;
3927           u32 next0;
3928           snat_interface_t *i;
3929           u32 sw_if_index0;
3930
3931           /* speculatively enqueue b0 to the current next frame */
3932           bi0 = from[0];
3933           to_next[0] = bi0;
3934           from += 1;
3935           to_next += 1;
3936           n_left_from -= 1;
3937           n_left_to_next -= 1;
3938
3939           b0 = vlib_get_buffer (vm, bi0);
3940           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3941           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3942
3943           pool_foreach (i, sm->output_feature_interfaces,
3944           ({
3945             /* Only packets from NAT inside interface */
3946             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
3947               {
3948                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3949                                     SNAT_FLAG_HAIRPINNING))
3950                   {
3951                     if (PREDICT_TRUE (sm->num_workers > 1))
3952                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3953                     else
3954                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3955                   }
3956                 break;
3957               }
3958           }));
3959
3960           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3961
3962           /* verify speculative enqueue, maybe switch current next frame */
3963           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3964                                            to_next, n_left_to_next,
3965                                            bi0, next0);
3966          }
3967
3968       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3969     }
3970
3971   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3972                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3973                                pkts_processed);
3974   return frame->n_vectors;
3975 }
3976
3977 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3978   .function = snat_hairpin_src_fn,
3979   .name = "nat44-hairpin-src",
3980   .vector_size = sizeof (u32),
3981   .type = VLIB_NODE_TYPE_INTERNAL,
3982   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3983   .error_strings = snat_in2out_error_strings,
3984   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3985   .next_nodes = {
3986      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3987      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3988      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3989      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3990   },
3991 };
3992
3993 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3994                               snat_hairpin_src_fn);
3995
3996 static uword
3997 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3998                                 vlib_node_runtime_t * node,
3999                                 vlib_frame_t * frame)
4000 {
4001   u32 n_left_from, * from, * to_next;
4002   snat_in2out_next_t next_index;
4003   u32 pkts_processed = 0;
4004   snat_main_t * sm = &snat_main;
4005   u32 stats_node_index;
4006
4007   stats_node_index = snat_in2out_fast_node.index;
4008
4009   from = vlib_frame_vector_args (frame);
4010   n_left_from = frame->n_vectors;
4011   next_index = node->cached_next_index;
4012
4013   while (n_left_from > 0)
4014     {
4015       u32 n_left_to_next;
4016
4017       vlib_get_next_frame (vm, node, next_index,
4018                            to_next, n_left_to_next);
4019
4020       while (n_left_from > 0 && n_left_to_next > 0)
4021         {
4022           u32 bi0;
4023           vlib_buffer_t * b0;
4024           u32 next0;
4025           u32 sw_if_index0;
4026           ip4_header_t * ip0;
4027           ip_csum_t sum0;
4028           u32 new_addr0, old_addr0;
4029           u16 old_port0, new_port0;
4030           udp_header_t * udp0;
4031           tcp_header_t * tcp0;
4032           icmp46_header_t * icmp0;
4033           snat_session_key_t key0, sm0;
4034           u32 proto0;
4035           u32 rx_fib_index0;
4036
4037           /* speculatively enqueue b0 to the current next frame */
4038           bi0 = from[0];
4039           to_next[0] = bi0;
4040           from += 1;
4041           to_next += 1;
4042           n_left_from -= 1;
4043           n_left_to_next -= 1;
4044
4045           b0 = vlib_get_buffer (vm, bi0);
4046           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
4047
4048           ip0 = vlib_buffer_get_current (b0);
4049           udp0 = ip4_next_header (ip0);
4050           tcp0 = (tcp_header_t *) udp0;
4051           icmp0 = (icmp46_header_t *) udp0;
4052
4053           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4054           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4055
4056           if (PREDICT_FALSE(ip0->ttl == 1))
4057             {
4058               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4059               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4060                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4061                                            0);
4062               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
4063               goto trace0;
4064             }
4065
4066           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4067
4068           if (PREDICT_FALSE (proto0 == ~0))
4069               goto trace0;
4070
4071           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
4072             {
4073               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4074                                   rx_fib_index0, node, next0, ~0, 0, 0);
4075               goto trace0;
4076             }
4077
4078           key0.addr = ip0->src_address;
4079           key0.protocol = proto0;
4080           key0.port = udp0->src_port;
4081           key0.fib_index = rx_fib_index0;
4082
4083           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
4084             {
4085               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4086               next0= SNAT_IN2OUT_NEXT_DROP;
4087               goto trace0;
4088             }
4089
4090           new_addr0 = sm0.addr.as_u32;
4091           new_port0 = sm0.port;
4092           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
4093           old_addr0 = ip0->src_address.as_u32;
4094           ip0->src_address.as_u32 = new_addr0;
4095
4096           sum0 = ip0->checksum;
4097           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4098                                  ip4_header_t,
4099                                  src_address /* changed member */);
4100           ip0->checksum = ip_csum_fold (sum0);
4101
4102           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
4103             {
4104               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4105                 {
4106                   old_port0 = tcp0->src_port;
4107                   tcp0->src_port = new_port0;
4108
4109                   sum0 = tcp0->checksum;
4110                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4111                                          ip4_header_t,
4112                                          dst_address /* changed member */);
4113                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
4114                                          ip4_header_t /* cheat */,
4115                                          length /* changed member */);
4116                   tcp0->checksum = ip_csum_fold(sum0);
4117                 }
4118               else
4119                 {
4120                   old_port0 = udp0->src_port;
4121                   udp0->src_port = new_port0;
4122                   udp0->checksum = 0;
4123                 }
4124             }
4125           else
4126             {
4127               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4128                 {
4129                   sum0 = tcp0->checksum;
4130                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4131                                          ip4_header_t,
4132                                          dst_address /* changed member */);
4133                   tcp0->checksum = ip_csum_fold(sum0);
4134                 }
4135             }
4136
4137           /* Hairpinning */
4138           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
4139
4140         trace0:
4141           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4142                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4143             {
4144               snat_in2out_trace_t *t =
4145                  vlib_add_trace (vm, node, b0, sizeof (*t));
4146               t->sw_if_index = sw_if_index0;
4147               t->next_index = next0;
4148             }
4149
4150           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4151
4152           /* verify speculative enqueue, maybe switch current next frame */
4153           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4154                                            to_next, n_left_to_next,
4155                                            bi0, next0);
4156         }
4157
4158       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4159     }
4160
4161   vlib_node_increment_counter (vm, stats_node_index,
4162                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4163                                pkts_processed);
4164   return frame->n_vectors;
4165 }
4166
4167
4168 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
4169   .function = snat_in2out_fast_static_map_fn,
4170   .name = "nat44-in2out-fast",
4171   .vector_size = sizeof (u32),
4172   .format_trace = format_snat_in2out_fast_trace,
4173   .type = VLIB_NODE_TYPE_INTERNAL,
4174
4175   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4176   .error_strings = snat_in2out_error_strings,
4177
4178   .runtime_data_bytes = sizeof (snat_runtime_t),
4179
4180   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
4181
4182   /* edit / add dispositions here */
4183   .next_nodes = {
4184     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4185     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4186     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
4187     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4188     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
4189   },
4190 };
4191
4192 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);