89054a01b6409103c71b9d675533b1f53af6bd66
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_ICMP_ERROR,
117   SNAT_IN2OUT_NEXT_SLOW_PATH,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
138                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
139                          u32 rx_fib_index0)
140 {
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   /* Don't NAT packet aimed at the intfc address */
151   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
152                                       ip0->dst_address.as_u32)))
153     return 1;
154
155   fei = fib_table_lookup (rx_fib_index0, &pfx);
156   if (FIB_NODE_INDEX_INVALID != fei)
157     {
158       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
159       if (sw_if_index == ~0)
160         {
161           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
162           if (FIB_NODE_INDEX_INVALID != fei)
163             sw_if_index = fib_entry_get_resolving_interface (fei);
164         }
165       snat_interface_t *i;
166       pool_foreach (i, sm->interfaces,
167       ({
168         /* NAT packet aimed at outside interface */
169         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
170           return 0;
171       }));
172     }
173
174   return 1;
175 }
176
177 static inline int
178 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
179                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                     u32 rx_fib_index0)
181 {
182   udp_header_t * udp0 = ip4_next_header (ip0);
183   snat_session_key_t key0, sm0;
184   clib_bihash_kv_8_8_t kv0, value0;
185
186   key0.addr = ip0->dst_address;
187   key0.port = udp0->dst_port;
188   key0.protocol = proto0;
189   key0.fib_index = sm->outside_fib_index;
190   kv0.key = key0.as_u64;
191
192   /* NAT packet aimed at external address if */
193   /* has active sessions */
194   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
195     {
196       /* or is static mappings */
197       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
198         return 0;
199     }
200   else
201     return 0;
202
203   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
204                                  rx_fib_index0);
205 }
206
207 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
208                       ip4_header_t * ip0,
209                       u32 rx_fib_index0,
210                       snat_session_key_t * key0,
211                       snat_session_t ** sessionp,
212                       vlib_node_runtime_t * node,
213                       u32 next0,
214                       u32 thread_index)
215 {
216   snat_user_t *u;
217   snat_user_key_t user_key;
218   snat_session_t *s;
219   clib_bihash_kv_8_8_t kv0, value0;
220   u32 oldest_per_user_translation_list_index;
221   dlist_elt_t * oldest_per_user_translation_list_elt;
222   dlist_elt_t * per_user_translation_list_elt;
223   dlist_elt_t * per_user_list_head_elt;
224   u32 session_index;
225   snat_session_key_t key1;
226   u32 address_index = ~0;
227   u32 outside_fib_index;
228   uword * p;
229   snat_worker_key_t worker_by_out_key;
230
231   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
232   if (! p)
233     {
234       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
235       return SNAT_IN2OUT_NEXT_DROP;
236     }
237   outside_fib_index = p[0];
238
239   key1.protocol = key0->protocol;
240   user_key.addr = ip0->src_address;
241   user_key.fib_index = rx_fib_index0;
242   kv0.key = user_key.as_u64;
243   
244   /* Ever heard of the "user" = src ip4 address before? */
245   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
246     {
247       /* no, make a new one */
248       pool_get (sm->per_thread_data[thread_index].users, u);
249       memset (u, 0, sizeof (*u));
250       u->addr = ip0->src_address;
251       u->fib_index = rx_fib_index0;
252
253       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
254
255       u->sessions_per_user_list_head_index = per_user_list_head_elt -
256         sm->per_thread_data[thread_index].list_pool;
257
258       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
259                        u->sessions_per_user_list_head_index);
260
261       kv0.value = u - sm->per_thread_data[thread_index].users;
262
263       /* add user */
264       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
265     }
266   else
267     {
268       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
269                              value0.value);
270     }
271
272   /* Over quota? Recycle the least recently used dynamic translation */
273   if (u->nsessions >= sm->max_translations_per_user)
274     {
275       /* Remove the oldest dynamic translation */
276       do {
277           oldest_per_user_translation_list_index =
278             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
279                                     u->sessions_per_user_list_head_index);
280
281           ASSERT (oldest_per_user_translation_list_index != ~0);
282
283           /* add it back to the end of the LRU list */
284           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
285                               u->sessions_per_user_list_head_index,
286                               oldest_per_user_translation_list_index);
287           /* Get the list element */
288           oldest_per_user_translation_list_elt =
289             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
290                                oldest_per_user_translation_list_index);
291
292           /* Get the session index from the list element */
293           session_index = oldest_per_user_translation_list_elt->value;
294
295           /* Get the session */
296           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
297                                  session_index);
298       } while (snat_is_session_static (s));
299
300       /* Remove in2out, out2in keys */
301       kv0.key = s->in2out.as_u64;
302       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
303           clib_warning ("in2out key delete failed");
304       kv0.key = s->out2in.as_u64;
305       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
306           clib_warning ("out2in key delete failed");
307
308       /* log NAT event */
309       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
310                                           s->out2in.addr.as_u32,
311                                           s->in2out.protocol,
312                                           s->in2out.port,
313                                           s->out2in.port,
314                                           s->in2out.fib_index);
315
316       snat_free_outside_address_and_port 
317         (sm, &s->out2in, s->outside_address_index);
318       s->outside_address_index = ~0;
319
320       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
321                                                &address_index))
322         {
323           ASSERT(0);
324
325           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
326           return SNAT_IN2OUT_NEXT_DROP;
327         }
328       s->outside_address_index = address_index;
329     }
330   else
331     {
332       u8 static_mapping = 1;
333
334       /* First try to match static mapping by local address and port */
335       if (snat_static_mapping_match (sm, *key0, &key1, 0))
336         {
337           static_mapping = 0;
338           /* Try to create dynamic translation */
339           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
340                                                    &address_index))
341             {
342               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
343               return SNAT_IN2OUT_NEXT_DROP;
344             }
345         }
346
347       /* Create a new session */
348       pool_get (sm->per_thread_data[thread_index].sessions, s);
349       memset (s, 0, sizeof (*s));
350       
351       s->outside_address_index = address_index;
352
353       if (static_mapping)
354         {
355           u->nstaticsessions++;
356           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
357         }
358       else
359         {
360           u->nsessions++;
361         }
362
363       /* Create list elts */
364       pool_get (sm->per_thread_data[thread_index].list_pool,
365                 per_user_translation_list_elt);
366       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
367                        per_user_translation_list_elt -
368                        sm->per_thread_data[thread_index].list_pool);
369
370       per_user_translation_list_elt->value =
371         s - sm->per_thread_data[thread_index].sessions;
372       s->per_user_index = per_user_translation_list_elt -
373                           sm->per_thread_data[thread_index].list_pool;
374       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
375
376       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
377                           s->per_user_list_head_index,
378                           per_user_translation_list_elt -
379                           sm->per_thread_data[thread_index].list_pool);
380    }
381   
382   s->in2out = *key0;
383   s->out2in = key1;
384   s->out2in.protocol = key0->protocol;
385   s->out2in.fib_index = outside_fib_index;
386   *sessionp = s;
387
388   /* Add to translation hashes */
389   kv0.key = s->in2out.as_u64;
390   kv0.value = s - sm->per_thread_data[thread_index].sessions;
391   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
392       clib_warning ("in2out key add failed");
393   
394   kv0.key = s->out2in.as_u64;
395   kv0.value = s - sm->per_thread_data[thread_index].sessions;
396   
397   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
398       clib_warning ("out2in key add failed");
399
400   /* Add to translated packets worker lookup */
401   worker_by_out_key.addr = s->out2in.addr;
402   worker_by_out_key.port = s->out2in.port;
403   worker_by_out_key.fib_index = s->out2in.fib_index;
404   kv0.key = worker_by_out_key.as_u64;
405   kv0.value = thread_index;
406   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
407
408   /* log NAT event */
409   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
410                                       s->out2in.addr.as_u32,
411                                       s->in2out.protocol,
412                                       s->in2out.port,
413                                       s->out2in.port,
414                                       s->in2out.fib_index);
415   return next0;
416 }
417
418 static_always_inline
419 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
420                                  snat_session_key_t *p_key0)
421 {
422   icmp46_header_t *icmp0;
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0, *inner_echo0 = 0;
425   ip4_header_t *inner_ip0 = 0;
426   void *l4_header = 0;
427   icmp46_header_t *inner_icmp0;
428
429   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
430   echo0 = (icmp_echo_header_t *)(icmp0+1);
431
432   if (!icmp_is_error_message (icmp0))
433     {
434       key0.protocol = SNAT_PROTOCOL_ICMP;
435       key0.addr = ip0->src_address;
436       key0.port = echo0->identifier;
437     }
438   else
439     {
440       inner_ip0 = (ip4_header_t *)(echo0+1);
441       l4_header = ip4_next_header (inner_ip0);
442       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
443       key0.addr = inner_ip0->dst_address;
444       switch (key0.protocol)
445         {
446         case SNAT_PROTOCOL_ICMP:
447           inner_icmp0 = (icmp46_header_t*)l4_header;
448           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
449           key0.port = inner_echo0->identifier;
450           break;
451         case SNAT_PROTOCOL_UDP:
452         case SNAT_PROTOCOL_TCP:
453           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
454           break;
455         default:
456           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
457         }
458     }
459   *p_key0 = key0;
460   return -1; /* success */
461 }
462
463 /**
464  * Get address and port values to be used for packet SNAT translation
465  * and create session if needed
466  *
467  * @param[in,out] sm             SNAT main
468  * @param[in,out] node           SNAT node runtime
469  * @param[in] thread_index       thread index
470  * @param[in,out] b0             buffer containing packet to be translated
471  * @param[out] p_proto           protocol used for matching
472  * @param[out] p_value           address and port after NAT translation
473  * @param[out] p_dont_translate  if packet should not be translated
474  * @param d                      optional parameter
475  * @param e                      optional parameter
476  */
477 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
478                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
479                            snat_session_key_t *p_value,
480                            u8 *p_dont_translate, void *d, void *e)
481 {
482   ip4_header_t *ip0;
483   icmp46_header_t *icmp0;
484   u32 sw_if_index0;
485   u32 rx_fib_index0;
486   snat_session_key_t key0;
487   snat_session_t *s0 = 0;
488   u8 dont_translate = 0;
489   clib_bihash_kv_8_8_t kv0, value0;
490   u32 next0 = ~0;
491   int err;
492
493   ip0 = vlib_buffer_get_current (b0);
494   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
495   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
496   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
497
498   err = icmp_get_key (ip0, &key0);
499   if (err != -1)
500     {
501       b0->error = node->errors[err];
502       next0 = SNAT_IN2OUT_NEXT_DROP;
503       goto out;
504     }
505   key0.fib_index = rx_fib_index0;
506
507   kv0.key = key0.as_u64;
508
509   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
510     {
511       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
512           IP_PROTOCOL_ICMP, rx_fib_index0)))
513         {
514           dont_translate = 1;
515           goto out;
516         }
517
518       if (icmp_is_error_message (icmp0))
519         {
520           next0 = SNAT_IN2OUT_NEXT_DROP;
521           goto out;
522         }
523
524       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
525                          &s0, node, next0, thread_index);
526
527       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
528         goto out;
529     }
530   else
531     s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
532                             value0.value);
533
534   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
535                     !icmp_is_error_message (icmp0)))
536     {
537       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
538       next0 = SNAT_IN2OUT_NEXT_DROP;
539       goto out;
540     }
541
542 out:
543   *p_proto = key0.protocol;
544   if (s0)
545     *p_value = s0->out2in;
546   *p_dont_translate = dont_translate;
547   if (d)
548     *(snat_session_t**)d = s0;
549   return next0;
550 }
551
552 /**
553  * Get address and port values to be used for packet SNAT translation
554  *
555  * @param[in] sm                 SNAT main
556  * @param[in,out] node           SNAT node runtime
557  * @param[in] thread_index       thread index
558  * @param[in,out] b0             buffer containing packet to be translated
559  * @param[out] p_proto           protocol used for matching
560  * @param[out] p_value           address and port after NAT translation
561  * @param[out] p_dont_translate  if packet should not be translated
562  * @param d                      optional parameter
563  * @param e                      optional parameter
564  */
565 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
566                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
567                            snat_session_key_t *p_value,
568                            u8 *p_dont_translate, void *d, void *e)
569 {
570   ip4_header_t *ip0;
571   icmp46_header_t *icmp0;
572   u32 sw_if_index0;
573   u32 rx_fib_index0;
574   snat_session_key_t key0;
575   snat_session_key_t sm0;
576   u8 dont_translate = 0;
577   u32 next0 = ~0;
578   int err;
579
580   ip0 = vlib_buffer_get_current (b0);
581   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
582   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
583   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
584
585   err = icmp_get_key (ip0, &key0);
586   if (err != -1)
587     {
588       b0->error = node->errors[err];
589       next0 = SNAT_IN2OUT_NEXT_DROP;
590       goto out2;
591     }
592   key0.fib_index = rx_fib_index0;
593
594   if (snat_static_mapping_match(sm, key0, &sm0, 0))
595     {
596       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
597           IP_PROTOCOL_ICMP, rx_fib_index0)))
598         {
599           dont_translate = 1;
600           goto out;
601         }
602
603       if (icmp_is_error_message (icmp0))
604         {
605           next0 = SNAT_IN2OUT_NEXT_DROP;
606           goto out;
607         }
608
609       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
610       next0 = SNAT_IN2OUT_NEXT_DROP;
611       goto out;
612     }
613
614   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
615                     !icmp_is_error_message (icmp0)))
616     {
617       if (icmp0->type != ICMP4_echo_reply || key0.port != sm0.port)
618         {
619           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
620           next0 = SNAT_IN2OUT_NEXT_DROP;
621           goto out;
622         }
623     }
624
625 out:
626   *p_value = sm0;
627 out2:
628   *p_proto = key0.protocol;
629   *p_dont_translate = dont_translate;
630   return next0;
631 }
632
633 static inline u32 icmp_in2out (snat_main_t *sm,
634                                vlib_buffer_t * b0,
635                                ip4_header_t * ip0,
636                                icmp46_header_t * icmp0,
637                                u32 sw_if_index0,
638                                u32 rx_fib_index0,
639                                vlib_node_runtime_t * node,
640                                u32 next0,
641                                u32 thread_index,
642                                void *d,
643                                void *e)
644 {
645   snat_session_key_t sm0;
646   u8 protocol;
647   icmp_echo_header_t *echo0, *inner_echo0 = 0;
648   ip4_header_t *inner_ip0;
649   void *l4_header = 0;
650   icmp46_header_t *inner_icmp0;
651   u8 dont_translate;
652   u32 new_addr0, old_addr0;
653   u16 old_id0, new_id0;
654   ip_csum_t sum0;
655   u16 checksum0;
656   u32 next0_tmp;
657
658   echo0 = (icmp_echo_header_t *)(icmp0+1);
659
660   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
661                                        &protocol, &sm0, &dont_translate, d, e);
662   if (next0_tmp != ~0)
663     next0 = next0_tmp;
664   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
665     goto out;
666
667   sum0 = ip_incremental_checksum (0, icmp0,
668                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
669   checksum0 = ~ip_csum_fold (sum0);
670   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
671     {
672       next0 = SNAT_IN2OUT_NEXT_DROP;
673       goto out;
674     }
675
676   old_addr0 = ip0->src_address.as_u32;
677   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
678   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
679
680   sum0 = ip0->checksum;
681   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
682                          src_address /* changed member */);
683   ip0->checksum = ip_csum_fold (sum0);
684   
685   if (!icmp_is_error_message (icmp0))
686     {
687       new_id0 = sm0.port;
688       if (PREDICT_FALSE(new_id0 != echo0->identifier))
689         {
690           old_id0 = echo0->identifier;
691           new_id0 = sm0.port;
692           echo0->identifier = new_id0;
693
694           sum0 = icmp0->checksum;
695           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
696                                  identifier);
697           icmp0->checksum = ip_csum_fold (sum0);
698         }
699     }
700   else
701     {
702       inner_ip0 = (ip4_header_t *)(echo0+1);
703       l4_header = ip4_next_header (inner_ip0);
704
705       if (!ip4_header_checksum_is_valid (inner_ip0))
706         {
707           next0 = SNAT_IN2OUT_NEXT_DROP;
708           goto out;
709         }
710
711       old_addr0 = inner_ip0->dst_address.as_u32;
712       inner_ip0->dst_address = sm0.addr;
713       new_addr0 = inner_ip0->dst_address.as_u32;
714
715       sum0 = icmp0->checksum;
716       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
717                              dst_address /* changed member */);
718       icmp0->checksum = ip_csum_fold (sum0);
719
720       switch (protocol)
721         {
722           case SNAT_PROTOCOL_ICMP:
723             inner_icmp0 = (icmp46_header_t*)l4_header;
724             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
725
726             old_id0 = inner_echo0->identifier;
727             new_id0 = sm0.port;
728             inner_echo0->identifier = new_id0;
729
730             sum0 = icmp0->checksum;
731             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
732                                    identifier);
733             icmp0->checksum = ip_csum_fold (sum0);
734             break;
735           case SNAT_PROTOCOL_UDP:
736           case SNAT_PROTOCOL_TCP:
737             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
738             new_id0 = sm0.port;
739             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
740
741             sum0 = icmp0->checksum;
742             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
743                                    dst_port);
744             icmp0->checksum = ip_csum_fold (sum0);
745             break;
746           default:
747             ASSERT(0);
748         }
749     }
750
751 out:
752   return next0;
753 }
754
755 /**
756  * @brief Hairpinning
757  *
758  * Hairpinning allows two endpoints on the internal side of the NAT to
759  * communicate even if they only use each other's external IP addresses
760  * and ports.
761  *
762  * @param sm     SNAT main.
763  * @param b0     Vlib buffer.
764  * @param ip0    IP header.
765  * @param udp0   UDP header.
766  * @param tcp0   TCP header.
767  * @param proto0 SNAT protocol.
768  */
769 static inline void
770 snat_hairpinning (snat_main_t *sm,
771                   vlib_buffer_t * b0,
772                   ip4_header_t * ip0,
773                   udp_header_t * udp0,
774                   tcp_header_t * tcp0,
775                   u32 proto0)
776 {
777   snat_session_key_t key0, sm0;
778   snat_worker_key_t k0;
779   snat_session_t * s0;
780   clib_bihash_kv_8_8_t kv0, value0;
781   ip_csum_t sum0;
782   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
783   u16 new_dst_port0, old_dst_port0;
784
785   key0.addr = ip0->dst_address;
786   key0.port = udp0->dst_port;
787   key0.protocol = proto0;
788   key0.fib_index = sm->outside_fib_index;
789   kv0.key = key0.as_u64;
790
791   /* Check if destination is in active sessions */
792   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
793     {
794       /* or static mappings */
795       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
796         {
797           new_dst_addr0 = sm0.addr.as_u32;
798           new_dst_port0 = sm0.port;
799           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
800         }
801     }
802   else
803     {
804       si = value0.value;
805       if (sm->num_workers > 1)
806         {
807           k0.addr = ip0->dst_address;
808           k0.port = udp0->dst_port;
809           k0.fib_index = sm->outside_fib_index;
810           kv0.key = k0.as_u64;
811           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
812             ASSERT(0);
813           else
814             ti = value0.value;
815         }
816       else
817         ti = sm->num_workers;
818
819       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
820       new_dst_addr0 = s0->in2out.addr.as_u32;
821       new_dst_port0 = s0->in2out.port;
822       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
823     }
824
825   /* Destination is behind the same NAT, use internal address and port */
826   if (new_dst_addr0)
827     {
828       old_dst_addr0 = ip0->dst_address.as_u32;
829       ip0->dst_address.as_u32 = new_dst_addr0;
830       sum0 = ip0->checksum;
831       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
832                              ip4_header_t, dst_address);
833       ip0->checksum = ip_csum_fold (sum0);
834
835       old_dst_port0 = tcp0->dst;
836       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
837         {
838           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
839             {
840               tcp0->dst = new_dst_port0;
841               sum0 = tcp0->checksum;
842               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
843                                      ip4_header_t, dst_address);
844               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
845                                      ip4_header_t /* cheat */, length);
846               tcp0->checksum = ip_csum_fold(sum0);
847             }
848           else
849             {
850               udp0->dst_port = new_dst_port0;
851               udp0->checksum = 0;
852             }
853         }
854     }
855 }
856
857 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
858                                          vlib_buffer_t * b0,
859                                          ip4_header_t * ip0,
860                                          icmp46_header_t * icmp0,
861                                          u32 sw_if_index0,
862                                          u32 rx_fib_index0,
863                                          vlib_node_runtime_t * node,
864                                          u32 next0,
865                                          f64 now,
866                                          u32 thread_index,
867                                          snat_session_t ** p_s0)
868 {
869   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
870                       next0, thread_index, p_s0, 0);
871   snat_session_t * s0 = *p_s0;
872   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
873     {
874       /* Accounting */
875       s0->last_heard = now;
876       s0->total_pkts++;
877       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
878       /* Per-user LRU list maintenance for dynamic translations */
879       if (!snat_is_session_static (s0))
880         {
881           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
882                              s0->per_user_index);
883           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
884                               s0->per_user_list_head_index,
885                               s0->per_user_index);
886         }
887     }
888   return next0;
889 }
890
891 static inline uword
892 snat_in2out_node_fn_inline (vlib_main_t * vm,
893                             vlib_node_runtime_t * node,
894                             vlib_frame_t * frame, int is_slow_path)
895 {
896   u32 n_left_from, * from, * to_next;
897   snat_in2out_next_t next_index;
898   u32 pkts_processed = 0;
899   snat_main_t * sm = &snat_main;
900   f64 now = vlib_time_now (vm);
901   u32 stats_node_index;
902   u32 thread_index = vlib_get_thread_index ();
903
904   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
905     snat_in2out_node.index;
906
907   from = vlib_frame_vector_args (frame);
908   n_left_from = frame->n_vectors;
909   next_index = node->cached_next_index;
910
911   while (n_left_from > 0)
912     {
913       u32 n_left_to_next;
914
915       vlib_get_next_frame (vm, node, next_index,
916                            to_next, n_left_to_next);
917
918       while (n_left_from >= 4 && n_left_to_next >= 2)
919         {
920           u32 bi0, bi1;
921           vlib_buffer_t * b0, * b1;
922           u32 next0, next1;
923           u32 sw_if_index0, sw_if_index1;
924           ip4_header_t * ip0, * ip1;
925           ip_csum_t sum0, sum1;
926           u32 new_addr0, old_addr0, new_addr1, old_addr1;
927           u16 old_port0, new_port0, old_port1, new_port1;
928           udp_header_t * udp0, * udp1;
929           tcp_header_t * tcp0, * tcp1;
930           icmp46_header_t * icmp0, * icmp1;
931           snat_session_key_t key0, key1;
932           u32 rx_fib_index0, rx_fib_index1;
933           u32 proto0, proto1;
934           snat_session_t * s0 = 0, * s1 = 0;
935           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
936           
937           /* Prefetch next iteration. */
938           {
939             vlib_buffer_t * p2, * p3;
940             
941             p2 = vlib_get_buffer (vm, from[2]);
942             p3 = vlib_get_buffer (vm, from[3]);
943             
944             vlib_prefetch_buffer_header (p2, LOAD);
945             vlib_prefetch_buffer_header (p3, LOAD);
946
947             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
948             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
949           }
950
951           /* speculatively enqueue b0 and b1 to the current next frame */
952           to_next[0] = bi0 = from[0];
953           to_next[1] = bi1 = from[1];
954           from += 2;
955           to_next += 2;
956           n_left_from -= 2;
957           n_left_to_next -= 2;
958           
959           b0 = vlib_get_buffer (vm, bi0);
960           b1 = vlib_get_buffer (vm, bi1);
961
962           ip0 = vlib_buffer_get_current (b0);
963           udp0 = ip4_next_header (ip0);
964           tcp0 = (tcp_header_t *) udp0;
965           icmp0 = (icmp46_header_t *) udp0;
966
967           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
968           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
969                                    sw_if_index0);
970
971           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
972
973           if (PREDICT_FALSE(ip0->ttl == 1))
974             {
975               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
976               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
977                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
978                                            0);
979               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
980               goto trace00;
981             }
982
983           proto0 = ip_proto_to_snat_proto (ip0->protocol);
984
985           /* Next configured feature, probably ip4-lookup */
986           if (is_slow_path)
987             {
988               if (PREDICT_FALSE (proto0 == ~0))
989                 goto trace00;
990               
991               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
992                 {
993                   next0 = icmp_in2out_slow_path 
994                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
995                      node, next0, now, thread_index, &s0);
996                   goto trace00;
997                 }
998             }
999           else
1000             {
1001               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1002                 {
1003                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1004                   goto trace00;
1005                 }
1006             }
1007
1008           key0.addr = ip0->src_address;
1009           key0.port = udp0->src_port;
1010           key0.protocol = proto0;
1011           key0.fib_index = rx_fib_index0;
1012           
1013           kv0.key = key0.as_u64;
1014
1015           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1016             {
1017               if (is_slow_path)
1018                 {
1019                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1020                       proto0, rx_fib_index0)))
1021                     goto trace00;
1022
1023                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1024                                      &s0, node, next0, thread_index);
1025                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1026                     goto trace00;
1027                 }
1028               else
1029                 {
1030                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1031                   goto trace00;
1032                 }
1033             }
1034           else
1035             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1036                                     value0.value);
1037
1038           old_addr0 = ip0->src_address.as_u32;
1039           ip0->src_address = s0->out2in.addr;
1040           new_addr0 = ip0->src_address.as_u32;
1041           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1042
1043           sum0 = ip0->checksum;
1044           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1045                                  ip4_header_t,
1046                                  src_address /* changed member */);
1047           ip0->checksum = ip_csum_fold (sum0);
1048
1049           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1050             {
1051               old_port0 = tcp0->src_port;
1052               tcp0->src_port = s0->out2in.port;
1053               new_port0 = tcp0->src_port;
1054
1055               sum0 = tcp0->checksum;
1056               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1057                                      ip4_header_t,
1058                                      dst_address /* changed member */);
1059               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1060                                      ip4_header_t /* cheat */,
1061                                      length /* changed member */);
1062               tcp0->checksum = ip_csum_fold(sum0);
1063             }
1064           else
1065             {
1066               old_port0 = udp0->src_port;
1067               udp0->src_port = s0->out2in.port;
1068               udp0->checksum = 0;
1069             }
1070
1071           /* Hairpinning */
1072           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1073
1074           /* Accounting */
1075           s0->last_heard = now;
1076           s0->total_pkts++;
1077           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1078           /* Per-user LRU list maintenance for dynamic translation */
1079           if (!snat_is_session_static (s0))
1080             {
1081               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1082                                  s0->per_user_index);
1083               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1084                                   s0->per_user_list_head_index,
1085                                   s0->per_user_index);
1086             }
1087         trace00:
1088
1089           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1090                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1091             {
1092               snat_in2out_trace_t *t = 
1093                  vlib_add_trace (vm, node, b0, sizeof (*t));
1094               t->is_slow_path = is_slow_path;
1095               t->sw_if_index = sw_if_index0;
1096               t->next_index = next0;
1097                   t->session_index = ~0;
1098               if (s0)
1099                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1100             }
1101
1102           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1103
1104           ip1 = vlib_buffer_get_current (b1);
1105           udp1 = ip4_next_header (ip1);
1106           tcp1 = (tcp_header_t *) udp1;
1107           icmp1 = (icmp46_header_t *) udp1;
1108
1109           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1110           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1111                                    sw_if_index1);
1112
1113           if (PREDICT_FALSE(ip1->ttl == 1))
1114             {
1115               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1116               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1117                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1118                                            0);
1119               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1120               goto trace01;
1121             }
1122
1123           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1124
1125           /* Next configured feature, probably ip4-lookup */
1126           if (is_slow_path)
1127             {
1128               if (PREDICT_FALSE (proto1 == ~0))
1129                 goto trace01;
1130               
1131               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1132                 {
1133                   next1 = icmp_in2out_slow_path 
1134                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1135                      next1, now, thread_index, &s1);
1136                   goto trace01;
1137                 }
1138             }
1139           else
1140             {
1141               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1142                 {
1143                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1144                   goto trace01;
1145                 }
1146             }
1147
1148           key1.addr = ip1->src_address;
1149           key1.port = udp1->src_port;
1150           key1.protocol = proto1;
1151           key1.fib_index = rx_fib_index1;
1152           
1153           kv1.key = key1.as_u64;
1154
1155             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1156             {
1157               if (is_slow_path)
1158                 {
1159                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
1160                       proto1, rx_fib_index1)))
1161                     goto trace01;
1162
1163                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1164                                      &s1, node, next1, thread_index);
1165                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1166                     goto trace01;
1167                 }
1168               else
1169                 {
1170                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1171                   goto trace01;
1172                 }
1173             }
1174           else
1175             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1176                                     value1.value);
1177
1178           old_addr1 = ip1->src_address.as_u32;
1179           ip1->src_address = s1->out2in.addr;
1180           new_addr1 = ip1->src_address.as_u32;
1181           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1182
1183           sum1 = ip1->checksum;
1184           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1185                                  ip4_header_t,
1186                                  src_address /* changed member */);
1187           ip1->checksum = ip_csum_fold (sum1);
1188
1189           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1190             {
1191               old_port1 = tcp1->src_port;
1192               tcp1->src_port = s1->out2in.port;
1193               new_port1 = tcp1->src_port;
1194
1195               sum1 = tcp1->checksum;
1196               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1197                                      ip4_header_t,
1198                                      dst_address /* changed member */);
1199               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1200                                      ip4_header_t /* cheat */,
1201                                      length /* changed member */);
1202               tcp1->checksum = ip_csum_fold(sum1);
1203             }
1204           else
1205             {
1206               old_port1 = udp1->src_port;
1207               udp1->src_port = s1->out2in.port;
1208               udp1->checksum = 0;
1209             }
1210
1211           /* Hairpinning */
1212           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1213
1214           /* Accounting */
1215           s1->last_heard = now;
1216           s1->total_pkts++;
1217           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1218           /* Per-user LRU list maintenance for dynamic translation */
1219           if (!snat_is_session_static (s1))
1220             {
1221               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1222                                  s1->per_user_index);
1223               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1224                                   s1->per_user_list_head_index,
1225                                   s1->per_user_index);
1226             }
1227         trace01:
1228
1229           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1230                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1231             {
1232               snat_in2out_trace_t *t = 
1233                  vlib_add_trace (vm, node, b1, sizeof (*t));
1234               t->sw_if_index = sw_if_index1;
1235               t->next_index = next1;
1236               t->session_index = ~0;
1237               if (s1)
1238                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1239             }
1240
1241           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1242
1243           /* verify speculative enqueues, maybe switch current next frame */
1244           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1245                                            to_next, n_left_to_next,
1246                                            bi0, bi1, next0, next1);
1247         }
1248
1249       while (n_left_from > 0 && n_left_to_next > 0)
1250         {
1251           u32 bi0;
1252           vlib_buffer_t * b0;
1253           u32 next0;
1254           u32 sw_if_index0;
1255           ip4_header_t * ip0;
1256           ip_csum_t sum0;
1257           u32 new_addr0, old_addr0;
1258           u16 old_port0, new_port0;
1259           udp_header_t * udp0;
1260           tcp_header_t * tcp0;
1261           icmp46_header_t * icmp0;
1262           snat_session_key_t key0;
1263           u32 rx_fib_index0;
1264           u32 proto0;
1265           snat_session_t * s0 = 0;
1266           clib_bihash_kv_8_8_t kv0, value0;
1267           
1268           /* speculatively enqueue b0 to the current next frame */
1269           bi0 = from[0];
1270           to_next[0] = bi0;
1271           from += 1;
1272           to_next += 1;
1273           n_left_from -= 1;
1274           n_left_to_next -= 1;
1275
1276           b0 = vlib_get_buffer (vm, bi0);
1277           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1278
1279           ip0 = vlib_buffer_get_current (b0);
1280           udp0 = ip4_next_header (ip0);
1281           tcp0 = (tcp_header_t *) udp0;
1282           icmp0 = (icmp46_header_t *) udp0;
1283
1284           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1285           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1286                                    sw_if_index0);
1287
1288           if (PREDICT_FALSE(ip0->ttl == 1))
1289             {
1290               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1291               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1292                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1293                                            0);
1294               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1295               goto trace0;
1296             }
1297
1298           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1299
1300           /* Next configured feature, probably ip4-lookup */
1301           if (is_slow_path)
1302             {
1303               if (PREDICT_FALSE (proto0 == ~0))
1304                 goto trace0;
1305               
1306               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1307                 {
1308                   next0 = icmp_in2out_slow_path 
1309                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1310                      next0, now, thread_index, &s0);
1311                   goto trace0;
1312                 }
1313             }
1314           else
1315             {
1316               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1317                 {
1318                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1319                   goto trace0;
1320                 }
1321             }
1322
1323           key0.addr = ip0->src_address;
1324           key0.port = udp0->src_port;
1325           key0.protocol = proto0;
1326           key0.fib_index = rx_fib_index0;
1327           
1328           kv0.key = key0.as_u64;
1329
1330           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1331             {
1332               if (is_slow_path)
1333                 {
1334                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1335                       proto0, rx_fib_index0)))
1336                     goto trace0;
1337
1338                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1339                                      &s0, node, next0, thread_index);
1340
1341                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1342                     goto trace0;
1343                 }
1344               else
1345                 {
1346                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1347                   goto trace0;
1348                 }
1349             }
1350           else
1351             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1352                                     value0.value);
1353
1354           old_addr0 = ip0->src_address.as_u32;
1355           ip0->src_address = s0->out2in.addr;
1356           new_addr0 = ip0->src_address.as_u32;
1357           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1358
1359           sum0 = ip0->checksum;
1360           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1361                                  ip4_header_t,
1362                                  src_address /* changed member */);
1363           ip0->checksum = ip_csum_fold (sum0);
1364
1365           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1366             {
1367               old_port0 = tcp0->src_port;
1368               tcp0->src_port = s0->out2in.port;
1369               new_port0 = tcp0->src_port;
1370
1371               sum0 = tcp0->checksum;
1372               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1373                                      ip4_header_t,
1374                                      dst_address /* changed member */);
1375               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1376                                      ip4_header_t /* cheat */,
1377                                      length /* changed member */);
1378               tcp0->checksum = ip_csum_fold(sum0);
1379             }
1380           else
1381             {
1382               old_port0 = udp0->src_port;
1383               udp0->src_port = s0->out2in.port;
1384               udp0->checksum = 0;
1385             }
1386
1387           /* Hairpinning */
1388           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1389
1390           /* Accounting */
1391           s0->last_heard = now;
1392           s0->total_pkts++;
1393           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1394           /* Per-user LRU list maintenance for dynamic translation */
1395           if (!snat_is_session_static (s0))
1396             {
1397               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1398                                  s0->per_user_index);
1399               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1400                                   s0->per_user_list_head_index,
1401                                   s0->per_user_index);
1402             }
1403
1404         trace0:
1405           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1406                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1407             {
1408               snat_in2out_trace_t *t = 
1409                  vlib_add_trace (vm, node, b0, sizeof (*t));
1410               t->is_slow_path = is_slow_path;
1411               t->sw_if_index = sw_if_index0;
1412               t->next_index = next0;
1413                   t->session_index = ~0;
1414               if (s0)
1415                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1416             }
1417
1418           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1419
1420           /* verify speculative enqueue, maybe switch current next frame */
1421           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1422                                            to_next, n_left_to_next,
1423                                            bi0, next0);
1424         }
1425
1426       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1427     }
1428
1429   vlib_node_increment_counter (vm, stats_node_index, 
1430                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1431                                pkts_processed);
1432   return frame->n_vectors;
1433 }
1434
1435 static uword
1436 snat_in2out_fast_path_fn (vlib_main_t * vm,
1437                           vlib_node_runtime_t * node,
1438                           vlib_frame_t * frame)
1439 {
1440   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1441 }
1442
1443 VLIB_REGISTER_NODE (snat_in2out_node) = {
1444   .function = snat_in2out_fast_path_fn,
1445   .name = "snat-in2out",
1446   .vector_size = sizeof (u32),
1447   .format_trace = format_snat_in2out_trace,
1448   .type = VLIB_NODE_TYPE_INTERNAL,
1449   
1450   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1451   .error_strings = snat_in2out_error_strings,
1452
1453   .runtime_data_bytes = sizeof (snat_runtime_t),
1454   
1455   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1456
1457   /* edit / add dispositions here */
1458   .next_nodes = {
1459     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1460     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1461     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1462     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1463   },
1464 };
1465
1466 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1467
1468 static uword
1469 snat_in2out_slow_path_fn (vlib_main_t * vm,
1470                           vlib_node_runtime_t * node,
1471                           vlib_frame_t * frame)
1472 {
1473   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1474 }
1475
1476 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1477   .function = snat_in2out_slow_path_fn,
1478   .name = "snat-in2out-slowpath",
1479   .vector_size = sizeof (u32),
1480   .format_trace = format_snat_in2out_trace,
1481   .type = VLIB_NODE_TYPE_INTERNAL,
1482   
1483   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1484   .error_strings = snat_in2out_error_strings,
1485
1486   .runtime_data_bytes = sizeof (snat_runtime_t),
1487   
1488   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1489
1490   /* edit / add dispositions here */
1491   .next_nodes = {
1492     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1493     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1494     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1495     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1496   },
1497 };
1498
1499 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1500
1501 /**************************/
1502 /*** deterministic mode ***/
1503 /**************************/
1504 static uword
1505 snat_det_in2out_node_fn (vlib_main_t * vm,
1506                          vlib_node_runtime_t * node,
1507                          vlib_frame_t * frame)
1508 {
1509   u32 n_left_from, * from, * to_next;
1510   snat_in2out_next_t next_index;
1511   u32 pkts_processed = 0;
1512   snat_main_t * sm = &snat_main;
1513   u32 now = (u32) vlib_time_now (vm);
1514   u32 thread_index = os_get_cpu_number ();
1515
1516   from = vlib_frame_vector_args (frame);
1517   n_left_from = frame->n_vectors;
1518   next_index = node->cached_next_index;
1519
1520   while (n_left_from > 0)
1521     {
1522       u32 n_left_to_next;
1523
1524       vlib_get_next_frame (vm, node, next_index,
1525                            to_next, n_left_to_next);
1526
1527       while (n_left_from >= 4 && n_left_to_next >= 2)
1528         {
1529           u32 bi0, bi1;
1530           vlib_buffer_t * b0, * b1;
1531           u32 next0, next1;
1532           u32 sw_if_index0, sw_if_index1;
1533           ip4_header_t * ip0, * ip1;
1534           ip_csum_t sum0, sum1;
1535           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1536           u16 old_port0, new_port0, lo_port0, i0;
1537           u16 old_port1, new_port1, lo_port1, i1;
1538           udp_header_t * udp0, * udp1;
1539           tcp_header_t * tcp0, * tcp1;
1540           u32 proto0, proto1;
1541           snat_det_out_key_t key0, key1;
1542           snat_det_map_t * dm0, * dm1;
1543           snat_det_session_t * ses0 = 0, * ses1 = 0;
1544           u32 rx_fib_index0, rx_fib_index1;
1545           icmp46_header_t * icmp0, * icmp1;
1546
1547           /* Prefetch next iteration. */
1548           {
1549             vlib_buffer_t * p2, * p3;
1550
1551             p2 = vlib_get_buffer (vm, from[2]);
1552             p3 = vlib_get_buffer (vm, from[3]);
1553
1554             vlib_prefetch_buffer_header (p2, LOAD);
1555             vlib_prefetch_buffer_header (p3, LOAD);
1556
1557             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1558             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1559           }
1560
1561           /* speculatively enqueue b0 and b1 to the current next frame */
1562           to_next[0] = bi0 = from[0];
1563           to_next[1] = bi1 = from[1];
1564           from += 2;
1565           to_next += 2;
1566           n_left_from -= 2;
1567           n_left_to_next -= 2;
1568
1569           b0 = vlib_get_buffer (vm, bi0);
1570           b1 = vlib_get_buffer (vm, bi1);
1571
1572           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1573           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1574
1575           ip0 = vlib_buffer_get_current (b0);
1576           udp0 = ip4_next_header (ip0);
1577           tcp0 = (tcp_header_t *) udp0;
1578
1579           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1580
1581           if (PREDICT_FALSE(ip0->ttl == 1))
1582             {
1583               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1584               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1585                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1586                                            0);
1587               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1588               goto trace0;
1589             }
1590
1591           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1592
1593           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1594             {
1595               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1596               icmp0 = (icmp46_header_t *) udp0;
1597
1598               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1599                                   rx_fib_index0, node, next0, thread_index,
1600                                   &ses0, &dm0);
1601               goto trace0;
1602             }
1603
1604           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1605           if (PREDICT_FALSE(!dm0))
1606             {
1607               clib_warning("no match for internal host %U",
1608                            format_ip4_address, &ip0->src_address);
1609               next0 = SNAT_IN2OUT_NEXT_DROP;
1610               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1611               goto trace0;
1612             }
1613
1614           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1615
1616           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1617           if (PREDICT_FALSE(!ses0))
1618             {
1619               key0.ext_host_addr = ip0->dst_address;
1620               key0.ext_host_port = tcp0->dst;
1621               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1622                 {
1623                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1624                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1625
1626                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1627                     continue;
1628
1629                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1630                   break;
1631                 }
1632               if (PREDICT_FALSE(!ses0))
1633                 {
1634                   next0 = SNAT_IN2OUT_NEXT_DROP;
1635                   b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1636                   goto trace0;
1637                 }
1638             }
1639
1640           new_port0 = ses0->out.out_port;
1641
1642           old_addr0.as_u32 = ip0->src_address.as_u32;
1643           ip0->src_address.as_u32 = new_addr0.as_u32;
1644           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1645
1646           sum0 = ip0->checksum;
1647           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1648                                  ip4_header_t,
1649                                  src_address /* changed member */);
1650           ip0->checksum = ip_csum_fold (sum0);
1651
1652           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1653             {
1654               if (tcp0->flags & TCP_FLAG_SYN)
1655                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1656               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1657                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1658               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1659                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1660               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1661                 snat_det_ses_close(dm0, ses0);
1662               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1663                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1664               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1665                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1666
1667               old_port0 = tcp0->src;
1668               tcp0->src = new_port0;
1669
1670               sum0 = tcp0->checksum;
1671               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1672                                      ip4_header_t,
1673                                      dst_address /* changed member */);
1674               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1675                                      ip4_header_t /* cheat */,
1676                                      length /* changed member */);
1677               tcp0->checksum = ip_csum_fold(sum0);
1678             }
1679           else
1680             {
1681               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1682               old_port0 = udp0->src_port;
1683               udp0->src_port = new_port0;
1684               udp0->checksum = 0;
1685             }
1686
1687           switch(ses0->state)
1688             {
1689             case SNAT_SESSION_UDP_ACTIVE:
1690                 ses0->expire = now + SNAT_UDP_TIMEOUT;
1691                 break;
1692             case SNAT_SESSION_TCP_SYN_SENT:
1693             case SNAT_SESSION_TCP_FIN_WAIT:
1694             case SNAT_SESSION_TCP_CLOSE_WAIT:
1695             case SNAT_SESSION_TCP_LAST_ACK:
1696                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1697                 break;
1698             case SNAT_SESSION_TCP_ESTABLISHED:
1699                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1700                 break;
1701             }
1702
1703         trace0:
1704           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1705                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1706             {
1707               snat_in2out_trace_t *t =
1708                  vlib_add_trace (vm, node, b0, sizeof (*t));
1709               t->is_slow_path = 0;
1710               t->sw_if_index = sw_if_index0;
1711               t->next_index = next0;
1712               t->session_index = ~0;
1713               if (ses0)
1714                 t->session_index = ses0 - dm0->sessions;
1715             }
1716
1717           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1718
1719           ip1 = vlib_buffer_get_current (b1);
1720           udp1 = ip4_next_header (ip1);
1721           tcp1 = (tcp_header_t *) udp1;
1722
1723           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1724
1725           if (PREDICT_FALSE(ip1->ttl == 1))
1726             {
1727               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1728               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1729                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1730                                            0);
1731               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1732               goto trace1;
1733             }
1734
1735           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1736
1737           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1738             {
1739               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1740               icmp1 = (icmp46_header_t *) udp1;
1741
1742               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
1743                                   rx_fib_index1, node, next1, thread_index,
1744                                   &ses1, &dm1);
1745               goto trace1;
1746             }
1747
1748           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1749           if (PREDICT_FALSE(!dm1))
1750             {
1751               clib_warning("no match for internal host %U",
1752                            format_ip4_address, &ip0->src_address);
1753               next1 = SNAT_IN2OUT_NEXT_DROP;
1754               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1755               goto trace1;
1756             }
1757
1758           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1759
1760           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src);
1761           if (PREDICT_FALSE(!ses1))
1762             {
1763               key1.ext_host_addr = ip1->dst_address;
1764               key1.ext_host_port = tcp1->dst;
1765               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1766                 {
1767                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1768                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1769
1770                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1771                     continue;
1772
1773                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1774                   break;
1775                 }
1776               if (PREDICT_FALSE(!ses1))
1777                 {
1778                   next1 = SNAT_IN2OUT_NEXT_DROP;
1779                   b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1780                   goto trace1;
1781                 }
1782             }
1783
1784           new_port1 = ses1->out.out_port;
1785
1786           old_addr1.as_u32 = ip1->src_address.as_u32;
1787           ip1->src_address.as_u32 = new_addr1.as_u32;
1788           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1789
1790           sum1 = ip1->checksum;
1791           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1792                                  ip4_header_t,
1793                                  src_address /* changed member */);
1794           ip1->checksum = ip_csum_fold (sum1);
1795
1796           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1797             {
1798               if (tcp1->flags & TCP_FLAG_SYN)
1799                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1800               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1801                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1802               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1803                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1804               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1805                 snat_det_ses_close(dm1, ses1);
1806               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1807                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1808               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1809                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1810
1811               old_port1 = tcp1->src;
1812               tcp1->src = new_port1;
1813
1814               sum1 = tcp1->checksum;
1815               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1816                                      ip4_header_t,
1817                                      dst_address /* changed member */);
1818               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1819                                      ip4_header_t /* cheat */,
1820                                      length /* changed member */);
1821               tcp1->checksum = ip_csum_fold(sum1);
1822             }
1823           else
1824             {
1825               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1826               old_port1 = udp1->src_port;
1827               udp1->src_port = new_port1;
1828               udp1->checksum = 0;
1829             }
1830
1831           switch(ses1->state)
1832             {
1833             case SNAT_SESSION_UDP_ACTIVE:
1834                 ses1->expire = now + SNAT_UDP_TIMEOUT;
1835                 break;
1836             case SNAT_SESSION_TCP_SYN_SENT:
1837             case SNAT_SESSION_TCP_FIN_WAIT:
1838             case SNAT_SESSION_TCP_CLOSE_WAIT:
1839             case SNAT_SESSION_TCP_LAST_ACK:
1840                 ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1841                 break;
1842             case SNAT_SESSION_TCP_ESTABLISHED:
1843                 ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1844                 break;
1845             }
1846
1847         trace1:
1848           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1849                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1850             {
1851               snat_in2out_trace_t *t =
1852                  vlib_add_trace (vm, node, b1, sizeof (*t));
1853               t->is_slow_path = 0;
1854               t->sw_if_index = sw_if_index1;
1855               t->next_index = next1;
1856               t->session_index = ~0;
1857               if (ses1)
1858                 t->session_index = ses1 - dm1->sessions;
1859             }
1860
1861           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1862
1863           /* verify speculative enqueues, maybe switch current next frame */
1864           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1865                                            to_next, n_left_to_next,
1866                                            bi0, bi1, next0, next1);
1867          }
1868
1869       while (n_left_from > 0 && n_left_to_next > 0)
1870         {
1871           u32 bi0;
1872           vlib_buffer_t * b0;
1873           u32 next0;
1874           u32 sw_if_index0;
1875           ip4_header_t * ip0;
1876           ip_csum_t sum0;
1877           ip4_address_t new_addr0, old_addr0;
1878           u16 old_port0, new_port0, lo_port0, i0;
1879           udp_header_t * udp0;
1880           tcp_header_t * tcp0;
1881           u32 proto0;
1882           snat_det_out_key_t key0;
1883           snat_det_map_t * dm0;
1884           snat_det_session_t * ses0 = 0;
1885           u32 rx_fib_index0;
1886           icmp46_header_t * icmp0;
1887
1888           /* speculatively enqueue b0 to the current next frame */
1889           bi0 = from[0];
1890           to_next[0] = bi0;
1891           from += 1;
1892           to_next += 1;
1893           n_left_from -= 1;
1894           n_left_to_next -= 1;
1895
1896           b0 = vlib_get_buffer (vm, bi0);
1897           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1898
1899           ip0 = vlib_buffer_get_current (b0);
1900           udp0 = ip4_next_header (ip0);
1901           tcp0 = (tcp_header_t *) udp0;
1902
1903           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1904
1905           if (PREDICT_FALSE(ip0->ttl == 1))
1906             {
1907               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1908               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1909                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1910                                            0);
1911               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1912               goto trace00;
1913             }
1914
1915           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1916
1917           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1918             {
1919               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1920               icmp0 = (icmp46_header_t *) udp0;
1921
1922               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1923                                   rx_fib_index0, node, next0, thread_index,
1924                                   &ses0, &dm0);
1925               goto trace00;
1926             }
1927
1928           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1929           if (PREDICT_FALSE(!dm0))
1930             {
1931               clib_warning("no match for internal host %U",
1932                            format_ip4_address, &ip0->src_address);
1933               next0 = SNAT_IN2OUT_NEXT_DROP;
1934               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1935               goto trace00;
1936             }
1937
1938           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1939
1940           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1941           if (PREDICT_FALSE(!ses0))
1942             {
1943               key0.ext_host_addr = ip0->dst_address;
1944               key0.ext_host_port = tcp0->dst;
1945               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1946                 {
1947                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1948                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1949
1950                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1951                     continue;
1952
1953                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1954                   break;
1955                 }
1956               if (PREDICT_FALSE(!ses0))
1957                 {
1958                   next0 = SNAT_IN2OUT_NEXT_DROP;
1959                   b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1960                   goto trace00;
1961                 }
1962             }
1963
1964           new_port0 = ses0->out.out_port;
1965
1966           old_addr0.as_u32 = ip0->src_address.as_u32;
1967           ip0->src_address.as_u32 = new_addr0.as_u32;
1968           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1969
1970           sum0 = ip0->checksum;
1971           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1972                                  ip4_header_t,
1973                                  src_address /* changed member */);
1974           ip0->checksum = ip_csum_fold (sum0);
1975
1976           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1977             {
1978               if (tcp0->flags & TCP_FLAG_SYN)
1979                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1980               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1981                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1982               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1983                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1984               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1985                 snat_det_ses_close(dm0, ses0);
1986               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1987                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1988               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1989                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1990
1991               old_port0 = tcp0->src;
1992               tcp0->src = new_port0;
1993
1994               sum0 = tcp0->checksum;
1995               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1996                                      ip4_header_t,
1997                                      dst_address /* changed member */);
1998               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1999                                      ip4_header_t /* cheat */,
2000                                      length /* changed member */);
2001               tcp0->checksum = ip_csum_fold(sum0);
2002             }
2003           else
2004             {
2005               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2006               old_port0 = udp0->src_port;
2007               udp0->src_port = new_port0;
2008               udp0->checksum = 0;
2009             }
2010
2011           switch(ses0->state)
2012             {
2013             case SNAT_SESSION_UDP_ACTIVE:
2014                 ses0->expire = now + SNAT_UDP_TIMEOUT;
2015                 break;
2016             case SNAT_SESSION_TCP_SYN_SENT:
2017             case SNAT_SESSION_TCP_FIN_WAIT:
2018             case SNAT_SESSION_TCP_CLOSE_WAIT:
2019             case SNAT_SESSION_TCP_LAST_ACK:
2020                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
2021                 break;
2022             case SNAT_SESSION_TCP_ESTABLISHED:
2023                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
2024                 break;
2025             }
2026
2027         trace00:
2028           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2029                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2030             {
2031               snat_in2out_trace_t *t =
2032                  vlib_add_trace (vm, node, b0, sizeof (*t));
2033               t->is_slow_path = 0;
2034               t->sw_if_index = sw_if_index0;
2035               t->next_index = next0;
2036               t->session_index = ~0;
2037               if (ses0)
2038                 t->session_index = ses0 - dm0->sessions;
2039             }
2040
2041           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2042
2043           /* verify speculative enqueue, maybe switch current next frame */
2044           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2045                                            to_next, n_left_to_next,
2046                                            bi0, next0);
2047         }
2048
2049       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2050     }
2051
2052   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2053                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2054                                pkts_processed);
2055   return frame->n_vectors;
2056 }
2057
2058 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2059   .function = snat_det_in2out_node_fn,
2060   .name = "snat-det-in2out",
2061   .vector_size = sizeof (u32),
2062   .format_trace = format_snat_in2out_trace,
2063   .type = VLIB_NODE_TYPE_INTERNAL,
2064
2065   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2066   .error_strings = snat_in2out_error_strings,
2067
2068   .runtime_data_bytes = sizeof (snat_runtime_t),
2069
2070   .n_next_nodes = 3,
2071
2072   /* edit / add dispositions here */
2073   .next_nodes = {
2074     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2075     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2076     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2077   },
2078 };
2079
2080 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2081
2082 /**
2083  * Get address and port values to be used for packet SNAT translation
2084  * and create session if needed
2085  *
2086  * @param[in,out] sm             SNAT main
2087  * @param[in,out] node           SNAT node runtime
2088  * @param[in] thread_index       thread index
2089  * @param[in,out] b0             buffer containing packet to be translated
2090  * @param[out] p_proto           protocol used for matching
2091  * @param[out] p_value           address and port after NAT translation
2092  * @param[out] p_dont_translate  if packet should not be translated
2093  * @param d                      optional parameter
2094  * @param e                      optional parameter
2095  */
2096 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2097                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2098                           snat_session_key_t *p_value,
2099                           u8 *p_dont_translate, void *d, void *e)
2100 {
2101   ip4_header_t *ip0;
2102   icmp46_header_t *icmp0;
2103   u32 sw_if_index0;
2104   u32 rx_fib_index0;
2105   u8 protocol;
2106   snat_det_out_key_t key0;
2107   u8 dont_translate = 0;
2108   u32 next0 = ~0;
2109   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2110   ip4_header_t *inner_ip0;
2111   void *l4_header = 0;
2112   icmp46_header_t *inner_icmp0;
2113   snat_det_map_t * dm0 = 0;
2114   ip4_address_t new_addr0;
2115   u16 lo_port0, i0;
2116   snat_det_session_t * ses0 = 0;
2117   ip4_address_t in_addr;
2118   u16 in_port;
2119
2120   ip0 = vlib_buffer_get_current (b0);
2121   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2122   echo0 = (icmp_echo_header_t *)(icmp0+1);
2123   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2124   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2125
2126   if (!icmp_is_error_message (icmp0))
2127     {
2128       protocol = SNAT_PROTOCOL_ICMP;
2129       in_addr = ip0->src_address;
2130       in_port = echo0->identifier;
2131     }
2132   else
2133     {
2134       inner_ip0 = (ip4_header_t *)(echo0+1);
2135       l4_header = ip4_next_header (inner_ip0);
2136       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2137       in_addr = inner_ip0->dst_address;
2138       switch (protocol)
2139         {
2140         case SNAT_PROTOCOL_ICMP:
2141           inner_icmp0 = (icmp46_header_t*)l4_header;
2142           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2143           in_port = inner_echo0->identifier;
2144           break;
2145         case SNAT_PROTOCOL_UDP:
2146         case SNAT_PROTOCOL_TCP:
2147           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2148           break;
2149         default:
2150           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2151           next0 = SNAT_IN2OUT_NEXT_DROP;
2152           goto out;
2153         }
2154     }
2155
2156   dm0 = snat_det_map_by_user(sm, &in_addr);
2157   if (PREDICT_FALSE(!dm0))
2158     {
2159       clib_warning("no match for internal host %U",
2160                    format_ip4_address, &in_addr);
2161       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2162           IP_PROTOCOL_ICMP, rx_fib_index0)))
2163         {
2164           dont_translate = 1;
2165           goto out;
2166         }
2167       next0 = SNAT_IN2OUT_NEXT_DROP;
2168       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2169       goto out;
2170     }
2171
2172   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2173
2174   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port);
2175   if (PREDICT_FALSE(!ses0))
2176     {
2177       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2178           IP_PROTOCOL_ICMP, rx_fib_index0)))
2179         {
2180           dont_translate = 1;
2181           goto out;
2182         }
2183       if (icmp0->type != ICMP4_echo_request)
2184         {
2185           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2186           next0 = SNAT_IN2OUT_NEXT_DROP;
2187           goto out;
2188         }
2189       key0.ext_host_addr = ip0->dst_address;
2190       key0.ext_host_port = 0;
2191       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2192         {
2193           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2194             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2195
2196           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2197             continue;
2198
2199           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2200           break;
2201         }
2202       if (PREDICT_FALSE(!ses0))
2203         {
2204           next0 = SNAT_IN2OUT_NEXT_DROP;
2205           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2206           goto out;
2207         }
2208     }
2209
2210   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2211                     !icmp_is_error_message (icmp0)))
2212     {
2213       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2214       next0 = SNAT_IN2OUT_NEXT_DROP;
2215       goto out;
2216     }
2217
2218 out:
2219   *p_proto = protocol;
2220   if (ses0)
2221     {
2222       p_value->addr = new_addr0;
2223       p_value->fib_index = sm->outside_fib_index;
2224       p_value->port = ses0->out.out_port;
2225     }
2226   *p_dont_translate = dont_translate;
2227   if (d)
2228     *(snat_det_session_t**)d = ses0;
2229   if (e)
2230     *(snat_det_map_t**)e = dm0;
2231   return next0;
2232 }
2233
2234 /**********************/
2235 /*** worker handoff ***/
2236 /**********************/
2237 static uword
2238 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2239                                vlib_node_runtime_t * node,
2240                                vlib_frame_t * frame)
2241 {
2242   snat_main_t *sm = &snat_main;
2243   vlib_thread_main_t *tm = vlib_get_thread_main ();
2244   u32 n_left_from, *from, *to_next = 0;
2245   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2246   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2247     = 0;
2248   vlib_frame_queue_elt_t *hf = 0;
2249   vlib_frame_t *f = 0;
2250   int i;
2251   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2252   u32 next_worker_index = 0;
2253   u32 current_worker_index = ~0;
2254   u32 thread_index = vlib_get_thread_index ();
2255
2256   ASSERT (vec_len (sm->workers));
2257
2258   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2259     {
2260       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2261
2262       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2263                                sm->first_worker_index + sm->num_workers - 1,
2264                                (vlib_frame_queue_t *) (~0));
2265     }
2266
2267   from = vlib_frame_vector_args (frame);
2268   n_left_from = frame->n_vectors;
2269
2270   while (n_left_from > 0)
2271     {
2272       u32 bi0;
2273       vlib_buffer_t *b0;
2274       u32 sw_if_index0;
2275       u32 rx_fib_index0;
2276       ip4_header_t * ip0;
2277       u8 do_handoff;
2278
2279       bi0 = from[0];
2280       from += 1;
2281       n_left_from -= 1;
2282
2283       b0 = vlib_get_buffer (vm, bi0);
2284
2285       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2286       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2287
2288       ip0 = vlib_buffer_get_current (b0);
2289
2290       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2291
2292       if (PREDICT_FALSE (next_worker_index != thread_index))
2293         {
2294           do_handoff = 1;
2295
2296           if (next_worker_index != current_worker_index)
2297             {
2298               if (hf)
2299                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2300
2301               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2302                                                       next_worker_index,
2303                                                       handoff_queue_elt_by_worker_index);
2304
2305               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2306               to_next_worker = &hf->buffer_index[hf->n_vectors];
2307               current_worker_index = next_worker_index;
2308             }
2309
2310           /* enqueue to correct worker thread */
2311           to_next_worker[0] = bi0;
2312           to_next_worker++;
2313           n_left_to_next_worker--;
2314
2315           if (n_left_to_next_worker == 0)
2316             {
2317               hf->n_vectors = VLIB_FRAME_SIZE;
2318               vlib_put_frame_queue_elt (hf);
2319               current_worker_index = ~0;
2320               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2321               hf = 0;
2322             }
2323         }
2324       else
2325         {
2326           do_handoff = 0;
2327           /* if this is 1st frame */
2328           if (!f)
2329             {
2330               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2331               to_next = vlib_frame_vector_args (f);
2332             }
2333
2334           to_next[0] = bi0;
2335           to_next += 1;
2336           f->n_vectors++;
2337         }
2338
2339       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2340                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2341         {
2342           snat_in2out_worker_handoff_trace_t *t =
2343             vlib_add_trace (vm, node, b0, sizeof (*t));
2344           t->next_worker_index = next_worker_index;
2345           t->do_handoff = do_handoff;
2346         }
2347     }
2348
2349   if (f)
2350     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2351
2352   if (hf)
2353     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2354
2355   /* Ship frames to the worker nodes */
2356   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2357     {
2358       if (handoff_queue_elt_by_worker_index[i])
2359         {
2360           hf = handoff_queue_elt_by_worker_index[i];
2361           /*
2362            * It works better to let the handoff node
2363            * rate-adapt, always ship the handoff queue element.
2364            */
2365           if (1 || hf->n_vectors == hf->last_n_vectors)
2366             {
2367               vlib_put_frame_queue_elt (hf);
2368               handoff_queue_elt_by_worker_index[i] = 0;
2369             }
2370           else
2371             hf->last_n_vectors = hf->n_vectors;
2372         }
2373       congested_handoff_queue_by_worker_index[i] =
2374         (vlib_frame_queue_t *) (~0);
2375     }
2376   hf = 0;
2377   current_worker_index = ~0;
2378   return frame->n_vectors;
2379 }
2380
2381 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2382   .function = snat_in2out_worker_handoff_fn,
2383   .name = "snat-in2out-worker-handoff",
2384   .vector_size = sizeof (u32),
2385   .format_trace = format_snat_in2out_worker_handoff_trace,
2386   .type = VLIB_NODE_TYPE_INTERNAL,
2387   
2388   .n_next_nodes = 1,
2389
2390   .next_nodes = {
2391     [0] = "error-drop",
2392   },
2393 };
2394
2395 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2396
2397 static uword
2398 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2399                                 vlib_node_runtime_t * node,
2400                                 vlib_frame_t * frame)
2401 {
2402   u32 n_left_from, * from, * to_next;
2403   snat_in2out_next_t next_index;
2404   u32 pkts_processed = 0;
2405   snat_main_t * sm = &snat_main;
2406   u32 stats_node_index;
2407
2408   stats_node_index = snat_in2out_fast_node.index;
2409
2410   from = vlib_frame_vector_args (frame);
2411   n_left_from = frame->n_vectors;
2412   next_index = node->cached_next_index;
2413
2414   while (n_left_from > 0)
2415     {
2416       u32 n_left_to_next;
2417
2418       vlib_get_next_frame (vm, node, next_index,
2419                            to_next, n_left_to_next);
2420
2421       while (n_left_from > 0 && n_left_to_next > 0)
2422         {
2423           u32 bi0;
2424           vlib_buffer_t * b0;
2425           u32 next0;
2426           u32 sw_if_index0;
2427           ip4_header_t * ip0;
2428           ip_csum_t sum0;
2429           u32 new_addr0, old_addr0;
2430           u16 old_port0, new_port0;
2431           udp_header_t * udp0;
2432           tcp_header_t * tcp0;
2433           icmp46_header_t * icmp0;
2434           snat_session_key_t key0, sm0;
2435           u32 proto0;
2436           u32 rx_fib_index0;
2437
2438           /* speculatively enqueue b0 to the current next frame */
2439           bi0 = from[0];
2440           to_next[0] = bi0;
2441           from += 1;
2442           to_next += 1;
2443           n_left_from -= 1;
2444           n_left_to_next -= 1;
2445
2446           b0 = vlib_get_buffer (vm, bi0);
2447           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2448
2449           ip0 = vlib_buffer_get_current (b0);
2450           udp0 = ip4_next_header (ip0);
2451           tcp0 = (tcp_header_t *) udp0;
2452           icmp0 = (icmp46_header_t *) udp0;
2453
2454           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2455           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2456
2457           if (PREDICT_FALSE(ip0->ttl == 1))
2458             {
2459               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2460               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2461                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2462                                            0);
2463               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2464               goto trace0;
2465             }
2466
2467           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2468
2469           if (PREDICT_FALSE (proto0 == ~0))
2470               goto trace0;
2471
2472           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2473             {
2474               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2475                                   rx_fib_index0, node, next0, ~0, 0, 0);
2476               goto trace0;
2477             }
2478
2479           key0.addr = ip0->src_address;
2480           key0.port = udp0->src_port;
2481           key0.fib_index = rx_fib_index0;
2482
2483           if (snat_static_mapping_match(sm, key0, &sm0, 0))
2484             {
2485               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2486               next0= SNAT_IN2OUT_NEXT_DROP;
2487               goto trace0;
2488             }
2489
2490           new_addr0 = sm0.addr.as_u32;
2491           new_port0 = sm0.port;
2492           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2493           old_addr0 = ip0->src_address.as_u32;
2494           ip0->src_address.as_u32 = new_addr0;
2495
2496           sum0 = ip0->checksum;
2497           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2498                                  ip4_header_t,
2499                                  src_address /* changed member */);
2500           ip0->checksum = ip_csum_fold (sum0);
2501
2502           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2503             {
2504               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2505                 {
2506                   old_port0 = tcp0->src_port;
2507                   tcp0->src_port = new_port0;
2508
2509                   sum0 = tcp0->checksum;
2510                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2511                                          ip4_header_t,
2512                                          dst_address /* changed member */);
2513                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2514                                          ip4_header_t /* cheat */,
2515                                          length /* changed member */);
2516                   tcp0->checksum = ip_csum_fold(sum0);
2517                 }
2518               else
2519                 {
2520                   old_port0 = udp0->src_port;
2521                   udp0->src_port = new_port0;
2522                   udp0->checksum = 0;
2523                 }
2524             }
2525           else
2526             {
2527               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2528                 {
2529                   sum0 = tcp0->checksum;
2530                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2531                                          ip4_header_t,
2532                                          dst_address /* changed member */);
2533                   tcp0->checksum = ip_csum_fold(sum0);
2534                 }
2535             }
2536
2537           /* Hairpinning */
2538           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2539
2540         trace0:
2541           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2542                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2543             {
2544               snat_in2out_trace_t *t =
2545                  vlib_add_trace (vm, node, b0, sizeof (*t));
2546               t->sw_if_index = sw_if_index0;
2547               t->next_index = next0;
2548             }
2549
2550           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2551
2552           /* verify speculative enqueue, maybe switch current next frame */
2553           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2554                                            to_next, n_left_to_next,
2555                                            bi0, next0);
2556         }
2557
2558       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2559     }
2560
2561   vlib_node_increment_counter (vm, stats_node_index,
2562                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2563                                pkts_processed);
2564   return frame->n_vectors;
2565 }
2566
2567
2568 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2569   .function = snat_in2out_fast_static_map_fn,
2570   .name = "snat-in2out-fast",
2571   .vector_size = sizeof (u32),
2572   .format_trace = format_snat_in2out_fast_trace,
2573   .type = VLIB_NODE_TYPE_INTERNAL,
2574   
2575   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2576   .error_strings = snat_in2out_error_strings,
2577
2578   .runtime_data_bytes = sizeof (snat_runtime_t),
2579   
2580   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2581
2582   /* edit / add dispositions here */
2583   .next_nodes = {
2584     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2585     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2586     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2587     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2588   },
2589 };
2590
2591 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);