Support ping from outside network in 1:1 NAT (VPP-695)
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_ICMP_ERROR,
117   SNAT_IN2OUT_NEXT_SLOW_PATH,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
138                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
139                          u32 rx_fib_index0)
140 {
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   /* Don't NAT packet aimed at the intfc address */
151   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
152                                       ip0->dst_address.as_u32)))
153     return 1;
154
155   fei = fib_table_lookup (rx_fib_index0, &pfx);
156   if (FIB_NODE_INDEX_INVALID != fei)
157     {
158       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
159       if (sw_if_index == ~0)
160         {
161           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
162           if (FIB_NODE_INDEX_INVALID != fei)
163             sw_if_index = fib_entry_get_resolving_interface (fei);
164         }
165       snat_interface_t *i;
166       pool_foreach (i, sm->interfaces,
167       ({
168         /* NAT packet aimed at outside interface */
169         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
170           return 0;
171       }));
172     }
173
174   return 1;
175 }
176
177 static inline int
178 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
179                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                     u32 rx_fib_index0)
181 {
182   udp_header_t * udp0 = ip4_next_header (ip0);
183   snat_session_key_t key0, sm0;
184   clib_bihash_kv_8_8_t kv0, value0;
185
186   key0.addr = ip0->dst_address;
187   key0.port = udp0->dst_port;
188   key0.protocol = proto0;
189   key0.fib_index = sm->outside_fib_index;
190   kv0.key = key0.as_u64;
191
192   /* NAT packet aimed at external address if */
193   /* has active sessions */
194   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
195     {
196       /* or is static mappings */
197       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
198         return 0;
199     }
200   else
201     return 0;
202
203   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
204                                  rx_fib_index0);
205 }
206
207 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
208                       ip4_header_t * ip0,
209                       u32 rx_fib_index0,
210                       snat_session_key_t * key0,
211                       snat_session_t ** sessionp,
212                       vlib_node_runtime_t * node,
213                       u32 next0,
214                       u32 thread_index)
215 {
216   snat_user_t *u;
217   snat_user_key_t user_key;
218   snat_session_t *s;
219   clib_bihash_kv_8_8_t kv0, value0;
220   u32 oldest_per_user_translation_list_index;
221   dlist_elt_t * oldest_per_user_translation_list_elt;
222   dlist_elt_t * per_user_translation_list_elt;
223   dlist_elt_t * per_user_list_head_elt;
224   u32 session_index;
225   snat_session_key_t key1;
226   u32 address_index = ~0;
227   u32 outside_fib_index;
228   uword * p;
229   snat_worker_key_t worker_by_out_key;
230
231   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
232   if (! p)
233     {
234       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
235       return SNAT_IN2OUT_NEXT_DROP;
236     }
237   outside_fib_index = p[0];
238
239   key1.protocol = key0->protocol;
240   user_key.addr = ip0->src_address;
241   user_key.fib_index = rx_fib_index0;
242   kv0.key = user_key.as_u64;
243   
244   /* Ever heard of the "user" = src ip4 address before? */
245   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
246     {
247       /* no, make a new one */
248       pool_get (sm->per_thread_data[thread_index].users, u);
249       memset (u, 0, sizeof (*u));
250       u->addr = ip0->src_address;
251       u->fib_index = rx_fib_index0;
252
253       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
254
255       u->sessions_per_user_list_head_index = per_user_list_head_elt -
256         sm->per_thread_data[thread_index].list_pool;
257
258       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
259                        u->sessions_per_user_list_head_index);
260
261       kv0.value = u - sm->per_thread_data[thread_index].users;
262
263       /* add user */
264       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
265     }
266   else
267     {
268       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
269                              value0.value);
270     }
271
272   /* Over quota? Recycle the least recently used dynamic translation */
273   if (u->nsessions >= sm->max_translations_per_user)
274     {
275       /* Remove the oldest dynamic translation */
276       do {
277           oldest_per_user_translation_list_index =
278             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
279                                     u->sessions_per_user_list_head_index);
280
281           ASSERT (oldest_per_user_translation_list_index != ~0);
282
283           /* add it back to the end of the LRU list */
284           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
285                               u->sessions_per_user_list_head_index,
286                               oldest_per_user_translation_list_index);
287           /* Get the list element */
288           oldest_per_user_translation_list_elt =
289             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
290                                oldest_per_user_translation_list_index);
291
292           /* Get the session index from the list element */
293           session_index = oldest_per_user_translation_list_elt->value;
294
295           /* Get the session */
296           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
297                                  session_index);
298       } while (snat_is_session_static (s));
299
300       /* Remove in2out, out2in keys */
301       kv0.key = s->in2out.as_u64;
302       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
303           clib_warning ("in2out key delete failed");
304       kv0.key = s->out2in.as_u64;
305       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
306           clib_warning ("out2in key delete failed");
307
308       /* log NAT event */
309       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
310                                           s->out2in.addr.as_u32,
311                                           s->in2out.protocol,
312                                           s->in2out.port,
313                                           s->out2in.port,
314                                           s->in2out.fib_index);
315
316       snat_free_outside_address_and_port 
317         (sm, &s->out2in, s->outside_address_index);
318       s->outside_address_index = ~0;
319
320       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
321                                                &address_index))
322         {
323           ASSERT(0);
324
325           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
326           return SNAT_IN2OUT_NEXT_DROP;
327         }
328       s->outside_address_index = address_index;
329     }
330   else
331     {
332       u8 static_mapping = 1;
333
334       /* First try to match static mapping by local address and port */
335       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
336         {
337           static_mapping = 0;
338           /* Try to create dynamic translation */
339           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
340                                                    &address_index))
341             {
342               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
343               return SNAT_IN2OUT_NEXT_DROP;
344             }
345         }
346
347       /* Create a new session */
348       pool_get (sm->per_thread_data[thread_index].sessions, s);
349       memset (s, 0, sizeof (*s));
350       
351       s->outside_address_index = address_index;
352
353       if (static_mapping)
354         {
355           u->nstaticsessions++;
356           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
357         }
358       else
359         {
360           u->nsessions++;
361         }
362
363       /* Create list elts */
364       pool_get (sm->per_thread_data[thread_index].list_pool,
365                 per_user_translation_list_elt);
366       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
367                        per_user_translation_list_elt -
368                        sm->per_thread_data[thread_index].list_pool);
369
370       per_user_translation_list_elt->value =
371         s - sm->per_thread_data[thread_index].sessions;
372       s->per_user_index = per_user_translation_list_elt -
373                           sm->per_thread_data[thread_index].list_pool;
374       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
375
376       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
377                           s->per_user_list_head_index,
378                           per_user_translation_list_elt -
379                           sm->per_thread_data[thread_index].list_pool);
380    }
381   
382   s->in2out = *key0;
383   s->out2in = key1;
384   s->out2in.protocol = key0->protocol;
385   s->out2in.fib_index = outside_fib_index;
386   *sessionp = s;
387
388   /* Add to translation hashes */
389   kv0.key = s->in2out.as_u64;
390   kv0.value = s - sm->per_thread_data[thread_index].sessions;
391   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
392       clib_warning ("in2out key add failed");
393   
394   kv0.key = s->out2in.as_u64;
395   kv0.value = s - sm->per_thread_data[thread_index].sessions;
396   
397   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
398       clib_warning ("out2in key add failed");
399
400   /* Add to translated packets worker lookup */
401   worker_by_out_key.addr = s->out2in.addr;
402   worker_by_out_key.port = s->out2in.port;
403   worker_by_out_key.fib_index = s->out2in.fib_index;
404   kv0.key = worker_by_out_key.as_u64;
405   kv0.value = thread_index;
406   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
407
408   /* log NAT event */
409   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
410                                       s->out2in.addr.as_u32,
411                                       s->in2out.protocol,
412                                       s->in2out.port,
413                                       s->out2in.port,
414                                       s->in2out.fib_index);
415   return next0;
416 }
417
418 static_always_inline
419 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
420                                  snat_session_key_t *p_key0)
421 {
422   icmp46_header_t *icmp0;
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0, *inner_echo0 = 0;
425   ip4_header_t *inner_ip0 = 0;
426   void *l4_header = 0;
427   icmp46_header_t *inner_icmp0;
428
429   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
430   echo0 = (icmp_echo_header_t *)(icmp0+1);
431
432   if (!icmp_is_error_message (icmp0))
433     {
434       key0.protocol = SNAT_PROTOCOL_ICMP;
435       key0.addr = ip0->src_address;
436       key0.port = echo0->identifier;
437     }
438   else
439     {
440       inner_ip0 = (ip4_header_t *)(echo0+1);
441       l4_header = ip4_next_header (inner_ip0);
442       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
443       key0.addr = inner_ip0->dst_address;
444       switch (key0.protocol)
445         {
446         case SNAT_PROTOCOL_ICMP:
447           inner_icmp0 = (icmp46_header_t*)l4_header;
448           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
449           key0.port = inner_echo0->identifier;
450           break;
451         case SNAT_PROTOCOL_UDP:
452         case SNAT_PROTOCOL_TCP:
453           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
454           break;
455         default:
456           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
457         }
458     }
459   *p_key0 = key0;
460   return -1; /* success */
461 }
462
463 /**
464  * Get address and port values to be used for packet SNAT translation
465  * and create session if needed
466  *
467  * @param[in,out] sm             SNAT main
468  * @param[in,out] node           SNAT node runtime
469  * @param[in] thread_index       thread index
470  * @param[in,out] b0             buffer containing packet to be translated
471  * @param[out] p_proto           protocol used for matching
472  * @param[out] p_value           address and port after NAT translation
473  * @param[out] p_dont_translate  if packet should not be translated
474  * @param d                      optional parameter
475  * @param e                      optional parameter
476  */
477 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
478                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
479                            snat_session_key_t *p_value,
480                            u8 *p_dont_translate, void *d, void *e)
481 {
482   ip4_header_t *ip0;
483   icmp46_header_t *icmp0;
484   u32 sw_if_index0;
485   u32 rx_fib_index0;
486   snat_session_key_t key0;
487   snat_session_t *s0 = 0;
488   u8 dont_translate = 0;
489   clib_bihash_kv_8_8_t kv0, value0;
490   u32 next0 = ~0;
491   int err;
492
493   ip0 = vlib_buffer_get_current (b0);
494   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
495   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
496   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
497
498   err = icmp_get_key (ip0, &key0);
499   if (err != -1)
500     {
501       b0->error = node->errors[err];
502       next0 = SNAT_IN2OUT_NEXT_DROP;
503       goto out;
504     }
505   key0.fib_index = rx_fib_index0;
506
507   kv0.key = key0.as_u64;
508
509   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
510     {
511       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
512           IP_PROTOCOL_ICMP, rx_fib_index0)))
513         {
514           dont_translate = 1;
515           goto out;
516         }
517
518       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
519         {
520           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
521           next0 = SNAT_IN2OUT_NEXT_DROP;
522           goto out;
523         }
524
525       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
526                          &s0, node, next0, thread_index);
527
528       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
529         goto out;
530     }
531   else
532     {
533       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
534                         icmp0->type != ICMP4_echo_reply &&
535                         !icmp_is_error_message (icmp0)))
536         {
537           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
538           next0 = SNAT_IN2OUT_NEXT_DROP;
539           goto out;
540         }
541
542       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
543                               value0.value);
544     }
545
546 out:
547   *p_proto = key0.protocol;
548   if (s0)
549     *p_value = s0->out2in;
550   *p_dont_translate = dont_translate;
551   if (d)
552     *(snat_session_t**)d = s0;
553   return next0;
554 }
555
556 /**
557  * Get address and port values to be used for packet SNAT translation
558  *
559  * @param[in] sm                 SNAT main
560  * @param[in,out] node           SNAT node runtime
561  * @param[in] thread_index       thread index
562  * @param[in,out] b0             buffer containing packet to be translated
563  * @param[out] p_proto           protocol used for matching
564  * @param[out] p_value           address and port after NAT translation
565  * @param[out] p_dont_translate  if packet should not be translated
566  * @param d                      optional parameter
567  * @param e                      optional parameter
568  */
569 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
570                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
571                            snat_session_key_t *p_value,
572                            u8 *p_dont_translate, void *d, void *e)
573 {
574   ip4_header_t *ip0;
575   icmp46_header_t *icmp0;
576   u32 sw_if_index0;
577   u32 rx_fib_index0;
578   snat_session_key_t key0;
579   snat_session_key_t sm0;
580   u8 dont_translate = 0;
581   u8 is_addr_only;
582   u32 next0 = ~0;
583   int err;
584
585   ip0 = vlib_buffer_get_current (b0);
586   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
587   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
588   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
589
590   err = icmp_get_key (ip0, &key0);
591   if (err != -1)
592     {
593       b0->error = node->errors[err];
594       next0 = SNAT_IN2OUT_NEXT_DROP;
595       goto out2;
596     }
597   key0.fib_index = rx_fib_index0;
598
599   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
600     {
601       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
602           IP_PROTOCOL_ICMP, rx_fib_index0)))
603         {
604           dont_translate = 1;
605           goto out;
606         }
607
608       if (icmp_is_error_message (icmp0))
609         {
610           next0 = SNAT_IN2OUT_NEXT_DROP;
611           goto out;
612         }
613
614       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
615       next0 = SNAT_IN2OUT_NEXT_DROP;
616       goto out;
617     }
618
619   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
620                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
621                     !icmp_is_error_message (icmp0)))
622     {
623       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
624       next0 = SNAT_IN2OUT_NEXT_DROP;
625       goto out;
626     }
627
628 out:
629   *p_value = sm0;
630 out2:
631   *p_proto = key0.protocol;
632   *p_dont_translate = dont_translate;
633   return next0;
634 }
635
636 static inline u32 icmp_in2out (snat_main_t *sm,
637                                vlib_buffer_t * b0,
638                                ip4_header_t * ip0,
639                                icmp46_header_t * icmp0,
640                                u32 sw_if_index0,
641                                u32 rx_fib_index0,
642                                vlib_node_runtime_t * node,
643                                u32 next0,
644                                u32 thread_index,
645                                void *d,
646                                void *e)
647 {
648   snat_session_key_t sm0;
649   u8 protocol;
650   icmp_echo_header_t *echo0, *inner_echo0 = 0;
651   ip4_header_t *inner_ip0;
652   void *l4_header = 0;
653   icmp46_header_t *inner_icmp0;
654   u8 dont_translate;
655   u32 new_addr0, old_addr0;
656   u16 old_id0, new_id0;
657   ip_csum_t sum0;
658   u16 checksum0;
659   u32 next0_tmp;
660
661   echo0 = (icmp_echo_header_t *)(icmp0+1);
662
663   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
664                                        &protocol, &sm0, &dont_translate, d, e);
665   if (next0_tmp != ~0)
666     next0 = next0_tmp;
667   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
668     goto out;
669
670   sum0 = ip_incremental_checksum (0, icmp0,
671                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
672   checksum0 = ~ip_csum_fold (sum0);
673   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
674     {
675       next0 = SNAT_IN2OUT_NEXT_DROP;
676       goto out;
677     }
678
679   old_addr0 = ip0->src_address.as_u32;
680   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
681   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
682
683   sum0 = ip0->checksum;
684   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
685                          src_address /* changed member */);
686   ip0->checksum = ip_csum_fold (sum0);
687   
688   if (!icmp_is_error_message (icmp0))
689     {
690       new_id0 = sm0.port;
691       if (PREDICT_FALSE(new_id0 != echo0->identifier))
692         {
693           old_id0 = echo0->identifier;
694           new_id0 = sm0.port;
695           echo0->identifier = new_id0;
696
697           sum0 = icmp0->checksum;
698           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
699                                  identifier);
700           icmp0->checksum = ip_csum_fold (sum0);
701         }
702     }
703   else
704     {
705       inner_ip0 = (ip4_header_t *)(echo0+1);
706       l4_header = ip4_next_header (inner_ip0);
707
708       if (!ip4_header_checksum_is_valid (inner_ip0))
709         {
710           next0 = SNAT_IN2OUT_NEXT_DROP;
711           goto out;
712         }
713
714       old_addr0 = inner_ip0->dst_address.as_u32;
715       inner_ip0->dst_address = sm0.addr;
716       new_addr0 = inner_ip0->dst_address.as_u32;
717
718       sum0 = icmp0->checksum;
719       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
720                              dst_address /* changed member */);
721       icmp0->checksum = ip_csum_fold (sum0);
722
723       switch (protocol)
724         {
725           case SNAT_PROTOCOL_ICMP:
726             inner_icmp0 = (icmp46_header_t*)l4_header;
727             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
728
729             old_id0 = inner_echo0->identifier;
730             new_id0 = sm0.port;
731             inner_echo0->identifier = new_id0;
732
733             sum0 = icmp0->checksum;
734             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
735                                    identifier);
736             icmp0->checksum = ip_csum_fold (sum0);
737             break;
738           case SNAT_PROTOCOL_UDP:
739           case SNAT_PROTOCOL_TCP:
740             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
741             new_id0 = sm0.port;
742             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
743
744             sum0 = icmp0->checksum;
745             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
746                                    dst_port);
747             icmp0->checksum = ip_csum_fold (sum0);
748             break;
749           default:
750             ASSERT(0);
751         }
752     }
753
754 out:
755   return next0;
756 }
757
758 /**
759  * @brief Hairpinning
760  *
761  * Hairpinning allows two endpoints on the internal side of the NAT to
762  * communicate even if they only use each other's external IP addresses
763  * and ports.
764  *
765  * @param sm     SNAT main.
766  * @param b0     Vlib buffer.
767  * @param ip0    IP header.
768  * @param udp0   UDP header.
769  * @param tcp0   TCP header.
770  * @param proto0 SNAT protocol.
771  */
772 static inline void
773 snat_hairpinning (snat_main_t *sm,
774                   vlib_buffer_t * b0,
775                   ip4_header_t * ip0,
776                   udp_header_t * udp0,
777                   tcp_header_t * tcp0,
778                   u32 proto0)
779 {
780   snat_session_key_t key0, sm0;
781   snat_worker_key_t k0;
782   snat_session_t * s0;
783   clib_bihash_kv_8_8_t kv0, value0;
784   ip_csum_t sum0;
785   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
786   u16 new_dst_port0, old_dst_port0;
787
788   key0.addr = ip0->dst_address;
789   key0.port = udp0->dst_port;
790   key0.protocol = proto0;
791   key0.fib_index = sm->outside_fib_index;
792   kv0.key = key0.as_u64;
793
794   /* Check if destination is in active sessions */
795   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
796     {
797       /* or static mappings */
798       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
799         {
800           new_dst_addr0 = sm0.addr.as_u32;
801           new_dst_port0 = sm0.port;
802           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
803         }
804     }
805   else
806     {
807       si = value0.value;
808       if (sm->num_workers > 1)
809         {
810           k0.addr = ip0->dst_address;
811           k0.port = udp0->dst_port;
812           k0.fib_index = sm->outside_fib_index;
813           kv0.key = k0.as_u64;
814           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
815             ASSERT(0);
816           else
817             ti = value0.value;
818         }
819       else
820         ti = sm->num_workers;
821
822       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
823       new_dst_addr0 = s0->in2out.addr.as_u32;
824       new_dst_port0 = s0->in2out.port;
825       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
826     }
827
828   /* Destination is behind the same NAT, use internal address and port */
829   if (new_dst_addr0)
830     {
831       old_dst_addr0 = ip0->dst_address.as_u32;
832       ip0->dst_address.as_u32 = new_dst_addr0;
833       sum0 = ip0->checksum;
834       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
835                              ip4_header_t, dst_address);
836       ip0->checksum = ip_csum_fold (sum0);
837
838       old_dst_port0 = tcp0->dst;
839       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
840         {
841           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
842             {
843               tcp0->dst = new_dst_port0;
844               sum0 = tcp0->checksum;
845               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
846                                      ip4_header_t, dst_address);
847               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
848                                      ip4_header_t /* cheat */, length);
849               tcp0->checksum = ip_csum_fold(sum0);
850             }
851           else
852             {
853               udp0->dst_port = new_dst_port0;
854               udp0->checksum = 0;
855             }
856         }
857     }
858 }
859
860 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
861                                          vlib_buffer_t * b0,
862                                          ip4_header_t * ip0,
863                                          icmp46_header_t * icmp0,
864                                          u32 sw_if_index0,
865                                          u32 rx_fib_index0,
866                                          vlib_node_runtime_t * node,
867                                          u32 next0,
868                                          f64 now,
869                                          u32 thread_index,
870                                          snat_session_t ** p_s0)
871 {
872   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
873                       next0, thread_index, p_s0, 0);
874   snat_session_t * s0 = *p_s0;
875   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
876     {
877       /* Accounting */
878       s0->last_heard = now;
879       s0->total_pkts++;
880       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
881       /* Per-user LRU list maintenance for dynamic translations */
882       if (!snat_is_session_static (s0))
883         {
884           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
885                              s0->per_user_index);
886           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
887                               s0->per_user_list_head_index,
888                               s0->per_user_index);
889         }
890     }
891   return next0;
892 }
893
894 static inline uword
895 snat_in2out_node_fn_inline (vlib_main_t * vm,
896                             vlib_node_runtime_t * node,
897                             vlib_frame_t * frame, int is_slow_path)
898 {
899   u32 n_left_from, * from, * to_next;
900   snat_in2out_next_t next_index;
901   u32 pkts_processed = 0;
902   snat_main_t * sm = &snat_main;
903   f64 now = vlib_time_now (vm);
904   u32 stats_node_index;
905   u32 thread_index = vlib_get_thread_index ();
906
907   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
908     snat_in2out_node.index;
909
910   from = vlib_frame_vector_args (frame);
911   n_left_from = frame->n_vectors;
912   next_index = node->cached_next_index;
913
914   while (n_left_from > 0)
915     {
916       u32 n_left_to_next;
917
918       vlib_get_next_frame (vm, node, next_index,
919                            to_next, n_left_to_next);
920
921       while (n_left_from >= 4 && n_left_to_next >= 2)
922         {
923           u32 bi0, bi1;
924           vlib_buffer_t * b0, * b1;
925           u32 next0, next1;
926           u32 sw_if_index0, sw_if_index1;
927           ip4_header_t * ip0, * ip1;
928           ip_csum_t sum0, sum1;
929           u32 new_addr0, old_addr0, new_addr1, old_addr1;
930           u16 old_port0, new_port0, old_port1, new_port1;
931           udp_header_t * udp0, * udp1;
932           tcp_header_t * tcp0, * tcp1;
933           icmp46_header_t * icmp0, * icmp1;
934           snat_session_key_t key0, key1;
935           u32 rx_fib_index0, rx_fib_index1;
936           u32 proto0, proto1;
937           snat_session_t * s0 = 0, * s1 = 0;
938           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
939           
940           /* Prefetch next iteration. */
941           {
942             vlib_buffer_t * p2, * p3;
943             
944             p2 = vlib_get_buffer (vm, from[2]);
945             p3 = vlib_get_buffer (vm, from[3]);
946             
947             vlib_prefetch_buffer_header (p2, LOAD);
948             vlib_prefetch_buffer_header (p3, LOAD);
949
950             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
951             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
952           }
953
954           /* speculatively enqueue b0 and b1 to the current next frame */
955           to_next[0] = bi0 = from[0];
956           to_next[1] = bi1 = from[1];
957           from += 2;
958           to_next += 2;
959           n_left_from -= 2;
960           n_left_to_next -= 2;
961           
962           b0 = vlib_get_buffer (vm, bi0);
963           b1 = vlib_get_buffer (vm, bi1);
964
965           ip0 = vlib_buffer_get_current (b0);
966           udp0 = ip4_next_header (ip0);
967           tcp0 = (tcp_header_t *) udp0;
968           icmp0 = (icmp46_header_t *) udp0;
969
970           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
971           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
972                                    sw_if_index0);
973
974           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
975
976           if (PREDICT_FALSE(ip0->ttl == 1))
977             {
978               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
979               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
980                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
981                                            0);
982               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
983               goto trace00;
984             }
985
986           proto0 = ip_proto_to_snat_proto (ip0->protocol);
987
988           /* Next configured feature, probably ip4-lookup */
989           if (is_slow_path)
990             {
991               if (PREDICT_FALSE (proto0 == ~0))
992                 goto trace00;
993               
994               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
995                 {
996                   next0 = icmp_in2out_slow_path 
997                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
998                      node, next0, now, thread_index, &s0);
999                   goto trace00;
1000                 }
1001             }
1002           else
1003             {
1004               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1005                 {
1006                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1007                   goto trace00;
1008                 }
1009             }
1010
1011           key0.addr = ip0->src_address;
1012           key0.port = udp0->src_port;
1013           key0.protocol = proto0;
1014           key0.fib_index = rx_fib_index0;
1015           
1016           kv0.key = key0.as_u64;
1017
1018           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1019             {
1020               if (is_slow_path)
1021                 {
1022                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1023                       proto0, rx_fib_index0)))
1024                     goto trace00;
1025
1026                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1027                                      &s0, node, next0, thread_index);
1028                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1029                     goto trace00;
1030                 }
1031               else
1032                 {
1033                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1034                   goto trace00;
1035                 }
1036             }
1037           else
1038             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1039                                     value0.value);
1040
1041           old_addr0 = ip0->src_address.as_u32;
1042           ip0->src_address = s0->out2in.addr;
1043           new_addr0 = ip0->src_address.as_u32;
1044           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1045
1046           sum0 = ip0->checksum;
1047           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1048                                  ip4_header_t,
1049                                  src_address /* changed member */);
1050           ip0->checksum = ip_csum_fold (sum0);
1051
1052           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1053             {
1054               old_port0 = tcp0->src_port;
1055               tcp0->src_port = s0->out2in.port;
1056               new_port0 = tcp0->src_port;
1057
1058               sum0 = tcp0->checksum;
1059               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1060                                      ip4_header_t,
1061                                      dst_address /* changed member */);
1062               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1063                                      ip4_header_t /* cheat */,
1064                                      length /* changed member */);
1065               tcp0->checksum = ip_csum_fold(sum0);
1066             }
1067           else
1068             {
1069               old_port0 = udp0->src_port;
1070               udp0->src_port = s0->out2in.port;
1071               udp0->checksum = 0;
1072             }
1073
1074           /* Hairpinning */
1075           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1076
1077           /* Accounting */
1078           s0->last_heard = now;
1079           s0->total_pkts++;
1080           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1081           /* Per-user LRU list maintenance for dynamic translation */
1082           if (!snat_is_session_static (s0))
1083             {
1084               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1085                                  s0->per_user_index);
1086               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1087                                   s0->per_user_list_head_index,
1088                                   s0->per_user_index);
1089             }
1090         trace00:
1091
1092           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1093                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1094             {
1095               snat_in2out_trace_t *t = 
1096                  vlib_add_trace (vm, node, b0, sizeof (*t));
1097               t->is_slow_path = is_slow_path;
1098               t->sw_if_index = sw_if_index0;
1099               t->next_index = next0;
1100                   t->session_index = ~0;
1101               if (s0)
1102                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1103             }
1104
1105           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1106
1107           ip1 = vlib_buffer_get_current (b1);
1108           udp1 = ip4_next_header (ip1);
1109           tcp1 = (tcp_header_t *) udp1;
1110           icmp1 = (icmp46_header_t *) udp1;
1111
1112           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1113           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1114                                    sw_if_index1);
1115
1116           if (PREDICT_FALSE(ip1->ttl == 1))
1117             {
1118               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1119               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1120                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1121                                            0);
1122               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1123               goto trace01;
1124             }
1125
1126           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1127
1128           /* Next configured feature, probably ip4-lookup */
1129           if (is_slow_path)
1130             {
1131               if (PREDICT_FALSE (proto1 == ~0))
1132                 goto trace01;
1133               
1134               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1135                 {
1136                   next1 = icmp_in2out_slow_path 
1137                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1138                      next1, now, thread_index, &s1);
1139                   goto trace01;
1140                 }
1141             }
1142           else
1143             {
1144               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1145                 {
1146                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1147                   goto trace01;
1148                 }
1149             }
1150
1151           key1.addr = ip1->src_address;
1152           key1.port = udp1->src_port;
1153           key1.protocol = proto1;
1154           key1.fib_index = rx_fib_index1;
1155           
1156           kv1.key = key1.as_u64;
1157
1158             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1159             {
1160               if (is_slow_path)
1161                 {
1162                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
1163                       proto1, rx_fib_index1)))
1164                     goto trace01;
1165
1166                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1167                                      &s1, node, next1, thread_index);
1168                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1169                     goto trace01;
1170                 }
1171               else
1172                 {
1173                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1174                   goto trace01;
1175                 }
1176             }
1177           else
1178             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1179                                     value1.value);
1180
1181           old_addr1 = ip1->src_address.as_u32;
1182           ip1->src_address = s1->out2in.addr;
1183           new_addr1 = ip1->src_address.as_u32;
1184           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1185
1186           sum1 = ip1->checksum;
1187           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1188                                  ip4_header_t,
1189                                  src_address /* changed member */);
1190           ip1->checksum = ip_csum_fold (sum1);
1191
1192           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1193             {
1194               old_port1 = tcp1->src_port;
1195               tcp1->src_port = s1->out2in.port;
1196               new_port1 = tcp1->src_port;
1197
1198               sum1 = tcp1->checksum;
1199               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1200                                      ip4_header_t,
1201                                      dst_address /* changed member */);
1202               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1203                                      ip4_header_t /* cheat */,
1204                                      length /* changed member */);
1205               tcp1->checksum = ip_csum_fold(sum1);
1206             }
1207           else
1208             {
1209               old_port1 = udp1->src_port;
1210               udp1->src_port = s1->out2in.port;
1211               udp1->checksum = 0;
1212             }
1213
1214           /* Hairpinning */
1215           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1216
1217           /* Accounting */
1218           s1->last_heard = now;
1219           s1->total_pkts++;
1220           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1221           /* Per-user LRU list maintenance for dynamic translation */
1222           if (!snat_is_session_static (s1))
1223             {
1224               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1225                                  s1->per_user_index);
1226               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1227                                   s1->per_user_list_head_index,
1228                                   s1->per_user_index);
1229             }
1230         trace01:
1231
1232           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1233                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1234             {
1235               snat_in2out_trace_t *t = 
1236                  vlib_add_trace (vm, node, b1, sizeof (*t));
1237               t->sw_if_index = sw_if_index1;
1238               t->next_index = next1;
1239               t->session_index = ~0;
1240               if (s1)
1241                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1242             }
1243
1244           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1245
1246           /* verify speculative enqueues, maybe switch current next frame */
1247           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1248                                            to_next, n_left_to_next,
1249                                            bi0, bi1, next0, next1);
1250         }
1251
1252       while (n_left_from > 0 && n_left_to_next > 0)
1253         {
1254           u32 bi0;
1255           vlib_buffer_t * b0;
1256           u32 next0;
1257           u32 sw_if_index0;
1258           ip4_header_t * ip0;
1259           ip_csum_t sum0;
1260           u32 new_addr0, old_addr0;
1261           u16 old_port0, new_port0;
1262           udp_header_t * udp0;
1263           tcp_header_t * tcp0;
1264           icmp46_header_t * icmp0;
1265           snat_session_key_t key0;
1266           u32 rx_fib_index0;
1267           u32 proto0;
1268           snat_session_t * s0 = 0;
1269           clib_bihash_kv_8_8_t kv0, value0;
1270           
1271           /* speculatively enqueue b0 to the current next frame */
1272           bi0 = from[0];
1273           to_next[0] = bi0;
1274           from += 1;
1275           to_next += 1;
1276           n_left_from -= 1;
1277           n_left_to_next -= 1;
1278
1279           b0 = vlib_get_buffer (vm, bi0);
1280           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1281
1282           ip0 = vlib_buffer_get_current (b0);
1283           udp0 = ip4_next_header (ip0);
1284           tcp0 = (tcp_header_t *) udp0;
1285           icmp0 = (icmp46_header_t *) udp0;
1286
1287           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1288           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1289                                    sw_if_index0);
1290
1291           if (PREDICT_FALSE(ip0->ttl == 1))
1292             {
1293               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1294               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1295                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1296                                            0);
1297               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1298               goto trace0;
1299             }
1300
1301           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1302
1303           /* Next configured feature, probably ip4-lookup */
1304           if (is_slow_path)
1305             {
1306               if (PREDICT_FALSE (proto0 == ~0))
1307                 goto trace0;
1308               
1309               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1310                 {
1311                   next0 = icmp_in2out_slow_path 
1312                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1313                      next0, now, thread_index, &s0);
1314                   goto trace0;
1315                 }
1316             }
1317           else
1318             {
1319               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1320                 {
1321                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1322                   goto trace0;
1323                 }
1324             }
1325
1326           key0.addr = ip0->src_address;
1327           key0.port = udp0->src_port;
1328           key0.protocol = proto0;
1329           key0.fib_index = rx_fib_index0;
1330           
1331           kv0.key = key0.as_u64;
1332
1333           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1334             {
1335               if (is_slow_path)
1336                 {
1337                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1338                       proto0, rx_fib_index0)))
1339                     goto trace0;
1340
1341                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1342                                      &s0, node, next0, thread_index);
1343
1344                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1345                     goto trace0;
1346                 }
1347               else
1348                 {
1349                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1350                   goto trace0;
1351                 }
1352             }
1353           else
1354             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1355                                     value0.value);
1356
1357           old_addr0 = ip0->src_address.as_u32;
1358           ip0->src_address = s0->out2in.addr;
1359           new_addr0 = ip0->src_address.as_u32;
1360           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1361
1362           sum0 = ip0->checksum;
1363           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1364                                  ip4_header_t,
1365                                  src_address /* changed member */);
1366           ip0->checksum = ip_csum_fold (sum0);
1367
1368           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1369             {
1370               old_port0 = tcp0->src_port;
1371               tcp0->src_port = s0->out2in.port;
1372               new_port0 = tcp0->src_port;
1373
1374               sum0 = tcp0->checksum;
1375               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1376                                      ip4_header_t,
1377                                      dst_address /* changed member */);
1378               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1379                                      ip4_header_t /* cheat */,
1380                                      length /* changed member */);
1381               tcp0->checksum = ip_csum_fold(sum0);
1382             }
1383           else
1384             {
1385               old_port0 = udp0->src_port;
1386               udp0->src_port = s0->out2in.port;
1387               udp0->checksum = 0;
1388             }
1389
1390           /* Hairpinning */
1391           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1392
1393           /* Accounting */
1394           s0->last_heard = now;
1395           s0->total_pkts++;
1396           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1397           /* Per-user LRU list maintenance for dynamic translation */
1398           if (!snat_is_session_static (s0))
1399             {
1400               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1401                                  s0->per_user_index);
1402               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1403                                   s0->per_user_list_head_index,
1404                                   s0->per_user_index);
1405             }
1406
1407         trace0:
1408           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1409                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1410             {
1411               snat_in2out_trace_t *t = 
1412                  vlib_add_trace (vm, node, b0, sizeof (*t));
1413               t->is_slow_path = is_slow_path;
1414               t->sw_if_index = sw_if_index0;
1415               t->next_index = next0;
1416                   t->session_index = ~0;
1417               if (s0)
1418                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1419             }
1420
1421           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1422
1423           /* verify speculative enqueue, maybe switch current next frame */
1424           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1425                                            to_next, n_left_to_next,
1426                                            bi0, next0);
1427         }
1428
1429       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1430     }
1431
1432   vlib_node_increment_counter (vm, stats_node_index, 
1433                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1434                                pkts_processed);
1435   return frame->n_vectors;
1436 }
1437
1438 static uword
1439 snat_in2out_fast_path_fn (vlib_main_t * vm,
1440                           vlib_node_runtime_t * node,
1441                           vlib_frame_t * frame)
1442 {
1443   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1444 }
1445
1446 VLIB_REGISTER_NODE (snat_in2out_node) = {
1447   .function = snat_in2out_fast_path_fn,
1448   .name = "snat-in2out",
1449   .vector_size = sizeof (u32),
1450   .format_trace = format_snat_in2out_trace,
1451   .type = VLIB_NODE_TYPE_INTERNAL,
1452   
1453   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1454   .error_strings = snat_in2out_error_strings,
1455
1456   .runtime_data_bytes = sizeof (snat_runtime_t),
1457   
1458   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1459
1460   /* edit / add dispositions here */
1461   .next_nodes = {
1462     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1463     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1464     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1465     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1466   },
1467 };
1468
1469 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1470
1471 static uword
1472 snat_in2out_slow_path_fn (vlib_main_t * vm,
1473                           vlib_node_runtime_t * node,
1474                           vlib_frame_t * frame)
1475 {
1476   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1477 }
1478
1479 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1480   .function = snat_in2out_slow_path_fn,
1481   .name = "snat-in2out-slowpath",
1482   .vector_size = sizeof (u32),
1483   .format_trace = format_snat_in2out_trace,
1484   .type = VLIB_NODE_TYPE_INTERNAL,
1485   
1486   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1487   .error_strings = snat_in2out_error_strings,
1488
1489   .runtime_data_bytes = sizeof (snat_runtime_t),
1490   
1491   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1492
1493   /* edit / add dispositions here */
1494   .next_nodes = {
1495     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1496     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1497     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1498     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1499   },
1500 };
1501
1502 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1503
1504 /**************************/
1505 /*** deterministic mode ***/
1506 /**************************/
1507 static uword
1508 snat_det_in2out_node_fn (vlib_main_t * vm,
1509                          vlib_node_runtime_t * node,
1510                          vlib_frame_t * frame)
1511 {
1512   u32 n_left_from, * from, * to_next;
1513   snat_in2out_next_t next_index;
1514   u32 pkts_processed = 0;
1515   snat_main_t * sm = &snat_main;
1516   u32 now = (u32) vlib_time_now (vm);
1517   u32 thread_index = os_get_cpu_number ();
1518
1519   from = vlib_frame_vector_args (frame);
1520   n_left_from = frame->n_vectors;
1521   next_index = node->cached_next_index;
1522
1523   while (n_left_from > 0)
1524     {
1525       u32 n_left_to_next;
1526
1527       vlib_get_next_frame (vm, node, next_index,
1528                            to_next, n_left_to_next);
1529
1530       while (n_left_from >= 4 && n_left_to_next >= 2)
1531         {
1532           u32 bi0, bi1;
1533           vlib_buffer_t * b0, * b1;
1534           u32 next0, next1;
1535           u32 sw_if_index0, sw_if_index1;
1536           ip4_header_t * ip0, * ip1;
1537           ip_csum_t sum0, sum1;
1538           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1539           u16 old_port0, new_port0, lo_port0, i0;
1540           u16 old_port1, new_port1, lo_port1, i1;
1541           udp_header_t * udp0, * udp1;
1542           tcp_header_t * tcp0, * tcp1;
1543           u32 proto0, proto1;
1544           snat_det_out_key_t key0, key1;
1545           snat_det_map_t * dm0, * dm1;
1546           snat_det_session_t * ses0 = 0, * ses1 = 0;
1547           u32 rx_fib_index0, rx_fib_index1;
1548           icmp46_header_t * icmp0, * icmp1;
1549
1550           /* Prefetch next iteration. */
1551           {
1552             vlib_buffer_t * p2, * p3;
1553
1554             p2 = vlib_get_buffer (vm, from[2]);
1555             p3 = vlib_get_buffer (vm, from[3]);
1556
1557             vlib_prefetch_buffer_header (p2, LOAD);
1558             vlib_prefetch_buffer_header (p3, LOAD);
1559
1560             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1561             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1562           }
1563
1564           /* speculatively enqueue b0 and b1 to the current next frame */
1565           to_next[0] = bi0 = from[0];
1566           to_next[1] = bi1 = from[1];
1567           from += 2;
1568           to_next += 2;
1569           n_left_from -= 2;
1570           n_left_to_next -= 2;
1571
1572           b0 = vlib_get_buffer (vm, bi0);
1573           b1 = vlib_get_buffer (vm, bi1);
1574
1575           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1576           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1577
1578           ip0 = vlib_buffer_get_current (b0);
1579           udp0 = ip4_next_header (ip0);
1580           tcp0 = (tcp_header_t *) udp0;
1581
1582           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1583
1584           if (PREDICT_FALSE(ip0->ttl == 1))
1585             {
1586               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1587               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1588                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1589                                            0);
1590               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1591               goto trace0;
1592             }
1593
1594           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1595
1596           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1597             {
1598               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1599               icmp0 = (icmp46_header_t *) udp0;
1600
1601               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1602                                   rx_fib_index0, node, next0, thread_index,
1603                                   &ses0, &dm0);
1604               goto trace0;
1605             }
1606
1607           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1608           if (PREDICT_FALSE(!dm0))
1609             {
1610               clib_warning("no match for internal host %U",
1611                            format_ip4_address, &ip0->src_address);
1612               next0 = SNAT_IN2OUT_NEXT_DROP;
1613               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1614               goto trace0;
1615             }
1616
1617           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1618
1619           key0.ext_host_addr = ip0->dst_address;
1620           key0.ext_host_port = tcp0->dst;
1621
1622           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
1623           if (PREDICT_FALSE(!ses0))
1624             {
1625               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1626                 {
1627                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1628                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1629
1630                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1631                     continue;
1632
1633                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1634                   break;
1635                 }
1636               if (PREDICT_FALSE(!ses0))
1637                 {
1638                   next0 = SNAT_IN2OUT_NEXT_DROP;
1639                   b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1640                   goto trace0;
1641                 }
1642             }
1643
1644           new_port0 = ses0->out.out_port;
1645
1646           old_addr0.as_u32 = ip0->src_address.as_u32;
1647           ip0->src_address.as_u32 = new_addr0.as_u32;
1648           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1649
1650           sum0 = ip0->checksum;
1651           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1652                                  ip4_header_t,
1653                                  src_address /* changed member */);
1654           ip0->checksum = ip_csum_fold (sum0);
1655
1656           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1657             {
1658               if (tcp0->flags & TCP_FLAG_SYN)
1659                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1660               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1661                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1662               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1663                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1664               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1665                 snat_det_ses_close(dm0, ses0);
1666               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1667                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1668               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1669                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1670
1671               old_port0 = tcp0->src;
1672               tcp0->src = new_port0;
1673
1674               sum0 = tcp0->checksum;
1675               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1676                                      ip4_header_t,
1677                                      dst_address /* changed member */);
1678               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1679                                      ip4_header_t /* cheat */,
1680                                      length /* changed member */);
1681               tcp0->checksum = ip_csum_fold(sum0);
1682             }
1683           else
1684             {
1685               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1686               old_port0 = udp0->src_port;
1687               udp0->src_port = new_port0;
1688               udp0->checksum = 0;
1689             }
1690
1691           switch(ses0->state)
1692             {
1693             case SNAT_SESSION_UDP_ACTIVE:
1694                 ses0->expire = now + sm->udp_timeout;
1695                 break;
1696             case SNAT_SESSION_TCP_SYN_SENT:
1697             case SNAT_SESSION_TCP_FIN_WAIT:
1698             case SNAT_SESSION_TCP_CLOSE_WAIT:
1699             case SNAT_SESSION_TCP_LAST_ACK:
1700                 ses0->expire = now + sm->tcp_transitory_timeout;
1701                 break;
1702             case SNAT_SESSION_TCP_ESTABLISHED:
1703                 ses0->expire = now + sm->tcp_established_timeout;
1704                 break;
1705             }
1706
1707         trace0:
1708           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1709                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1710             {
1711               snat_in2out_trace_t *t =
1712                  vlib_add_trace (vm, node, b0, sizeof (*t));
1713               t->is_slow_path = 0;
1714               t->sw_if_index = sw_if_index0;
1715               t->next_index = next0;
1716               t->session_index = ~0;
1717               if (ses0)
1718                 t->session_index = ses0 - dm0->sessions;
1719             }
1720
1721           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1722
1723           ip1 = vlib_buffer_get_current (b1);
1724           udp1 = ip4_next_header (ip1);
1725           tcp1 = (tcp_header_t *) udp1;
1726
1727           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1728
1729           if (PREDICT_FALSE(ip1->ttl == 1))
1730             {
1731               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1732               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1733                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1734                                            0);
1735               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1736               goto trace1;
1737             }
1738
1739           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1740
1741           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1742             {
1743               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1744               icmp1 = (icmp46_header_t *) udp1;
1745
1746               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
1747                                   rx_fib_index1, node, next1, thread_index,
1748                                   &ses1, &dm1);
1749               goto trace1;
1750             }
1751
1752           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1753           if (PREDICT_FALSE(!dm1))
1754             {
1755               clib_warning("no match for internal host %U",
1756                            format_ip4_address, &ip0->src_address);
1757               next1 = SNAT_IN2OUT_NEXT_DROP;
1758               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1759               goto trace1;
1760             }
1761
1762           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1763
1764           key1.ext_host_addr = ip1->dst_address;
1765           key1.ext_host_port = tcp1->dst;
1766
1767           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
1768           if (PREDICT_FALSE(!ses1))
1769             {
1770               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1771                 {
1772                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1773                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1774
1775                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1776                     continue;
1777
1778                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1779                   break;
1780                 }
1781               if (PREDICT_FALSE(!ses1))
1782                 {
1783                   next1 = SNAT_IN2OUT_NEXT_DROP;
1784                   b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1785                   goto trace1;
1786                 }
1787             }
1788
1789           new_port1 = ses1->out.out_port;
1790
1791           old_addr1.as_u32 = ip1->src_address.as_u32;
1792           ip1->src_address.as_u32 = new_addr1.as_u32;
1793           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1794
1795           sum1 = ip1->checksum;
1796           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1797                                  ip4_header_t,
1798                                  src_address /* changed member */);
1799           ip1->checksum = ip_csum_fold (sum1);
1800
1801           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1802             {
1803               if (tcp1->flags & TCP_FLAG_SYN)
1804                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1805               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1806                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1807               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1808                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1809               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1810                 snat_det_ses_close(dm1, ses1);
1811               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1812                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1813               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1814                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1815
1816               old_port1 = tcp1->src;
1817               tcp1->src = new_port1;
1818
1819               sum1 = tcp1->checksum;
1820               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1821                                      ip4_header_t,
1822                                      dst_address /* changed member */);
1823               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1824                                      ip4_header_t /* cheat */,
1825                                      length /* changed member */);
1826               tcp1->checksum = ip_csum_fold(sum1);
1827             }
1828           else
1829             {
1830               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1831               old_port1 = udp1->src_port;
1832               udp1->src_port = new_port1;
1833               udp1->checksum = 0;
1834             }
1835
1836           switch(ses1->state)
1837             {
1838             case SNAT_SESSION_UDP_ACTIVE:
1839                 ses1->expire = now + sm->udp_timeout;
1840                 break;
1841             case SNAT_SESSION_TCP_SYN_SENT:
1842             case SNAT_SESSION_TCP_FIN_WAIT:
1843             case SNAT_SESSION_TCP_CLOSE_WAIT:
1844             case SNAT_SESSION_TCP_LAST_ACK:
1845                 ses1->expire = now + sm->tcp_transitory_timeout;
1846                 break;
1847             case SNAT_SESSION_TCP_ESTABLISHED:
1848                 ses1->expire = now + sm->tcp_established_timeout;
1849                 break;
1850             }
1851
1852         trace1:
1853           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1854                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1855             {
1856               snat_in2out_trace_t *t =
1857                  vlib_add_trace (vm, node, b1, sizeof (*t));
1858               t->is_slow_path = 0;
1859               t->sw_if_index = sw_if_index1;
1860               t->next_index = next1;
1861               t->session_index = ~0;
1862               if (ses1)
1863                 t->session_index = ses1 - dm1->sessions;
1864             }
1865
1866           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1867
1868           /* verify speculative enqueues, maybe switch current next frame */
1869           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1870                                            to_next, n_left_to_next,
1871                                            bi0, bi1, next0, next1);
1872          }
1873
1874       while (n_left_from > 0 && n_left_to_next > 0)
1875         {
1876           u32 bi0;
1877           vlib_buffer_t * b0;
1878           u32 next0;
1879           u32 sw_if_index0;
1880           ip4_header_t * ip0;
1881           ip_csum_t sum0;
1882           ip4_address_t new_addr0, old_addr0;
1883           u16 old_port0, new_port0, lo_port0, i0;
1884           udp_header_t * udp0;
1885           tcp_header_t * tcp0;
1886           u32 proto0;
1887           snat_det_out_key_t key0;
1888           snat_det_map_t * dm0;
1889           snat_det_session_t * ses0 = 0;
1890           u32 rx_fib_index0;
1891           icmp46_header_t * icmp0;
1892
1893           /* speculatively enqueue b0 to the current next frame */
1894           bi0 = from[0];
1895           to_next[0] = bi0;
1896           from += 1;
1897           to_next += 1;
1898           n_left_from -= 1;
1899           n_left_to_next -= 1;
1900
1901           b0 = vlib_get_buffer (vm, bi0);
1902           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1903
1904           ip0 = vlib_buffer_get_current (b0);
1905           udp0 = ip4_next_header (ip0);
1906           tcp0 = (tcp_header_t *) udp0;
1907
1908           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1909
1910           if (PREDICT_FALSE(ip0->ttl == 1))
1911             {
1912               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1913               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1914                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1915                                            0);
1916               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1917               goto trace00;
1918             }
1919
1920           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1921
1922           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1923             {
1924               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1925               icmp0 = (icmp46_header_t *) udp0;
1926
1927               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1928                                   rx_fib_index0, node, next0, thread_index,
1929                                   &ses0, &dm0);
1930               goto trace00;
1931             }
1932
1933           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1934           if (PREDICT_FALSE(!dm0))
1935             {
1936               clib_warning("no match for internal host %U",
1937                            format_ip4_address, &ip0->src_address);
1938               next0 = SNAT_IN2OUT_NEXT_DROP;
1939               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1940               goto trace00;
1941             }
1942
1943           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1944
1945           key0.ext_host_addr = ip0->dst_address;
1946           key0.ext_host_port = tcp0->dst;
1947
1948           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
1949           if (PREDICT_FALSE(!ses0))
1950             {
1951               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1952                 {
1953                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1954                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1955
1956                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1957                     continue;
1958
1959                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1960                   break;
1961                 }
1962               if (PREDICT_FALSE(!ses0))
1963                 {
1964                   next0 = SNAT_IN2OUT_NEXT_DROP;
1965                   b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1966                   goto trace00;
1967                 }
1968             }
1969
1970           new_port0 = ses0->out.out_port;
1971
1972           old_addr0.as_u32 = ip0->src_address.as_u32;
1973           ip0->src_address.as_u32 = new_addr0.as_u32;
1974           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1975
1976           sum0 = ip0->checksum;
1977           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1978                                  ip4_header_t,
1979                                  src_address /* changed member */);
1980           ip0->checksum = ip_csum_fold (sum0);
1981
1982           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1983             {
1984               if (tcp0->flags & TCP_FLAG_SYN)
1985                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1986               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1987                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1988               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1989                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1990               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1991                 snat_det_ses_close(dm0, ses0);
1992               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1993                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1994               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1995                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1996
1997               old_port0 = tcp0->src;
1998               tcp0->src = new_port0;
1999
2000               sum0 = tcp0->checksum;
2001               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2002                                      ip4_header_t,
2003                                      dst_address /* changed member */);
2004               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2005                                      ip4_header_t /* cheat */,
2006                                      length /* changed member */);
2007               tcp0->checksum = ip_csum_fold(sum0);
2008             }
2009           else
2010             {
2011               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2012               old_port0 = udp0->src_port;
2013               udp0->src_port = new_port0;
2014               udp0->checksum = 0;
2015             }
2016
2017           switch(ses0->state)
2018             {
2019             case SNAT_SESSION_UDP_ACTIVE:
2020                 ses0->expire = now + sm->udp_timeout;
2021                 break;
2022             case SNAT_SESSION_TCP_SYN_SENT:
2023             case SNAT_SESSION_TCP_FIN_WAIT:
2024             case SNAT_SESSION_TCP_CLOSE_WAIT:
2025             case SNAT_SESSION_TCP_LAST_ACK:
2026                 ses0->expire = now + sm->tcp_transitory_timeout;
2027                 break;
2028             case SNAT_SESSION_TCP_ESTABLISHED:
2029                 ses0->expire = now + sm->tcp_established_timeout;
2030                 break;
2031             }
2032
2033         trace00:
2034           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2035                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2036             {
2037               snat_in2out_trace_t *t =
2038                  vlib_add_trace (vm, node, b0, sizeof (*t));
2039               t->is_slow_path = 0;
2040               t->sw_if_index = sw_if_index0;
2041               t->next_index = next0;
2042               t->session_index = ~0;
2043               if (ses0)
2044                 t->session_index = ses0 - dm0->sessions;
2045             }
2046
2047           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2048
2049           /* verify speculative enqueue, maybe switch current next frame */
2050           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2051                                            to_next, n_left_to_next,
2052                                            bi0, next0);
2053         }
2054
2055       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2056     }
2057
2058   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2059                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2060                                pkts_processed);
2061   return frame->n_vectors;
2062 }
2063
2064 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2065   .function = snat_det_in2out_node_fn,
2066   .name = "snat-det-in2out",
2067   .vector_size = sizeof (u32),
2068   .format_trace = format_snat_in2out_trace,
2069   .type = VLIB_NODE_TYPE_INTERNAL,
2070
2071   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2072   .error_strings = snat_in2out_error_strings,
2073
2074   .runtime_data_bytes = sizeof (snat_runtime_t),
2075
2076   .n_next_nodes = 3,
2077
2078   /* edit / add dispositions here */
2079   .next_nodes = {
2080     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2081     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2082     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2083   },
2084 };
2085
2086 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2087
2088 /**
2089  * Get address and port values to be used for packet SNAT translation
2090  * and create session if needed
2091  *
2092  * @param[in,out] sm             SNAT main
2093  * @param[in,out] node           SNAT node runtime
2094  * @param[in] thread_index       thread index
2095  * @param[in,out] b0             buffer containing packet to be translated
2096  * @param[out] p_proto           protocol used for matching
2097  * @param[out] p_value           address and port after NAT translation
2098  * @param[out] p_dont_translate  if packet should not be translated
2099  * @param d                      optional parameter
2100  * @param e                      optional parameter
2101  */
2102 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2103                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2104                           snat_session_key_t *p_value,
2105                           u8 *p_dont_translate, void *d, void *e)
2106 {
2107   ip4_header_t *ip0;
2108   icmp46_header_t *icmp0;
2109   u32 sw_if_index0;
2110   u32 rx_fib_index0;
2111   u8 protocol;
2112   snat_det_out_key_t key0;
2113   u8 dont_translate = 0;
2114   u32 next0 = ~0;
2115   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2116   ip4_header_t *inner_ip0;
2117   void *l4_header = 0;
2118   icmp46_header_t *inner_icmp0;
2119   snat_det_map_t * dm0 = 0;
2120   ip4_address_t new_addr0;
2121   u16 lo_port0, i0;
2122   snat_det_session_t * ses0 = 0;
2123   ip4_address_t in_addr;
2124   u16 in_port;
2125
2126   ip0 = vlib_buffer_get_current (b0);
2127   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2128   echo0 = (icmp_echo_header_t *)(icmp0+1);
2129   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2130   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2131
2132   if (!icmp_is_error_message (icmp0))
2133     {
2134       protocol = SNAT_PROTOCOL_ICMP;
2135       in_addr = ip0->src_address;
2136       in_port = echo0->identifier;
2137     }
2138   else
2139     {
2140       inner_ip0 = (ip4_header_t *)(echo0+1);
2141       l4_header = ip4_next_header (inner_ip0);
2142       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2143       in_addr = inner_ip0->dst_address;
2144       switch (protocol)
2145         {
2146         case SNAT_PROTOCOL_ICMP:
2147           inner_icmp0 = (icmp46_header_t*)l4_header;
2148           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2149           in_port = inner_echo0->identifier;
2150           break;
2151         case SNAT_PROTOCOL_UDP:
2152         case SNAT_PROTOCOL_TCP:
2153           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2154           break;
2155         default:
2156           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2157           next0 = SNAT_IN2OUT_NEXT_DROP;
2158           goto out;
2159         }
2160     }
2161
2162   dm0 = snat_det_map_by_user(sm, &in_addr);
2163   if (PREDICT_FALSE(!dm0))
2164     {
2165       clib_warning("no match for internal host %U",
2166                    format_ip4_address, &in_addr);
2167       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2168           IP_PROTOCOL_ICMP, rx_fib_index0)))
2169         {
2170           dont_translate = 1;
2171           goto out;
2172         }
2173       next0 = SNAT_IN2OUT_NEXT_DROP;
2174       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2175       goto out;
2176     }
2177
2178   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2179
2180   key0.ext_host_addr = ip0->dst_address;
2181   key0.ext_host_port = 0;
2182
2183   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2184   if (PREDICT_FALSE(!ses0))
2185     {
2186       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2187           IP_PROTOCOL_ICMP, rx_fib_index0)))
2188         {
2189           dont_translate = 1;
2190           goto out;
2191         }
2192       if (icmp0->type != ICMP4_echo_request)
2193         {
2194           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2195           next0 = SNAT_IN2OUT_NEXT_DROP;
2196           goto out;
2197         }
2198       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2199         {
2200           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2201             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2202
2203           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2204             continue;
2205
2206           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2207           break;
2208         }
2209       if (PREDICT_FALSE(!ses0))
2210         {
2211           next0 = SNAT_IN2OUT_NEXT_DROP;
2212           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2213           goto out;
2214         }
2215     }
2216
2217   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2218                     !icmp_is_error_message (icmp0)))
2219     {
2220       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2221       next0 = SNAT_IN2OUT_NEXT_DROP;
2222       goto out;
2223     }
2224
2225   u32 now = (u32) vlib_time_now (sm->vlib_main);
2226
2227   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2228   ses0->expire = now + sm->icmp_timeout;
2229
2230 out:
2231   *p_proto = protocol;
2232   if (ses0)
2233     {
2234       p_value->addr = new_addr0;
2235       p_value->fib_index = sm->outside_fib_index;
2236       p_value->port = ses0->out.out_port;
2237     }
2238   *p_dont_translate = dont_translate;
2239   if (d)
2240     *(snat_det_session_t**)d = ses0;
2241   if (e)
2242     *(snat_det_map_t**)e = dm0;
2243   return next0;
2244 }
2245
2246 /**********************/
2247 /*** worker handoff ***/
2248 /**********************/
2249 static uword
2250 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2251                                vlib_node_runtime_t * node,
2252                                vlib_frame_t * frame)
2253 {
2254   snat_main_t *sm = &snat_main;
2255   vlib_thread_main_t *tm = vlib_get_thread_main ();
2256   u32 n_left_from, *from, *to_next = 0;
2257   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2258   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2259     = 0;
2260   vlib_frame_queue_elt_t *hf = 0;
2261   vlib_frame_t *f = 0;
2262   int i;
2263   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2264   u32 next_worker_index = 0;
2265   u32 current_worker_index = ~0;
2266   u32 thread_index = vlib_get_thread_index ();
2267
2268   ASSERT (vec_len (sm->workers));
2269
2270   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2271     {
2272       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2273
2274       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2275                                sm->first_worker_index + sm->num_workers - 1,
2276                                (vlib_frame_queue_t *) (~0));
2277     }
2278
2279   from = vlib_frame_vector_args (frame);
2280   n_left_from = frame->n_vectors;
2281
2282   while (n_left_from > 0)
2283     {
2284       u32 bi0;
2285       vlib_buffer_t *b0;
2286       u32 sw_if_index0;
2287       u32 rx_fib_index0;
2288       ip4_header_t * ip0;
2289       u8 do_handoff;
2290
2291       bi0 = from[0];
2292       from += 1;
2293       n_left_from -= 1;
2294
2295       b0 = vlib_get_buffer (vm, bi0);
2296
2297       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2298       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2299
2300       ip0 = vlib_buffer_get_current (b0);
2301
2302       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2303
2304       if (PREDICT_FALSE (next_worker_index != thread_index))
2305         {
2306           do_handoff = 1;
2307
2308           if (next_worker_index != current_worker_index)
2309             {
2310               if (hf)
2311                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2312
2313               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2314                                                       next_worker_index,
2315                                                       handoff_queue_elt_by_worker_index);
2316
2317               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2318               to_next_worker = &hf->buffer_index[hf->n_vectors];
2319               current_worker_index = next_worker_index;
2320             }
2321
2322           /* enqueue to correct worker thread */
2323           to_next_worker[0] = bi0;
2324           to_next_worker++;
2325           n_left_to_next_worker--;
2326
2327           if (n_left_to_next_worker == 0)
2328             {
2329               hf->n_vectors = VLIB_FRAME_SIZE;
2330               vlib_put_frame_queue_elt (hf);
2331               current_worker_index = ~0;
2332               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2333               hf = 0;
2334             }
2335         }
2336       else
2337         {
2338           do_handoff = 0;
2339           /* if this is 1st frame */
2340           if (!f)
2341             {
2342               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2343               to_next = vlib_frame_vector_args (f);
2344             }
2345
2346           to_next[0] = bi0;
2347           to_next += 1;
2348           f->n_vectors++;
2349         }
2350
2351       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2352                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2353         {
2354           snat_in2out_worker_handoff_trace_t *t =
2355             vlib_add_trace (vm, node, b0, sizeof (*t));
2356           t->next_worker_index = next_worker_index;
2357           t->do_handoff = do_handoff;
2358         }
2359     }
2360
2361   if (f)
2362     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2363
2364   if (hf)
2365     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2366
2367   /* Ship frames to the worker nodes */
2368   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2369     {
2370       if (handoff_queue_elt_by_worker_index[i])
2371         {
2372           hf = handoff_queue_elt_by_worker_index[i];
2373           /*
2374            * It works better to let the handoff node
2375            * rate-adapt, always ship the handoff queue element.
2376            */
2377           if (1 || hf->n_vectors == hf->last_n_vectors)
2378             {
2379               vlib_put_frame_queue_elt (hf);
2380               handoff_queue_elt_by_worker_index[i] = 0;
2381             }
2382           else
2383             hf->last_n_vectors = hf->n_vectors;
2384         }
2385       congested_handoff_queue_by_worker_index[i] =
2386         (vlib_frame_queue_t *) (~0);
2387     }
2388   hf = 0;
2389   current_worker_index = ~0;
2390   return frame->n_vectors;
2391 }
2392
2393 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2394   .function = snat_in2out_worker_handoff_fn,
2395   .name = "snat-in2out-worker-handoff",
2396   .vector_size = sizeof (u32),
2397   .format_trace = format_snat_in2out_worker_handoff_trace,
2398   .type = VLIB_NODE_TYPE_INTERNAL,
2399   
2400   .n_next_nodes = 1,
2401
2402   .next_nodes = {
2403     [0] = "error-drop",
2404   },
2405 };
2406
2407 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2408
2409 static uword
2410 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2411                                 vlib_node_runtime_t * node,
2412                                 vlib_frame_t * frame)
2413 {
2414   u32 n_left_from, * from, * to_next;
2415   snat_in2out_next_t next_index;
2416   u32 pkts_processed = 0;
2417   snat_main_t * sm = &snat_main;
2418   u32 stats_node_index;
2419
2420   stats_node_index = snat_in2out_fast_node.index;
2421
2422   from = vlib_frame_vector_args (frame);
2423   n_left_from = frame->n_vectors;
2424   next_index = node->cached_next_index;
2425
2426   while (n_left_from > 0)
2427     {
2428       u32 n_left_to_next;
2429
2430       vlib_get_next_frame (vm, node, next_index,
2431                            to_next, n_left_to_next);
2432
2433       while (n_left_from > 0 && n_left_to_next > 0)
2434         {
2435           u32 bi0;
2436           vlib_buffer_t * b0;
2437           u32 next0;
2438           u32 sw_if_index0;
2439           ip4_header_t * ip0;
2440           ip_csum_t sum0;
2441           u32 new_addr0, old_addr0;
2442           u16 old_port0, new_port0;
2443           udp_header_t * udp0;
2444           tcp_header_t * tcp0;
2445           icmp46_header_t * icmp0;
2446           snat_session_key_t key0, sm0;
2447           u32 proto0;
2448           u32 rx_fib_index0;
2449
2450           /* speculatively enqueue b0 to the current next frame */
2451           bi0 = from[0];
2452           to_next[0] = bi0;
2453           from += 1;
2454           to_next += 1;
2455           n_left_from -= 1;
2456           n_left_to_next -= 1;
2457
2458           b0 = vlib_get_buffer (vm, bi0);
2459           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2460
2461           ip0 = vlib_buffer_get_current (b0);
2462           udp0 = ip4_next_header (ip0);
2463           tcp0 = (tcp_header_t *) udp0;
2464           icmp0 = (icmp46_header_t *) udp0;
2465
2466           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2467           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2468
2469           if (PREDICT_FALSE(ip0->ttl == 1))
2470             {
2471               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2472               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2473                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2474                                            0);
2475               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2476               goto trace0;
2477             }
2478
2479           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2480
2481           if (PREDICT_FALSE (proto0 == ~0))
2482               goto trace0;
2483
2484           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2485             {
2486               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2487                                   rx_fib_index0, node, next0, ~0, 0, 0);
2488               goto trace0;
2489             }
2490
2491           key0.addr = ip0->src_address;
2492           key0.port = udp0->src_port;
2493           key0.fib_index = rx_fib_index0;
2494
2495           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
2496             {
2497               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2498               next0= SNAT_IN2OUT_NEXT_DROP;
2499               goto trace0;
2500             }
2501
2502           new_addr0 = sm0.addr.as_u32;
2503           new_port0 = sm0.port;
2504           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2505           old_addr0 = ip0->src_address.as_u32;
2506           ip0->src_address.as_u32 = new_addr0;
2507
2508           sum0 = ip0->checksum;
2509           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2510                                  ip4_header_t,
2511                                  src_address /* changed member */);
2512           ip0->checksum = ip_csum_fold (sum0);
2513
2514           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2515             {
2516               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2517                 {
2518                   old_port0 = tcp0->src_port;
2519                   tcp0->src_port = new_port0;
2520
2521                   sum0 = tcp0->checksum;
2522                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2523                                          ip4_header_t,
2524                                          dst_address /* changed member */);
2525                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2526                                          ip4_header_t /* cheat */,
2527                                          length /* changed member */);
2528                   tcp0->checksum = ip_csum_fold(sum0);
2529                 }
2530               else
2531                 {
2532                   old_port0 = udp0->src_port;
2533                   udp0->src_port = new_port0;
2534                   udp0->checksum = 0;
2535                 }
2536             }
2537           else
2538             {
2539               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2540                 {
2541                   sum0 = tcp0->checksum;
2542                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2543                                          ip4_header_t,
2544                                          dst_address /* changed member */);
2545                   tcp0->checksum = ip_csum_fold(sum0);
2546                 }
2547             }
2548
2549           /* Hairpinning */
2550           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2551
2552         trace0:
2553           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2554                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2555             {
2556               snat_in2out_trace_t *t =
2557                  vlib_add_trace (vm, node, b0, sizeof (*t));
2558               t->sw_if_index = sw_if_index0;
2559               t->next_index = next0;
2560             }
2561
2562           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2563
2564           /* verify speculative enqueue, maybe switch current next frame */
2565           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2566                                            to_next, n_left_to_next,
2567                                            bi0, next0);
2568         }
2569
2570       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2571     }
2572
2573   vlib_node_increment_counter (vm, stats_node_index,
2574                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2575                                pkts_processed);
2576   return frame->n_vectors;
2577 }
2578
2579
2580 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2581   .function = snat_in2out_fast_static_map_fn,
2582   .name = "snat-in2out-fast",
2583   .vector_size = sizeof (u32),
2584   .format_trace = format_snat_in2out_fast_trace,
2585   .type = VLIB_NODE_TYPE_INTERNAL,
2586   
2587   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2588   .error_strings = snat_in2out_error_strings,
2589
2590   .runtime_data_bytes = sizeof (snat_runtime_t),
2591   
2592   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2593
2594   /* edit / add dispositions here */
2595   .next_nodes = {
2596     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2597     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2598     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2599     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2600   },
2601 };
2602
2603 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);