acl-plugin: bihash-based ACL lookup
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_ICMP_ERROR,
117   SNAT_IN2OUT_NEXT_SLOW_PATH,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
138                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
139                          u32 rx_fib_index0)
140 {
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   /* Don't NAT packet aimed at the intfc address */
151   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
152                                       ip0->dst_address.as_u32)))
153     return 1;
154
155   fei = fib_table_lookup (rx_fib_index0, &pfx);
156   if (FIB_NODE_INDEX_INVALID != fei)
157     {
158       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
159       if (sw_if_index == ~0)
160         {
161           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
162           if (FIB_NODE_INDEX_INVALID != fei)
163             sw_if_index = fib_entry_get_resolving_interface (fei);
164         }
165       snat_interface_t *i;
166       pool_foreach (i, sm->interfaces,
167       ({
168         /* NAT packet aimed at outside interface */
169         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
170           return 0;
171       }));
172     }
173
174   return 1;
175 }
176
177 static inline int
178 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
179                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                     u32 rx_fib_index0)
181 {
182   udp_header_t * udp0 = ip4_next_header (ip0);
183   snat_session_key_t key0, sm0;
184   clib_bihash_kv_8_8_t kv0, value0;
185
186   key0.addr = ip0->dst_address;
187   key0.port = udp0->dst_port;
188   key0.protocol = proto0;
189   key0.fib_index = sm->outside_fib_index;
190   kv0.key = key0.as_u64;
191
192   /* NAT packet aimed at external address if */
193   /* has active sessions */
194   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
195     {
196       /* or is static mappings */
197       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
198         return 0;
199     }
200   else
201     return 0;
202
203   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
204                                  rx_fib_index0);
205 }
206
207 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
208                       ip4_header_t * ip0,
209                       u32 rx_fib_index0,
210                       snat_session_key_t * key0,
211                       snat_session_t ** sessionp,
212                       vlib_node_runtime_t * node,
213                       u32 next0,
214                       u32 thread_index)
215 {
216   snat_user_t *u;
217   snat_user_key_t user_key;
218   snat_session_t *s;
219   clib_bihash_kv_8_8_t kv0, value0;
220   u32 oldest_per_user_translation_list_index;
221   dlist_elt_t * oldest_per_user_translation_list_elt;
222   dlist_elt_t * per_user_translation_list_elt;
223   dlist_elt_t * per_user_list_head_elt;
224   u32 session_index;
225   snat_session_key_t key1;
226   u32 address_index = ~0;
227   u32 outside_fib_index;
228   uword * p;
229   snat_worker_key_t worker_by_out_key;
230
231   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
232   if (! p)
233     {
234       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
235       return SNAT_IN2OUT_NEXT_DROP;
236     }
237   outside_fib_index = p[0];
238
239   key1.protocol = key0->protocol;
240   user_key.addr = ip0->src_address;
241   user_key.fib_index = rx_fib_index0;
242   kv0.key = user_key.as_u64;
243   
244   /* Ever heard of the "user" = src ip4 address before? */
245   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
246     {
247       /* no, make a new one */
248       pool_get (sm->per_thread_data[thread_index].users, u);
249       memset (u, 0, sizeof (*u));
250       u->addr = ip0->src_address;
251       u->fib_index = rx_fib_index0;
252
253       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
254
255       u->sessions_per_user_list_head_index = per_user_list_head_elt -
256         sm->per_thread_data[thread_index].list_pool;
257
258       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
259                        u->sessions_per_user_list_head_index);
260
261       kv0.value = u - sm->per_thread_data[thread_index].users;
262
263       /* add user */
264       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
265     }
266   else
267     {
268       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
269                              value0.value);
270     }
271
272   /* Over quota? Recycle the least recently used dynamic translation */
273   if (u->nsessions >= sm->max_translations_per_user)
274     {
275       /* Remove the oldest dynamic translation */
276       do {
277           oldest_per_user_translation_list_index =
278             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
279                                     u->sessions_per_user_list_head_index);
280
281           ASSERT (oldest_per_user_translation_list_index != ~0);
282
283           /* add it back to the end of the LRU list */
284           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
285                               u->sessions_per_user_list_head_index,
286                               oldest_per_user_translation_list_index);
287           /* Get the list element */
288           oldest_per_user_translation_list_elt =
289             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
290                                oldest_per_user_translation_list_index);
291
292           /* Get the session index from the list element */
293           session_index = oldest_per_user_translation_list_elt->value;
294
295           /* Get the session */
296           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
297                                  session_index);
298       } while (snat_is_session_static (s));
299
300       /* Remove in2out, out2in keys */
301       kv0.key = s->in2out.as_u64;
302       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
303           clib_warning ("in2out key delete failed");
304       kv0.key = s->out2in.as_u64;
305       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
306           clib_warning ("out2in key delete failed");
307
308       /* log NAT event */
309       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
310                                           s->out2in.addr.as_u32,
311                                           s->in2out.protocol,
312                                           s->in2out.port,
313                                           s->out2in.port,
314                                           s->in2out.fib_index);
315
316       snat_free_outside_address_and_port 
317         (sm, &s->out2in, s->outside_address_index);
318       s->outside_address_index = ~0;
319
320       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
321                                                &address_index))
322         {
323           ASSERT(0);
324
325           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
326           return SNAT_IN2OUT_NEXT_DROP;
327         }
328       s->outside_address_index = address_index;
329     }
330   else
331     {
332       u8 static_mapping = 1;
333
334       /* First try to match static mapping by local address and port */
335       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
336         {
337           static_mapping = 0;
338           /* Try to create dynamic translation */
339           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
340                                                    &address_index))
341             {
342               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
343               return SNAT_IN2OUT_NEXT_DROP;
344             }
345         }
346
347       /* Create a new session */
348       pool_get (sm->per_thread_data[thread_index].sessions, s);
349       memset (s, 0, sizeof (*s));
350       
351       s->outside_address_index = address_index;
352
353       if (static_mapping)
354         {
355           u->nstaticsessions++;
356           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
357         }
358       else
359         {
360           u->nsessions++;
361         }
362
363       /* Create list elts */
364       pool_get (sm->per_thread_data[thread_index].list_pool,
365                 per_user_translation_list_elt);
366       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
367                        per_user_translation_list_elt -
368                        sm->per_thread_data[thread_index].list_pool);
369
370       per_user_translation_list_elt->value =
371         s - sm->per_thread_data[thread_index].sessions;
372       s->per_user_index = per_user_translation_list_elt -
373                           sm->per_thread_data[thread_index].list_pool;
374       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
375
376       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
377                           s->per_user_list_head_index,
378                           per_user_translation_list_elt -
379                           sm->per_thread_data[thread_index].list_pool);
380    }
381   
382   s->in2out = *key0;
383   s->out2in = key1;
384   s->out2in.protocol = key0->protocol;
385   s->out2in.fib_index = outside_fib_index;
386   *sessionp = s;
387
388   /* Add to translation hashes */
389   kv0.key = s->in2out.as_u64;
390   kv0.value = s - sm->per_thread_data[thread_index].sessions;
391   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
392       clib_warning ("in2out key add failed");
393   
394   kv0.key = s->out2in.as_u64;
395   kv0.value = s - sm->per_thread_data[thread_index].sessions;
396   
397   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
398       clib_warning ("out2in key add failed");
399
400   /* Add to translated packets worker lookup */
401   worker_by_out_key.addr = s->out2in.addr;
402   worker_by_out_key.port = s->out2in.port;
403   worker_by_out_key.fib_index = s->out2in.fib_index;
404   kv0.key = worker_by_out_key.as_u64;
405   kv0.value = thread_index;
406   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
407
408   /* log NAT event */
409   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
410                                       s->out2in.addr.as_u32,
411                                       s->in2out.protocol,
412                                       s->in2out.port,
413                                       s->out2in.port,
414                                       s->in2out.fib_index);
415   return next0;
416 }
417
418 static_always_inline
419 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
420                                  snat_session_key_t *p_key0)
421 {
422   icmp46_header_t *icmp0;
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0, *inner_echo0 = 0;
425   ip4_header_t *inner_ip0 = 0;
426   void *l4_header = 0;
427   icmp46_header_t *inner_icmp0;
428
429   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
430   echo0 = (icmp_echo_header_t *)(icmp0+1);
431
432   if (!icmp_is_error_message (icmp0))
433     {
434       key0.protocol = SNAT_PROTOCOL_ICMP;
435       key0.addr = ip0->src_address;
436       key0.port = echo0->identifier;
437     }
438   else
439     {
440       inner_ip0 = (ip4_header_t *)(echo0+1);
441       l4_header = ip4_next_header (inner_ip0);
442       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
443       key0.addr = inner_ip0->dst_address;
444       switch (key0.protocol)
445         {
446         case SNAT_PROTOCOL_ICMP:
447           inner_icmp0 = (icmp46_header_t*)l4_header;
448           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
449           key0.port = inner_echo0->identifier;
450           break;
451         case SNAT_PROTOCOL_UDP:
452         case SNAT_PROTOCOL_TCP:
453           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
454           break;
455         default:
456           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
457         }
458     }
459   *p_key0 = key0;
460   return -1; /* success */
461 }
462
463 /**
464  * Get address and port values to be used for packet SNAT translation
465  * and create session if needed
466  *
467  * @param[in,out] sm             SNAT main
468  * @param[in,out] node           SNAT node runtime
469  * @param[in] thread_index       thread index
470  * @param[in,out] b0             buffer containing packet to be translated
471  * @param[out] p_proto           protocol used for matching
472  * @param[out] p_value           address and port after NAT translation
473  * @param[out] p_dont_translate  if packet should not be translated
474  * @param d                      optional parameter
475  * @param e                      optional parameter
476  */
477 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
478                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
479                            snat_session_key_t *p_value,
480                            u8 *p_dont_translate, void *d, void *e)
481 {
482   ip4_header_t *ip0;
483   icmp46_header_t *icmp0;
484   u32 sw_if_index0;
485   u32 rx_fib_index0;
486   snat_session_key_t key0;
487   snat_session_t *s0 = 0;
488   u8 dont_translate = 0;
489   clib_bihash_kv_8_8_t kv0, value0;
490   u32 next0 = ~0;
491   int err;
492
493   ip0 = vlib_buffer_get_current (b0);
494   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
495   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
496   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
497
498   err = icmp_get_key (ip0, &key0);
499   if (err != -1)
500     {
501       b0->error = node->errors[err];
502       next0 = SNAT_IN2OUT_NEXT_DROP;
503       goto out;
504     }
505   key0.fib_index = rx_fib_index0;
506
507   kv0.key = key0.as_u64;
508
509   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
510     {
511       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
512           IP_PROTOCOL_ICMP, rx_fib_index0)))
513         {
514           dont_translate = 1;
515           goto out;
516         }
517
518       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
519         {
520           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
521           next0 = SNAT_IN2OUT_NEXT_DROP;
522           goto out;
523         }
524
525       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
526                          &s0, node, next0, thread_index);
527
528       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
529         goto out;
530     }
531   else
532     {
533       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
534                         icmp0->type != ICMP4_echo_reply &&
535                         !icmp_is_error_message (icmp0)))
536         {
537           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
538           next0 = SNAT_IN2OUT_NEXT_DROP;
539           goto out;
540         }
541
542       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
543                               value0.value);
544     }
545
546 out:
547   *p_proto = key0.protocol;
548   if (s0)
549     *p_value = s0->out2in;
550   *p_dont_translate = dont_translate;
551   if (d)
552     *(snat_session_t**)d = s0;
553   return next0;
554 }
555
556 /**
557  * Get address and port values to be used for packet SNAT translation
558  *
559  * @param[in] sm                 SNAT main
560  * @param[in,out] node           SNAT node runtime
561  * @param[in] thread_index       thread index
562  * @param[in,out] b0             buffer containing packet to be translated
563  * @param[out] p_proto           protocol used for matching
564  * @param[out] p_value           address and port after NAT translation
565  * @param[out] p_dont_translate  if packet should not be translated
566  * @param d                      optional parameter
567  * @param e                      optional parameter
568  */
569 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
570                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
571                            snat_session_key_t *p_value,
572                            u8 *p_dont_translate, void *d, void *e)
573 {
574   ip4_header_t *ip0;
575   icmp46_header_t *icmp0;
576   u32 sw_if_index0;
577   u32 rx_fib_index0;
578   snat_session_key_t key0;
579   snat_session_key_t sm0;
580   u8 dont_translate = 0;
581   u8 is_addr_only;
582   u32 next0 = ~0;
583   int err;
584
585   ip0 = vlib_buffer_get_current (b0);
586   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
587   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
588   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
589
590   err = icmp_get_key (ip0, &key0);
591   if (err != -1)
592     {
593       b0->error = node->errors[err];
594       next0 = SNAT_IN2OUT_NEXT_DROP;
595       goto out2;
596     }
597   key0.fib_index = rx_fib_index0;
598
599   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
600     {
601       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
602           IP_PROTOCOL_ICMP, rx_fib_index0)))
603         {
604           dont_translate = 1;
605           goto out;
606         }
607
608       if (icmp_is_error_message (icmp0))
609         {
610           next0 = SNAT_IN2OUT_NEXT_DROP;
611           goto out;
612         }
613
614       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
615       next0 = SNAT_IN2OUT_NEXT_DROP;
616       goto out;
617     }
618
619   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
620                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
621                     !icmp_is_error_message (icmp0)))
622     {
623       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
624       next0 = SNAT_IN2OUT_NEXT_DROP;
625       goto out;
626     }
627
628 out:
629   *p_value = sm0;
630 out2:
631   *p_proto = key0.protocol;
632   *p_dont_translate = dont_translate;
633   return next0;
634 }
635
636 static inline u32 icmp_in2out (snat_main_t *sm,
637                                vlib_buffer_t * b0,
638                                ip4_header_t * ip0,
639                                icmp46_header_t * icmp0,
640                                u32 sw_if_index0,
641                                u32 rx_fib_index0,
642                                vlib_node_runtime_t * node,
643                                u32 next0,
644                                u32 thread_index,
645                                void *d,
646                                void *e)
647 {
648   snat_session_key_t sm0;
649   u8 protocol;
650   icmp_echo_header_t *echo0, *inner_echo0 = 0;
651   ip4_header_t *inner_ip0;
652   void *l4_header = 0;
653   icmp46_header_t *inner_icmp0;
654   u8 dont_translate;
655   u32 new_addr0, old_addr0;
656   u16 old_id0, new_id0;
657   ip_csum_t sum0;
658   u16 checksum0;
659   u32 next0_tmp;
660
661   echo0 = (icmp_echo_header_t *)(icmp0+1);
662
663   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
664                                        &protocol, &sm0, &dont_translate, d, e);
665   if (next0_tmp != ~0)
666     next0 = next0_tmp;
667   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
668     goto out;
669
670   sum0 = ip_incremental_checksum (0, icmp0,
671                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
672   checksum0 = ~ip_csum_fold (sum0);
673   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
674     {
675       next0 = SNAT_IN2OUT_NEXT_DROP;
676       goto out;
677     }
678
679   old_addr0 = ip0->src_address.as_u32;
680   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
681   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
682
683   sum0 = ip0->checksum;
684   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
685                          src_address /* changed member */);
686   ip0->checksum = ip_csum_fold (sum0);
687   
688   if (!icmp_is_error_message (icmp0))
689     {
690       new_id0 = sm0.port;
691       if (PREDICT_FALSE(new_id0 != echo0->identifier))
692         {
693           old_id0 = echo0->identifier;
694           new_id0 = sm0.port;
695           echo0->identifier = new_id0;
696
697           sum0 = icmp0->checksum;
698           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
699                                  identifier);
700           icmp0->checksum = ip_csum_fold (sum0);
701         }
702     }
703   else
704     {
705       inner_ip0 = (ip4_header_t *)(echo0+1);
706       l4_header = ip4_next_header (inner_ip0);
707
708       if (!ip4_header_checksum_is_valid (inner_ip0))
709         {
710           next0 = SNAT_IN2OUT_NEXT_DROP;
711           goto out;
712         }
713
714       old_addr0 = inner_ip0->dst_address.as_u32;
715       inner_ip0->dst_address = sm0.addr;
716       new_addr0 = inner_ip0->dst_address.as_u32;
717
718       sum0 = icmp0->checksum;
719       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
720                              dst_address /* changed member */);
721       icmp0->checksum = ip_csum_fold (sum0);
722
723       switch (protocol)
724         {
725           case SNAT_PROTOCOL_ICMP:
726             inner_icmp0 = (icmp46_header_t*)l4_header;
727             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
728
729             old_id0 = inner_echo0->identifier;
730             new_id0 = sm0.port;
731             inner_echo0->identifier = new_id0;
732
733             sum0 = icmp0->checksum;
734             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
735                                    identifier);
736             icmp0->checksum = ip_csum_fold (sum0);
737             break;
738           case SNAT_PROTOCOL_UDP:
739           case SNAT_PROTOCOL_TCP:
740             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
741             new_id0 = sm0.port;
742             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
743
744             sum0 = icmp0->checksum;
745             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
746                                    dst_port);
747             icmp0->checksum = ip_csum_fold (sum0);
748             break;
749           default:
750             ASSERT(0);
751         }
752     }
753
754 out:
755   return next0;
756 }
757
758 /**
759  * @brief Hairpinning
760  *
761  * Hairpinning allows two endpoints on the internal side of the NAT to
762  * communicate even if they only use each other's external IP addresses
763  * and ports.
764  *
765  * @param sm     SNAT main.
766  * @param b0     Vlib buffer.
767  * @param ip0    IP header.
768  * @param udp0   UDP header.
769  * @param tcp0   TCP header.
770  * @param proto0 SNAT protocol.
771  */
772 static inline void
773 snat_hairpinning (snat_main_t *sm,
774                   vlib_buffer_t * b0,
775                   ip4_header_t * ip0,
776                   udp_header_t * udp0,
777                   tcp_header_t * tcp0,
778                   u32 proto0)
779 {
780   snat_session_key_t key0, sm0;
781   snat_worker_key_t k0;
782   snat_session_t * s0;
783   clib_bihash_kv_8_8_t kv0, value0;
784   ip_csum_t sum0;
785   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
786   u16 new_dst_port0, old_dst_port0;
787
788   key0.addr = ip0->dst_address;
789   key0.port = udp0->dst_port;
790   key0.protocol = proto0;
791   key0.fib_index = sm->outside_fib_index;
792   kv0.key = key0.as_u64;
793
794   /* Check if destination is in active sessions */
795   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
796     {
797       /* or static mappings */
798       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
799         {
800           new_dst_addr0 = sm0.addr.as_u32;
801           new_dst_port0 = sm0.port;
802           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
803         }
804     }
805   else
806     {
807       si = value0.value;
808       if (sm->num_workers > 1)
809         {
810           k0.addr = ip0->dst_address;
811           k0.port = udp0->dst_port;
812           k0.fib_index = sm->outside_fib_index;
813           kv0.key = k0.as_u64;
814           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
815             ASSERT(0);
816           else
817             ti = value0.value;
818         }
819       else
820         ti = sm->num_workers;
821
822       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
823       new_dst_addr0 = s0->in2out.addr.as_u32;
824       new_dst_port0 = s0->in2out.port;
825       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
826     }
827
828   /* Destination is behind the same NAT, use internal address and port */
829   if (new_dst_addr0)
830     {
831       old_dst_addr0 = ip0->dst_address.as_u32;
832       ip0->dst_address.as_u32 = new_dst_addr0;
833       sum0 = ip0->checksum;
834       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
835                              ip4_header_t, dst_address);
836       ip0->checksum = ip_csum_fold (sum0);
837
838       old_dst_port0 = tcp0->dst;
839       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
840         {
841           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
842             {
843               tcp0->dst = new_dst_port0;
844               sum0 = tcp0->checksum;
845               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
846                                      ip4_header_t, dst_address);
847               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
848                                      ip4_header_t /* cheat */, length);
849               tcp0->checksum = ip_csum_fold(sum0);
850             }
851           else
852             {
853               udp0->dst_port = new_dst_port0;
854               udp0->checksum = 0;
855             }
856         }
857       else
858         {
859           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
860             {
861               sum0 = tcp0->checksum;
862               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
863                                      ip4_header_t, dst_address);
864               tcp0->checksum = ip_csum_fold(sum0);
865             }
866         }
867     }
868 }
869
870 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
871                                          vlib_buffer_t * b0,
872                                          ip4_header_t * ip0,
873                                          icmp46_header_t * icmp0,
874                                          u32 sw_if_index0,
875                                          u32 rx_fib_index0,
876                                          vlib_node_runtime_t * node,
877                                          u32 next0,
878                                          f64 now,
879                                          u32 thread_index,
880                                          snat_session_t ** p_s0)
881 {
882   snat_session_key_t key0, sm0;
883   clib_bihash_kv_8_8_t kv0, value0;
884   snat_worker_key_t k0;
885   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
886   ip_csum_t sum0;
887
888   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
889                       next0, thread_index, p_s0, 0);
890   snat_session_t * s0 = *p_s0;
891   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
892     {
893       /* Hairpinning */
894       if (!icmp_is_error_message (icmp0))
895         {
896           icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
897           u16 icmp_id0 = echo0->identifier;
898           key0.addr = ip0->dst_address;
899           key0.port = icmp_id0;
900           key0.protocol = SNAT_PROTOCOL_ICMP;
901           key0.fib_index = sm->outside_fib_index;
902           kv0.key = key0.as_u64;
903
904           /* Check if destination is in active sessions */
905           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
906             {
907               /* or static mappings */
908               if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
909                 {
910                   new_dst_addr0 = sm0.addr.as_u32;
911                   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
912                 }
913             }
914           else
915             {
916               si = value0.value;
917               if (sm->num_workers > 1)
918                 {
919                   k0.addr = ip0->dst_address;
920                   k0.port = icmp_id0;
921                   k0.fib_index = sm->outside_fib_index;
922                   kv0.key = k0.as_u64;
923                   if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
924                     ASSERT(0);
925                   else
926                     ti = value0.value;
927                 }
928               else
929                 ti = sm->num_workers;
930
931               s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
932               new_dst_addr0 = s0->in2out.addr.as_u32;
933               vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
934               echo0->identifier = s0->in2out.port;
935               sum0 = icmp0->checksum;
936               sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
937                                      icmp_echo_header_t, identifier);
938               icmp0->checksum = ip_csum_fold (sum0);
939             }
940
941           /* Destination is behind the same NAT, use internal address and port */
942           if (new_dst_addr0)
943             {
944               old_dst_addr0 = ip0->dst_address.as_u32;
945               ip0->dst_address.as_u32 = new_dst_addr0;
946               sum0 = ip0->checksum;
947               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
948                                      ip4_header_t, dst_address);
949               ip0->checksum = ip_csum_fold (sum0);
950             }
951         }
952
953       /* Accounting */
954       s0->last_heard = now;
955       s0->total_pkts++;
956       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
957       /* Per-user LRU list maintenance for dynamic translations */
958       if (!snat_is_session_static (s0))
959         {
960           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
961                              s0->per_user_index);
962           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
963                               s0->per_user_list_head_index,
964                               s0->per_user_index);
965         }
966     }
967   return next0;
968 }
969
970 static inline uword
971 snat_in2out_node_fn_inline (vlib_main_t * vm,
972                             vlib_node_runtime_t * node,
973                             vlib_frame_t * frame, int is_slow_path)
974 {
975   u32 n_left_from, * from, * to_next;
976   snat_in2out_next_t next_index;
977   u32 pkts_processed = 0;
978   snat_main_t * sm = &snat_main;
979   f64 now = vlib_time_now (vm);
980   u32 stats_node_index;
981   u32 thread_index = vlib_get_thread_index ();
982
983   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
984     snat_in2out_node.index;
985
986   from = vlib_frame_vector_args (frame);
987   n_left_from = frame->n_vectors;
988   next_index = node->cached_next_index;
989
990   while (n_left_from > 0)
991     {
992       u32 n_left_to_next;
993
994       vlib_get_next_frame (vm, node, next_index,
995                            to_next, n_left_to_next);
996
997       while (n_left_from >= 4 && n_left_to_next >= 2)
998         {
999           u32 bi0, bi1;
1000           vlib_buffer_t * b0, * b1;
1001           u32 next0, next1;
1002           u32 sw_if_index0, sw_if_index1;
1003           ip4_header_t * ip0, * ip1;
1004           ip_csum_t sum0, sum1;
1005           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1006           u16 old_port0, new_port0, old_port1, new_port1;
1007           udp_header_t * udp0, * udp1;
1008           tcp_header_t * tcp0, * tcp1;
1009           icmp46_header_t * icmp0, * icmp1;
1010           snat_session_key_t key0, key1;
1011           u32 rx_fib_index0, rx_fib_index1;
1012           u32 proto0, proto1;
1013           snat_session_t * s0 = 0, * s1 = 0;
1014           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1015           
1016           /* Prefetch next iteration. */
1017           {
1018             vlib_buffer_t * p2, * p3;
1019             
1020             p2 = vlib_get_buffer (vm, from[2]);
1021             p3 = vlib_get_buffer (vm, from[3]);
1022             
1023             vlib_prefetch_buffer_header (p2, LOAD);
1024             vlib_prefetch_buffer_header (p3, LOAD);
1025
1026             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1027             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1028           }
1029
1030           /* speculatively enqueue b0 and b1 to the current next frame */
1031           to_next[0] = bi0 = from[0];
1032           to_next[1] = bi1 = from[1];
1033           from += 2;
1034           to_next += 2;
1035           n_left_from -= 2;
1036           n_left_to_next -= 2;
1037           
1038           b0 = vlib_get_buffer (vm, bi0);
1039           b1 = vlib_get_buffer (vm, bi1);
1040
1041           ip0 = vlib_buffer_get_current (b0);
1042           udp0 = ip4_next_header (ip0);
1043           tcp0 = (tcp_header_t *) udp0;
1044           icmp0 = (icmp46_header_t *) udp0;
1045
1046           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1047           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1048                                    sw_if_index0);
1049
1050           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1051
1052           if (PREDICT_FALSE(ip0->ttl == 1))
1053             {
1054               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1055               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1056                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1057                                            0);
1058               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1059               goto trace00;
1060             }
1061
1062           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1063
1064           /* Next configured feature, probably ip4-lookup */
1065           if (is_slow_path)
1066             {
1067               if (PREDICT_FALSE (proto0 == ~0))
1068                 goto trace00;
1069               
1070               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1071                 {
1072                   next0 = icmp_in2out_slow_path 
1073                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
1074                      node, next0, now, thread_index, &s0);
1075                   goto trace00;
1076                 }
1077             }
1078           else
1079             {
1080               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1081                 {
1082                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1083                   goto trace00;
1084                 }
1085             }
1086
1087           key0.addr = ip0->src_address;
1088           key0.port = udp0->src_port;
1089           key0.protocol = proto0;
1090           key0.fib_index = rx_fib_index0;
1091           
1092           kv0.key = key0.as_u64;
1093
1094           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1095             {
1096               if (is_slow_path)
1097                 {
1098                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1099                       proto0, rx_fib_index0)))
1100                     goto trace00;
1101
1102                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1103                                      &s0, node, next0, thread_index);
1104                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1105                     goto trace00;
1106                 }
1107               else
1108                 {
1109                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1110                   goto trace00;
1111                 }
1112             }
1113           else
1114             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1115                                     value0.value);
1116
1117           old_addr0 = ip0->src_address.as_u32;
1118           ip0->src_address = s0->out2in.addr;
1119           new_addr0 = ip0->src_address.as_u32;
1120           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1121
1122           sum0 = ip0->checksum;
1123           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1124                                  ip4_header_t,
1125                                  src_address /* changed member */);
1126           ip0->checksum = ip_csum_fold (sum0);
1127
1128           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1129             {
1130               old_port0 = tcp0->src_port;
1131               tcp0->src_port = s0->out2in.port;
1132               new_port0 = tcp0->src_port;
1133
1134               sum0 = tcp0->checksum;
1135               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1136                                      ip4_header_t,
1137                                      dst_address /* changed member */);
1138               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1139                                      ip4_header_t /* cheat */,
1140                                      length /* changed member */);
1141               tcp0->checksum = ip_csum_fold(sum0);
1142             }
1143           else
1144             {
1145               old_port0 = udp0->src_port;
1146               udp0->src_port = s0->out2in.port;
1147               udp0->checksum = 0;
1148             }
1149
1150           /* Hairpinning */
1151           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1152
1153           /* Accounting */
1154           s0->last_heard = now;
1155           s0->total_pkts++;
1156           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1157           /* Per-user LRU list maintenance for dynamic translation */
1158           if (!snat_is_session_static (s0))
1159             {
1160               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1161                                  s0->per_user_index);
1162               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1163                                   s0->per_user_list_head_index,
1164                                   s0->per_user_index);
1165             }
1166         trace00:
1167
1168           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1169                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1170             {
1171               snat_in2out_trace_t *t = 
1172                  vlib_add_trace (vm, node, b0, sizeof (*t));
1173               t->is_slow_path = is_slow_path;
1174               t->sw_if_index = sw_if_index0;
1175               t->next_index = next0;
1176                   t->session_index = ~0;
1177               if (s0)
1178                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1179             }
1180
1181           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1182
1183           ip1 = vlib_buffer_get_current (b1);
1184           udp1 = ip4_next_header (ip1);
1185           tcp1 = (tcp_header_t *) udp1;
1186           icmp1 = (icmp46_header_t *) udp1;
1187
1188           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1189           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1190                                    sw_if_index1);
1191
1192           if (PREDICT_FALSE(ip1->ttl == 1))
1193             {
1194               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1195               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1196                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1197                                            0);
1198               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1199               goto trace01;
1200             }
1201
1202           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1203
1204           /* Next configured feature, probably ip4-lookup */
1205           if (is_slow_path)
1206             {
1207               if (PREDICT_FALSE (proto1 == ~0))
1208                 goto trace01;
1209               
1210               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1211                 {
1212                   next1 = icmp_in2out_slow_path 
1213                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1214                      next1, now, thread_index, &s1);
1215                   goto trace01;
1216                 }
1217             }
1218           else
1219             {
1220               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1221                 {
1222                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1223                   goto trace01;
1224                 }
1225             }
1226
1227           key1.addr = ip1->src_address;
1228           key1.port = udp1->src_port;
1229           key1.protocol = proto1;
1230           key1.fib_index = rx_fib_index1;
1231           
1232           kv1.key = key1.as_u64;
1233
1234             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1235             {
1236               if (is_slow_path)
1237                 {
1238                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
1239                       proto1, rx_fib_index1)))
1240                     goto trace01;
1241
1242                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1243                                      &s1, node, next1, thread_index);
1244                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1245                     goto trace01;
1246                 }
1247               else
1248                 {
1249                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1250                   goto trace01;
1251                 }
1252             }
1253           else
1254             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1255                                     value1.value);
1256
1257           old_addr1 = ip1->src_address.as_u32;
1258           ip1->src_address = s1->out2in.addr;
1259           new_addr1 = ip1->src_address.as_u32;
1260           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1261
1262           sum1 = ip1->checksum;
1263           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1264                                  ip4_header_t,
1265                                  src_address /* changed member */);
1266           ip1->checksum = ip_csum_fold (sum1);
1267
1268           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1269             {
1270               old_port1 = tcp1->src_port;
1271               tcp1->src_port = s1->out2in.port;
1272               new_port1 = tcp1->src_port;
1273
1274               sum1 = tcp1->checksum;
1275               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1276                                      ip4_header_t,
1277                                      dst_address /* changed member */);
1278               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1279                                      ip4_header_t /* cheat */,
1280                                      length /* changed member */);
1281               tcp1->checksum = ip_csum_fold(sum1);
1282             }
1283           else
1284             {
1285               old_port1 = udp1->src_port;
1286               udp1->src_port = s1->out2in.port;
1287               udp1->checksum = 0;
1288             }
1289
1290           /* Hairpinning */
1291           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1292
1293           /* Accounting */
1294           s1->last_heard = now;
1295           s1->total_pkts++;
1296           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1297           /* Per-user LRU list maintenance for dynamic translation */
1298           if (!snat_is_session_static (s1))
1299             {
1300               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1301                                  s1->per_user_index);
1302               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1303                                   s1->per_user_list_head_index,
1304                                   s1->per_user_index);
1305             }
1306         trace01:
1307
1308           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1309                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1310             {
1311               snat_in2out_trace_t *t = 
1312                  vlib_add_trace (vm, node, b1, sizeof (*t));
1313               t->sw_if_index = sw_if_index1;
1314               t->next_index = next1;
1315               t->session_index = ~0;
1316               if (s1)
1317                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1318             }
1319
1320           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1321
1322           /* verify speculative enqueues, maybe switch current next frame */
1323           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1324                                            to_next, n_left_to_next,
1325                                            bi0, bi1, next0, next1);
1326         }
1327
1328       while (n_left_from > 0 && n_left_to_next > 0)
1329         {
1330           u32 bi0;
1331           vlib_buffer_t * b0;
1332           u32 next0;
1333           u32 sw_if_index0;
1334           ip4_header_t * ip0;
1335           ip_csum_t sum0;
1336           u32 new_addr0, old_addr0;
1337           u16 old_port0, new_port0;
1338           udp_header_t * udp0;
1339           tcp_header_t * tcp0;
1340           icmp46_header_t * icmp0;
1341           snat_session_key_t key0;
1342           u32 rx_fib_index0;
1343           u32 proto0;
1344           snat_session_t * s0 = 0;
1345           clib_bihash_kv_8_8_t kv0, value0;
1346           
1347           /* speculatively enqueue b0 to the current next frame */
1348           bi0 = from[0];
1349           to_next[0] = bi0;
1350           from += 1;
1351           to_next += 1;
1352           n_left_from -= 1;
1353           n_left_to_next -= 1;
1354
1355           b0 = vlib_get_buffer (vm, bi0);
1356           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1357
1358           ip0 = vlib_buffer_get_current (b0);
1359           udp0 = ip4_next_header (ip0);
1360           tcp0 = (tcp_header_t *) udp0;
1361           icmp0 = (icmp46_header_t *) udp0;
1362
1363           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1364           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1365                                    sw_if_index0);
1366
1367           if (PREDICT_FALSE(ip0->ttl == 1))
1368             {
1369               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1370               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1371                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1372                                            0);
1373               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1374               goto trace0;
1375             }
1376
1377           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1378
1379           /* Next configured feature, probably ip4-lookup */
1380           if (is_slow_path)
1381             {
1382               if (PREDICT_FALSE (proto0 == ~0))
1383                 goto trace0;
1384               
1385               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1386                 {
1387                   next0 = icmp_in2out_slow_path 
1388                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1389                      next0, now, thread_index, &s0);
1390                   goto trace0;
1391                 }
1392             }
1393           else
1394             {
1395               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1396                 {
1397                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1398                   goto trace0;
1399                 }
1400             }
1401
1402           key0.addr = ip0->src_address;
1403           key0.port = udp0->src_port;
1404           key0.protocol = proto0;
1405           key0.fib_index = rx_fib_index0;
1406           
1407           kv0.key = key0.as_u64;
1408
1409           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1410             {
1411               if (is_slow_path)
1412                 {
1413                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1414                       proto0, rx_fib_index0)))
1415                     goto trace0;
1416
1417                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1418                                      &s0, node, next0, thread_index);
1419
1420                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1421                     goto trace0;
1422                 }
1423               else
1424                 {
1425                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1426                   goto trace0;
1427                 }
1428             }
1429           else
1430             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1431                                     value0.value);
1432
1433           old_addr0 = ip0->src_address.as_u32;
1434           ip0->src_address = s0->out2in.addr;
1435           new_addr0 = ip0->src_address.as_u32;
1436           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1437
1438           sum0 = ip0->checksum;
1439           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1440                                  ip4_header_t,
1441                                  src_address /* changed member */);
1442           ip0->checksum = ip_csum_fold (sum0);
1443
1444           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1445             {
1446               old_port0 = tcp0->src_port;
1447               tcp0->src_port = s0->out2in.port;
1448               new_port0 = tcp0->src_port;
1449
1450               sum0 = tcp0->checksum;
1451               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1452                                      ip4_header_t,
1453                                      dst_address /* changed member */);
1454               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1455                                      ip4_header_t /* cheat */,
1456                                      length /* changed member */);
1457               tcp0->checksum = ip_csum_fold(sum0);
1458             }
1459           else
1460             {
1461               old_port0 = udp0->src_port;
1462               udp0->src_port = s0->out2in.port;
1463               udp0->checksum = 0;
1464             }
1465
1466           /* Hairpinning */
1467           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1468
1469           /* Accounting */
1470           s0->last_heard = now;
1471           s0->total_pkts++;
1472           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1473           /* Per-user LRU list maintenance for dynamic translation */
1474           if (!snat_is_session_static (s0))
1475             {
1476               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1477                                  s0->per_user_index);
1478               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1479                                   s0->per_user_list_head_index,
1480                                   s0->per_user_index);
1481             }
1482
1483         trace0:
1484           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1485                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1486             {
1487               snat_in2out_trace_t *t = 
1488                  vlib_add_trace (vm, node, b0, sizeof (*t));
1489               t->is_slow_path = is_slow_path;
1490               t->sw_if_index = sw_if_index0;
1491               t->next_index = next0;
1492                   t->session_index = ~0;
1493               if (s0)
1494                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1495             }
1496
1497           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1498
1499           /* verify speculative enqueue, maybe switch current next frame */
1500           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1501                                            to_next, n_left_to_next,
1502                                            bi0, next0);
1503         }
1504
1505       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1506     }
1507
1508   vlib_node_increment_counter (vm, stats_node_index, 
1509                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1510                                pkts_processed);
1511   return frame->n_vectors;
1512 }
1513
1514 static uword
1515 snat_in2out_fast_path_fn (vlib_main_t * vm,
1516                           vlib_node_runtime_t * node,
1517                           vlib_frame_t * frame)
1518 {
1519   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1520 }
1521
1522 VLIB_REGISTER_NODE (snat_in2out_node) = {
1523   .function = snat_in2out_fast_path_fn,
1524   .name = "snat-in2out",
1525   .vector_size = sizeof (u32),
1526   .format_trace = format_snat_in2out_trace,
1527   .type = VLIB_NODE_TYPE_INTERNAL,
1528   
1529   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1530   .error_strings = snat_in2out_error_strings,
1531
1532   .runtime_data_bytes = sizeof (snat_runtime_t),
1533   
1534   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1535
1536   /* edit / add dispositions here */
1537   .next_nodes = {
1538     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1539     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1540     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1541     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1542   },
1543 };
1544
1545 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1546
1547 static uword
1548 snat_in2out_slow_path_fn (vlib_main_t * vm,
1549                           vlib_node_runtime_t * node,
1550                           vlib_frame_t * frame)
1551 {
1552   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1553 }
1554
1555 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1556   .function = snat_in2out_slow_path_fn,
1557   .name = "snat-in2out-slowpath",
1558   .vector_size = sizeof (u32),
1559   .format_trace = format_snat_in2out_trace,
1560   .type = VLIB_NODE_TYPE_INTERNAL,
1561   
1562   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1563   .error_strings = snat_in2out_error_strings,
1564
1565   .runtime_data_bytes = sizeof (snat_runtime_t),
1566   
1567   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1568
1569   /* edit / add dispositions here */
1570   .next_nodes = {
1571     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1572     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1573     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1574     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1575   },
1576 };
1577
1578 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1579
1580 /**************************/
1581 /*** deterministic mode ***/
1582 /**************************/
1583 static uword
1584 snat_det_in2out_node_fn (vlib_main_t * vm,
1585                          vlib_node_runtime_t * node,
1586                          vlib_frame_t * frame)
1587 {
1588   u32 n_left_from, * from, * to_next;
1589   snat_in2out_next_t next_index;
1590   u32 pkts_processed = 0;
1591   snat_main_t * sm = &snat_main;
1592   u32 now = (u32) vlib_time_now (vm);
1593   u32 thread_index = vlib_get_thread_index ();
1594
1595   from = vlib_frame_vector_args (frame);
1596   n_left_from = frame->n_vectors;
1597   next_index = node->cached_next_index;
1598
1599   while (n_left_from > 0)
1600     {
1601       u32 n_left_to_next;
1602
1603       vlib_get_next_frame (vm, node, next_index,
1604                            to_next, n_left_to_next);
1605
1606       while (n_left_from >= 4 && n_left_to_next >= 2)
1607         {
1608           u32 bi0, bi1;
1609           vlib_buffer_t * b0, * b1;
1610           u32 next0, next1;
1611           u32 sw_if_index0, sw_if_index1;
1612           ip4_header_t * ip0, * ip1;
1613           ip_csum_t sum0, sum1;
1614           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1615           u16 old_port0, new_port0, lo_port0, i0;
1616           u16 old_port1, new_port1, lo_port1, i1;
1617           udp_header_t * udp0, * udp1;
1618           tcp_header_t * tcp0, * tcp1;
1619           u32 proto0, proto1;
1620           snat_det_out_key_t key0, key1;
1621           snat_det_map_t * dm0, * dm1;
1622           snat_det_session_t * ses0 = 0, * ses1 = 0;
1623           u32 rx_fib_index0, rx_fib_index1;
1624           icmp46_header_t * icmp0, * icmp1;
1625
1626           /* Prefetch next iteration. */
1627           {
1628             vlib_buffer_t * p2, * p3;
1629
1630             p2 = vlib_get_buffer (vm, from[2]);
1631             p3 = vlib_get_buffer (vm, from[3]);
1632
1633             vlib_prefetch_buffer_header (p2, LOAD);
1634             vlib_prefetch_buffer_header (p3, LOAD);
1635
1636             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1637             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1638           }
1639
1640           /* speculatively enqueue b0 and b1 to the current next frame */
1641           to_next[0] = bi0 = from[0];
1642           to_next[1] = bi1 = from[1];
1643           from += 2;
1644           to_next += 2;
1645           n_left_from -= 2;
1646           n_left_to_next -= 2;
1647
1648           b0 = vlib_get_buffer (vm, bi0);
1649           b1 = vlib_get_buffer (vm, bi1);
1650
1651           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1652           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1653
1654           ip0 = vlib_buffer_get_current (b0);
1655           udp0 = ip4_next_header (ip0);
1656           tcp0 = (tcp_header_t *) udp0;
1657
1658           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1659
1660           if (PREDICT_FALSE(ip0->ttl == 1))
1661             {
1662               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1663               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1664                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1665                                            0);
1666               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1667               goto trace0;
1668             }
1669
1670           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1671
1672           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1673             {
1674               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1675               icmp0 = (icmp46_header_t *) udp0;
1676
1677               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1678                                   rx_fib_index0, node, next0, thread_index,
1679                                   &ses0, &dm0);
1680               goto trace0;
1681             }
1682
1683           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1684           if (PREDICT_FALSE(!dm0))
1685             {
1686               clib_warning("no match for internal host %U",
1687                            format_ip4_address, &ip0->src_address);
1688               next0 = SNAT_IN2OUT_NEXT_DROP;
1689               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1690               goto trace0;
1691             }
1692
1693           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1694
1695           key0.ext_host_addr = ip0->dst_address;
1696           key0.ext_host_port = tcp0->dst;
1697
1698           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
1699           if (PREDICT_FALSE(!ses0))
1700             {
1701               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1702                 {
1703                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1704                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1705
1706                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1707                     continue;
1708
1709                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1710                   break;
1711                 }
1712               if (PREDICT_FALSE(!ses0))
1713                 {
1714                   /* too many sessions for user, send ICMP error packet */
1715
1716                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1717                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
1718                                                ICMP4_destination_unreachable_destination_unreachable_host,
1719                                                0);
1720                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1721                   goto trace0;
1722                 }
1723             }
1724
1725           new_port0 = ses0->out.out_port;
1726
1727           old_addr0.as_u32 = ip0->src_address.as_u32;
1728           ip0->src_address.as_u32 = new_addr0.as_u32;
1729           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1730
1731           sum0 = ip0->checksum;
1732           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1733                                  ip4_header_t,
1734                                  src_address /* changed member */);
1735           ip0->checksum = ip_csum_fold (sum0);
1736
1737           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1738             {
1739               if (tcp0->flags & TCP_FLAG_SYN)
1740                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1741               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1742                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1743               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1744                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1745               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1746                 snat_det_ses_close(dm0, ses0);
1747               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1748                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1749               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1750                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1751
1752               old_port0 = tcp0->src;
1753               tcp0->src = new_port0;
1754
1755               sum0 = tcp0->checksum;
1756               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1757                                      ip4_header_t,
1758                                      dst_address /* changed member */);
1759               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1760                                      ip4_header_t /* cheat */,
1761                                      length /* changed member */);
1762               tcp0->checksum = ip_csum_fold(sum0);
1763             }
1764           else
1765             {
1766               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1767               old_port0 = udp0->src_port;
1768               udp0->src_port = new_port0;
1769               udp0->checksum = 0;
1770             }
1771
1772           switch(ses0->state)
1773             {
1774             case SNAT_SESSION_UDP_ACTIVE:
1775                 ses0->expire = now + sm->udp_timeout;
1776                 break;
1777             case SNAT_SESSION_TCP_SYN_SENT:
1778             case SNAT_SESSION_TCP_FIN_WAIT:
1779             case SNAT_SESSION_TCP_CLOSE_WAIT:
1780             case SNAT_SESSION_TCP_LAST_ACK:
1781                 ses0->expire = now + sm->tcp_transitory_timeout;
1782                 break;
1783             case SNAT_SESSION_TCP_ESTABLISHED:
1784                 ses0->expire = now + sm->tcp_established_timeout;
1785                 break;
1786             }
1787
1788         trace0:
1789           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1790                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1791             {
1792               snat_in2out_trace_t *t =
1793                  vlib_add_trace (vm, node, b0, sizeof (*t));
1794               t->is_slow_path = 0;
1795               t->sw_if_index = sw_if_index0;
1796               t->next_index = next0;
1797               t->session_index = ~0;
1798               if (ses0)
1799                 t->session_index = ses0 - dm0->sessions;
1800             }
1801
1802           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1803
1804           ip1 = vlib_buffer_get_current (b1);
1805           udp1 = ip4_next_header (ip1);
1806           tcp1 = (tcp_header_t *) udp1;
1807
1808           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1809
1810           if (PREDICT_FALSE(ip1->ttl == 1))
1811             {
1812               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1813               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1814                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1815                                            0);
1816               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1817               goto trace1;
1818             }
1819
1820           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1821
1822           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1823             {
1824               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1825               icmp1 = (icmp46_header_t *) udp1;
1826
1827               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
1828                                   rx_fib_index1, node, next1, thread_index,
1829                                   &ses1, &dm1);
1830               goto trace1;
1831             }
1832
1833           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1834           if (PREDICT_FALSE(!dm1))
1835             {
1836               clib_warning("no match for internal host %U",
1837                            format_ip4_address, &ip0->src_address);
1838               next1 = SNAT_IN2OUT_NEXT_DROP;
1839               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1840               goto trace1;
1841             }
1842
1843           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1844
1845           key1.ext_host_addr = ip1->dst_address;
1846           key1.ext_host_port = tcp1->dst;
1847
1848           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
1849           if (PREDICT_FALSE(!ses1))
1850             {
1851               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1852                 {
1853                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1854                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1855
1856                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1857                     continue;
1858
1859                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1860                   break;
1861                 }
1862               if (PREDICT_FALSE(!ses1))
1863                 {
1864                   /* too many sessions for user, send ICMP error packet */
1865
1866                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1867                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
1868                                                ICMP4_destination_unreachable_destination_unreachable_host,
1869                                                0);
1870                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1871                   goto trace1;
1872                 }
1873             }
1874
1875           new_port1 = ses1->out.out_port;
1876
1877           old_addr1.as_u32 = ip1->src_address.as_u32;
1878           ip1->src_address.as_u32 = new_addr1.as_u32;
1879           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1880
1881           sum1 = ip1->checksum;
1882           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1883                                  ip4_header_t,
1884                                  src_address /* changed member */);
1885           ip1->checksum = ip_csum_fold (sum1);
1886
1887           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1888             {
1889               if (tcp1->flags & TCP_FLAG_SYN)
1890                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1891               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1892                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1893               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1894                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1895               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1896                 snat_det_ses_close(dm1, ses1);
1897               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1898                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1899               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1900                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1901
1902               old_port1 = tcp1->src;
1903               tcp1->src = new_port1;
1904
1905               sum1 = tcp1->checksum;
1906               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1907                                      ip4_header_t,
1908                                      dst_address /* changed member */);
1909               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1910                                      ip4_header_t /* cheat */,
1911                                      length /* changed member */);
1912               tcp1->checksum = ip_csum_fold(sum1);
1913             }
1914           else
1915             {
1916               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1917               old_port1 = udp1->src_port;
1918               udp1->src_port = new_port1;
1919               udp1->checksum = 0;
1920             }
1921
1922           switch(ses1->state)
1923             {
1924             case SNAT_SESSION_UDP_ACTIVE:
1925                 ses1->expire = now + sm->udp_timeout;
1926                 break;
1927             case SNAT_SESSION_TCP_SYN_SENT:
1928             case SNAT_SESSION_TCP_FIN_WAIT:
1929             case SNAT_SESSION_TCP_CLOSE_WAIT:
1930             case SNAT_SESSION_TCP_LAST_ACK:
1931                 ses1->expire = now + sm->tcp_transitory_timeout;
1932                 break;
1933             case SNAT_SESSION_TCP_ESTABLISHED:
1934                 ses1->expire = now + sm->tcp_established_timeout;
1935                 break;
1936             }
1937
1938         trace1:
1939           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1940                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1941             {
1942               snat_in2out_trace_t *t =
1943                  vlib_add_trace (vm, node, b1, sizeof (*t));
1944               t->is_slow_path = 0;
1945               t->sw_if_index = sw_if_index1;
1946               t->next_index = next1;
1947               t->session_index = ~0;
1948               if (ses1)
1949                 t->session_index = ses1 - dm1->sessions;
1950             }
1951
1952           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1953
1954           /* verify speculative enqueues, maybe switch current next frame */
1955           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1956                                            to_next, n_left_to_next,
1957                                            bi0, bi1, next0, next1);
1958          }
1959
1960       while (n_left_from > 0 && n_left_to_next > 0)
1961         {
1962           u32 bi0;
1963           vlib_buffer_t * b0;
1964           u32 next0;
1965           u32 sw_if_index0;
1966           ip4_header_t * ip0;
1967           ip_csum_t sum0;
1968           ip4_address_t new_addr0, old_addr0;
1969           u16 old_port0, new_port0, lo_port0, i0;
1970           udp_header_t * udp0;
1971           tcp_header_t * tcp0;
1972           u32 proto0;
1973           snat_det_out_key_t key0;
1974           snat_det_map_t * dm0;
1975           snat_det_session_t * ses0 = 0;
1976           u32 rx_fib_index0;
1977           icmp46_header_t * icmp0;
1978
1979           /* speculatively enqueue b0 to the current next frame */
1980           bi0 = from[0];
1981           to_next[0] = bi0;
1982           from += 1;
1983           to_next += 1;
1984           n_left_from -= 1;
1985           n_left_to_next -= 1;
1986
1987           b0 = vlib_get_buffer (vm, bi0);
1988           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1989
1990           ip0 = vlib_buffer_get_current (b0);
1991           udp0 = ip4_next_header (ip0);
1992           tcp0 = (tcp_header_t *) udp0;
1993
1994           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1995
1996           if (PREDICT_FALSE(ip0->ttl == 1))
1997             {
1998               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1999               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2000                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2001                                            0);
2002               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2003               goto trace00;
2004             }
2005
2006           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2007
2008           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2009             {
2010               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2011               icmp0 = (icmp46_header_t *) udp0;
2012
2013               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2014                                   rx_fib_index0, node, next0, thread_index,
2015                                   &ses0, &dm0);
2016               goto trace00;
2017             }
2018
2019           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2020           if (PREDICT_FALSE(!dm0))
2021             {
2022               clib_warning("no match for internal host %U",
2023                            format_ip4_address, &ip0->src_address);
2024               next0 = SNAT_IN2OUT_NEXT_DROP;
2025               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2026               goto trace00;
2027             }
2028
2029           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2030
2031           key0.ext_host_addr = ip0->dst_address;
2032           key0.ext_host_port = tcp0->dst;
2033
2034           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2035           if (PREDICT_FALSE(!ses0))
2036             {
2037               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2038                 {
2039                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2040                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2041
2042                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2043                     continue;
2044
2045                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2046                   break;
2047                 }
2048               if (PREDICT_FALSE(!ses0))
2049                 {
2050                   /* too many sessions for user, send ICMP error packet */
2051
2052                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2053                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2054                                                ICMP4_destination_unreachable_destination_unreachable_host,
2055                                                0);
2056                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2057                   goto trace00;
2058                 }
2059             }
2060
2061           new_port0 = ses0->out.out_port;
2062
2063           old_addr0.as_u32 = ip0->src_address.as_u32;
2064           ip0->src_address.as_u32 = new_addr0.as_u32;
2065           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2066
2067           sum0 = ip0->checksum;
2068           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2069                                  ip4_header_t,
2070                                  src_address /* changed member */);
2071           ip0->checksum = ip_csum_fold (sum0);
2072
2073           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2074             {
2075               if (tcp0->flags & TCP_FLAG_SYN)
2076                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2077               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2078                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2079               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2080                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2081               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2082                 snat_det_ses_close(dm0, ses0);
2083               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2084                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2085               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2086                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2087
2088               old_port0 = tcp0->src;
2089               tcp0->src = new_port0;
2090
2091               sum0 = tcp0->checksum;
2092               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2093                                      ip4_header_t,
2094                                      dst_address /* changed member */);
2095               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2096                                      ip4_header_t /* cheat */,
2097                                      length /* changed member */);
2098               tcp0->checksum = ip_csum_fold(sum0);
2099             }
2100           else
2101             {
2102               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2103               old_port0 = udp0->src_port;
2104               udp0->src_port = new_port0;
2105               udp0->checksum = 0;
2106             }
2107
2108           switch(ses0->state)
2109             {
2110             case SNAT_SESSION_UDP_ACTIVE:
2111                 ses0->expire = now + sm->udp_timeout;
2112                 break;
2113             case SNAT_SESSION_TCP_SYN_SENT:
2114             case SNAT_SESSION_TCP_FIN_WAIT:
2115             case SNAT_SESSION_TCP_CLOSE_WAIT:
2116             case SNAT_SESSION_TCP_LAST_ACK:
2117                 ses0->expire = now + sm->tcp_transitory_timeout;
2118                 break;
2119             case SNAT_SESSION_TCP_ESTABLISHED:
2120                 ses0->expire = now + sm->tcp_established_timeout;
2121                 break;
2122             }
2123
2124         trace00:
2125           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2126                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2127             {
2128               snat_in2out_trace_t *t =
2129                  vlib_add_trace (vm, node, b0, sizeof (*t));
2130               t->is_slow_path = 0;
2131               t->sw_if_index = sw_if_index0;
2132               t->next_index = next0;
2133               t->session_index = ~0;
2134               if (ses0)
2135                 t->session_index = ses0 - dm0->sessions;
2136             }
2137
2138           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2139
2140           /* verify speculative enqueue, maybe switch current next frame */
2141           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2142                                            to_next, n_left_to_next,
2143                                            bi0, next0);
2144         }
2145
2146       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2147     }
2148
2149   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2150                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2151                                pkts_processed);
2152   return frame->n_vectors;
2153 }
2154
2155 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2156   .function = snat_det_in2out_node_fn,
2157   .name = "snat-det-in2out",
2158   .vector_size = sizeof (u32),
2159   .format_trace = format_snat_in2out_trace,
2160   .type = VLIB_NODE_TYPE_INTERNAL,
2161
2162   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2163   .error_strings = snat_in2out_error_strings,
2164
2165   .runtime_data_bytes = sizeof (snat_runtime_t),
2166
2167   .n_next_nodes = 3,
2168
2169   /* edit / add dispositions here */
2170   .next_nodes = {
2171     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2172     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2173     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2174   },
2175 };
2176
2177 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2178
2179 /**
2180  * Get address and port values to be used for packet SNAT translation
2181  * and create session if needed
2182  *
2183  * @param[in,out] sm             SNAT main
2184  * @param[in,out] node           SNAT node runtime
2185  * @param[in] thread_index       thread index
2186  * @param[in,out] b0             buffer containing packet to be translated
2187  * @param[out] p_proto           protocol used for matching
2188  * @param[out] p_value           address and port after NAT translation
2189  * @param[out] p_dont_translate  if packet should not be translated
2190  * @param d                      optional parameter
2191  * @param e                      optional parameter
2192  */
2193 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2194                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2195                           snat_session_key_t *p_value,
2196                           u8 *p_dont_translate, void *d, void *e)
2197 {
2198   ip4_header_t *ip0;
2199   icmp46_header_t *icmp0;
2200   u32 sw_if_index0;
2201   u32 rx_fib_index0;
2202   u8 protocol;
2203   snat_det_out_key_t key0;
2204   u8 dont_translate = 0;
2205   u32 next0 = ~0;
2206   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2207   ip4_header_t *inner_ip0;
2208   void *l4_header = 0;
2209   icmp46_header_t *inner_icmp0;
2210   snat_det_map_t * dm0 = 0;
2211   ip4_address_t new_addr0;
2212   u16 lo_port0, i0;
2213   snat_det_session_t * ses0 = 0;
2214   ip4_address_t in_addr;
2215   u16 in_port;
2216
2217   ip0 = vlib_buffer_get_current (b0);
2218   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2219   echo0 = (icmp_echo_header_t *)(icmp0+1);
2220   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2221   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2222
2223   if (!icmp_is_error_message (icmp0))
2224     {
2225       protocol = SNAT_PROTOCOL_ICMP;
2226       in_addr = ip0->src_address;
2227       in_port = echo0->identifier;
2228     }
2229   else
2230     {
2231       inner_ip0 = (ip4_header_t *)(echo0+1);
2232       l4_header = ip4_next_header (inner_ip0);
2233       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2234       in_addr = inner_ip0->dst_address;
2235       switch (protocol)
2236         {
2237         case SNAT_PROTOCOL_ICMP:
2238           inner_icmp0 = (icmp46_header_t*)l4_header;
2239           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2240           in_port = inner_echo0->identifier;
2241           break;
2242         case SNAT_PROTOCOL_UDP:
2243         case SNAT_PROTOCOL_TCP:
2244           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2245           break;
2246         default:
2247           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2248           next0 = SNAT_IN2OUT_NEXT_DROP;
2249           goto out;
2250         }
2251     }
2252
2253   dm0 = snat_det_map_by_user(sm, &in_addr);
2254   if (PREDICT_FALSE(!dm0))
2255     {
2256       clib_warning("no match for internal host %U",
2257                    format_ip4_address, &in_addr);
2258       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2259           IP_PROTOCOL_ICMP, rx_fib_index0)))
2260         {
2261           dont_translate = 1;
2262           goto out;
2263         }
2264       next0 = SNAT_IN2OUT_NEXT_DROP;
2265       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2266       goto out;
2267     }
2268
2269   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2270
2271   key0.ext_host_addr = ip0->dst_address;
2272   key0.ext_host_port = 0;
2273
2274   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2275   if (PREDICT_FALSE(!ses0))
2276     {
2277       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2278           IP_PROTOCOL_ICMP, rx_fib_index0)))
2279         {
2280           dont_translate = 1;
2281           goto out;
2282         }
2283       if (icmp0->type != ICMP4_echo_request)
2284         {
2285           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2286           next0 = SNAT_IN2OUT_NEXT_DROP;
2287           goto out;
2288         }
2289       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2290         {
2291           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2292             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2293
2294           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2295             continue;
2296
2297           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2298           break;
2299         }
2300       if (PREDICT_FALSE(!ses0))
2301         {
2302           next0 = SNAT_IN2OUT_NEXT_DROP;
2303           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2304           goto out;
2305         }
2306     }
2307
2308   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2309                     !icmp_is_error_message (icmp0)))
2310     {
2311       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2312       next0 = SNAT_IN2OUT_NEXT_DROP;
2313       goto out;
2314     }
2315
2316   u32 now = (u32) vlib_time_now (sm->vlib_main);
2317
2318   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2319   ses0->expire = now + sm->icmp_timeout;
2320
2321 out:
2322   *p_proto = protocol;
2323   if (ses0)
2324     {
2325       p_value->addr = new_addr0;
2326       p_value->fib_index = sm->outside_fib_index;
2327       p_value->port = ses0->out.out_port;
2328     }
2329   *p_dont_translate = dont_translate;
2330   if (d)
2331     *(snat_det_session_t**)d = ses0;
2332   if (e)
2333     *(snat_det_map_t**)e = dm0;
2334   return next0;
2335 }
2336
2337 /**********************/
2338 /*** worker handoff ***/
2339 /**********************/
2340 static uword
2341 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2342                                vlib_node_runtime_t * node,
2343                                vlib_frame_t * frame)
2344 {
2345   snat_main_t *sm = &snat_main;
2346   vlib_thread_main_t *tm = vlib_get_thread_main ();
2347   u32 n_left_from, *from, *to_next = 0;
2348   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2349   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2350     = 0;
2351   vlib_frame_queue_elt_t *hf = 0;
2352   vlib_frame_t *f = 0;
2353   int i;
2354   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2355   u32 next_worker_index = 0;
2356   u32 current_worker_index = ~0;
2357   u32 thread_index = vlib_get_thread_index ();
2358
2359   ASSERT (vec_len (sm->workers));
2360
2361   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2362     {
2363       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2364
2365       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2366                                sm->first_worker_index + sm->num_workers - 1,
2367                                (vlib_frame_queue_t *) (~0));
2368     }
2369
2370   from = vlib_frame_vector_args (frame);
2371   n_left_from = frame->n_vectors;
2372
2373   while (n_left_from > 0)
2374     {
2375       u32 bi0;
2376       vlib_buffer_t *b0;
2377       u32 sw_if_index0;
2378       u32 rx_fib_index0;
2379       ip4_header_t * ip0;
2380       u8 do_handoff;
2381
2382       bi0 = from[0];
2383       from += 1;
2384       n_left_from -= 1;
2385
2386       b0 = vlib_get_buffer (vm, bi0);
2387
2388       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2389       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2390
2391       ip0 = vlib_buffer_get_current (b0);
2392
2393       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2394
2395       if (PREDICT_FALSE (next_worker_index != thread_index))
2396         {
2397           do_handoff = 1;
2398
2399           if (next_worker_index != current_worker_index)
2400             {
2401               if (hf)
2402                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2403
2404               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2405                                                       next_worker_index,
2406                                                       handoff_queue_elt_by_worker_index);
2407
2408               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2409               to_next_worker = &hf->buffer_index[hf->n_vectors];
2410               current_worker_index = next_worker_index;
2411             }
2412
2413           /* enqueue to correct worker thread */
2414           to_next_worker[0] = bi0;
2415           to_next_worker++;
2416           n_left_to_next_worker--;
2417
2418           if (n_left_to_next_worker == 0)
2419             {
2420               hf->n_vectors = VLIB_FRAME_SIZE;
2421               vlib_put_frame_queue_elt (hf);
2422               current_worker_index = ~0;
2423               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2424               hf = 0;
2425             }
2426         }
2427       else
2428         {
2429           do_handoff = 0;
2430           /* if this is 1st frame */
2431           if (!f)
2432             {
2433               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2434               to_next = vlib_frame_vector_args (f);
2435             }
2436
2437           to_next[0] = bi0;
2438           to_next += 1;
2439           f->n_vectors++;
2440         }
2441
2442       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2443                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2444         {
2445           snat_in2out_worker_handoff_trace_t *t =
2446             vlib_add_trace (vm, node, b0, sizeof (*t));
2447           t->next_worker_index = next_worker_index;
2448           t->do_handoff = do_handoff;
2449         }
2450     }
2451
2452   if (f)
2453     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2454
2455   if (hf)
2456     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2457
2458   /* Ship frames to the worker nodes */
2459   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2460     {
2461       if (handoff_queue_elt_by_worker_index[i])
2462         {
2463           hf = handoff_queue_elt_by_worker_index[i];
2464           /*
2465            * It works better to let the handoff node
2466            * rate-adapt, always ship the handoff queue element.
2467            */
2468           if (1 || hf->n_vectors == hf->last_n_vectors)
2469             {
2470               vlib_put_frame_queue_elt (hf);
2471               handoff_queue_elt_by_worker_index[i] = 0;
2472             }
2473           else
2474             hf->last_n_vectors = hf->n_vectors;
2475         }
2476       congested_handoff_queue_by_worker_index[i] =
2477         (vlib_frame_queue_t *) (~0);
2478     }
2479   hf = 0;
2480   current_worker_index = ~0;
2481   return frame->n_vectors;
2482 }
2483
2484 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2485   .function = snat_in2out_worker_handoff_fn,
2486   .name = "snat-in2out-worker-handoff",
2487   .vector_size = sizeof (u32),
2488   .format_trace = format_snat_in2out_worker_handoff_trace,
2489   .type = VLIB_NODE_TYPE_INTERNAL,
2490   
2491   .n_next_nodes = 1,
2492
2493   .next_nodes = {
2494     [0] = "error-drop",
2495   },
2496 };
2497
2498 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2499
2500 static uword
2501 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2502                                 vlib_node_runtime_t * node,
2503                                 vlib_frame_t * frame)
2504 {
2505   u32 n_left_from, * from, * to_next;
2506   snat_in2out_next_t next_index;
2507   u32 pkts_processed = 0;
2508   snat_main_t * sm = &snat_main;
2509   u32 stats_node_index;
2510
2511   stats_node_index = snat_in2out_fast_node.index;
2512
2513   from = vlib_frame_vector_args (frame);
2514   n_left_from = frame->n_vectors;
2515   next_index = node->cached_next_index;
2516
2517   while (n_left_from > 0)
2518     {
2519       u32 n_left_to_next;
2520
2521       vlib_get_next_frame (vm, node, next_index,
2522                            to_next, n_left_to_next);
2523
2524       while (n_left_from > 0 && n_left_to_next > 0)
2525         {
2526           u32 bi0;
2527           vlib_buffer_t * b0;
2528           u32 next0;
2529           u32 sw_if_index0;
2530           ip4_header_t * ip0;
2531           ip_csum_t sum0;
2532           u32 new_addr0, old_addr0;
2533           u16 old_port0, new_port0;
2534           udp_header_t * udp0;
2535           tcp_header_t * tcp0;
2536           icmp46_header_t * icmp0;
2537           snat_session_key_t key0, sm0;
2538           u32 proto0;
2539           u32 rx_fib_index0;
2540
2541           /* speculatively enqueue b0 to the current next frame */
2542           bi0 = from[0];
2543           to_next[0] = bi0;
2544           from += 1;
2545           to_next += 1;
2546           n_left_from -= 1;
2547           n_left_to_next -= 1;
2548
2549           b0 = vlib_get_buffer (vm, bi0);
2550           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2551
2552           ip0 = vlib_buffer_get_current (b0);
2553           udp0 = ip4_next_header (ip0);
2554           tcp0 = (tcp_header_t *) udp0;
2555           icmp0 = (icmp46_header_t *) udp0;
2556
2557           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2558           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2559
2560           if (PREDICT_FALSE(ip0->ttl == 1))
2561             {
2562               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2563               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2564                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2565                                            0);
2566               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2567               goto trace0;
2568             }
2569
2570           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2571
2572           if (PREDICT_FALSE (proto0 == ~0))
2573               goto trace0;
2574
2575           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2576             {
2577               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2578                                   rx_fib_index0, node, next0, ~0, 0, 0);
2579               goto trace0;
2580             }
2581
2582           key0.addr = ip0->src_address;
2583           key0.port = udp0->src_port;
2584           key0.fib_index = rx_fib_index0;
2585
2586           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
2587             {
2588               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2589               next0= SNAT_IN2OUT_NEXT_DROP;
2590               goto trace0;
2591             }
2592
2593           new_addr0 = sm0.addr.as_u32;
2594           new_port0 = sm0.port;
2595           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2596           old_addr0 = ip0->src_address.as_u32;
2597           ip0->src_address.as_u32 = new_addr0;
2598
2599           sum0 = ip0->checksum;
2600           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2601                                  ip4_header_t,
2602                                  src_address /* changed member */);
2603           ip0->checksum = ip_csum_fold (sum0);
2604
2605           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2606             {
2607               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2608                 {
2609                   old_port0 = tcp0->src_port;
2610                   tcp0->src_port = new_port0;
2611
2612                   sum0 = tcp0->checksum;
2613                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2614                                          ip4_header_t,
2615                                          dst_address /* changed member */);
2616                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2617                                          ip4_header_t /* cheat */,
2618                                          length /* changed member */);
2619                   tcp0->checksum = ip_csum_fold(sum0);
2620                 }
2621               else
2622                 {
2623                   old_port0 = udp0->src_port;
2624                   udp0->src_port = new_port0;
2625                   udp0->checksum = 0;
2626                 }
2627             }
2628           else
2629             {
2630               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2631                 {
2632                   sum0 = tcp0->checksum;
2633                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2634                                          ip4_header_t,
2635                                          dst_address /* changed member */);
2636                   tcp0->checksum = ip_csum_fold(sum0);
2637                 }
2638             }
2639
2640           /* Hairpinning */
2641           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2642
2643         trace0:
2644           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2645                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2646             {
2647               snat_in2out_trace_t *t =
2648                  vlib_add_trace (vm, node, b0, sizeof (*t));
2649               t->sw_if_index = sw_if_index0;
2650               t->next_index = next0;
2651             }
2652
2653           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2654
2655           /* verify speculative enqueue, maybe switch current next frame */
2656           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2657                                            to_next, n_left_to_next,
2658                                            bi0, next0);
2659         }
2660
2661       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2662     }
2663
2664   vlib_node_increment_counter (vm, stats_node_index,
2665                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2666                                pkts_processed);
2667   return frame->n_vectors;
2668 }
2669
2670
2671 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2672   .function = snat_in2out_fast_static_map_fn,
2673   .name = "snat-in2out-fast",
2674   .vector_size = sizeof (u32),
2675   .format_trace = format_snat_in2out_fast_trace,
2676   .type = VLIB_NODE_TYPE_INTERNAL,
2677   
2678   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2679   .error_strings = snat_in2out_error_strings,
2680
2681   .runtime_data_bytes = sizeof (snat_runtime_t),
2682   
2683   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2684
2685   /* edit / add dispositions here */
2686   .next_nodes = {
2687     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2688     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2689     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2690     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2691   },
2692 };
2693
2694 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);