CGN: Send ICMP error packet if user is out of sessions available
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_ICMP_ERROR,
117   SNAT_IN2OUT_NEXT_SLOW_PATH,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
138                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
139                          u32 rx_fib_index0)
140 {
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   /* Don't NAT packet aimed at the intfc address */
151   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
152                                       ip0->dst_address.as_u32)))
153     return 1;
154
155   fei = fib_table_lookup (rx_fib_index0, &pfx);
156   if (FIB_NODE_INDEX_INVALID != fei)
157     {
158       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
159       if (sw_if_index == ~0)
160         {
161           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
162           if (FIB_NODE_INDEX_INVALID != fei)
163             sw_if_index = fib_entry_get_resolving_interface (fei);
164         }
165       snat_interface_t *i;
166       pool_foreach (i, sm->interfaces,
167       ({
168         /* NAT packet aimed at outside interface */
169         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
170           return 0;
171       }));
172     }
173
174   return 1;
175 }
176
177 static inline int
178 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
179                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                     u32 rx_fib_index0)
181 {
182   udp_header_t * udp0 = ip4_next_header (ip0);
183   snat_session_key_t key0, sm0;
184   clib_bihash_kv_8_8_t kv0, value0;
185
186   key0.addr = ip0->dst_address;
187   key0.port = udp0->dst_port;
188   key0.protocol = proto0;
189   key0.fib_index = sm->outside_fib_index;
190   kv0.key = key0.as_u64;
191
192   /* NAT packet aimed at external address if */
193   /* has active sessions */
194   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
195     {
196       /* or is static mappings */
197       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
198         return 0;
199     }
200   else
201     return 0;
202
203   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
204                                  rx_fib_index0);
205 }
206
207 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
208                       ip4_header_t * ip0,
209                       u32 rx_fib_index0,
210                       snat_session_key_t * key0,
211                       snat_session_t ** sessionp,
212                       vlib_node_runtime_t * node,
213                       u32 next0,
214                       u32 thread_index)
215 {
216   snat_user_t *u;
217   snat_user_key_t user_key;
218   snat_session_t *s;
219   clib_bihash_kv_8_8_t kv0, value0;
220   u32 oldest_per_user_translation_list_index;
221   dlist_elt_t * oldest_per_user_translation_list_elt;
222   dlist_elt_t * per_user_translation_list_elt;
223   dlist_elt_t * per_user_list_head_elt;
224   u32 session_index;
225   snat_session_key_t key1;
226   u32 address_index = ~0;
227   u32 outside_fib_index;
228   uword * p;
229   snat_worker_key_t worker_by_out_key;
230
231   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
232   if (! p)
233     {
234       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
235       return SNAT_IN2OUT_NEXT_DROP;
236     }
237   outside_fib_index = p[0];
238
239   key1.protocol = key0->protocol;
240   user_key.addr = ip0->src_address;
241   user_key.fib_index = rx_fib_index0;
242   kv0.key = user_key.as_u64;
243   
244   /* Ever heard of the "user" = src ip4 address before? */
245   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
246     {
247       /* no, make a new one */
248       pool_get (sm->per_thread_data[thread_index].users, u);
249       memset (u, 0, sizeof (*u));
250       u->addr = ip0->src_address;
251       u->fib_index = rx_fib_index0;
252
253       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
254
255       u->sessions_per_user_list_head_index = per_user_list_head_elt -
256         sm->per_thread_data[thread_index].list_pool;
257
258       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
259                        u->sessions_per_user_list_head_index);
260
261       kv0.value = u - sm->per_thread_data[thread_index].users;
262
263       /* add user */
264       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
265     }
266   else
267     {
268       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
269                              value0.value);
270     }
271
272   /* Over quota? Recycle the least recently used dynamic translation */
273   if (u->nsessions >= sm->max_translations_per_user)
274     {
275       /* Remove the oldest dynamic translation */
276       do {
277           oldest_per_user_translation_list_index =
278             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
279                                     u->sessions_per_user_list_head_index);
280
281           ASSERT (oldest_per_user_translation_list_index != ~0);
282
283           /* add it back to the end of the LRU list */
284           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
285                               u->sessions_per_user_list_head_index,
286                               oldest_per_user_translation_list_index);
287           /* Get the list element */
288           oldest_per_user_translation_list_elt =
289             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
290                                oldest_per_user_translation_list_index);
291
292           /* Get the session index from the list element */
293           session_index = oldest_per_user_translation_list_elt->value;
294
295           /* Get the session */
296           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
297                                  session_index);
298       } while (snat_is_session_static (s));
299
300       /* Remove in2out, out2in keys */
301       kv0.key = s->in2out.as_u64;
302       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
303           clib_warning ("in2out key delete failed");
304       kv0.key = s->out2in.as_u64;
305       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
306           clib_warning ("out2in key delete failed");
307
308       /* log NAT event */
309       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
310                                           s->out2in.addr.as_u32,
311                                           s->in2out.protocol,
312                                           s->in2out.port,
313                                           s->out2in.port,
314                                           s->in2out.fib_index);
315
316       snat_free_outside_address_and_port 
317         (sm, &s->out2in, s->outside_address_index);
318       s->outside_address_index = ~0;
319
320       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
321                                                &address_index))
322         {
323           ASSERT(0);
324
325           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
326           return SNAT_IN2OUT_NEXT_DROP;
327         }
328       s->outside_address_index = address_index;
329     }
330   else
331     {
332       u8 static_mapping = 1;
333
334       /* First try to match static mapping by local address and port */
335       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
336         {
337           static_mapping = 0;
338           /* Try to create dynamic translation */
339           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
340                                                    &address_index))
341             {
342               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
343               return SNAT_IN2OUT_NEXT_DROP;
344             }
345         }
346
347       /* Create a new session */
348       pool_get (sm->per_thread_data[thread_index].sessions, s);
349       memset (s, 0, sizeof (*s));
350       
351       s->outside_address_index = address_index;
352
353       if (static_mapping)
354         {
355           u->nstaticsessions++;
356           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
357         }
358       else
359         {
360           u->nsessions++;
361         }
362
363       /* Create list elts */
364       pool_get (sm->per_thread_data[thread_index].list_pool,
365                 per_user_translation_list_elt);
366       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
367                        per_user_translation_list_elt -
368                        sm->per_thread_data[thread_index].list_pool);
369
370       per_user_translation_list_elt->value =
371         s - sm->per_thread_data[thread_index].sessions;
372       s->per_user_index = per_user_translation_list_elt -
373                           sm->per_thread_data[thread_index].list_pool;
374       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
375
376       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
377                           s->per_user_list_head_index,
378                           per_user_translation_list_elt -
379                           sm->per_thread_data[thread_index].list_pool);
380    }
381   
382   s->in2out = *key0;
383   s->out2in = key1;
384   s->out2in.protocol = key0->protocol;
385   s->out2in.fib_index = outside_fib_index;
386   *sessionp = s;
387
388   /* Add to translation hashes */
389   kv0.key = s->in2out.as_u64;
390   kv0.value = s - sm->per_thread_data[thread_index].sessions;
391   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
392       clib_warning ("in2out key add failed");
393   
394   kv0.key = s->out2in.as_u64;
395   kv0.value = s - sm->per_thread_data[thread_index].sessions;
396   
397   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
398       clib_warning ("out2in key add failed");
399
400   /* Add to translated packets worker lookup */
401   worker_by_out_key.addr = s->out2in.addr;
402   worker_by_out_key.port = s->out2in.port;
403   worker_by_out_key.fib_index = s->out2in.fib_index;
404   kv0.key = worker_by_out_key.as_u64;
405   kv0.value = thread_index;
406   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
407
408   /* log NAT event */
409   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
410                                       s->out2in.addr.as_u32,
411                                       s->in2out.protocol,
412                                       s->in2out.port,
413                                       s->out2in.port,
414                                       s->in2out.fib_index);
415   return next0;
416 }
417
418 static_always_inline
419 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
420                                  snat_session_key_t *p_key0)
421 {
422   icmp46_header_t *icmp0;
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0, *inner_echo0 = 0;
425   ip4_header_t *inner_ip0 = 0;
426   void *l4_header = 0;
427   icmp46_header_t *inner_icmp0;
428
429   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
430   echo0 = (icmp_echo_header_t *)(icmp0+1);
431
432   if (!icmp_is_error_message (icmp0))
433     {
434       key0.protocol = SNAT_PROTOCOL_ICMP;
435       key0.addr = ip0->src_address;
436       key0.port = echo0->identifier;
437     }
438   else
439     {
440       inner_ip0 = (ip4_header_t *)(echo0+1);
441       l4_header = ip4_next_header (inner_ip0);
442       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
443       key0.addr = inner_ip0->dst_address;
444       switch (key0.protocol)
445         {
446         case SNAT_PROTOCOL_ICMP:
447           inner_icmp0 = (icmp46_header_t*)l4_header;
448           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
449           key0.port = inner_echo0->identifier;
450           break;
451         case SNAT_PROTOCOL_UDP:
452         case SNAT_PROTOCOL_TCP:
453           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
454           break;
455         default:
456           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
457         }
458     }
459   *p_key0 = key0;
460   return -1; /* success */
461 }
462
463 /**
464  * Get address and port values to be used for packet SNAT translation
465  * and create session if needed
466  *
467  * @param[in,out] sm             SNAT main
468  * @param[in,out] node           SNAT node runtime
469  * @param[in] thread_index       thread index
470  * @param[in,out] b0             buffer containing packet to be translated
471  * @param[out] p_proto           protocol used for matching
472  * @param[out] p_value           address and port after NAT translation
473  * @param[out] p_dont_translate  if packet should not be translated
474  * @param d                      optional parameter
475  * @param e                      optional parameter
476  */
477 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
478                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
479                            snat_session_key_t *p_value,
480                            u8 *p_dont_translate, void *d, void *e)
481 {
482   ip4_header_t *ip0;
483   icmp46_header_t *icmp0;
484   u32 sw_if_index0;
485   u32 rx_fib_index0;
486   snat_session_key_t key0;
487   snat_session_t *s0 = 0;
488   u8 dont_translate = 0;
489   clib_bihash_kv_8_8_t kv0, value0;
490   u32 next0 = ~0;
491   int err;
492
493   ip0 = vlib_buffer_get_current (b0);
494   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
495   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
496   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
497
498   err = icmp_get_key (ip0, &key0);
499   if (err != -1)
500     {
501       b0->error = node->errors[err];
502       next0 = SNAT_IN2OUT_NEXT_DROP;
503       goto out;
504     }
505   key0.fib_index = rx_fib_index0;
506
507   kv0.key = key0.as_u64;
508
509   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
510     {
511       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
512           IP_PROTOCOL_ICMP, rx_fib_index0)))
513         {
514           dont_translate = 1;
515           goto out;
516         }
517
518       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
519         {
520           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
521           next0 = SNAT_IN2OUT_NEXT_DROP;
522           goto out;
523         }
524
525       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
526                          &s0, node, next0, thread_index);
527
528       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
529         goto out;
530     }
531   else
532     {
533       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
534                         icmp0->type != ICMP4_echo_reply &&
535                         !icmp_is_error_message (icmp0)))
536         {
537           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
538           next0 = SNAT_IN2OUT_NEXT_DROP;
539           goto out;
540         }
541
542       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
543                               value0.value);
544     }
545
546 out:
547   *p_proto = key0.protocol;
548   if (s0)
549     *p_value = s0->out2in;
550   *p_dont_translate = dont_translate;
551   if (d)
552     *(snat_session_t**)d = s0;
553   return next0;
554 }
555
556 /**
557  * Get address and port values to be used for packet SNAT translation
558  *
559  * @param[in] sm                 SNAT main
560  * @param[in,out] node           SNAT node runtime
561  * @param[in] thread_index       thread index
562  * @param[in,out] b0             buffer containing packet to be translated
563  * @param[out] p_proto           protocol used for matching
564  * @param[out] p_value           address and port after NAT translation
565  * @param[out] p_dont_translate  if packet should not be translated
566  * @param d                      optional parameter
567  * @param e                      optional parameter
568  */
569 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
570                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
571                            snat_session_key_t *p_value,
572                            u8 *p_dont_translate, void *d, void *e)
573 {
574   ip4_header_t *ip0;
575   icmp46_header_t *icmp0;
576   u32 sw_if_index0;
577   u32 rx_fib_index0;
578   snat_session_key_t key0;
579   snat_session_key_t sm0;
580   u8 dont_translate = 0;
581   u8 is_addr_only;
582   u32 next0 = ~0;
583   int err;
584
585   ip0 = vlib_buffer_get_current (b0);
586   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
587   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
588   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
589
590   err = icmp_get_key (ip0, &key0);
591   if (err != -1)
592     {
593       b0->error = node->errors[err];
594       next0 = SNAT_IN2OUT_NEXT_DROP;
595       goto out2;
596     }
597   key0.fib_index = rx_fib_index0;
598
599   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
600     {
601       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
602           IP_PROTOCOL_ICMP, rx_fib_index0)))
603         {
604           dont_translate = 1;
605           goto out;
606         }
607
608       if (icmp_is_error_message (icmp0))
609         {
610           next0 = SNAT_IN2OUT_NEXT_DROP;
611           goto out;
612         }
613
614       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
615       next0 = SNAT_IN2OUT_NEXT_DROP;
616       goto out;
617     }
618
619   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
620                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
621                     !icmp_is_error_message (icmp0)))
622     {
623       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
624       next0 = SNAT_IN2OUT_NEXT_DROP;
625       goto out;
626     }
627
628 out:
629   *p_value = sm0;
630 out2:
631   *p_proto = key0.protocol;
632   *p_dont_translate = dont_translate;
633   return next0;
634 }
635
636 static inline u32 icmp_in2out (snat_main_t *sm,
637                                vlib_buffer_t * b0,
638                                ip4_header_t * ip0,
639                                icmp46_header_t * icmp0,
640                                u32 sw_if_index0,
641                                u32 rx_fib_index0,
642                                vlib_node_runtime_t * node,
643                                u32 next0,
644                                u32 thread_index,
645                                void *d,
646                                void *e)
647 {
648   snat_session_key_t sm0;
649   u8 protocol;
650   icmp_echo_header_t *echo0, *inner_echo0 = 0;
651   ip4_header_t *inner_ip0;
652   void *l4_header = 0;
653   icmp46_header_t *inner_icmp0;
654   u8 dont_translate;
655   u32 new_addr0, old_addr0;
656   u16 old_id0, new_id0;
657   ip_csum_t sum0;
658   u16 checksum0;
659   u32 next0_tmp;
660
661   echo0 = (icmp_echo_header_t *)(icmp0+1);
662
663   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
664                                        &protocol, &sm0, &dont_translate, d, e);
665   if (next0_tmp != ~0)
666     next0 = next0_tmp;
667   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
668     goto out;
669
670   sum0 = ip_incremental_checksum (0, icmp0,
671                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
672   checksum0 = ~ip_csum_fold (sum0);
673   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
674     {
675       next0 = SNAT_IN2OUT_NEXT_DROP;
676       goto out;
677     }
678
679   old_addr0 = ip0->src_address.as_u32;
680   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
681   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
682
683   sum0 = ip0->checksum;
684   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
685                          src_address /* changed member */);
686   ip0->checksum = ip_csum_fold (sum0);
687   
688   if (!icmp_is_error_message (icmp0))
689     {
690       new_id0 = sm0.port;
691       if (PREDICT_FALSE(new_id0 != echo0->identifier))
692         {
693           old_id0 = echo0->identifier;
694           new_id0 = sm0.port;
695           echo0->identifier = new_id0;
696
697           sum0 = icmp0->checksum;
698           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
699                                  identifier);
700           icmp0->checksum = ip_csum_fold (sum0);
701         }
702     }
703   else
704     {
705       inner_ip0 = (ip4_header_t *)(echo0+1);
706       l4_header = ip4_next_header (inner_ip0);
707
708       if (!ip4_header_checksum_is_valid (inner_ip0))
709         {
710           next0 = SNAT_IN2OUT_NEXT_DROP;
711           goto out;
712         }
713
714       old_addr0 = inner_ip0->dst_address.as_u32;
715       inner_ip0->dst_address = sm0.addr;
716       new_addr0 = inner_ip0->dst_address.as_u32;
717
718       sum0 = icmp0->checksum;
719       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
720                              dst_address /* changed member */);
721       icmp0->checksum = ip_csum_fold (sum0);
722
723       switch (protocol)
724         {
725           case SNAT_PROTOCOL_ICMP:
726             inner_icmp0 = (icmp46_header_t*)l4_header;
727             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
728
729             old_id0 = inner_echo0->identifier;
730             new_id0 = sm0.port;
731             inner_echo0->identifier = new_id0;
732
733             sum0 = icmp0->checksum;
734             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
735                                    identifier);
736             icmp0->checksum = ip_csum_fold (sum0);
737             break;
738           case SNAT_PROTOCOL_UDP:
739           case SNAT_PROTOCOL_TCP:
740             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
741             new_id0 = sm0.port;
742             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
743
744             sum0 = icmp0->checksum;
745             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
746                                    dst_port);
747             icmp0->checksum = ip_csum_fold (sum0);
748             break;
749           default:
750             ASSERT(0);
751         }
752     }
753
754 out:
755   return next0;
756 }
757
758 /**
759  * @brief Hairpinning
760  *
761  * Hairpinning allows two endpoints on the internal side of the NAT to
762  * communicate even if they only use each other's external IP addresses
763  * and ports.
764  *
765  * @param sm     SNAT main.
766  * @param b0     Vlib buffer.
767  * @param ip0    IP header.
768  * @param udp0   UDP header.
769  * @param tcp0   TCP header.
770  * @param proto0 SNAT protocol.
771  */
772 static inline void
773 snat_hairpinning (snat_main_t *sm,
774                   vlib_buffer_t * b0,
775                   ip4_header_t * ip0,
776                   udp_header_t * udp0,
777                   tcp_header_t * tcp0,
778                   u32 proto0)
779 {
780   snat_session_key_t key0, sm0;
781   snat_worker_key_t k0;
782   snat_session_t * s0;
783   clib_bihash_kv_8_8_t kv0, value0;
784   ip_csum_t sum0;
785   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
786   u16 new_dst_port0, old_dst_port0;
787
788   key0.addr = ip0->dst_address;
789   key0.port = udp0->dst_port;
790   key0.protocol = proto0;
791   key0.fib_index = sm->outside_fib_index;
792   kv0.key = key0.as_u64;
793
794   /* Check if destination is in active sessions */
795   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
796     {
797       /* or static mappings */
798       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
799         {
800           new_dst_addr0 = sm0.addr.as_u32;
801           new_dst_port0 = sm0.port;
802           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
803         }
804     }
805   else
806     {
807       si = value0.value;
808       if (sm->num_workers > 1)
809         {
810           k0.addr = ip0->dst_address;
811           k0.port = udp0->dst_port;
812           k0.fib_index = sm->outside_fib_index;
813           kv0.key = k0.as_u64;
814           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
815             ASSERT(0);
816           else
817             ti = value0.value;
818         }
819       else
820         ti = sm->num_workers;
821
822       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
823       new_dst_addr0 = s0->in2out.addr.as_u32;
824       new_dst_port0 = s0->in2out.port;
825       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
826     }
827
828   /* Destination is behind the same NAT, use internal address and port */
829   if (new_dst_addr0)
830     {
831       old_dst_addr0 = ip0->dst_address.as_u32;
832       ip0->dst_address.as_u32 = new_dst_addr0;
833       sum0 = ip0->checksum;
834       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
835                              ip4_header_t, dst_address);
836       ip0->checksum = ip_csum_fold (sum0);
837
838       old_dst_port0 = tcp0->dst;
839       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
840         {
841           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
842             {
843               tcp0->dst = new_dst_port0;
844               sum0 = tcp0->checksum;
845               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
846                                      ip4_header_t, dst_address);
847               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
848                                      ip4_header_t /* cheat */, length);
849               tcp0->checksum = ip_csum_fold(sum0);
850             }
851           else
852             {
853               udp0->dst_port = new_dst_port0;
854               udp0->checksum = 0;
855             }
856         }
857     }
858 }
859
860 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
861                                          vlib_buffer_t * b0,
862                                          ip4_header_t * ip0,
863                                          icmp46_header_t * icmp0,
864                                          u32 sw_if_index0,
865                                          u32 rx_fib_index0,
866                                          vlib_node_runtime_t * node,
867                                          u32 next0,
868                                          f64 now,
869                                          u32 thread_index,
870                                          snat_session_t ** p_s0)
871 {
872   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
873                       next0, thread_index, p_s0, 0);
874   snat_session_t * s0 = *p_s0;
875   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
876     {
877       /* Accounting */
878       s0->last_heard = now;
879       s0->total_pkts++;
880       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
881       /* Per-user LRU list maintenance for dynamic translations */
882       if (!snat_is_session_static (s0))
883         {
884           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
885                              s0->per_user_index);
886           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
887                               s0->per_user_list_head_index,
888                               s0->per_user_index);
889         }
890     }
891   return next0;
892 }
893
894 static inline uword
895 snat_in2out_node_fn_inline (vlib_main_t * vm,
896                             vlib_node_runtime_t * node,
897                             vlib_frame_t * frame, int is_slow_path)
898 {
899   u32 n_left_from, * from, * to_next;
900   snat_in2out_next_t next_index;
901   u32 pkts_processed = 0;
902   snat_main_t * sm = &snat_main;
903   f64 now = vlib_time_now (vm);
904   u32 stats_node_index;
905   u32 thread_index = vlib_get_thread_index ();
906
907   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
908     snat_in2out_node.index;
909
910   from = vlib_frame_vector_args (frame);
911   n_left_from = frame->n_vectors;
912   next_index = node->cached_next_index;
913
914   while (n_left_from > 0)
915     {
916       u32 n_left_to_next;
917
918       vlib_get_next_frame (vm, node, next_index,
919                            to_next, n_left_to_next);
920
921       while (n_left_from >= 4 && n_left_to_next >= 2)
922         {
923           u32 bi0, bi1;
924           vlib_buffer_t * b0, * b1;
925           u32 next0, next1;
926           u32 sw_if_index0, sw_if_index1;
927           ip4_header_t * ip0, * ip1;
928           ip_csum_t sum0, sum1;
929           u32 new_addr0, old_addr0, new_addr1, old_addr1;
930           u16 old_port0, new_port0, old_port1, new_port1;
931           udp_header_t * udp0, * udp1;
932           tcp_header_t * tcp0, * tcp1;
933           icmp46_header_t * icmp0, * icmp1;
934           snat_session_key_t key0, key1;
935           u32 rx_fib_index0, rx_fib_index1;
936           u32 proto0, proto1;
937           snat_session_t * s0 = 0, * s1 = 0;
938           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
939           
940           /* Prefetch next iteration. */
941           {
942             vlib_buffer_t * p2, * p3;
943             
944             p2 = vlib_get_buffer (vm, from[2]);
945             p3 = vlib_get_buffer (vm, from[3]);
946             
947             vlib_prefetch_buffer_header (p2, LOAD);
948             vlib_prefetch_buffer_header (p3, LOAD);
949
950             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
951             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
952           }
953
954           /* speculatively enqueue b0 and b1 to the current next frame */
955           to_next[0] = bi0 = from[0];
956           to_next[1] = bi1 = from[1];
957           from += 2;
958           to_next += 2;
959           n_left_from -= 2;
960           n_left_to_next -= 2;
961           
962           b0 = vlib_get_buffer (vm, bi0);
963           b1 = vlib_get_buffer (vm, bi1);
964
965           ip0 = vlib_buffer_get_current (b0);
966           udp0 = ip4_next_header (ip0);
967           tcp0 = (tcp_header_t *) udp0;
968           icmp0 = (icmp46_header_t *) udp0;
969
970           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
971           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
972                                    sw_if_index0);
973
974           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
975
976           if (PREDICT_FALSE(ip0->ttl == 1))
977             {
978               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
979               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
980                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
981                                            0);
982               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
983               goto trace00;
984             }
985
986           proto0 = ip_proto_to_snat_proto (ip0->protocol);
987
988           /* Next configured feature, probably ip4-lookup */
989           if (is_slow_path)
990             {
991               if (PREDICT_FALSE (proto0 == ~0))
992                 goto trace00;
993               
994               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
995                 {
996                   next0 = icmp_in2out_slow_path 
997                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
998                      node, next0, now, thread_index, &s0);
999                   goto trace00;
1000                 }
1001             }
1002           else
1003             {
1004               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1005                 {
1006                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1007                   goto trace00;
1008                 }
1009             }
1010
1011           key0.addr = ip0->src_address;
1012           key0.port = udp0->src_port;
1013           key0.protocol = proto0;
1014           key0.fib_index = rx_fib_index0;
1015           
1016           kv0.key = key0.as_u64;
1017
1018           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1019             {
1020               if (is_slow_path)
1021                 {
1022                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1023                       proto0, rx_fib_index0)))
1024                     goto trace00;
1025
1026                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1027                                      &s0, node, next0, thread_index);
1028                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1029                     goto trace00;
1030                 }
1031               else
1032                 {
1033                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1034                   goto trace00;
1035                 }
1036             }
1037           else
1038             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1039                                     value0.value);
1040
1041           old_addr0 = ip0->src_address.as_u32;
1042           ip0->src_address = s0->out2in.addr;
1043           new_addr0 = ip0->src_address.as_u32;
1044           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1045
1046           sum0 = ip0->checksum;
1047           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1048                                  ip4_header_t,
1049                                  src_address /* changed member */);
1050           ip0->checksum = ip_csum_fold (sum0);
1051
1052           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1053             {
1054               old_port0 = tcp0->src_port;
1055               tcp0->src_port = s0->out2in.port;
1056               new_port0 = tcp0->src_port;
1057
1058               sum0 = tcp0->checksum;
1059               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1060                                      ip4_header_t,
1061                                      dst_address /* changed member */);
1062               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1063                                      ip4_header_t /* cheat */,
1064                                      length /* changed member */);
1065               tcp0->checksum = ip_csum_fold(sum0);
1066             }
1067           else
1068             {
1069               old_port0 = udp0->src_port;
1070               udp0->src_port = s0->out2in.port;
1071               udp0->checksum = 0;
1072             }
1073
1074           /* Hairpinning */
1075           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1076
1077           /* Accounting */
1078           s0->last_heard = now;
1079           s0->total_pkts++;
1080           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1081           /* Per-user LRU list maintenance for dynamic translation */
1082           if (!snat_is_session_static (s0))
1083             {
1084               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1085                                  s0->per_user_index);
1086               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1087                                   s0->per_user_list_head_index,
1088                                   s0->per_user_index);
1089             }
1090         trace00:
1091
1092           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1093                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1094             {
1095               snat_in2out_trace_t *t = 
1096                  vlib_add_trace (vm, node, b0, sizeof (*t));
1097               t->is_slow_path = is_slow_path;
1098               t->sw_if_index = sw_if_index0;
1099               t->next_index = next0;
1100                   t->session_index = ~0;
1101               if (s0)
1102                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1103             }
1104
1105           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1106
1107           ip1 = vlib_buffer_get_current (b1);
1108           udp1 = ip4_next_header (ip1);
1109           tcp1 = (tcp_header_t *) udp1;
1110           icmp1 = (icmp46_header_t *) udp1;
1111
1112           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1113           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1114                                    sw_if_index1);
1115
1116           if (PREDICT_FALSE(ip1->ttl == 1))
1117             {
1118               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1119               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1120                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1121                                            0);
1122               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1123               goto trace01;
1124             }
1125
1126           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1127
1128           /* Next configured feature, probably ip4-lookup */
1129           if (is_slow_path)
1130             {
1131               if (PREDICT_FALSE (proto1 == ~0))
1132                 goto trace01;
1133               
1134               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1135                 {
1136                   next1 = icmp_in2out_slow_path 
1137                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1138                      next1, now, thread_index, &s1);
1139                   goto trace01;
1140                 }
1141             }
1142           else
1143             {
1144               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1145                 {
1146                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1147                   goto trace01;
1148                 }
1149             }
1150
1151           key1.addr = ip1->src_address;
1152           key1.port = udp1->src_port;
1153           key1.protocol = proto1;
1154           key1.fib_index = rx_fib_index1;
1155           
1156           kv1.key = key1.as_u64;
1157
1158             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1159             {
1160               if (is_slow_path)
1161                 {
1162                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
1163                       proto1, rx_fib_index1)))
1164                     goto trace01;
1165
1166                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1167                                      &s1, node, next1, thread_index);
1168                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1169                     goto trace01;
1170                 }
1171               else
1172                 {
1173                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1174                   goto trace01;
1175                 }
1176             }
1177           else
1178             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1179                                     value1.value);
1180
1181           old_addr1 = ip1->src_address.as_u32;
1182           ip1->src_address = s1->out2in.addr;
1183           new_addr1 = ip1->src_address.as_u32;
1184           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1185
1186           sum1 = ip1->checksum;
1187           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1188                                  ip4_header_t,
1189                                  src_address /* changed member */);
1190           ip1->checksum = ip_csum_fold (sum1);
1191
1192           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1193             {
1194               old_port1 = tcp1->src_port;
1195               tcp1->src_port = s1->out2in.port;
1196               new_port1 = tcp1->src_port;
1197
1198               sum1 = tcp1->checksum;
1199               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1200                                      ip4_header_t,
1201                                      dst_address /* changed member */);
1202               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1203                                      ip4_header_t /* cheat */,
1204                                      length /* changed member */);
1205               tcp1->checksum = ip_csum_fold(sum1);
1206             }
1207           else
1208             {
1209               old_port1 = udp1->src_port;
1210               udp1->src_port = s1->out2in.port;
1211               udp1->checksum = 0;
1212             }
1213
1214           /* Hairpinning */
1215           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1216
1217           /* Accounting */
1218           s1->last_heard = now;
1219           s1->total_pkts++;
1220           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1221           /* Per-user LRU list maintenance for dynamic translation */
1222           if (!snat_is_session_static (s1))
1223             {
1224               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1225                                  s1->per_user_index);
1226               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1227                                   s1->per_user_list_head_index,
1228                                   s1->per_user_index);
1229             }
1230         trace01:
1231
1232           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1233                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1234             {
1235               snat_in2out_trace_t *t = 
1236                  vlib_add_trace (vm, node, b1, sizeof (*t));
1237               t->sw_if_index = sw_if_index1;
1238               t->next_index = next1;
1239               t->session_index = ~0;
1240               if (s1)
1241                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1242             }
1243
1244           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1245
1246           /* verify speculative enqueues, maybe switch current next frame */
1247           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1248                                            to_next, n_left_to_next,
1249                                            bi0, bi1, next0, next1);
1250         }
1251
1252       while (n_left_from > 0 && n_left_to_next > 0)
1253         {
1254           u32 bi0;
1255           vlib_buffer_t * b0;
1256           u32 next0;
1257           u32 sw_if_index0;
1258           ip4_header_t * ip0;
1259           ip_csum_t sum0;
1260           u32 new_addr0, old_addr0;
1261           u16 old_port0, new_port0;
1262           udp_header_t * udp0;
1263           tcp_header_t * tcp0;
1264           icmp46_header_t * icmp0;
1265           snat_session_key_t key0;
1266           u32 rx_fib_index0;
1267           u32 proto0;
1268           snat_session_t * s0 = 0;
1269           clib_bihash_kv_8_8_t kv0, value0;
1270           
1271           /* speculatively enqueue b0 to the current next frame */
1272           bi0 = from[0];
1273           to_next[0] = bi0;
1274           from += 1;
1275           to_next += 1;
1276           n_left_from -= 1;
1277           n_left_to_next -= 1;
1278
1279           b0 = vlib_get_buffer (vm, bi0);
1280           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1281
1282           ip0 = vlib_buffer_get_current (b0);
1283           udp0 = ip4_next_header (ip0);
1284           tcp0 = (tcp_header_t *) udp0;
1285           icmp0 = (icmp46_header_t *) udp0;
1286
1287           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1288           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1289                                    sw_if_index0);
1290
1291           if (PREDICT_FALSE(ip0->ttl == 1))
1292             {
1293               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1294               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1295                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1296                                            0);
1297               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1298               goto trace0;
1299             }
1300
1301           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1302
1303           /* Next configured feature, probably ip4-lookup */
1304           if (is_slow_path)
1305             {
1306               if (PREDICT_FALSE (proto0 == ~0))
1307                 goto trace0;
1308               
1309               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1310                 {
1311                   next0 = icmp_in2out_slow_path 
1312                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1313                      next0, now, thread_index, &s0);
1314                   goto trace0;
1315                 }
1316             }
1317           else
1318             {
1319               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1320                 {
1321                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1322                   goto trace0;
1323                 }
1324             }
1325
1326           key0.addr = ip0->src_address;
1327           key0.port = udp0->src_port;
1328           key0.protocol = proto0;
1329           key0.fib_index = rx_fib_index0;
1330           
1331           kv0.key = key0.as_u64;
1332
1333           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1334             {
1335               if (is_slow_path)
1336                 {
1337                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1338                       proto0, rx_fib_index0)))
1339                     goto trace0;
1340
1341                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1342                                      &s0, node, next0, thread_index);
1343
1344                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1345                     goto trace0;
1346                 }
1347               else
1348                 {
1349                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1350                   goto trace0;
1351                 }
1352             }
1353           else
1354             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1355                                     value0.value);
1356
1357           old_addr0 = ip0->src_address.as_u32;
1358           ip0->src_address = s0->out2in.addr;
1359           new_addr0 = ip0->src_address.as_u32;
1360           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1361
1362           sum0 = ip0->checksum;
1363           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1364                                  ip4_header_t,
1365                                  src_address /* changed member */);
1366           ip0->checksum = ip_csum_fold (sum0);
1367
1368           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1369             {
1370               old_port0 = tcp0->src_port;
1371               tcp0->src_port = s0->out2in.port;
1372               new_port0 = tcp0->src_port;
1373
1374               sum0 = tcp0->checksum;
1375               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1376                                      ip4_header_t,
1377                                      dst_address /* changed member */);
1378               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1379                                      ip4_header_t /* cheat */,
1380                                      length /* changed member */);
1381               tcp0->checksum = ip_csum_fold(sum0);
1382             }
1383           else
1384             {
1385               old_port0 = udp0->src_port;
1386               udp0->src_port = s0->out2in.port;
1387               udp0->checksum = 0;
1388             }
1389
1390           /* Hairpinning */
1391           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1392
1393           /* Accounting */
1394           s0->last_heard = now;
1395           s0->total_pkts++;
1396           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1397           /* Per-user LRU list maintenance for dynamic translation */
1398           if (!snat_is_session_static (s0))
1399             {
1400               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1401                                  s0->per_user_index);
1402               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1403                                   s0->per_user_list_head_index,
1404                                   s0->per_user_index);
1405             }
1406
1407         trace0:
1408           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1409                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1410             {
1411               snat_in2out_trace_t *t = 
1412                  vlib_add_trace (vm, node, b0, sizeof (*t));
1413               t->is_slow_path = is_slow_path;
1414               t->sw_if_index = sw_if_index0;
1415               t->next_index = next0;
1416                   t->session_index = ~0;
1417               if (s0)
1418                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1419             }
1420
1421           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1422
1423           /* verify speculative enqueue, maybe switch current next frame */
1424           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1425                                            to_next, n_left_to_next,
1426                                            bi0, next0);
1427         }
1428
1429       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1430     }
1431
1432   vlib_node_increment_counter (vm, stats_node_index, 
1433                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1434                                pkts_processed);
1435   return frame->n_vectors;
1436 }
1437
1438 static uword
1439 snat_in2out_fast_path_fn (vlib_main_t * vm,
1440                           vlib_node_runtime_t * node,
1441                           vlib_frame_t * frame)
1442 {
1443   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1444 }
1445
1446 VLIB_REGISTER_NODE (snat_in2out_node) = {
1447   .function = snat_in2out_fast_path_fn,
1448   .name = "snat-in2out",
1449   .vector_size = sizeof (u32),
1450   .format_trace = format_snat_in2out_trace,
1451   .type = VLIB_NODE_TYPE_INTERNAL,
1452   
1453   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1454   .error_strings = snat_in2out_error_strings,
1455
1456   .runtime_data_bytes = sizeof (snat_runtime_t),
1457   
1458   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1459
1460   /* edit / add dispositions here */
1461   .next_nodes = {
1462     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1463     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1464     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1465     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1466   },
1467 };
1468
1469 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1470
1471 static uword
1472 snat_in2out_slow_path_fn (vlib_main_t * vm,
1473                           vlib_node_runtime_t * node,
1474                           vlib_frame_t * frame)
1475 {
1476   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1477 }
1478
1479 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1480   .function = snat_in2out_slow_path_fn,
1481   .name = "snat-in2out-slowpath",
1482   .vector_size = sizeof (u32),
1483   .format_trace = format_snat_in2out_trace,
1484   .type = VLIB_NODE_TYPE_INTERNAL,
1485   
1486   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1487   .error_strings = snat_in2out_error_strings,
1488
1489   .runtime_data_bytes = sizeof (snat_runtime_t),
1490   
1491   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1492
1493   /* edit / add dispositions here */
1494   .next_nodes = {
1495     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1496     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1497     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1498     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1499   },
1500 };
1501
1502 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1503
1504 /**************************/
1505 /*** deterministic mode ***/
1506 /**************************/
1507 static uword
1508 snat_det_in2out_node_fn (vlib_main_t * vm,
1509                          vlib_node_runtime_t * node,
1510                          vlib_frame_t * frame)
1511 {
1512   u32 n_left_from, * from, * to_next;
1513   snat_in2out_next_t next_index;
1514   u32 pkts_processed = 0;
1515   snat_main_t * sm = &snat_main;
1516   u32 now = (u32) vlib_time_now (vm);
1517   u32 thread_index = os_get_cpu_number ();
1518
1519   from = vlib_frame_vector_args (frame);
1520   n_left_from = frame->n_vectors;
1521   next_index = node->cached_next_index;
1522
1523   while (n_left_from > 0)
1524     {
1525       u32 n_left_to_next;
1526
1527       vlib_get_next_frame (vm, node, next_index,
1528                            to_next, n_left_to_next);
1529
1530       while (n_left_from >= 4 && n_left_to_next >= 2)
1531         {
1532           u32 bi0, bi1;
1533           vlib_buffer_t * b0, * b1;
1534           u32 next0, next1;
1535           u32 sw_if_index0, sw_if_index1;
1536           ip4_header_t * ip0, * ip1;
1537           ip_csum_t sum0, sum1;
1538           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1539           u16 old_port0, new_port0, lo_port0, i0;
1540           u16 old_port1, new_port1, lo_port1, i1;
1541           udp_header_t * udp0, * udp1;
1542           tcp_header_t * tcp0, * tcp1;
1543           u32 proto0, proto1;
1544           snat_det_out_key_t key0, key1;
1545           snat_det_map_t * dm0, * dm1;
1546           snat_det_session_t * ses0 = 0, * ses1 = 0;
1547           u32 rx_fib_index0, rx_fib_index1;
1548           icmp46_header_t * icmp0, * icmp1;
1549
1550           /* Prefetch next iteration. */
1551           {
1552             vlib_buffer_t * p2, * p3;
1553
1554             p2 = vlib_get_buffer (vm, from[2]);
1555             p3 = vlib_get_buffer (vm, from[3]);
1556
1557             vlib_prefetch_buffer_header (p2, LOAD);
1558             vlib_prefetch_buffer_header (p3, LOAD);
1559
1560             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1561             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1562           }
1563
1564           /* speculatively enqueue b0 and b1 to the current next frame */
1565           to_next[0] = bi0 = from[0];
1566           to_next[1] = bi1 = from[1];
1567           from += 2;
1568           to_next += 2;
1569           n_left_from -= 2;
1570           n_left_to_next -= 2;
1571
1572           b0 = vlib_get_buffer (vm, bi0);
1573           b1 = vlib_get_buffer (vm, bi1);
1574
1575           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1576           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1577
1578           ip0 = vlib_buffer_get_current (b0);
1579           udp0 = ip4_next_header (ip0);
1580           tcp0 = (tcp_header_t *) udp0;
1581
1582           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1583
1584           if (PREDICT_FALSE(ip0->ttl == 1))
1585             {
1586               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1587               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1588                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1589                                            0);
1590               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1591               goto trace0;
1592             }
1593
1594           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1595
1596           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1597             {
1598               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1599               icmp0 = (icmp46_header_t *) udp0;
1600
1601               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1602                                   rx_fib_index0, node, next0, thread_index,
1603                                   &ses0, &dm0);
1604               goto trace0;
1605             }
1606
1607           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1608           if (PREDICT_FALSE(!dm0))
1609             {
1610               clib_warning("no match for internal host %U",
1611                            format_ip4_address, &ip0->src_address);
1612               next0 = SNAT_IN2OUT_NEXT_DROP;
1613               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1614               goto trace0;
1615             }
1616
1617           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1618
1619           key0.ext_host_addr = ip0->dst_address;
1620           key0.ext_host_port = tcp0->dst;
1621
1622           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
1623           if (PREDICT_FALSE(!ses0))
1624             {
1625               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1626                 {
1627                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1628                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1629
1630                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1631                     continue;
1632
1633                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1634                   break;
1635                 }
1636               if (PREDICT_FALSE(!ses0))
1637                 {
1638                   /* too many sessions for user, send ICMP error packet */
1639
1640                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1641                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
1642                                                ICMP4_destination_unreachable_destination_unreachable_host,
1643                                                0);
1644                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1645                   goto trace0;
1646                 }
1647             }
1648
1649           new_port0 = ses0->out.out_port;
1650
1651           old_addr0.as_u32 = ip0->src_address.as_u32;
1652           ip0->src_address.as_u32 = new_addr0.as_u32;
1653           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1654
1655           sum0 = ip0->checksum;
1656           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1657                                  ip4_header_t,
1658                                  src_address /* changed member */);
1659           ip0->checksum = ip_csum_fold (sum0);
1660
1661           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1662             {
1663               if (tcp0->flags & TCP_FLAG_SYN)
1664                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1665               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1666                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1667               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1668                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1669               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1670                 snat_det_ses_close(dm0, ses0);
1671               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1672                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1673               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1674                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1675
1676               old_port0 = tcp0->src;
1677               tcp0->src = new_port0;
1678
1679               sum0 = tcp0->checksum;
1680               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1681                                      ip4_header_t,
1682                                      dst_address /* changed member */);
1683               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1684                                      ip4_header_t /* cheat */,
1685                                      length /* changed member */);
1686               tcp0->checksum = ip_csum_fold(sum0);
1687             }
1688           else
1689             {
1690               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1691               old_port0 = udp0->src_port;
1692               udp0->src_port = new_port0;
1693               udp0->checksum = 0;
1694             }
1695
1696           switch(ses0->state)
1697             {
1698             case SNAT_SESSION_UDP_ACTIVE:
1699                 ses0->expire = now + sm->udp_timeout;
1700                 break;
1701             case SNAT_SESSION_TCP_SYN_SENT:
1702             case SNAT_SESSION_TCP_FIN_WAIT:
1703             case SNAT_SESSION_TCP_CLOSE_WAIT:
1704             case SNAT_SESSION_TCP_LAST_ACK:
1705                 ses0->expire = now + sm->tcp_transitory_timeout;
1706                 break;
1707             case SNAT_SESSION_TCP_ESTABLISHED:
1708                 ses0->expire = now + sm->tcp_established_timeout;
1709                 break;
1710             }
1711
1712         trace0:
1713           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1714                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1715             {
1716               snat_in2out_trace_t *t =
1717                  vlib_add_trace (vm, node, b0, sizeof (*t));
1718               t->is_slow_path = 0;
1719               t->sw_if_index = sw_if_index0;
1720               t->next_index = next0;
1721               t->session_index = ~0;
1722               if (ses0)
1723                 t->session_index = ses0 - dm0->sessions;
1724             }
1725
1726           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1727
1728           ip1 = vlib_buffer_get_current (b1);
1729           udp1 = ip4_next_header (ip1);
1730           tcp1 = (tcp_header_t *) udp1;
1731
1732           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1733
1734           if (PREDICT_FALSE(ip1->ttl == 1))
1735             {
1736               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1737               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1738                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1739                                            0);
1740               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1741               goto trace1;
1742             }
1743
1744           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1745
1746           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1747             {
1748               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1749               icmp1 = (icmp46_header_t *) udp1;
1750
1751               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
1752                                   rx_fib_index1, node, next1, thread_index,
1753                                   &ses1, &dm1);
1754               goto trace1;
1755             }
1756
1757           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1758           if (PREDICT_FALSE(!dm1))
1759             {
1760               clib_warning("no match for internal host %U",
1761                            format_ip4_address, &ip0->src_address);
1762               next1 = SNAT_IN2OUT_NEXT_DROP;
1763               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1764               goto trace1;
1765             }
1766
1767           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1768
1769           key1.ext_host_addr = ip1->dst_address;
1770           key1.ext_host_port = tcp1->dst;
1771
1772           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
1773           if (PREDICT_FALSE(!ses1))
1774             {
1775               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1776                 {
1777                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1778                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1779
1780                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1781                     continue;
1782
1783                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1784                   break;
1785                 }
1786               if (PREDICT_FALSE(!ses1))
1787                 {
1788                   /* too many sessions for user, send ICMP error packet */
1789
1790                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1791                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
1792                                                ICMP4_destination_unreachable_destination_unreachable_host,
1793                                                0);
1794                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1795                   goto trace1;
1796                 }
1797             }
1798
1799           new_port1 = ses1->out.out_port;
1800
1801           old_addr1.as_u32 = ip1->src_address.as_u32;
1802           ip1->src_address.as_u32 = new_addr1.as_u32;
1803           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1804
1805           sum1 = ip1->checksum;
1806           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1807                                  ip4_header_t,
1808                                  src_address /* changed member */);
1809           ip1->checksum = ip_csum_fold (sum1);
1810
1811           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1812             {
1813               if (tcp1->flags & TCP_FLAG_SYN)
1814                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1815               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1816                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1817               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1818                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1819               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1820                 snat_det_ses_close(dm1, ses1);
1821               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1822                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1823               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1824                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1825
1826               old_port1 = tcp1->src;
1827               tcp1->src = new_port1;
1828
1829               sum1 = tcp1->checksum;
1830               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1831                                      ip4_header_t,
1832                                      dst_address /* changed member */);
1833               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1834                                      ip4_header_t /* cheat */,
1835                                      length /* changed member */);
1836               tcp1->checksum = ip_csum_fold(sum1);
1837             }
1838           else
1839             {
1840               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1841               old_port1 = udp1->src_port;
1842               udp1->src_port = new_port1;
1843               udp1->checksum = 0;
1844             }
1845
1846           switch(ses1->state)
1847             {
1848             case SNAT_SESSION_UDP_ACTIVE:
1849                 ses1->expire = now + sm->udp_timeout;
1850                 break;
1851             case SNAT_SESSION_TCP_SYN_SENT:
1852             case SNAT_SESSION_TCP_FIN_WAIT:
1853             case SNAT_SESSION_TCP_CLOSE_WAIT:
1854             case SNAT_SESSION_TCP_LAST_ACK:
1855                 ses1->expire = now + sm->tcp_transitory_timeout;
1856                 break;
1857             case SNAT_SESSION_TCP_ESTABLISHED:
1858                 ses1->expire = now + sm->tcp_established_timeout;
1859                 break;
1860             }
1861
1862         trace1:
1863           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1864                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1865             {
1866               snat_in2out_trace_t *t =
1867                  vlib_add_trace (vm, node, b1, sizeof (*t));
1868               t->is_slow_path = 0;
1869               t->sw_if_index = sw_if_index1;
1870               t->next_index = next1;
1871               t->session_index = ~0;
1872               if (ses1)
1873                 t->session_index = ses1 - dm1->sessions;
1874             }
1875
1876           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1877
1878           /* verify speculative enqueues, maybe switch current next frame */
1879           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1880                                            to_next, n_left_to_next,
1881                                            bi0, bi1, next0, next1);
1882          }
1883
1884       while (n_left_from > 0 && n_left_to_next > 0)
1885         {
1886           u32 bi0;
1887           vlib_buffer_t * b0;
1888           u32 next0;
1889           u32 sw_if_index0;
1890           ip4_header_t * ip0;
1891           ip_csum_t sum0;
1892           ip4_address_t new_addr0, old_addr0;
1893           u16 old_port0, new_port0, lo_port0, i0;
1894           udp_header_t * udp0;
1895           tcp_header_t * tcp0;
1896           u32 proto0;
1897           snat_det_out_key_t key0;
1898           snat_det_map_t * dm0;
1899           snat_det_session_t * ses0 = 0;
1900           u32 rx_fib_index0;
1901           icmp46_header_t * icmp0;
1902
1903           /* speculatively enqueue b0 to the current next frame */
1904           bi0 = from[0];
1905           to_next[0] = bi0;
1906           from += 1;
1907           to_next += 1;
1908           n_left_from -= 1;
1909           n_left_to_next -= 1;
1910
1911           b0 = vlib_get_buffer (vm, bi0);
1912           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1913
1914           ip0 = vlib_buffer_get_current (b0);
1915           udp0 = ip4_next_header (ip0);
1916           tcp0 = (tcp_header_t *) udp0;
1917
1918           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1919
1920           if (PREDICT_FALSE(ip0->ttl == 1))
1921             {
1922               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1923               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1924                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1925                                            0);
1926               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1927               goto trace00;
1928             }
1929
1930           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1931
1932           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1933             {
1934               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1935               icmp0 = (icmp46_header_t *) udp0;
1936
1937               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1938                                   rx_fib_index0, node, next0, thread_index,
1939                                   &ses0, &dm0);
1940               goto trace00;
1941             }
1942
1943           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1944           if (PREDICT_FALSE(!dm0))
1945             {
1946               clib_warning("no match for internal host %U",
1947                            format_ip4_address, &ip0->src_address);
1948               next0 = SNAT_IN2OUT_NEXT_DROP;
1949               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1950               goto trace00;
1951             }
1952
1953           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1954
1955           key0.ext_host_addr = ip0->dst_address;
1956           key0.ext_host_port = tcp0->dst;
1957
1958           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
1959           if (PREDICT_FALSE(!ses0))
1960             {
1961               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1962                 {
1963                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1964                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1965
1966                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1967                     continue;
1968
1969                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1970                   break;
1971                 }
1972               if (PREDICT_FALSE(!ses0))
1973                 {
1974                   /* too many sessions for user, send ICMP error packet */
1975
1976                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1977                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
1978                                                ICMP4_destination_unreachable_destination_unreachable_host,
1979                                                0);
1980                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1981                   goto trace00;
1982                 }
1983             }
1984
1985           new_port0 = ses0->out.out_port;
1986
1987           old_addr0.as_u32 = ip0->src_address.as_u32;
1988           ip0->src_address.as_u32 = new_addr0.as_u32;
1989           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1990
1991           sum0 = ip0->checksum;
1992           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1993                                  ip4_header_t,
1994                                  src_address /* changed member */);
1995           ip0->checksum = ip_csum_fold (sum0);
1996
1997           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1998             {
1999               if (tcp0->flags & TCP_FLAG_SYN)
2000                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2001               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2002                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2003               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2004                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2005               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2006                 snat_det_ses_close(dm0, ses0);
2007               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2008                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2009               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2010                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2011
2012               old_port0 = tcp0->src;
2013               tcp0->src = new_port0;
2014
2015               sum0 = tcp0->checksum;
2016               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2017                                      ip4_header_t,
2018                                      dst_address /* changed member */);
2019               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2020                                      ip4_header_t /* cheat */,
2021                                      length /* changed member */);
2022               tcp0->checksum = ip_csum_fold(sum0);
2023             }
2024           else
2025             {
2026               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2027               old_port0 = udp0->src_port;
2028               udp0->src_port = new_port0;
2029               udp0->checksum = 0;
2030             }
2031
2032           switch(ses0->state)
2033             {
2034             case SNAT_SESSION_UDP_ACTIVE:
2035                 ses0->expire = now + sm->udp_timeout;
2036                 break;
2037             case SNAT_SESSION_TCP_SYN_SENT:
2038             case SNAT_SESSION_TCP_FIN_WAIT:
2039             case SNAT_SESSION_TCP_CLOSE_WAIT:
2040             case SNAT_SESSION_TCP_LAST_ACK:
2041                 ses0->expire = now + sm->tcp_transitory_timeout;
2042                 break;
2043             case SNAT_SESSION_TCP_ESTABLISHED:
2044                 ses0->expire = now + sm->tcp_established_timeout;
2045                 break;
2046             }
2047
2048         trace00:
2049           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2050                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2051             {
2052               snat_in2out_trace_t *t =
2053                  vlib_add_trace (vm, node, b0, sizeof (*t));
2054               t->is_slow_path = 0;
2055               t->sw_if_index = sw_if_index0;
2056               t->next_index = next0;
2057               t->session_index = ~0;
2058               if (ses0)
2059                 t->session_index = ses0 - dm0->sessions;
2060             }
2061
2062           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2063
2064           /* verify speculative enqueue, maybe switch current next frame */
2065           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2066                                            to_next, n_left_to_next,
2067                                            bi0, next0);
2068         }
2069
2070       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2071     }
2072
2073   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2074                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2075                                pkts_processed);
2076   return frame->n_vectors;
2077 }
2078
2079 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2080   .function = snat_det_in2out_node_fn,
2081   .name = "snat-det-in2out",
2082   .vector_size = sizeof (u32),
2083   .format_trace = format_snat_in2out_trace,
2084   .type = VLIB_NODE_TYPE_INTERNAL,
2085
2086   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2087   .error_strings = snat_in2out_error_strings,
2088
2089   .runtime_data_bytes = sizeof (snat_runtime_t),
2090
2091   .n_next_nodes = 3,
2092
2093   /* edit / add dispositions here */
2094   .next_nodes = {
2095     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2096     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2097     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2098   },
2099 };
2100
2101 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2102
2103 /**
2104  * Get address and port values to be used for packet SNAT translation
2105  * and create session if needed
2106  *
2107  * @param[in,out] sm             SNAT main
2108  * @param[in,out] node           SNAT node runtime
2109  * @param[in] thread_index       thread index
2110  * @param[in,out] b0             buffer containing packet to be translated
2111  * @param[out] p_proto           protocol used for matching
2112  * @param[out] p_value           address and port after NAT translation
2113  * @param[out] p_dont_translate  if packet should not be translated
2114  * @param d                      optional parameter
2115  * @param e                      optional parameter
2116  */
2117 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2118                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2119                           snat_session_key_t *p_value,
2120                           u8 *p_dont_translate, void *d, void *e)
2121 {
2122   ip4_header_t *ip0;
2123   icmp46_header_t *icmp0;
2124   u32 sw_if_index0;
2125   u32 rx_fib_index0;
2126   u8 protocol;
2127   snat_det_out_key_t key0;
2128   u8 dont_translate = 0;
2129   u32 next0 = ~0;
2130   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2131   ip4_header_t *inner_ip0;
2132   void *l4_header = 0;
2133   icmp46_header_t *inner_icmp0;
2134   snat_det_map_t * dm0 = 0;
2135   ip4_address_t new_addr0;
2136   u16 lo_port0, i0;
2137   snat_det_session_t * ses0 = 0;
2138   ip4_address_t in_addr;
2139   u16 in_port;
2140
2141   ip0 = vlib_buffer_get_current (b0);
2142   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2143   echo0 = (icmp_echo_header_t *)(icmp0+1);
2144   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2145   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2146
2147   if (!icmp_is_error_message (icmp0))
2148     {
2149       protocol = SNAT_PROTOCOL_ICMP;
2150       in_addr = ip0->src_address;
2151       in_port = echo0->identifier;
2152     }
2153   else
2154     {
2155       inner_ip0 = (ip4_header_t *)(echo0+1);
2156       l4_header = ip4_next_header (inner_ip0);
2157       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2158       in_addr = inner_ip0->dst_address;
2159       switch (protocol)
2160         {
2161         case SNAT_PROTOCOL_ICMP:
2162           inner_icmp0 = (icmp46_header_t*)l4_header;
2163           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2164           in_port = inner_echo0->identifier;
2165           break;
2166         case SNAT_PROTOCOL_UDP:
2167         case SNAT_PROTOCOL_TCP:
2168           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2169           break;
2170         default:
2171           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2172           next0 = SNAT_IN2OUT_NEXT_DROP;
2173           goto out;
2174         }
2175     }
2176
2177   dm0 = snat_det_map_by_user(sm, &in_addr);
2178   if (PREDICT_FALSE(!dm0))
2179     {
2180       clib_warning("no match for internal host %U",
2181                    format_ip4_address, &in_addr);
2182       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2183           IP_PROTOCOL_ICMP, rx_fib_index0)))
2184         {
2185           dont_translate = 1;
2186           goto out;
2187         }
2188       next0 = SNAT_IN2OUT_NEXT_DROP;
2189       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2190       goto out;
2191     }
2192
2193   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2194
2195   key0.ext_host_addr = ip0->dst_address;
2196   key0.ext_host_port = 0;
2197
2198   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2199   if (PREDICT_FALSE(!ses0))
2200     {
2201       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2202           IP_PROTOCOL_ICMP, rx_fib_index0)))
2203         {
2204           dont_translate = 1;
2205           goto out;
2206         }
2207       if (icmp0->type != ICMP4_echo_request)
2208         {
2209           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2210           next0 = SNAT_IN2OUT_NEXT_DROP;
2211           goto out;
2212         }
2213       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2214         {
2215           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2216             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2217
2218           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2219             continue;
2220
2221           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2222           break;
2223         }
2224       if (PREDICT_FALSE(!ses0))
2225         {
2226           next0 = SNAT_IN2OUT_NEXT_DROP;
2227           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2228           goto out;
2229         }
2230     }
2231
2232   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2233                     !icmp_is_error_message (icmp0)))
2234     {
2235       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2236       next0 = SNAT_IN2OUT_NEXT_DROP;
2237       goto out;
2238     }
2239
2240   u32 now = (u32) vlib_time_now (sm->vlib_main);
2241
2242   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2243   ses0->expire = now + sm->icmp_timeout;
2244
2245 out:
2246   *p_proto = protocol;
2247   if (ses0)
2248     {
2249       p_value->addr = new_addr0;
2250       p_value->fib_index = sm->outside_fib_index;
2251       p_value->port = ses0->out.out_port;
2252     }
2253   *p_dont_translate = dont_translate;
2254   if (d)
2255     *(snat_det_session_t**)d = ses0;
2256   if (e)
2257     *(snat_det_map_t**)e = dm0;
2258   return next0;
2259 }
2260
2261 /**********************/
2262 /*** worker handoff ***/
2263 /**********************/
2264 static uword
2265 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2266                                vlib_node_runtime_t * node,
2267                                vlib_frame_t * frame)
2268 {
2269   snat_main_t *sm = &snat_main;
2270   vlib_thread_main_t *tm = vlib_get_thread_main ();
2271   u32 n_left_from, *from, *to_next = 0;
2272   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2273   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2274     = 0;
2275   vlib_frame_queue_elt_t *hf = 0;
2276   vlib_frame_t *f = 0;
2277   int i;
2278   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2279   u32 next_worker_index = 0;
2280   u32 current_worker_index = ~0;
2281   u32 thread_index = vlib_get_thread_index ();
2282
2283   ASSERT (vec_len (sm->workers));
2284
2285   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2286     {
2287       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2288
2289       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2290                                sm->first_worker_index + sm->num_workers - 1,
2291                                (vlib_frame_queue_t *) (~0));
2292     }
2293
2294   from = vlib_frame_vector_args (frame);
2295   n_left_from = frame->n_vectors;
2296
2297   while (n_left_from > 0)
2298     {
2299       u32 bi0;
2300       vlib_buffer_t *b0;
2301       u32 sw_if_index0;
2302       u32 rx_fib_index0;
2303       ip4_header_t * ip0;
2304       u8 do_handoff;
2305
2306       bi0 = from[0];
2307       from += 1;
2308       n_left_from -= 1;
2309
2310       b0 = vlib_get_buffer (vm, bi0);
2311
2312       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2313       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2314
2315       ip0 = vlib_buffer_get_current (b0);
2316
2317       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2318
2319       if (PREDICT_FALSE (next_worker_index != thread_index))
2320         {
2321           do_handoff = 1;
2322
2323           if (next_worker_index != current_worker_index)
2324             {
2325               if (hf)
2326                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2327
2328               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2329                                                       next_worker_index,
2330                                                       handoff_queue_elt_by_worker_index);
2331
2332               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2333               to_next_worker = &hf->buffer_index[hf->n_vectors];
2334               current_worker_index = next_worker_index;
2335             }
2336
2337           /* enqueue to correct worker thread */
2338           to_next_worker[0] = bi0;
2339           to_next_worker++;
2340           n_left_to_next_worker--;
2341
2342           if (n_left_to_next_worker == 0)
2343             {
2344               hf->n_vectors = VLIB_FRAME_SIZE;
2345               vlib_put_frame_queue_elt (hf);
2346               current_worker_index = ~0;
2347               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2348               hf = 0;
2349             }
2350         }
2351       else
2352         {
2353           do_handoff = 0;
2354           /* if this is 1st frame */
2355           if (!f)
2356             {
2357               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2358               to_next = vlib_frame_vector_args (f);
2359             }
2360
2361           to_next[0] = bi0;
2362           to_next += 1;
2363           f->n_vectors++;
2364         }
2365
2366       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2367                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2368         {
2369           snat_in2out_worker_handoff_trace_t *t =
2370             vlib_add_trace (vm, node, b0, sizeof (*t));
2371           t->next_worker_index = next_worker_index;
2372           t->do_handoff = do_handoff;
2373         }
2374     }
2375
2376   if (f)
2377     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2378
2379   if (hf)
2380     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2381
2382   /* Ship frames to the worker nodes */
2383   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2384     {
2385       if (handoff_queue_elt_by_worker_index[i])
2386         {
2387           hf = handoff_queue_elt_by_worker_index[i];
2388           /*
2389            * It works better to let the handoff node
2390            * rate-adapt, always ship the handoff queue element.
2391            */
2392           if (1 || hf->n_vectors == hf->last_n_vectors)
2393             {
2394               vlib_put_frame_queue_elt (hf);
2395               handoff_queue_elt_by_worker_index[i] = 0;
2396             }
2397           else
2398             hf->last_n_vectors = hf->n_vectors;
2399         }
2400       congested_handoff_queue_by_worker_index[i] =
2401         (vlib_frame_queue_t *) (~0);
2402     }
2403   hf = 0;
2404   current_worker_index = ~0;
2405   return frame->n_vectors;
2406 }
2407
2408 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2409   .function = snat_in2out_worker_handoff_fn,
2410   .name = "snat-in2out-worker-handoff",
2411   .vector_size = sizeof (u32),
2412   .format_trace = format_snat_in2out_worker_handoff_trace,
2413   .type = VLIB_NODE_TYPE_INTERNAL,
2414   
2415   .n_next_nodes = 1,
2416
2417   .next_nodes = {
2418     [0] = "error-drop",
2419   },
2420 };
2421
2422 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2423
2424 static uword
2425 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2426                                 vlib_node_runtime_t * node,
2427                                 vlib_frame_t * frame)
2428 {
2429   u32 n_left_from, * from, * to_next;
2430   snat_in2out_next_t next_index;
2431   u32 pkts_processed = 0;
2432   snat_main_t * sm = &snat_main;
2433   u32 stats_node_index;
2434
2435   stats_node_index = snat_in2out_fast_node.index;
2436
2437   from = vlib_frame_vector_args (frame);
2438   n_left_from = frame->n_vectors;
2439   next_index = node->cached_next_index;
2440
2441   while (n_left_from > 0)
2442     {
2443       u32 n_left_to_next;
2444
2445       vlib_get_next_frame (vm, node, next_index,
2446                            to_next, n_left_to_next);
2447
2448       while (n_left_from > 0 && n_left_to_next > 0)
2449         {
2450           u32 bi0;
2451           vlib_buffer_t * b0;
2452           u32 next0;
2453           u32 sw_if_index0;
2454           ip4_header_t * ip0;
2455           ip_csum_t sum0;
2456           u32 new_addr0, old_addr0;
2457           u16 old_port0, new_port0;
2458           udp_header_t * udp0;
2459           tcp_header_t * tcp0;
2460           icmp46_header_t * icmp0;
2461           snat_session_key_t key0, sm0;
2462           u32 proto0;
2463           u32 rx_fib_index0;
2464
2465           /* speculatively enqueue b0 to the current next frame */
2466           bi0 = from[0];
2467           to_next[0] = bi0;
2468           from += 1;
2469           to_next += 1;
2470           n_left_from -= 1;
2471           n_left_to_next -= 1;
2472
2473           b0 = vlib_get_buffer (vm, bi0);
2474           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2475
2476           ip0 = vlib_buffer_get_current (b0);
2477           udp0 = ip4_next_header (ip0);
2478           tcp0 = (tcp_header_t *) udp0;
2479           icmp0 = (icmp46_header_t *) udp0;
2480
2481           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2482           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2483
2484           if (PREDICT_FALSE(ip0->ttl == 1))
2485             {
2486               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2487               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2488                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2489                                            0);
2490               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2491               goto trace0;
2492             }
2493
2494           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2495
2496           if (PREDICT_FALSE (proto0 == ~0))
2497               goto trace0;
2498
2499           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2500             {
2501               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2502                                   rx_fib_index0, node, next0, ~0, 0, 0);
2503               goto trace0;
2504             }
2505
2506           key0.addr = ip0->src_address;
2507           key0.port = udp0->src_port;
2508           key0.fib_index = rx_fib_index0;
2509
2510           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
2511             {
2512               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2513               next0= SNAT_IN2OUT_NEXT_DROP;
2514               goto trace0;
2515             }
2516
2517           new_addr0 = sm0.addr.as_u32;
2518           new_port0 = sm0.port;
2519           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2520           old_addr0 = ip0->src_address.as_u32;
2521           ip0->src_address.as_u32 = new_addr0;
2522
2523           sum0 = ip0->checksum;
2524           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2525                                  ip4_header_t,
2526                                  src_address /* changed member */);
2527           ip0->checksum = ip_csum_fold (sum0);
2528
2529           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2530             {
2531               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2532                 {
2533                   old_port0 = tcp0->src_port;
2534                   tcp0->src_port = new_port0;
2535
2536                   sum0 = tcp0->checksum;
2537                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2538                                          ip4_header_t,
2539                                          dst_address /* changed member */);
2540                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2541                                          ip4_header_t /* cheat */,
2542                                          length /* changed member */);
2543                   tcp0->checksum = ip_csum_fold(sum0);
2544                 }
2545               else
2546                 {
2547                   old_port0 = udp0->src_port;
2548                   udp0->src_port = new_port0;
2549                   udp0->checksum = 0;
2550                 }
2551             }
2552           else
2553             {
2554               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2555                 {
2556                   sum0 = tcp0->checksum;
2557                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2558                                          ip4_header_t,
2559                                          dst_address /* changed member */);
2560                   tcp0->checksum = ip_csum_fold(sum0);
2561                 }
2562             }
2563
2564           /* Hairpinning */
2565           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2566
2567         trace0:
2568           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2569                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2570             {
2571               snat_in2out_trace_t *t =
2572                  vlib_add_trace (vm, node, b0, sizeof (*t));
2573               t->sw_if_index = sw_if_index0;
2574               t->next_index = next0;
2575             }
2576
2577           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2578
2579           /* verify speculative enqueue, maybe switch current next frame */
2580           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2581                                            to_next, n_left_to_next,
2582                                            bi0, next0);
2583         }
2584
2585       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2586     }
2587
2588   vlib_node_increment_counter (vm, stats_node_index,
2589                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2590                                pkts_processed);
2591   return frame->n_vectors;
2592 }
2593
2594
2595 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2596   .function = snat_in2out_fast_static_map_fn,
2597   .name = "snat-in2out-fast",
2598   .vector_size = sizeof (u32),
2599   .format_trace = format_snat_in2out_fast_trace,
2600   .type = VLIB_NODE_TYPE_INTERNAL,
2601   
2602   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2603   .error_strings = snat_in2out_error_strings,
2604
2605   .runtime_data_bytes = sizeof (snat_runtime_t),
2606   
2607   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2608
2609   /* edit / add dispositions here */
2610   .next_nodes = {
2611     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2612     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2613     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2614     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2615   },
2616 };
2617
2618 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);