acl-plugin: remove clib_warnings on plugin init
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_ICMP_ERROR,
117   SNAT_IN2OUT_NEXT_SLOW_PATH,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
138                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
139                          u32 rx_fib_index0)
140 {
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   /* Don't NAT packet aimed at the intfc address */
151   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
152                                       ip0->dst_address.as_u32)))
153     return 1;
154
155   fei = fib_table_lookup (rx_fib_index0, &pfx);
156   if (FIB_NODE_INDEX_INVALID != fei)
157     {
158       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
159       if (sw_if_index == ~0)
160         {
161           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
162           if (FIB_NODE_INDEX_INVALID != fei)
163             sw_if_index = fib_entry_get_resolving_interface (fei);
164         }
165       snat_interface_t *i;
166       pool_foreach (i, sm->interfaces,
167       ({
168         /* NAT packet aimed at outside interface */
169         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
170           return 0;
171       }));
172     }
173
174   return 1;
175 }
176
177 static inline int
178 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
179                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                     u32 rx_fib_index0)
181 {
182   udp_header_t * udp0 = ip4_next_header (ip0);
183   snat_session_key_t key0, sm0;
184   clib_bihash_kv_8_8_t kv0, value0;
185
186   key0.addr = ip0->dst_address;
187   key0.port = udp0->dst_port;
188   key0.protocol = proto0;
189   key0.fib_index = sm->outside_fib_index;
190   kv0.key = key0.as_u64;
191
192   /* NAT packet aimed at external address if */
193   /* has active sessions */
194   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
195     {
196       /* or is static mappings */
197       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
198         return 0;
199     }
200   else
201     return 0;
202
203   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
204                                  rx_fib_index0);
205 }
206
207 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
208                       ip4_header_t * ip0,
209                       u32 rx_fib_index0,
210                       snat_session_key_t * key0,
211                       snat_session_t ** sessionp,
212                       vlib_node_runtime_t * node,
213                       u32 next0,
214                       u32 thread_index)
215 {
216   snat_user_t *u;
217   snat_user_key_t user_key;
218   snat_session_t *s;
219   clib_bihash_kv_8_8_t kv0, value0;
220   u32 oldest_per_user_translation_list_index;
221   dlist_elt_t * oldest_per_user_translation_list_elt;
222   dlist_elt_t * per_user_translation_list_elt;
223   dlist_elt_t * per_user_list_head_elt;
224   u32 session_index;
225   snat_session_key_t key1;
226   u32 address_index = ~0;
227   u32 outside_fib_index;
228   uword * p;
229   snat_worker_key_t worker_by_out_key;
230
231   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
232   if (! p)
233     {
234       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
235       return SNAT_IN2OUT_NEXT_DROP;
236     }
237   outside_fib_index = p[0];
238
239   key1.protocol = key0->protocol;
240   user_key.addr = ip0->src_address;
241   user_key.fib_index = rx_fib_index0;
242   kv0.key = user_key.as_u64;
243   
244   /* Ever heard of the "user" = src ip4 address before? */
245   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
246     {
247       /* no, make a new one */
248       pool_get (sm->per_thread_data[thread_index].users, u);
249       memset (u, 0, sizeof (*u));
250       u->addr = ip0->src_address;
251       u->fib_index = rx_fib_index0;
252
253       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
254
255       u->sessions_per_user_list_head_index = per_user_list_head_elt -
256         sm->per_thread_data[thread_index].list_pool;
257
258       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
259                        u->sessions_per_user_list_head_index);
260
261       kv0.value = u - sm->per_thread_data[thread_index].users;
262
263       /* add user */
264       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
265     }
266   else
267     {
268       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
269                              value0.value);
270     }
271
272   /* Over quota? Recycle the least recently used dynamic translation */
273   if (u->nsessions >= sm->max_translations_per_user)
274     {
275       /* Remove the oldest dynamic translation */
276       do {
277           oldest_per_user_translation_list_index =
278             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
279                                     u->sessions_per_user_list_head_index);
280
281           ASSERT (oldest_per_user_translation_list_index != ~0);
282
283           /* add it back to the end of the LRU list */
284           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
285                               u->sessions_per_user_list_head_index,
286                               oldest_per_user_translation_list_index);
287           /* Get the list element */
288           oldest_per_user_translation_list_elt =
289             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
290                                oldest_per_user_translation_list_index);
291
292           /* Get the session index from the list element */
293           session_index = oldest_per_user_translation_list_elt->value;
294
295           /* Get the session */
296           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
297                                  session_index);
298       } while (snat_is_session_static (s));
299
300       /* Remove in2out, out2in keys */
301       kv0.key = s->in2out.as_u64;
302       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
303           clib_warning ("in2out key delete failed");
304       kv0.key = s->out2in.as_u64;
305       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
306           clib_warning ("out2in key delete failed");
307
308       /* log NAT event */
309       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
310                                           s->out2in.addr.as_u32,
311                                           s->in2out.protocol,
312                                           s->in2out.port,
313                                           s->out2in.port,
314                                           s->in2out.fib_index);
315
316       snat_free_outside_address_and_port 
317         (sm, &s->out2in, s->outside_address_index);
318       s->outside_address_index = ~0;
319
320       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
321                                                &address_index))
322         {
323           ASSERT(0);
324
325           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
326           return SNAT_IN2OUT_NEXT_DROP;
327         }
328       s->outside_address_index = address_index;
329     }
330   else
331     {
332       u8 static_mapping = 1;
333
334       /* First try to match static mapping by local address and port */
335       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
336         {
337           static_mapping = 0;
338           /* Try to create dynamic translation */
339           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
340                                                    &address_index))
341             {
342               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
343               return SNAT_IN2OUT_NEXT_DROP;
344             }
345         }
346
347       /* Create a new session */
348       pool_get (sm->per_thread_data[thread_index].sessions, s);
349       memset (s, 0, sizeof (*s));
350       
351       s->outside_address_index = address_index;
352
353       if (static_mapping)
354         {
355           u->nstaticsessions++;
356           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
357         }
358       else
359         {
360           u->nsessions++;
361         }
362
363       /* Create list elts */
364       pool_get (sm->per_thread_data[thread_index].list_pool,
365                 per_user_translation_list_elt);
366       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
367                        per_user_translation_list_elt -
368                        sm->per_thread_data[thread_index].list_pool);
369
370       per_user_translation_list_elt->value =
371         s - sm->per_thread_data[thread_index].sessions;
372       s->per_user_index = per_user_translation_list_elt -
373                           sm->per_thread_data[thread_index].list_pool;
374       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
375
376       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
377                           s->per_user_list_head_index,
378                           per_user_translation_list_elt -
379                           sm->per_thread_data[thread_index].list_pool);
380    }
381   
382   s->in2out = *key0;
383   s->out2in = key1;
384   s->out2in.protocol = key0->protocol;
385   s->out2in.fib_index = outside_fib_index;
386   *sessionp = s;
387
388   /* Add to translation hashes */
389   kv0.key = s->in2out.as_u64;
390   kv0.value = s - sm->per_thread_data[thread_index].sessions;
391   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
392       clib_warning ("in2out key add failed");
393   
394   kv0.key = s->out2in.as_u64;
395   kv0.value = s - sm->per_thread_data[thread_index].sessions;
396   
397   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
398       clib_warning ("out2in key add failed");
399
400   /* Add to translated packets worker lookup */
401   worker_by_out_key.addr = s->out2in.addr;
402   worker_by_out_key.port = s->out2in.port;
403   worker_by_out_key.fib_index = s->out2in.fib_index;
404   kv0.key = worker_by_out_key.as_u64;
405   kv0.value = thread_index;
406   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
407
408   /* log NAT event */
409   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
410                                       s->out2in.addr.as_u32,
411                                       s->in2out.protocol,
412                                       s->in2out.port,
413                                       s->out2in.port,
414                                       s->in2out.fib_index);
415   return next0;
416 }
417
418 static_always_inline
419 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
420                                  snat_session_key_t *p_key0)
421 {
422   icmp46_header_t *icmp0;
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0, *inner_echo0 = 0;
425   ip4_header_t *inner_ip0 = 0;
426   void *l4_header = 0;
427   icmp46_header_t *inner_icmp0;
428
429   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
430   echo0 = (icmp_echo_header_t *)(icmp0+1);
431
432   if (!icmp_is_error_message (icmp0))
433     {
434       key0.protocol = SNAT_PROTOCOL_ICMP;
435       key0.addr = ip0->src_address;
436       key0.port = echo0->identifier;
437     }
438   else
439     {
440       inner_ip0 = (ip4_header_t *)(echo0+1);
441       l4_header = ip4_next_header (inner_ip0);
442       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
443       key0.addr = inner_ip0->dst_address;
444       switch (key0.protocol)
445         {
446         case SNAT_PROTOCOL_ICMP:
447           inner_icmp0 = (icmp46_header_t*)l4_header;
448           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
449           key0.port = inner_echo0->identifier;
450           break;
451         case SNAT_PROTOCOL_UDP:
452         case SNAT_PROTOCOL_TCP:
453           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
454           break;
455         default:
456           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
457         }
458     }
459   *p_key0 = key0;
460   return -1; /* success */
461 }
462
463 /**
464  * Get address and port values to be used for packet SNAT translation
465  * and create session if needed
466  *
467  * @param[in,out] sm             SNAT main
468  * @param[in,out] node           SNAT node runtime
469  * @param[in] thread_index       thread index
470  * @param[in,out] b0             buffer containing packet to be translated
471  * @param[out] p_proto           protocol used for matching
472  * @param[out] p_value           address and port after NAT translation
473  * @param[out] p_dont_translate  if packet should not be translated
474  * @param d                      optional parameter
475  * @param e                      optional parameter
476  */
477 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
478                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
479                            snat_session_key_t *p_value,
480                            u8 *p_dont_translate, void *d, void *e)
481 {
482   ip4_header_t *ip0;
483   icmp46_header_t *icmp0;
484   u32 sw_if_index0;
485   u32 rx_fib_index0;
486   snat_session_key_t key0;
487   snat_session_t *s0 = 0;
488   u8 dont_translate = 0;
489   clib_bihash_kv_8_8_t kv0, value0;
490   u32 next0 = ~0;
491   int err;
492
493   ip0 = vlib_buffer_get_current (b0);
494   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
495   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
496   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
497
498   err = icmp_get_key (ip0, &key0);
499   if (err != -1)
500     {
501       b0->error = node->errors[err];
502       next0 = SNAT_IN2OUT_NEXT_DROP;
503       goto out;
504     }
505   key0.fib_index = rx_fib_index0;
506
507   kv0.key = key0.as_u64;
508
509   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
510     {
511       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
512           IP_PROTOCOL_ICMP, rx_fib_index0)))
513         {
514           dont_translate = 1;
515           goto out;
516         }
517
518       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
519         {
520           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
521           next0 = SNAT_IN2OUT_NEXT_DROP;
522           goto out;
523         }
524
525       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
526                          &s0, node, next0, thread_index);
527
528       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
529         goto out;
530     }
531   else
532     {
533       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
534                         icmp0->type != ICMP4_echo_reply &&
535                         !icmp_is_error_message (icmp0)))
536         {
537           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
538           next0 = SNAT_IN2OUT_NEXT_DROP;
539           goto out;
540         }
541
542       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
543                               value0.value);
544     }
545
546 out:
547   *p_proto = key0.protocol;
548   if (s0)
549     *p_value = s0->out2in;
550   *p_dont_translate = dont_translate;
551   if (d)
552     *(snat_session_t**)d = s0;
553   return next0;
554 }
555
556 /**
557  * Get address and port values to be used for packet SNAT translation
558  *
559  * @param[in] sm                 SNAT main
560  * @param[in,out] node           SNAT node runtime
561  * @param[in] thread_index       thread index
562  * @param[in,out] b0             buffer containing packet to be translated
563  * @param[out] p_proto           protocol used for matching
564  * @param[out] p_value           address and port after NAT translation
565  * @param[out] p_dont_translate  if packet should not be translated
566  * @param d                      optional parameter
567  * @param e                      optional parameter
568  */
569 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
570                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
571                            snat_session_key_t *p_value,
572                            u8 *p_dont_translate, void *d, void *e)
573 {
574   ip4_header_t *ip0;
575   icmp46_header_t *icmp0;
576   u32 sw_if_index0;
577   u32 rx_fib_index0;
578   snat_session_key_t key0;
579   snat_session_key_t sm0;
580   u8 dont_translate = 0;
581   u8 is_addr_only;
582   u32 next0 = ~0;
583   int err;
584
585   ip0 = vlib_buffer_get_current (b0);
586   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
587   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
588   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
589
590   err = icmp_get_key (ip0, &key0);
591   if (err != -1)
592     {
593       b0->error = node->errors[err];
594       next0 = SNAT_IN2OUT_NEXT_DROP;
595       goto out2;
596     }
597   key0.fib_index = rx_fib_index0;
598
599   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
600     {
601       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
602           IP_PROTOCOL_ICMP, rx_fib_index0)))
603         {
604           dont_translate = 1;
605           goto out;
606         }
607
608       if (icmp_is_error_message (icmp0))
609         {
610           next0 = SNAT_IN2OUT_NEXT_DROP;
611           goto out;
612         }
613
614       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
615       next0 = SNAT_IN2OUT_NEXT_DROP;
616       goto out;
617     }
618
619   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
620                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
621                     !icmp_is_error_message (icmp0)))
622     {
623       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
624       next0 = SNAT_IN2OUT_NEXT_DROP;
625       goto out;
626     }
627
628 out:
629   *p_value = sm0;
630 out2:
631   *p_proto = key0.protocol;
632   *p_dont_translate = dont_translate;
633   return next0;
634 }
635
636 static inline u32 icmp_in2out (snat_main_t *sm,
637                                vlib_buffer_t * b0,
638                                ip4_header_t * ip0,
639                                icmp46_header_t * icmp0,
640                                u32 sw_if_index0,
641                                u32 rx_fib_index0,
642                                vlib_node_runtime_t * node,
643                                u32 next0,
644                                u32 thread_index,
645                                void *d,
646                                void *e)
647 {
648   snat_session_key_t sm0;
649   u8 protocol;
650   icmp_echo_header_t *echo0, *inner_echo0 = 0;
651   ip4_header_t *inner_ip0;
652   void *l4_header = 0;
653   icmp46_header_t *inner_icmp0;
654   u8 dont_translate;
655   u32 new_addr0, old_addr0;
656   u16 old_id0, new_id0;
657   ip_csum_t sum0;
658   u16 checksum0;
659   u32 next0_tmp;
660
661   echo0 = (icmp_echo_header_t *)(icmp0+1);
662
663   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
664                                        &protocol, &sm0, &dont_translate, d, e);
665   if (next0_tmp != ~0)
666     next0 = next0_tmp;
667   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
668     goto out;
669
670   sum0 = ip_incremental_checksum (0, icmp0,
671                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
672   checksum0 = ~ip_csum_fold (sum0);
673   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
674     {
675       next0 = SNAT_IN2OUT_NEXT_DROP;
676       goto out;
677     }
678
679   old_addr0 = ip0->src_address.as_u32;
680   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
681   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
682
683   sum0 = ip0->checksum;
684   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
685                          src_address /* changed member */);
686   ip0->checksum = ip_csum_fold (sum0);
687   
688   if (!icmp_is_error_message (icmp0))
689     {
690       new_id0 = sm0.port;
691       if (PREDICT_FALSE(new_id0 != echo0->identifier))
692         {
693           old_id0 = echo0->identifier;
694           new_id0 = sm0.port;
695           echo0->identifier = new_id0;
696
697           sum0 = icmp0->checksum;
698           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
699                                  identifier);
700           icmp0->checksum = ip_csum_fold (sum0);
701         }
702     }
703   else
704     {
705       inner_ip0 = (ip4_header_t *)(echo0+1);
706       l4_header = ip4_next_header (inner_ip0);
707
708       if (!ip4_header_checksum_is_valid (inner_ip0))
709         {
710           next0 = SNAT_IN2OUT_NEXT_DROP;
711           goto out;
712         }
713
714       old_addr0 = inner_ip0->dst_address.as_u32;
715       inner_ip0->dst_address = sm0.addr;
716       new_addr0 = inner_ip0->dst_address.as_u32;
717
718       sum0 = icmp0->checksum;
719       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
720                              dst_address /* changed member */);
721       icmp0->checksum = ip_csum_fold (sum0);
722
723       switch (protocol)
724         {
725           case SNAT_PROTOCOL_ICMP:
726             inner_icmp0 = (icmp46_header_t*)l4_header;
727             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
728
729             old_id0 = inner_echo0->identifier;
730             new_id0 = sm0.port;
731             inner_echo0->identifier = new_id0;
732
733             sum0 = icmp0->checksum;
734             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
735                                    identifier);
736             icmp0->checksum = ip_csum_fold (sum0);
737             break;
738           case SNAT_PROTOCOL_UDP:
739           case SNAT_PROTOCOL_TCP:
740             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
741             new_id0 = sm0.port;
742             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
743
744             sum0 = icmp0->checksum;
745             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
746                                    dst_port);
747             icmp0->checksum = ip_csum_fold (sum0);
748             break;
749           default:
750             ASSERT(0);
751         }
752     }
753
754 out:
755   return next0;
756 }
757
758 /**
759  * @brief Hairpinning
760  *
761  * Hairpinning allows two endpoints on the internal side of the NAT to
762  * communicate even if they only use each other's external IP addresses
763  * and ports.
764  *
765  * @param sm     SNAT main.
766  * @param b0     Vlib buffer.
767  * @param ip0    IP header.
768  * @param udp0   UDP header.
769  * @param tcp0   TCP header.
770  * @param proto0 SNAT protocol.
771  */
772 static inline void
773 snat_hairpinning (snat_main_t *sm,
774                   vlib_buffer_t * b0,
775                   ip4_header_t * ip0,
776                   udp_header_t * udp0,
777                   tcp_header_t * tcp0,
778                   u32 proto0)
779 {
780   snat_session_key_t key0, sm0;
781   snat_worker_key_t k0;
782   snat_session_t * s0;
783   clib_bihash_kv_8_8_t kv0, value0;
784   ip_csum_t sum0;
785   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
786   u16 new_dst_port0, old_dst_port0;
787
788   key0.addr = ip0->dst_address;
789   key0.port = udp0->dst_port;
790   key0.protocol = proto0;
791   key0.fib_index = sm->outside_fib_index;
792   kv0.key = key0.as_u64;
793
794   /* Check if destination is in active sessions */
795   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
796     {
797       /* or static mappings */
798       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
799         {
800           new_dst_addr0 = sm0.addr.as_u32;
801           new_dst_port0 = sm0.port;
802           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
803         }
804     }
805   else
806     {
807       si = value0.value;
808       if (sm->num_workers > 1)
809         {
810           k0.addr = ip0->dst_address;
811           k0.port = udp0->dst_port;
812           k0.fib_index = sm->outside_fib_index;
813           kv0.key = k0.as_u64;
814           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
815             ASSERT(0);
816           else
817             ti = value0.value;
818         }
819       else
820         ti = sm->num_workers;
821
822       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
823       new_dst_addr0 = s0->in2out.addr.as_u32;
824       new_dst_port0 = s0->in2out.port;
825       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
826     }
827
828   /* Destination is behind the same NAT, use internal address and port */
829   if (new_dst_addr0)
830     {
831       old_dst_addr0 = ip0->dst_address.as_u32;
832       ip0->dst_address.as_u32 = new_dst_addr0;
833       sum0 = ip0->checksum;
834       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
835                              ip4_header_t, dst_address);
836       ip0->checksum = ip_csum_fold (sum0);
837
838       old_dst_port0 = tcp0->dst;
839       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
840         {
841           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
842             {
843               tcp0->dst = new_dst_port0;
844               sum0 = tcp0->checksum;
845               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
846                                      ip4_header_t, dst_address);
847               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
848                                      ip4_header_t /* cheat */, length);
849               tcp0->checksum = ip_csum_fold(sum0);
850             }
851           else
852             {
853               udp0->dst_port = new_dst_port0;
854               udp0->checksum = 0;
855             }
856         }
857     }
858 }
859
860 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
861                                          vlib_buffer_t * b0,
862                                          ip4_header_t * ip0,
863                                          icmp46_header_t * icmp0,
864                                          u32 sw_if_index0,
865                                          u32 rx_fib_index0,
866                                          vlib_node_runtime_t * node,
867                                          u32 next0,
868                                          f64 now,
869                                          u32 thread_index,
870                                          snat_session_t ** p_s0)
871 {
872   snat_session_key_t key0, sm0;
873   clib_bihash_kv_8_8_t kv0, value0;
874   snat_worker_key_t k0;
875   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
876   ip_csum_t sum0;
877
878   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
879                       next0, thread_index, p_s0, 0);
880   snat_session_t * s0 = *p_s0;
881   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
882     {
883       /* Hairpinning */
884       if (!icmp_is_error_message (icmp0))
885         {
886           icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
887           u16 icmp_id0 = echo0->identifier;
888           key0.addr = ip0->dst_address;
889           key0.port = icmp_id0;
890           key0.protocol = SNAT_PROTOCOL_ICMP;
891           key0.fib_index = sm->outside_fib_index;
892           kv0.key = key0.as_u64;
893
894           /* Check if destination is in active sessions */
895           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
896             {
897               /* or static mappings */
898               if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
899                 {
900                   new_dst_addr0 = sm0.addr.as_u32;
901                   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
902                 }
903             }
904           else
905             {
906               si = value0.value;
907               if (sm->num_workers > 1)
908                 {
909                   k0.addr = ip0->dst_address;
910                   k0.port = icmp_id0;
911                   k0.fib_index = sm->outside_fib_index;
912                   kv0.key = k0.as_u64;
913                   if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
914                     ASSERT(0);
915                   else
916                     ti = value0.value;
917                 }
918               else
919                 ti = sm->num_workers;
920
921               s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
922               new_dst_addr0 = s0->in2out.addr.as_u32;
923               vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
924               echo0->identifier = s0->in2out.port;
925               sum0 = icmp0->checksum;
926               sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
927                                      icmp_echo_header_t, identifier);
928               icmp0->checksum = ip_csum_fold (sum0);
929             }
930
931           /* Destination is behind the same NAT, use internal address and port */
932           if (new_dst_addr0)
933             {
934               old_dst_addr0 = ip0->dst_address.as_u32;
935               ip0->dst_address.as_u32 = new_dst_addr0;
936               sum0 = ip0->checksum;
937               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
938                                      ip4_header_t, dst_address);
939               ip0->checksum = ip_csum_fold (sum0);
940             }
941         }
942
943       /* Accounting */
944       s0->last_heard = now;
945       s0->total_pkts++;
946       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
947       /* Per-user LRU list maintenance for dynamic translations */
948       if (!snat_is_session_static (s0))
949         {
950           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
951                              s0->per_user_index);
952           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
953                               s0->per_user_list_head_index,
954                               s0->per_user_index);
955         }
956     }
957   return next0;
958 }
959
960 static inline uword
961 snat_in2out_node_fn_inline (vlib_main_t * vm,
962                             vlib_node_runtime_t * node,
963                             vlib_frame_t * frame, int is_slow_path)
964 {
965   u32 n_left_from, * from, * to_next;
966   snat_in2out_next_t next_index;
967   u32 pkts_processed = 0;
968   snat_main_t * sm = &snat_main;
969   f64 now = vlib_time_now (vm);
970   u32 stats_node_index;
971   u32 thread_index = vlib_get_thread_index ();
972
973   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
974     snat_in2out_node.index;
975
976   from = vlib_frame_vector_args (frame);
977   n_left_from = frame->n_vectors;
978   next_index = node->cached_next_index;
979
980   while (n_left_from > 0)
981     {
982       u32 n_left_to_next;
983
984       vlib_get_next_frame (vm, node, next_index,
985                            to_next, n_left_to_next);
986
987       while (n_left_from >= 4 && n_left_to_next >= 2)
988         {
989           u32 bi0, bi1;
990           vlib_buffer_t * b0, * b1;
991           u32 next0, next1;
992           u32 sw_if_index0, sw_if_index1;
993           ip4_header_t * ip0, * ip1;
994           ip_csum_t sum0, sum1;
995           u32 new_addr0, old_addr0, new_addr1, old_addr1;
996           u16 old_port0, new_port0, old_port1, new_port1;
997           udp_header_t * udp0, * udp1;
998           tcp_header_t * tcp0, * tcp1;
999           icmp46_header_t * icmp0, * icmp1;
1000           snat_session_key_t key0, key1;
1001           u32 rx_fib_index0, rx_fib_index1;
1002           u32 proto0, proto1;
1003           snat_session_t * s0 = 0, * s1 = 0;
1004           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1005           
1006           /* Prefetch next iteration. */
1007           {
1008             vlib_buffer_t * p2, * p3;
1009             
1010             p2 = vlib_get_buffer (vm, from[2]);
1011             p3 = vlib_get_buffer (vm, from[3]);
1012             
1013             vlib_prefetch_buffer_header (p2, LOAD);
1014             vlib_prefetch_buffer_header (p3, LOAD);
1015
1016             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1017             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1018           }
1019
1020           /* speculatively enqueue b0 and b1 to the current next frame */
1021           to_next[0] = bi0 = from[0];
1022           to_next[1] = bi1 = from[1];
1023           from += 2;
1024           to_next += 2;
1025           n_left_from -= 2;
1026           n_left_to_next -= 2;
1027           
1028           b0 = vlib_get_buffer (vm, bi0);
1029           b1 = vlib_get_buffer (vm, bi1);
1030
1031           ip0 = vlib_buffer_get_current (b0);
1032           udp0 = ip4_next_header (ip0);
1033           tcp0 = (tcp_header_t *) udp0;
1034           icmp0 = (icmp46_header_t *) udp0;
1035
1036           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1037           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1038                                    sw_if_index0);
1039
1040           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1041
1042           if (PREDICT_FALSE(ip0->ttl == 1))
1043             {
1044               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1045               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1046                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1047                                            0);
1048               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1049               goto trace00;
1050             }
1051
1052           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1053
1054           /* Next configured feature, probably ip4-lookup */
1055           if (is_slow_path)
1056             {
1057               if (PREDICT_FALSE (proto0 == ~0))
1058                 goto trace00;
1059               
1060               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1061                 {
1062                   next0 = icmp_in2out_slow_path 
1063                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
1064                      node, next0, now, thread_index, &s0);
1065                   goto trace00;
1066                 }
1067             }
1068           else
1069             {
1070               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1071                 {
1072                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1073                   goto trace00;
1074                 }
1075             }
1076
1077           key0.addr = ip0->src_address;
1078           key0.port = udp0->src_port;
1079           key0.protocol = proto0;
1080           key0.fib_index = rx_fib_index0;
1081           
1082           kv0.key = key0.as_u64;
1083
1084           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1085             {
1086               if (is_slow_path)
1087                 {
1088                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1089                       proto0, rx_fib_index0)))
1090                     goto trace00;
1091
1092                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1093                                      &s0, node, next0, thread_index);
1094                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1095                     goto trace00;
1096                 }
1097               else
1098                 {
1099                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1100                   goto trace00;
1101                 }
1102             }
1103           else
1104             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1105                                     value0.value);
1106
1107           old_addr0 = ip0->src_address.as_u32;
1108           ip0->src_address = s0->out2in.addr;
1109           new_addr0 = ip0->src_address.as_u32;
1110           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1111
1112           sum0 = ip0->checksum;
1113           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1114                                  ip4_header_t,
1115                                  src_address /* changed member */);
1116           ip0->checksum = ip_csum_fold (sum0);
1117
1118           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1119             {
1120               old_port0 = tcp0->src_port;
1121               tcp0->src_port = s0->out2in.port;
1122               new_port0 = tcp0->src_port;
1123
1124               sum0 = tcp0->checksum;
1125               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1126                                      ip4_header_t,
1127                                      dst_address /* changed member */);
1128               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1129                                      ip4_header_t /* cheat */,
1130                                      length /* changed member */);
1131               tcp0->checksum = ip_csum_fold(sum0);
1132             }
1133           else
1134             {
1135               old_port0 = udp0->src_port;
1136               udp0->src_port = s0->out2in.port;
1137               udp0->checksum = 0;
1138             }
1139
1140           /* Hairpinning */
1141           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1142
1143           /* Accounting */
1144           s0->last_heard = now;
1145           s0->total_pkts++;
1146           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1147           /* Per-user LRU list maintenance for dynamic translation */
1148           if (!snat_is_session_static (s0))
1149             {
1150               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1151                                  s0->per_user_index);
1152               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1153                                   s0->per_user_list_head_index,
1154                                   s0->per_user_index);
1155             }
1156         trace00:
1157
1158           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1159                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1160             {
1161               snat_in2out_trace_t *t = 
1162                  vlib_add_trace (vm, node, b0, sizeof (*t));
1163               t->is_slow_path = is_slow_path;
1164               t->sw_if_index = sw_if_index0;
1165               t->next_index = next0;
1166                   t->session_index = ~0;
1167               if (s0)
1168                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1169             }
1170
1171           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1172
1173           ip1 = vlib_buffer_get_current (b1);
1174           udp1 = ip4_next_header (ip1);
1175           tcp1 = (tcp_header_t *) udp1;
1176           icmp1 = (icmp46_header_t *) udp1;
1177
1178           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1179           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1180                                    sw_if_index1);
1181
1182           if (PREDICT_FALSE(ip1->ttl == 1))
1183             {
1184               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1185               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1186                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1187                                            0);
1188               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1189               goto trace01;
1190             }
1191
1192           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1193
1194           /* Next configured feature, probably ip4-lookup */
1195           if (is_slow_path)
1196             {
1197               if (PREDICT_FALSE (proto1 == ~0))
1198                 goto trace01;
1199               
1200               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1201                 {
1202                   next1 = icmp_in2out_slow_path 
1203                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1204                      next1, now, thread_index, &s1);
1205                   goto trace01;
1206                 }
1207             }
1208           else
1209             {
1210               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1211                 {
1212                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1213                   goto trace01;
1214                 }
1215             }
1216
1217           key1.addr = ip1->src_address;
1218           key1.port = udp1->src_port;
1219           key1.protocol = proto1;
1220           key1.fib_index = rx_fib_index1;
1221           
1222           kv1.key = key1.as_u64;
1223
1224             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1225             {
1226               if (is_slow_path)
1227                 {
1228                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
1229                       proto1, rx_fib_index1)))
1230                     goto trace01;
1231
1232                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1233                                      &s1, node, next1, thread_index);
1234                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1235                     goto trace01;
1236                 }
1237               else
1238                 {
1239                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1240                   goto trace01;
1241                 }
1242             }
1243           else
1244             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1245                                     value1.value);
1246
1247           old_addr1 = ip1->src_address.as_u32;
1248           ip1->src_address = s1->out2in.addr;
1249           new_addr1 = ip1->src_address.as_u32;
1250           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1251
1252           sum1 = ip1->checksum;
1253           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1254                                  ip4_header_t,
1255                                  src_address /* changed member */);
1256           ip1->checksum = ip_csum_fold (sum1);
1257
1258           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1259             {
1260               old_port1 = tcp1->src_port;
1261               tcp1->src_port = s1->out2in.port;
1262               new_port1 = tcp1->src_port;
1263
1264               sum1 = tcp1->checksum;
1265               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1266                                      ip4_header_t,
1267                                      dst_address /* changed member */);
1268               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1269                                      ip4_header_t /* cheat */,
1270                                      length /* changed member */);
1271               tcp1->checksum = ip_csum_fold(sum1);
1272             }
1273           else
1274             {
1275               old_port1 = udp1->src_port;
1276               udp1->src_port = s1->out2in.port;
1277               udp1->checksum = 0;
1278             }
1279
1280           /* Hairpinning */
1281           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1282
1283           /* Accounting */
1284           s1->last_heard = now;
1285           s1->total_pkts++;
1286           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1287           /* Per-user LRU list maintenance for dynamic translation */
1288           if (!snat_is_session_static (s1))
1289             {
1290               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1291                                  s1->per_user_index);
1292               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1293                                   s1->per_user_list_head_index,
1294                                   s1->per_user_index);
1295             }
1296         trace01:
1297
1298           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1299                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1300             {
1301               snat_in2out_trace_t *t = 
1302                  vlib_add_trace (vm, node, b1, sizeof (*t));
1303               t->sw_if_index = sw_if_index1;
1304               t->next_index = next1;
1305               t->session_index = ~0;
1306               if (s1)
1307                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1308             }
1309
1310           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1311
1312           /* verify speculative enqueues, maybe switch current next frame */
1313           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1314                                            to_next, n_left_to_next,
1315                                            bi0, bi1, next0, next1);
1316         }
1317
1318       while (n_left_from > 0 && n_left_to_next > 0)
1319         {
1320           u32 bi0;
1321           vlib_buffer_t * b0;
1322           u32 next0;
1323           u32 sw_if_index0;
1324           ip4_header_t * ip0;
1325           ip_csum_t sum0;
1326           u32 new_addr0, old_addr0;
1327           u16 old_port0, new_port0;
1328           udp_header_t * udp0;
1329           tcp_header_t * tcp0;
1330           icmp46_header_t * icmp0;
1331           snat_session_key_t key0;
1332           u32 rx_fib_index0;
1333           u32 proto0;
1334           snat_session_t * s0 = 0;
1335           clib_bihash_kv_8_8_t kv0, value0;
1336           
1337           /* speculatively enqueue b0 to the current next frame */
1338           bi0 = from[0];
1339           to_next[0] = bi0;
1340           from += 1;
1341           to_next += 1;
1342           n_left_from -= 1;
1343           n_left_to_next -= 1;
1344
1345           b0 = vlib_get_buffer (vm, bi0);
1346           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1347
1348           ip0 = vlib_buffer_get_current (b0);
1349           udp0 = ip4_next_header (ip0);
1350           tcp0 = (tcp_header_t *) udp0;
1351           icmp0 = (icmp46_header_t *) udp0;
1352
1353           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1354           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1355                                    sw_if_index0);
1356
1357           if (PREDICT_FALSE(ip0->ttl == 1))
1358             {
1359               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1360               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1361                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1362                                            0);
1363               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1364               goto trace0;
1365             }
1366
1367           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1368
1369           /* Next configured feature, probably ip4-lookup */
1370           if (is_slow_path)
1371             {
1372               if (PREDICT_FALSE (proto0 == ~0))
1373                 goto trace0;
1374               
1375               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1376                 {
1377                   next0 = icmp_in2out_slow_path 
1378                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1379                      next0, now, thread_index, &s0);
1380                   goto trace0;
1381                 }
1382             }
1383           else
1384             {
1385               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1386                 {
1387                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1388                   goto trace0;
1389                 }
1390             }
1391
1392           key0.addr = ip0->src_address;
1393           key0.port = udp0->src_port;
1394           key0.protocol = proto0;
1395           key0.fib_index = rx_fib_index0;
1396           
1397           kv0.key = key0.as_u64;
1398
1399           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1400             {
1401               if (is_slow_path)
1402                 {
1403                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1404                       proto0, rx_fib_index0)))
1405                     goto trace0;
1406
1407                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1408                                      &s0, node, next0, thread_index);
1409
1410                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1411                     goto trace0;
1412                 }
1413               else
1414                 {
1415                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1416                   goto trace0;
1417                 }
1418             }
1419           else
1420             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1421                                     value0.value);
1422
1423           old_addr0 = ip0->src_address.as_u32;
1424           ip0->src_address = s0->out2in.addr;
1425           new_addr0 = ip0->src_address.as_u32;
1426           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1427
1428           sum0 = ip0->checksum;
1429           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1430                                  ip4_header_t,
1431                                  src_address /* changed member */);
1432           ip0->checksum = ip_csum_fold (sum0);
1433
1434           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1435             {
1436               old_port0 = tcp0->src_port;
1437               tcp0->src_port = s0->out2in.port;
1438               new_port0 = tcp0->src_port;
1439
1440               sum0 = tcp0->checksum;
1441               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1442                                      ip4_header_t,
1443                                      dst_address /* changed member */);
1444               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1445                                      ip4_header_t /* cheat */,
1446                                      length /* changed member */);
1447               tcp0->checksum = ip_csum_fold(sum0);
1448             }
1449           else
1450             {
1451               old_port0 = udp0->src_port;
1452               udp0->src_port = s0->out2in.port;
1453               udp0->checksum = 0;
1454             }
1455
1456           /* Hairpinning */
1457           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1458
1459           /* Accounting */
1460           s0->last_heard = now;
1461           s0->total_pkts++;
1462           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1463           /* Per-user LRU list maintenance for dynamic translation */
1464           if (!snat_is_session_static (s0))
1465             {
1466               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1467                                  s0->per_user_index);
1468               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1469                                   s0->per_user_list_head_index,
1470                                   s0->per_user_index);
1471             }
1472
1473         trace0:
1474           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1475                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1476             {
1477               snat_in2out_trace_t *t = 
1478                  vlib_add_trace (vm, node, b0, sizeof (*t));
1479               t->is_slow_path = is_slow_path;
1480               t->sw_if_index = sw_if_index0;
1481               t->next_index = next0;
1482                   t->session_index = ~0;
1483               if (s0)
1484                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1485             }
1486
1487           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1488
1489           /* verify speculative enqueue, maybe switch current next frame */
1490           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1491                                            to_next, n_left_to_next,
1492                                            bi0, next0);
1493         }
1494
1495       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1496     }
1497
1498   vlib_node_increment_counter (vm, stats_node_index, 
1499                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1500                                pkts_processed);
1501   return frame->n_vectors;
1502 }
1503
1504 static uword
1505 snat_in2out_fast_path_fn (vlib_main_t * vm,
1506                           vlib_node_runtime_t * node,
1507                           vlib_frame_t * frame)
1508 {
1509   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1510 }
1511
1512 VLIB_REGISTER_NODE (snat_in2out_node) = {
1513   .function = snat_in2out_fast_path_fn,
1514   .name = "snat-in2out",
1515   .vector_size = sizeof (u32),
1516   .format_trace = format_snat_in2out_trace,
1517   .type = VLIB_NODE_TYPE_INTERNAL,
1518   
1519   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1520   .error_strings = snat_in2out_error_strings,
1521
1522   .runtime_data_bytes = sizeof (snat_runtime_t),
1523   
1524   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1525
1526   /* edit / add dispositions here */
1527   .next_nodes = {
1528     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1529     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1530     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1531     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1532   },
1533 };
1534
1535 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1536
1537 static uword
1538 snat_in2out_slow_path_fn (vlib_main_t * vm,
1539                           vlib_node_runtime_t * node,
1540                           vlib_frame_t * frame)
1541 {
1542   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1543 }
1544
1545 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1546   .function = snat_in2out_slow_path_fn,
1547   .name = "snat-in2out-slowpath",
1548   .vector_size = sizeof (u32),
1549   .format_trace = format_snat_in2out_trace,
1550   .type = VLIB_NODE_TYPE_INTERNAL,
1551   
1552   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1553   .error_strings = snat_in2out_error_strings,
1554
1555   .runtime_data_bytes = sizeof (snat_runtime_t),
1556   
1557   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1558
1559   /* edit / add dispositions here */
1560   .next_nodes = {
1561     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1562     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1563     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1564     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1565   },
1566 };
1567
1568 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1569
1570 /**************************/
1571 /*** deterministic mode ***/
1572 /**************************/
1573 static uword
1574 snat_det_in2out_node_fn (vlib_main_t * vm,
1575                          vlib_node_runtime_t * node,
1576                          vlib_frame_t * frame)
1577 {
1578   u32 n_left_from, * from, * to_next;
1579   snat_in2out_next_t next_index;
1580   u32 pkts_processed = 0;
1581   snat_main_t * sm = &snat_main;
1582   u32 now = (u32) vlib_time_now (vm);
1583   u32 thread_index = vlib_get_thread_index ();
1584
1585   from = vlib_frame_vector_args (frame);
1586   n_left_from = frame->n_vectors;
1587   next_index = node->cached_next_index;
1588
1589   while (n_left_from > 0)
1590     {
1591       u32 n_left_to_next;
1592
1593       vlib_get_next_frame (vm, node, next_index,
1594                            to_next, n_left_to_next);
1595
1596       while (n_left_from >= 4 && n_left_to_next >= 2)
1597         {
1598           u32 bi0, bi1;
1599           vlib_buffer_t * b0, * b1;
1600           u32 next0, next1;
1601           u32 sw_if_index0, sw_if_index1;
1602           ip4_header_t * ip0, * ip1;
1603           ip_csum_t sum0, sum1;
1604           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1605           u16 old_port0, new_port0, lo_port0, i0;
1606           u16 old_port1, new_port1, lo_port1, i1;
1607           udp_header_t * udp0, * udp1;
1608           tcp_header_t * tcp0, * tcp1;
1609           u32 proto0, proto1;
1610           snat_det_out_key_t key0, key1;
1611           snat_det_map_t * dm0, * dm1;
1612           snat_det_session_t * ses0 = 0, * ses1 = 0;
1613           u32 rx_fib_index0, rx_fib_index1;
1614           icmp46_header_t * icmp0, * icmp1;
1615
1616           /* Prefetch next iteration. */
1617           {
1618             vlib_buffer_t * p2, * p3;
1619
1620             p2 = vlib_get_buffer (vm, from[2]);
1621             p3 = vlib_get_buffer (vm, from[3]);
1622
1623             vlib_prefetch_buffer_header (p2, LOAD);
1624             vlib_prefetch_buffer_header (p3, LOAD);
1625
1626             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1627             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1628           }
1629
1630           /* speculatively enqueue b0 and b1 to the current next frame */
1631           to_next[0] = bi0 = from[0];
1632           to_next[1] = bi1 = from[1];
1633           from += 2;
1634           to_next += 2;
1635           n_left_from -= 2;
1636           n_left_to_next -= 2;
1637
1638           b0 = vlib_get_buffer (vm, bi0);
1639           b1 = vlib_get_buffer (vm, bi1);
1640
1641           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1642           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1643
1644           ip0 = vlib_buffer_get_current (b0);
1645           udp0 = ip4_next_header (ip0);
1646           tcp0 = (tcp_header_t *) udp0;
1647
1648           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1649
1650           if (PREDICT_FALSE(ip0->ttl == 1))
1651             {
1652               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1653               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1654                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1655                                            0);
1656               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1657               goto trace0;
1658             }
1659
1660           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1661
1662           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1663             {
1664               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1665               icmp0 = (icmp46_header_t *) udp0;
1666
1667               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1668                                   rx_fib_index0, node, next0, thread_index,
1669                                   &ses0, &dm0);
1670               goto trace0;
1671             }
1672
1673           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1674           if (PREDICT_FALSE(!dm0))
1675             {
1676               clib_warning("no match for internal host %U",
1677                            format_ip4_address, &ip0->src_address);
1678               next0 = SNAT_IN2OUT_NEXT_DROP;
1679               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1680               goto trace0;
1681             }
1682
1683           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1684
1685           key0.ext_host_addr = ip0->dst_address;
1686           key0.ext_host_port = tcp0->dst;
1687
1688           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
1689           if (PREDICT_FALSE(!ses0))
1690             {
1691               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1692                 {
1693                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1694                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1695
1696                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1697                     continue;
1698
1699                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1700                   break;
1701                 }
1702               if (PREDICT_FALSE(!ses0))
1703                 {
1704                   /* too many sessions for user, send ICMP error packet */
1705
1706                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1707                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
1708                                                ICMP4_destination_unreachable_destination_unreachable_host,
1709                                                0);
1710                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1711                   goto trace0;
1712                 }
1713             }
1714
1715           new_port0 = ses0->out.out_port;
1716
1717           old_addr0.as_u32 = ip0->src_address.as_u32;
1718           ip0->src_address.as_u32 = new_addr0.as_u32;
1719           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1720
1721           sum0 = ip0->checksum;
1722           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1723                                  ip4_header_t,
1724                                  src_address /* changed member */);
1725           ip0->checksum = ip_csum_fold (sum0);
1726
1727           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1728             {
1729               if (tcp0->flags & TCP_FLAG_SYN)
1730                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1731               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1732                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1733               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1734                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1735               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1736                 snat_det_ses_close(dm0, ses0);
1737               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1738                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1739               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1740                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1741
1742               old_port0 = tcp0->src;
1743               tcp0->src = new_port0;
1744
1745               sum0 = tcp0->checksum;
1746               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1747                                      ip4_header_t,
1748                                      dst_address /* changed member */);
1749               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1750                                      ip4_header_t /* cheat */,
1751                                      length /* changed member */);
1752               tcp0->checksum = ip_csum_fold(sum0);
1753             }
1754           else
1755             {
1756               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1757               old_port0 = udp0->src_port;
1758               udp0->src_port = new_port0;
1759               udp0->checksum = 0;
1760             }
1761
1762           switch(ses0->state)
1763             {
1764             case SNAT_SESSION_UDP_ACTIVE:
1765                 ses0->expire = now + sm->udp_timeout;
1766                 break;
1767             case SNAT_SESSION_TCP_SYN_SENT:
1768             case SNAT_SESSION_TCP_FIN_WAIT:
1769             case SNAT_SESSION_TCP_CLOSE_WAIT:
1770             case SNAT_SESSION_TCP_LAST_ACK:
1771                 ses0->expire = now + sm->tcp_transitory_timeout;
1772                 break;
1773             case SNAT_SESSION_TCP_ESTABLISHED:
1774                 ses0->expire = now + sm->tcp_established_timeout;
1775                 break;
1776             }
1777
1778         trace0:
1779           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1780                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1781             {
1782               snat_in2out_trace_t *t =
1783                  vlib_add_trace (vm, node, b0, sizeof (*t));
1784               t->is_slow_path = 0;
1785               t->sw_if_index = sw_if_index0;
1786               t->next_index = next0;
1787               t->session_index = ~0;
1788               if (ses0)
1789                 t->session_index = ses0 - dm0->sessions;
1790             }
1791
1792           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1793
1794           ip1 = vlib_buffer_get_current (b1);
1795           udp1 = ip4_next_header (ip1);
1796           tcp1 = (tcp_header_t *) udp1;
1797
1798           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1799
1800           if (PREDICT_FALSE(ip1->ttl == 1))
1801             {
1802               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1803               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1804                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1805                                            0);
1806               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1807               goto trace1;
1808             }
1809
1810           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1811
1812           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1813             {
1814               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1815               icmp1 = (icmp46_header_t *) udp1;
1816
1817               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
1818                                   rx_fib_index1, node, next1, thread_index,
1819                                   &ses1, &dm1);
1820               goto trace1;
1821             }
1822
1823           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1824           if (PREDICT_FALSE(!dm1))
1825             {
1826               clib_warning("no match for internal host %U",
1827                            format_ip4_address, &ip0->src_address);
1828               next1 = SNAT_IN2OUT_NEXT_DROP;
1829               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1830               goto trace1;
1831             }
1832
1833           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1834
1835           key1.ext_host_addr = ip1->dst_address;
1836           key1.ext_host_port = tcp1->dst;
1837
1838           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
1839           if (PREDICT_FALSE(!ses1))
1840             {
1841               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1842                 {
1843                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1844                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1845
1846                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1847                     continue;
1848
1849                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1850                   break;
1851                 }
1852               if (PREDICT_FALSE(!ses1))
1853                 {
1854                   /* too many sessions for user, send ICMP error packet */
1855
1856                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1857                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
1858                                                ICMP4_destination_unreachable_destination_unreachable_host,
1859                                                0);
1860                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1861                   goto trace1;
1862                 }
1863             }
1864
1865           new_port1 = ses1->out.out_port;
1866
1867           old_addr1.as_u32 = ip1->src_address.as_u32;
1868           ip1->src_address.as_u32 = new_addr1.as_u32;
1869           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1870
1871           sum1 = ip1->checksum;
1872           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1873                                  ip4_header_t,
1874                                  src_address /* changed member */);
1875           ip1->checksum = ip_csum_fold (sum1);
1876
1877           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1878             {
1879               if (tcp1->flags & TCP_FLAG_SYN)
1880                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1881               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1882                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1883               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1884                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1885               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1886                 snat_det_ses_close(dm1, ses1);
1887               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1888                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1889               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1890                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1891
1892               old_port1 = tcp1->src;
1893               tcp1->src = new_port1;
1894
1895               sum1 = tcp1->checksum;
1896               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1897                                      ip4_header_t,
1898                                      dst_address /* changed member */);
1899               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1900                                      ip4_header_t /* cheat */,
1901                                      length /* changed member */);
1902               tcp1->checksum = ip_csum_fold(sum1);
1903             }
1904           else
1905             {
1906               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1907               old_port1 = udp1->src_port;
1908               udp1->src_port = new_port1;
1909               udp1->checksum = 0;
1910             }
1911
1912           switch(ses1->state)
1913             {
1914             case SNAT_SESSION_UDP_ACTIVE:
1915                 ses1->expire = now + sm->udp_timeout;
1916                 break;
1917             case SNAT_SESSION_TCP_SYN_SENT:
1918             case SNAT_SESSION_TCP_FIN_WAIT:
1919             case SNAT_SESSION_TCP_CLOSE_WAIT:
1920             case SNAT_SESSION_TCP_LAST_ACK:
1921                 ses1->expire = now + sm->tcp_transitory_timeout;
1922                 break;
1923             case SNAT_SESSION_TCP_ESTABLISHED:
1924                 ses1->expire = now + sm->tcp_established_timeout;
1925                 break;
1926             }
1927
1928         trace1:
1929           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1930                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1931             {
1932               snat_in2out_trace_t *t =
1933                  vlib_add_trace (vm, node, b1, sizeof (*t));
1934               t->is_slow_path = 0;
1935               t->sw_if_index = sw_if_index1;
1936               t->next_index = next1;
1937               t->session_index = ~0;
1938               if (ses1)
1939                 t->session_index = ses1 - dm1->sessions;
1940             }
1941
1942           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1943
1944           /* verify speculative enqueues, maybe switch current next frame */
1945           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1946                                            to_next, n_left_to_next,
1947                                            bi0, bi1, next0, next1);
1948          }
1949
1950       while (n_left_from > 0 && n_left_to_next > 0)
1951         {
1952           u32 bi0;
1953           vlib_buffer_t * b0;
1954           u32 next0;
1955           u32 sw_if_index0;
1956           ip4_header_t * ip0;
1957           ip_csum_t sum0;
1958           ip4_address_t new_addr0, old_addr0;
1959           u16 old_port0, new_port0, lo_port0, i0;
1960           udp_header_t * udp0;
1961           tcp_header_t * tcp0;
1962           u32 proto0;
1963           snat_det_out_key_t key0;
1964           snat_det_map_t * dm0;
1965           snat_det_session_t * ses0 = 0;
1966           u32 rx_fib_index0;
1967           icmp46_header_t * icmp0;
1968
1969           /* speculatively enqueue b0 to the current next frame */
1970           bi0 = from[0];
1971           to_next[0] = bi0;
1972           from += 1;
1973           to_next += 1;
1974           n_left_from -= 1;
1975           n_left_to_next -= 1;
1976
1977           b0 = vlib_get_buffer (vm, bi0);
1978           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1979
1980           ip0 = vlib_buffer_get_current (b0);
1981           udp0 = ip4_next_header (ip0);
1982           tcp0 = (tcp_header_t *) udp0;
1983
1984           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1985
1986           if (PREDICT_FALSE(ip0->ttl == 1))
1987             {
1988               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1989               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1990                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1991                                            0);
1992               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1993               goto trace00;
1994             }
1995
1996           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1997
1998           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1999             {
2000               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2001               icmp0 = (icmp46_header_t *) udp0;
2002
2003               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2004                                   rx_fib_index0, node, next0, thread_index,
2005                                   &ses0, &dm0);
2006               goto trace00;
2007             }
2008
2009           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2010           if (PREDICT_FALSE(!dm0))
2011             {
2012               clib_warning("no match for internal host %U",
2013                            format_ip4_address, &ip0->src_address);
2014               next0 = SNAT_IN2OUT_NEXT_DROP;
2015               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2016               goto trace00;
2017             }
2018
2019           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2020
2021           key0.ext_host_addr = ip0->dst_address;
2022           key0.ext_host_port = tcp0->dst;
2023
2024           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2025           if (PREDICT_FALSE(!ses0))
2026             {
2027               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2028                 {
2029                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2030                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2031
2032                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2033                     continue;
2034
2035                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2036                   break;
2037                 }
2038               if (PREDICT_FALSE(!ses0))
2039                 {
2040                   /* too many sessions for user, send ICMP error packet */
2041
2042                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2043                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2044                                                ICMP4_destination_unreachable_destination_unreachable_host,
2045                                                0);
2046                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2047                   goto trace00;
2048                 }
2049             }
2050
2051           new_port0 = ses0->out.out_port;
2052
2053           old_addr0.as_u32 = ip0->src_address.as_u32;
2054           ip0->src_address.as_u32 = new_addr0.as_u32;
2055           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2056
2057           sum0 = ip0->checksum;
2058           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2059                                  ip4_header_t,
2060                                  src_address /* changed member */);
2061           ip0->checksum = ip_csum_fold (sum0);
2062
2063           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2064             {
2065               if (tcp0->flags & TCP_FLAG_SYN)
2066                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2067               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2068                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2069               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2070                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2071               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2072                 snat_det_ses_close(dm0, ses0);
2073               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2074                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2075               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2076                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2077
2078               old_port0 = tcp0->src;
2079               tcp0->src = new_port0;
2080
2081               sum0 = tcp0->checksum;
2082               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2083                                      ip4_header_t,
2084                                      dst_address /* changed member */);
2085               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2086                                      ip4_header_t /* cheat */,
2087                                      length /* changed member */);
2088               tcp0->checksum = ip_csum_fold(sum0);
2089             }
2090           else
2091             {
2092               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2093               old_port0 = udp0->src_port;
2094               udp0->src_port = new_port0;
2095               udp0->checksum = 0;
2096             }
2097
2098           switch(ses0->state)
2099             {
2100             case SNAT_SESSION_UDP_ACTIVE:
2101                 ses0->expire = now + sm->udp_timeout;
2102                 break;
2103             case SNAT_SESSION_TCP_SYN_SENT:
2104             case SNAT_SESSION_TCP_FIN_WAIT:
2105             case SNAT_SESSION_TCP_CLOSE_WAIT:
2106             case SNAT_SESSION_TCP_LAST_ACK:
2107                 ses0->expire = now + sm->tcp_transitory_timeout;
2108                 break;
2109             case SNAT_SESSION_TCP_ESTABLISHED:
2110                 ses0->expire = now + sm->tcp_established_timeout;
2111                 break;
2112             }
2113
2114         trace00:
2115           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2116                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2117             {
2118               snat_in2out_trace_t *t =
2119                  vlib_add_trace (vm, node, b0, sizeof (*t));
2120               t->is_slow_path = 0;
2121               t->sw_if_index = sw_if_index0;
2122               t->next_index = next0;
2123               t->session_index = ~0;
2124               if (ses0)
2125                 t->session_index = ses0 - dm0->sessions;
2126             }
2127
2128           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2129
2130           /* verify speculative enqueue, maybe switch current next frame */
2131           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2132                                            to_next, n_left_to_next,
2133                                            bi0, next0);
2134         }
2135
2136       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2137     }
2138
2139   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2140                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2141                                pkts_processed);
2142   return frame->n_vectors;
2143 }
2144
2145 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2146   .function = snat_det_in2out_node_fn,
2147   .name = "snat-det-in2out",
2148   .vector_size = sizeof (u32),
2149   .format_trace = format_snat_in2out_trace,
2150   .type = VLIB_NODE_TYPE_INTERNAL,
2151
2152   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2153   .error_strings = snat_in2out_error_strings,
2154
2155   .runtime_data_bytes = sizeof (snat_runtime_t),
2156
2157   .n_next_nodes = 3,
2158
2159   /* edit / add dispositions here */
2160   .next_nodes = {
2161     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2162     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2163     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2164   },
2165 };
2166
2167 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2168
2169 /**
2170  * Get address and port values to be used for packet SNAT translation
2171  * and create session if needed
2172  *
2173  * @param[in,out] sm             SNAT main
2174  * @param[in,out] node           SNAT node runtime
2175  * @param[in] thread_index       thread index
2176  * @param[in,out] b0             buffer containing packet to be translated
2177  * @param[out] p_proto           protocol used for matching
2178  * @param[out] p_value           address and port after NAT translation
2179  * @param[out] p_dont_translate  if packet should not be translated
2180  * @param d                      optional parameter
2181  * @param e                      optional parameter
2182  */
2183 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2184                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2185                           snat_session_key_t *p_value,
2186                           u8 *p_dont_translate, void *d, void *e)
2187 {
2188   ip4_header_t *ip0;
2189   icmp46_header_t *icmp0;
2190   u32 sw_if_index0;
2191   u32 rx_fib_index0;
2192   u8 protocol;
2193   snat_det_out_key_t key0;
2194   u8 dont_translate = 0;
2195   u32 next0 = ~0;
2196   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2197   ip4_header_t *inner_ip0;
2198   void *l4_header = 0;
2199   icmp46_header_t *inner_icmp0;
2200   snat_det_map_t * dm0 = 0;
2201   ip4_address_t new_addr0;
2202   u16 lo_port0, i0;
2203   snat_det_session_t * ses0 = 0;
2204   ip4_address_t in_addr;
2205   u16 in_port;
2206
2207   ip0 = vlib_buffer_get_current (b0);
2208   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2209   echo0 = (icmp_echo_header_t *)(icmp0+1);
2210   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2211   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2212
2213   if (!icmp_is_error_message (icmp0))
2214     {
2215       protocol = SNAT_PROTOCOL_ICMP;
2216       in_addr = ip0->src_address;
2217       in_port = echo0->identifier;
2218     }
2219   else
2220     {
2221       inner_ip0 = (ip4_header_t *)(echo0+1);
2222       l4_header = ip4_next_header (inner_ip0);
2223       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2224       in_addr = inner_ip0->dst_address;
2225       switch (protocol)
2226         {
2227         case SNAT_PROTOCOL_ICMP:
2228           inner_icmp0 = (icmp46_header_t*)l4_header;
2229           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2230           in_port = inner_echo0->identifier;
2231           break;
2232         case SNAT_PROTOCOL_UDP:
2233         case SNAT_PROTOCOL_TCP:
2234           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2235           break;
2236         default:
2237           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2238           next0 = SNAT_IN2OUT_NEXT_DROP;
2239           goto out;
2240         }
2241     }
2242
2243   dm0 = snat_det_map_by_user(sm, &in_addr);
2244   if (PREDICT_FALSE(!dm0))
2245     {
2246       clib_warning("no match for internal host %U",
2247                    format_ip4_address, &in_addr);
2248       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2249           IP_PROTOCOL_ICMP, rx_fib_index0)))
2250         {
2251           dont_translate = 1;
2252           goto out;
2253         }
2254       next0 = SNAT_IN2OUT_NEXT_DROP;
2255       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2256       goto out;
2257     }
2258
2259   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2260
2261   key0.ext_host_addr = ip0->dst_address;
2262   key0.ext_host_port = 0;
2263
2264   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2265   if (PREDICT_FALSE(!ses0))
2266     {
2267       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2268           IP_PROTOCOL_ICMP, rx_fib_index0)))
2269         {
2270           dont_translate = 1;
2271           goto out;
2272         }
2273       if (icmp0->type != ICMP4_echo_request)
2274         {
2275           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2276           next0 = SNAT_IN2OUT_NEXT_DROP;
2277           goto out;
2278         }
2279       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2280         {
2281           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2282             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2283
2284           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2285             continue;
2286
2287           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2288           break;
2289         }
2290       if (PREDICT_FALSE(!ses0))
2291         {
2292           next0 = SNAT_IN2OUT_NEXT_DROP;
2293           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2294           goto out;
2295         }
2296     }
2297
2298   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2299                     !icmp_is_error_message (icmp0)))
2300     {
2301       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2302       next0 = SNAT_IN2OUT_NEXT_DROP;
2303       goto out;
2304     }
2305
2306   u32 now = (u32) vlib_time_now (sm->vlib_main);
2307
2308   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2309   ses0->expire = now + sm->icmp_timeout;
2310
2311 out:
2312   *p_proto = protocol;
2313   if (ses0)
2314     {
2315       p_value->addr = new_addr0;
2316       p_value->fib_index = sm->outside_fib_index;
2317       p_value->port = ses0->out.out_port;
2318     }
2319   *p_dont_translate = dont_translate;
2320   if (d)
2321     *(snat_det_session_t**)d = ses0;
2322   if (e)
2323     *(snat_det_map_t**)e = dm0;
2324   return next0;
2325 }
2326
2327 /**********************/
2328 /*** worker handoff ***/
2329 /**********************/
2330 static uword
2331 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2332                                vlib_node_runtime_t * node,
2333                                vlib_frame_t * frame)
2334 {
2335   snat_main_t *sm = &snat_main;
2336   vlib_thread_main_t *tm = vlib_get_thread_main ();
2337   u32 n_left_from, *from, *to_next = 0;
2338   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2339   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2340     = 0;
2341   vlib_frame_queue_elt_t *hf = 0;
2342   vlib_frame_t *f = 0;
2343   int i;
2344   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2345   u32 next_worker_index = 0;
2346   u32 current_worker_index = ~0;
2347   u32 thread_index = vlib_get_thread_index ();
2348
2349   ASSERT (vec_len (sm->workers));
2350
2351   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2352     {
2353       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2354
2355       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2356                                sm->first_worker_index + sm->num_workers - 1,
2357                                (vlib_frame_queue_t *) (~0));
2358     }
2359
2360   from = vlib_frame_vector_args (frame);
2361   n_left_from = frame->n_vectors;
2362
2363   while (n_left_from > 0)
2364     {
2365       u32 bi0;
2366       vlib_buffer_t *b0;
2367       u32 sw_if_index0;
2368       u32 rx_fib_index0;
2369       ip4_header_t * ip0;
2370       u8 do_handoff;
2371
2372       bi0 = from[0];
2373       from += 1;
2374       n_left_from -= 1;
2375
2376       b0 = vlib_get_buffer (vm, bi0);
2377
2378       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2379       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2380
2381       ip0 = vlib_buffer_get_current (b0);
2382
2383       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2384
2385       if (PREDICT_FALSE (next_worker_index != thread_index))
2386         {
2387           do_handoff = 1;
2388
2389           if (next_worker_index != current_worker_index)
2390             {
2391               if (hf)
2392                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2393
2394               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2395                                                       next_worker_index,
2396                                                       handoff_queue_elt_by_worker_index);
2397
2398               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2399               to_next_worker = &hf->buffer_index[hf->n_vectors];
2400               current_worker_index = next_worker_index;
2401             }
2402
2403           /* enqueue to correct worker thread */
2404           to_next_worker[0] = bi0;
2405           to_next_worker++;
2406           n_left_to_next_worker--;
2407
2408           if (n_left_to_next_worker == 0)
2409             {
2410               hf->n_vectors = VLIB_FRAME_SIZE;
2411               vlib_put_frame_queue_elt (hf);
2412               current_worker_index = ~0;
2413               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2414               hf = 0;
2415             }
2416         }
2417       else
2418         {
2419           do_handoff = 0;
2420           /* if this is 1st frame */
2421           if (!f)
2422             {
2423               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2424               to_next = vlib_frame_vector_args (f);
2425             }
2426
2427           to_next[0] = bi0;
2428           to_next += 1;
2429           f->n_vectors++;
2430         }
2431
2432       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2433                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2434         {
2435           snat_in2out_worker_handoff_trace_t *t =
2436             vlib_add_trace (vm, node, b0, sizeof (*t));
2437           t->next_worker_index = next_worker_index;
2438           t->do_handoff = do_handoff;
2439         }
2440     }
2441
2442   if (f)
2443     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2444
2445   if (hf)
2446     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2447
2448   /* Ship frames to the worker nodes */
2449   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2450     {
2451       if (handoff_queue_elt_by_worker_index[i])
2452         {
2453           hf = handoff_queue_elt_by_worker_index[i];
2454           /*
2455            * It works better to let the handoff node
2456            * rate-adapt, always ship the handoff queue element.
2457            */
2458           if (1 || hf->n_vectors == hf->last_n_vectors)
2459             {
2460               vlib_put_frame_queue_elt (hf);
2461               handoff_queue_elt_by_worker_index[i] = 0;
2462             }
2463           else
2464             hf->last_n_vectors = hf->n_vectors;
2465         }
2466       congested_handoff_queue_by_worker_index[i] =
2467         (vlib_frame_queue_t *) (~0);
2468     }
2469   hf = 0;
2470   current_worker_index = ~0;
2471   return frame->n_vectors;
2472 }
2473
2474 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2475   .function = snat_in2out_worker_handoff_fn,
2476   .name = "snat-in2out-worker-handoff",
2477   .vector_size = sizeof (u32),
2478   .format_trace = format_snat_in2out_worker_handoff_trace,
2479   .type = VLIB_NODE_TYPE_INTERNAL,
2480   
2481   .n_next_nodes = 1,
2482
2483   .next_nodes = {
2484     [0] = "error-drop",
2485   },
2486 };
2487
2488 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2489
2490 static uword
2491 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2492                                 vlib_node_runtime_t * node,
2493                                 vlib_frame_t * frame)
2494 {
2495   u32 n_left_from, * from, * to_next;
2496   snat_in2out_next_t next_index;
2497   u32 pkts_processed = 0;
2498   snat_main_t * sm = &snat_main;
2499   u32 stats_node_index;
2500
2501   stats_node_index = snat_in2out_fast_node.index;
2502
2503   from = vlib_frame_vector_args (frame);
2504   n_left_from = frame->n_vectors;
2505   next_index = node->cached_next_index;
2506
2507   while (n_left_from > 0)
2508     {
2509       u32 n_left_to_next;
2510
2511       vlib_get_next_frame (vm, node, next_index,
2512                            to_next, n_left_to_next);
2513
2514       while (n_left_from > 0 && n_left_to_next > 0)
2515         {
2516           u32 bi0;
2517           vlib_buffer_t * b0;
2518           u32 next0;
2519           u32 sw_if_index0;
2520           ip4_header_t * ip0;
2521           ip_csum_t sum0;
2522           u32 new_addr0, old_addr0;
2523           u16 old_port0, new_port0;
2524           udp_header_t * udp0;
2525           tcp_header_t * tcp0;
2526           icmp46_header_t * icmp0;
2527           snat_session_key_t key0, sm0;
2528           u32 proto0;
2529           u32 rx_fib_index0;
2530
2531           /* speculatively enqueue b0 to the current next frame */
2532           bi0 = from[0];
2533           to_next[0] = bi0;
2534           from += 1;
2535           to_next += 1;
2536           n_left_from -= 1;
2537           n_left_to_next -= 1;
2538
2539           b0 = vlib_get_buffer (vm, bi0);
2540           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2541
2542           ip0 = vlib_buffer_get_current (b0);
2543           udp0 = ip4_next_header (ip0);
2544           tcp0 = (tcp_header_t *) udp0;
2545           icmp0 = (icmp46_header_t *) udp0;
2546
2547           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2548           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2549
2550           if (PREDICT_FALSE(ip0->ttl == 1))
2551             {
2552               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2553               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2554                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2555                                            0);
2556               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2557               goto trace0;
2558             }
2559
2560           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2561
2562           if (PREDICT_FALSE (proto0 == ~0))
2563               goto trace0;
2564
2565           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2566             {
2567               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2568                                   rx_fib_index0, node, next0, ~0, 0, 0);
2569               goto trace0;
2570             }
2571
2572           key0.addr = ip0->src_address;
2573           key0.port = udp0->src_port;
2574           key0.fib_index = rx_fib_index0;
2575
2576           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
2577             {
2578               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2579               next0= SNAT_IN2OUT_NEXT_DROP;
2580               goto trace0;
2581             }
2582
2583           new_addr0 = sm0.addr.as_u32;
2584           new_port0 = sm0.port;
2585           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2586           old_addr0 = ip0->src_address.as_u32;
2587           ip0->src_address.as_u32 = new_addr0;
2588
2589           sum0 = ip0->checksum;
2590           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2591                                  ip4_header_t,
2592                                  src_address /* changed member */);
2593           ip0->checksum = ip_csum_fold (sum0);
2594
2595           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2596             {
2597               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2598                 {
2599                   old_port0 = tcp0->src_port;
2600                   tcp0->src_port = new_port0;
2601
2602                   sum0 = tcp0->checksum;
2603                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2604                                          ip4_header_t,
2605                                          dst_address /* changed member */);
2606                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2607                                          ip4_header_t /* cheat */,
2608                                          length /* changed member */);
2609                   tcp0->checksum = ip_csum_fold(sum0);
2610                 }
2611               else
2612                 {
2613                   old_port0 = udp0->src_port;
2614                   udp0->src_port = new_port0;
2615                   udp0->checksum = 0;
2616                 }
2617             }
2618           else
2619             {
2620               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2621                 {
2622                   sum0 = tcp0->checksum;
2623                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2624                                          ip4_header_t,
2625                                          dst_address /* changed member */);
2626                   tcp0->checksum = ip_csum_fold(sum0);
2627                 }
2628             }
2629
2630           /* Hairpinning */
2631           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2632
2633         trace0:
2634           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2635                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2636             {
2637               snat_in2out_trace_t *t =
2638                  vlib_add_trace (vm, node, b0, sizeof (*t));
2639               t->sw_if_index = sw_if_index0;
2640               t->next_index = next0;
2641             }
2642
2643           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2644
2645           /* verify speculative enqueue, maybe switch current next frame */
2646           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2647                                            to_next, n_left_to_next,
2648                                            bi0, next0);
2649         }
2650
2651       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2652     }
2653
2654   vlib_node_increment_counter (vm, stats_node_index,
2655                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2656                                pkts_processed);
2657   return frame->n_vectors;
2658 }
2659
2660
2661 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2662   .function = snat_in2out_fast_static_map_fn,
2663   .name = "snat-in2out-fast",
2664   .vector_size = sizeof (u32),
2665   .format_trace = format_snat_in2out_fast_trace,
2666   .type = VLIB_NODE_TYPE_INTERNAL,
2667   
2668   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2669   .error_strings = snat_in2out_error_strings,
2670
2671   .runtime_data_bytes = sizeof (snat_runtime_t),
2672   
2673   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2674
2675   /* edit / add dispositions here */
2676   .next_nodes = {
2677     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2678     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2679     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2680     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2681   },
2682 };
2683
2684 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);