Use thread local storage for thread index
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_SLOW_PATH,
117   SNAT_IN2OUT_NEXT_ICMP_ERROR,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
138                    ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
139 {
140   ip4_address_t * first_int_addr;
141   udp_header_t * udp0 = ip4_next_header (ip0);
142   snat_session_key_t key0, sm0;
143   clib_bihash_kv_8_8_t kv0, value0;
144   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
145   fib_prefix_t pfx = {
146     .fp_proto = FIB_PROTOCOL_IP4,
147     .fp_len = 32,
148     .fp_addr = {
149         .ip4.as_u32 = ip0->dst_address.as_u32,
150     },
151   };
152
153   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
154     {
155       first_int_addr =
156         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
157                                      0 /* just want the address */);
158       rt->cached_sw_if_index = sw_if_index0;
159       if (first_int_addr)
160         rt->cached_ip4_address = first_int_addr->as_u32;
161       else
162         rt->cached_ip4_address = 0;
163     }
164
165   /* Don't NAT packet aimed at the intfc address */
166   if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
167     return 1;
168
169   key0.addr = ip0->dst_address;
170   key0.port = udp0->dst_port;
171   key0.protocol = proto0;
172   key0.fib_index = sm->outside_fib_index;
173   kv0.key = key0.as_u64;
174
175   /* NAT packet aimed at external address if */
176   /* has active sessions */
177   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
178     {
179       /* or is static mappings */
180       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
181         return 0;
182     }
183   else
184     return 0;
185
186   fei = fib_table_lookup (rx_fib_index0, &pfx);
187   if (FIB_NODE_INDEX_INVALID != fei)
188     {
189       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
190       if (sw_if_index == ~0)
191         {
192           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
193           if (FIB_NODE_INDEX_INVALID != fei)
194             sw_if_index = fib_entry_get_resolving_interface (fei);
195         }
196       snat_interface_t *i;
197       pool_foreach (i, sm->interfaces,
198       ({
199         /* NAT packet aimed at outside interface */
200         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
201           return 0;
202       }));
203     }
204
205   return 1;
206 }
207
208 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
209                       ip4_header_t * ip0,
210                       u32 rx_fib_index0,
211                       snat_session_key_t * key0,
212                       snat_session_t ** sessionp,
213                       vlib_node_runtime_t * node,
214                       u32 next0,
215                       u32 thread_index)
216 {
217   snat_user_t *u;
218   snat_user_key_t user_key;
219   snat_session_t *s;
220   clib_bihash_kv_8_8_t kv0, value0;
221   u32 oldest_per_user_translation_list_index;
222   dlist_elt_t * oldest_per_user_translation_list_elt;
223   dlist_elt_t * per_user_translation_list_elt;
224   dlist_elt_t * per_user_list_head_elt;
225   u32 session_index;
226   snat_session_key_t key1;
227   u32 address_index = ~0;
228   u32 outside_fib_index;
229   uword * p;
230   snat_worker_key_t worker_by_out_key;
231
232   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
233   if (! p)
234     {
235       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
236       return SNAT_IN2OUT_NEXT_DROP;
237     }
238   outside_fib_index = p[0];
239
240   key1.protocol = key0->protocol;
241   user_key.addr = ip0->src_address;
242   user_key.fib_index = rx_fib_index0;
243   kv0.key = user_key.as_u64;
244   
245   /* Ever heard of the "user" = src ip4 address before? */
246   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
247     {
248       /* no, make a new one */
249       pool_get (sm->per_thread_data[thread_index].users, u);
250       memset (u, 0, sizeof (*u));
251       u->addr = ip0->src_address;
252       u->fib_index = rx_fib_index0;
253
254       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
255
256       u->sessions_per_user_list_head_index = per_user_list_head_elt -
257         sm->per_thread_data[thread_index].list_pool;
258
259       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
260                        u->sessions_per_user_list_head_index);
261
262       kv0.value = u - sm->per_thread_data[thread_index].users;
263
264       /* add user */
265       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
266     }
267   else
268     {
269       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
270                              value0.value);
271     }
272
273   /* Over quota? Recycle the least recently used dynamic translation */
274   if (u->nsessions >= sm->max_translations_per_user)
275     {
276       /* Remove the oldest dynamic translation */
277       do {
278           oldest_per_user_translation_list_index =
279             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
280                                     u->sessions_per_user_list_head_index);
281
282           ASSERT (oldest_per_user_translation_list_index != ~0);
283
284           /* add it back to the end of the LRU list */
285           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
286                               u->sessions_per_user_list_head_index,
287                               oldest_per_user_translation_list_index);
288           /* Get the list element */
289           oldest_per_user_translation_list_elt =
290             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
291                                oldest_per_user_translation_list_index);
292
293           /* Get the session index from the list element */
294           session_index = oldest_per_user_translation_list_elt->value;
295
296           /* Get the session */
297           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
298                                  session_index);
299       } while (snat_is_session_static (s));
300
301       /* Remove in2out, out2in keys */
302       kv0.key = s->in2out.as_u64;
303       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
304           clib_warning ("in2out key delete failed");
305       kv0.key = s->out2in.as_u64;
306       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
307           clib_warning ("out2in key delete failed");
308
309       /* log NAT event */
310       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
311                                           s->out2in.addr.as_u32,
312                                           s->in2out.protocol,
313                                           s->in2out.port,
314                                           s->out2in.port,
315                                           s->in2out.fib_index);
316
317       snat_free_outside_address_and_port 
318         (sm, &s->out2in, s->outside_address_index);
319       s->outside_address_index = ~0;
320
321       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
322                                                &address_index))
323         {
324           ASSERT(0);
325
326           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
327           return SNAT_IN2OUT_NEXT_DROP;
328         }
329       s->outside_address_index = address_index;
330     }
331   else
332     {
333       u8 static_mapping = 1;
334
335       /* First try to match static mapping by local address and port */
336       if (snat_static_mapping_match (sm, *key0, &key1, 0))
337         {
338           static_mapping = 0;
339           /* Try to create dynamic translation */
340           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
341                                                    &address_index))
342             {
343               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
344               return SNAT_IN2OUT_NEXT_DROP;
345             }
346         }
347
348       /* Create a new session */
349       pool_get (sm->per_thread_data[thread_index].sessions, s);
350       memset (s, 0, sizeof (*s));
351       
352       s->outside_address_index = address_index;
353
354       if (static_mapping)
355         {
356           u->nstaticsessions++;
357           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
358         }
359       else
360         {
361           u->nsessions++;
362         }
363
364       /* Create list elts */
365       pool_get (sm->per_thread_data[thread_index].list_pool,
366                 per_user_translation_list_elt);
367       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
368                        per_user_translation_list_elt -
369                        sm->per_thread_data[thread_index].list_pool);
370
371       per_user_translation_list_elt->value =
372         s - sm->per_thread_data[thread_index].sessions;
373       s->per_user_index = per_user_translation_list_elt -
374                           sm->per_thread_data[thread_index].list_pool;
375       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
376
377       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
378                           s->per_user_list_head_index,
379                           per_user_translation_list_elt -
380                           sm->per_thread_data[thread_index].list_pool);
381    }
382   
383   s->in2out = *key0;
384   s->out2in = key1;
385   s->out2in.protocol = key0->protocol;
386   s->out2in.fib_index = outside_fib_index;
387   *sessionp = s;
388
389   /* Add to translation hashes */
390   kv0.key = s->in2out.as_u64;
391   kv0.value = s - sm->per_thread_data[thread_index].sessions;
392   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
393       clib_warning ("in2out key add failed");
394   
395   kv0.key = s->out2in.as_u64;
396   kv0.value = s - sm->per_thread_data[thread_index].sessions;
397   
398   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
399       clib_warning ("out2in key add failed");
400
401   /* Add to translated packets worker lookup */
402   worker_by_out_key.addr = s->out2in.addr;
403   worker_by_out_key.port = s->out2in.port;
404   worker_by_out_key.fib_index = s->out2in.fib_index;
405   kv0.key = worker_by_out_key.as_u64;
406   kv0.value = thread_index;
407   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
408
409   /* log NAT event */
410   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
411                                       s->out2in.addr.as_u32,
412                                       s->in2out.protocol,
413                                       s->in2out.port,
414                                       s->out2in.port,
415                                       s->in2out.fib_index);
416   return next0;
417 }
418
419 static_always_inline
420 snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0,
421                                  snat_session_key_t *p_key0)
422 {
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0, *inner_echo0 = 0;
425   ip4_header_t *inner_ip0 = 0;
426   void *l4_header = 0;
427   icmp46_header_t *inner_icmp0;
428
429   echo0 = (icmp_echo_header_t *)(icmp0+1);
430
431   if (!icmp_is_error_message (icmp0))
432     {
433       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
434         return SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE;
435       key0.protocol = SNAT_PROTOCOL_ICMP;
436       key0.port = echo0->identifier;
437     }
438   else
439     {
440       inner_ip0 = (ip4_header_t *)(echo0+1);
441       l4_header = ip4_next_header (inner_ip0);
442       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
443       switch (key0.protocol)
444         {
445         case SNAT_PROTOCOL_ICMP:
446           inner_icmp0 = (icmp46_header_t*)l4_header;
447           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
448           key0.port = inner_echo0->identifier;
449           break;
450         case SNAT_PROTOCOL_UDP:
451         case SNAT_PROTOCOL_TCP:
452           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
453           break;
454         default:
455           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
456         }
457     }
458   *p_key0 = key0;
459   return -1; /* success */
460 }
461
462 /**
463  * Get address and port values to be used for packet SNAT translation
464  * and create session if needed
465  *
466  * @param[in,out] sm             SNAT main
467  * @param[in,out] node           SNAT node runtime
468  * @param[in] thread_index       thread index
469  * @param[in,out] b0             buffer containing packet to be translated
470  * @param[out] p_key             address and port before NAT translation
471  * @param[out] p_value           address and port after NAT translation
472  * @param[out] p_dont_translate  if packet should not be translated
473  * @param d                      optional parameter
474  */
475 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
476                            u32 thread_index, vlib_buffer_t *b0,
477                            snat_session_key_t *p_key,
478                            snat_session_key_t *p_value,
479                            u8 *p_dont_translate, void *d)
480 {
481   snat_runtime_t *rt;
482   ip4_header_t *ip0;
483   icmp46_header_t *icmp0;
484   u32 sw_if_index0;
485   u32 rx_fib_index0;
486   snat_session_key_t key0;
487   snat_session_t *s0 = 0;
488   u8 dont_translate = 0;
489   clib_bihash_kv_8_8_t kv0, value0;
490   u32 next0 = ~0;
491   int err;
492
493   rt = (snat_runtime_t *) node->runtime_data;
494   ip0 = vlib_buffer_get_current (b0);
495   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
496   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
497   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
498
499   err = icmp_get_key (icmp0, &key0);
500   if (err != -1)
501     {
502       b0->error = node->errors[err];
503       next0 = SNAT_IN2OUT_NEXT_DROP;
504       goto out;
505     }
506   key0.addr = ip0->src_address;
507   key0.fib_index = rx_fib_index0;
508
509   kv0.key = key0.as_u64;
510
511   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
512     {
513       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
514           IP_PROTOCOL_ICMP, rx_fib_index0)))
515         {
516           dont_translate = 1;
517           goto out;
518         }
519
520       if (icmp_is_error_message (icmp0))
521         {
522           next0 = SNAT_IN2OUT_NEXT_DROP;
523           goto out;
524         }
525
526       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
527                          &s0, node, next0, thread_index);
528
529       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
530         goto out;
531     }
532   else
533     s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
534                             value0.value);
535
536 out:
537   *p_key = key0;
538   if (s0)
539     *p_value = s0->out2in;
540   *p_dont_translate = dont_translate;
541   if (d)
542     *(snat_session_t**)d = s0;
543   return next0;
544 }
545
546 /**
547  * Get address and port values to be used for packet SNAT translation
548  *
549  * @param[in] sm                 SNAT main
550  * @param[in,out] node           SNAT node runtime
551  * @param[in] thread_index       thread index
552  * @param[in,out] b0             buffer containing packet to be translated
553  * @param[out] p_key             address and port before NAT translation
554  * @param[out] p_value           address and port after NAT translation
555  * @param[out] p_dont_translate  if packet should not be translated
556  * @param d                      optional parameter
557  */
558 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
559                            u32 thread_index, vlib_buffer_t *b0,
560                            snat_session_key_t *p_key,
561                            snat_session_key_t *p_value,
562                            u8 *p_dont_translate, void *d)
563 {
564   snat_runtime_t *rt;
565   ip4_header_t *ip0;
566   icmp46_header_t *icmp0;
567   u32 sw_if_index0;
568   u32 rx_fib_index0;
569   snat_session_key_t key0;
570   snat_session_key_t sm0;
571   u8 dont_translate = 0;
572   u32 next0 = ~0;
573   int err;
574
575   rt = (snat_runtime_t *) node->runtime_data;
576   ip0 = vlib_buffer_get_current (b0);
577   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
578   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
579   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
580
581   err = icmp_get_key (icmp0, &key0);
582   if (err != -1)
583     {
584       b0->error = node->errors[err];
585       next0 = SNAT_IN2OUT_NEXT_DROP;
586       goto out2;
587     }
588   key0.addr = ip0->src_address;
589   key0.fib_index = rx_fib_index0;
590
591   if (snat_static_mapping_match(sm, key0, &sm0, 0))
592     {
593       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
594           IP_PROTOCOL_ICMP, rx_fib_index0)))
595         {
596           dont_translate = 1;
597           goto out;
598         }
599
600       if (icmp_is_error_message (icmp0))
601         {
602           next0 = SNAT_IN2OUT_NEXT_DROP;
603           goto out;
604         }
605
606       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
607       next0 = SNAT_IN2OUT_NEXT_DROP;
608       goto out;
609     }
610
611 out:
612   *p_value = sm0;
613 out2:
614   *p_key = key0;
615   *p_dont_translate = dont_translate;
616   return next0;
617 }
618
619 static inline u32 icmp_in2out (snat_main_t *sm,
620                                vlib_buffer_t * b0,
621                                ip4_header_t * ip0,
622                                icmp46_header_t * icmp0,
623                                u32 sw_if_index0,
624                                u32 rx_fib_index0,
625                                vlib_node_runtime_t * node,
626                                u32 next0,
627                                u32 thread_index,
628                                void *d)
629 {
630   snat_session_key_t key0, sm0;
631   icmp_echo_header_t *echo0, *inner_echo0 = 0;
632   ip4_header_t *inner_ip0;
633   void *l4_header = 0;
634   icmp46_header_t *inner_icmp0;
635   u8 dont_translate;
636   u32 new_addr0, old_addr0;
637   u16 old_id0, new_id0;
638   ip_csum_t sum0;
639   u16 checksum0;
640   u32 next0_tmp;
641
642   echo0 = (icmp_echo_header_t *)(icmp0+1);
643
644   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
645                                        &key0, &sm0, &dont_translate, d);
646   if (next0_tmp != ~0)
647     next0 = next0_tmp;
648   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
649     goto out;
650
651   sum0 = ip_incremental_checksum (0, icmp0,
652                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
653   checksum0 = ~ip_csum_fold (sum0);
654   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
655     {
656       next0 = SNAT_IN2OUT_NEXT_DROP;
657       goto out;
658     }
659
660   old_addr0 = ip0->src_address.as_u32;
661   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
662   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
663
664   sum0 = ip0->checksum;
665   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
666                          src_address /* changed member */);
667   ip0->checksum = ip_csum_fold (sum0);
668   
669   if (!icmp_is_error_message (icmp0))
670     {
671       new_id0 = sm0.port;
672       if (PREDICT_FALSE(new_id0 != echo0->identifier))
673         {
674           old_id0 = echo0->identifier;
675           new_id0 = sm0.port;
676           echo0->identifier = new_id0;
677
678           sum0 = icmp0->checksum;
679           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
680                                  identifier);
681           icmp0->checksum = ip_csum_fold (sum0);
682         }
683     }
684   else
685     {
686       inner_ip0 = (ip4_header_t *)(echo0+1);
687       l4_header = ip4_next_header (inner_ip0);
688
689       if (!ip4_header_checksum_is_valid (inner_ip0))
690         {
691           next0 = SNAT_IN2OUT_NEXT_DROP;
692           goto out;
693         }
694
695       old_addr0 = inner_ip0->dst_address.as_u32;
696       inner_ip0->dst_address = sm0.addr;
697       new_addr0 = inner_ip0->src_address.as_u32;
698
699       sum0 = icmp0->checksum;
700       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
701                              dst_address /* changed member */);
702       icmp0->checksum = ip_csum_fold (sum0);
703
704       switch (key0.protocol)
705         {
706           case SNAT_PROTOCOL_ICMP:
707             inner_icmp0 = (icmp46_header_t*)l4_header;
708             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
709
710             old_id0 = inner_echo0->identifier;
711             new_id0 = sm0.port;
712             inner_echo0->identifier = new_id0;
713
714             sum0 = icmp0->checksum;
715             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
716                                    identifier);
717             icmp0->checksum = ip_csum_fold (sum0);
718             break;
719           case SNAT_PROTOCOL_UDP:
720           case SNAT_PROTOCOL_TCP:
721             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
722             new_id0 = sm0.port;
723             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
724
725             sum0 = icmp0->checksum;
726             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
727                                    dst_port);
728             icmp0->checksum = ip_csum_fold (sum0);
729             break;
730           default:
731             ASSERT(0);
732         }
733     }
734
735 out:
736   return next0;
737 }
738
739 /**
740  * @brief Hairpinning
741  *
742  * Hairpinning allows two endpoints on the internal side of the NAT to
743  * communicate even if they only use each other's external IP addresses
744  * and ports.
745  *
746  * @param sm     SNAT main.
747  * @param b0     Vlib buffer.
748  * @param ip0    IP header.
749  * @param udp0   UDP header.
750  * @param tcp0   TCP header.
751  * @param proto0 SNAT protocol.
752  */
753 static inline void
754 snat_hairpinning (snat_main_t *sm,
755                   vlib_buffer_t * b0,
756                   ip4_header_t * ip0,
757                   udp_header_t * udp0,
758                   tcp_header_t * tcp0,
759                   u32 proto0)
760 {
761   snat_session_key_t key0, sm0;
762   snat_worker_key_t k0;
763   snat_session_t * s0;
764   clib_bihash_kv_8_8_t kv0, value0;
765   ip_csum_t sum0;
766   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
767   u16 new_dst_port0, old_dst_port0;
768
769   key0.addr = ip0->dst_address;
770   key0.port = udp0->dst_port;
771   key0.protocol = proto0;
772   key0.fib_index = sm->outside_fib_index;
773   kv0.key = key0.as_u64;
774
775   /* Check if destination is in active sessions */
776   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
777     {
778       /* or static mappings */
779       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
780         {
781           new_dst_addr0 = sm0.addr.as_u32;
782           new_dst_port0 = sm0.port;
783           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
784         }
785     }
786   else
787     {
788       si = value0.value;
789       if (sm->num_workers > 1)
790         {
791           k0.addr = ip0->dst_address;
792           k0.port = udp0->dst_port;
793           k0.fib_index = sm->outside_fib_index;
794           kv0.key = k0.as_u64;
795           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
796             ASSERT(0);
797           else
798             ti = value0.value;
799         }
800       else
801         ti = sm->num_workers;
802
803       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
804       new_dst_addr0 = s0->in2out.addr.as_u32;
805       new_dst_port0 = s0->in2out.port;
806       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
807     }
808
809   /* Destination is behind the same NAT, use internal address and port */
810   if (new_dst_addr0)
811     {
812       old_dst_addr0 = ip0->dst_address.as_u32;
813       ip0->dst_address.as_u32 = new_dst_addr0;
814       sum0 = ip0->checksum;
815       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
816                              ip4_header_t, dst_address);
817       ip0->checksum = ip_csum_fold (sum0);
818
819       old_dst_port0 = tcp0->dst;
820       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
821         {
822           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
823             {
824               tcp0->dst = new_dst_port0;
825               sum0 = tcp0->checksum;
826               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
827                                      ip4_header_t, dst_address);
828               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
829                                      ip4_header_t /* cheat */, length);
830               tcp0->checksum = ip_csum_fold(sum0);
831             }
832           else
833             {
834               udp0->dst_port = new_dst_port0;
835               udp0->checksum = 0;
836             }
837         }
838     }
839 }
840
841 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
842                                          vlib_buffer_t * b0,
843                                          ip4_header_t * ip0,
844                                          icmp46_header_t * icmp0,
845                                          u32 sw_if_index0,
846                                          u32 rx_fib_index0,
847                                          vlib_node_runtime_t * node,
848                                          u32 next0,
849                                          f64 now,
850                                          u32 thread_index,
851                                          snat_session_t ** p_s0)
852 {
853   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
854                       next0, thread_index, p_s0);
855   snat_session_t * s0 = *p_s0;
856   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
857     {
858       /* Accounting */
859       s0->last_heard = now;
860       s0->total_pkts++;
861       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
862       /* Per-user LRU list maintenance for dynamic translations */
863       if (!snat_is_session_static (s0))
864         {
865           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
866                              s0->per_user_index);
867           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
868                               s0->per_user_list_head_index,
869                               s0->per_user_index);
870         }
871     }
872   return next0;
873 }
874
875 static inline uword
876 snat_in2out_node_fn_inline (vlib_main_t * vm,
877                             vlib_node_runtime_t * node,
878                             vlib_frame_t * frame, int is_slow_path)
879 {
880   u32 n_left_from, * from, * to_next;
881   snat_in2out_next_t next_index;
882   u32 pkts_processed = 0;
883   snat_main_t * sm = &snat_main;
884   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
885   f64 now = vlib_time_now (vm);
886   u32 stats_node_index;
887   u32 thread_index = vlib_get_thread_index ();
888
889   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
890     snat_in2out_node.index;
891
892   from = vlib_frame_vector_args (frame);
893   n_left_from = frame->n_vectors;
894   next_index = node->cached_next_index;
895
896   while (n_left_from > 0)
897     {
898       u32 n_left_to_next;
899
900       vlib_get_next_frame (vm, node, next_index,
901                            to_next, n_left_to_next);
902
903       while (n_left_from >= 4 && n_left_to_next >= 2)
904         {
905           u32 bi0, bi1;
906           vlib_buffer_t * b0, * b1;
907           u32 next0, next1;
908           u32 sw_if_index0, sw_if_index1;
909           ip4_header_t * ip0, * ip1;
910           ip_csum_t sum0, sum1;
911           u32 new_addr0, old_addr0, new_addr1, old_addr1;
912           u16 old_port0, new_port0, old_port1, new_port1;
913           udp_header_t * udp0, * udp1;
914           tcp_header_t * tcp0, * tcp1;
915           icmp46_header_t * icmp0, * icmp1;
916           snat_session_key_t key0, key1;
917           u32 rx_fib_index0, rx_fib_index1;
918           u32 proto0, proto1;
919           snat_session_t * s0 = 0, * s1 = 0;
920           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
921           
922           /* Prefetch next iteration. */
923           {
924             vlib_buffer_t * p2, * p3;
925             
926             p2 = vlib_get_buffer (vm, from[2]);
927             p3 = vlib_get_buffer (vm, from[3]);
928             
929             vlib_prefetch_buffer_header (p2, LOAD);
930             vlib_prefetch_buffer_header (p3, LOAD);
931
932             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
933             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
934           }
935
936           /* speculatively enqueue b0 and b1 to the current next frame */
937           to_next[0] = bi0 = from[0];
938           to_next[1] = bi1 = from[1];
939           from += 2;
940           to_next += 2;
941           n_left_from -= 2;
942           n_left_to_next -= 2;
943           
944           b0 = vlib_get_buffer (vm, bi0);
945           b1 = vlib_get_buffer (vm, bi1);
946
947           ip0 = vlib_buffer_get_current (b0);
948           udp0 = ip4_next_header (ip0);
949           tcp0 = (tcp_header_t *) udp0;
950           icmp0 = (icmp46_header_t *) udp0;
951
952           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
953           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
954                                    sw_if_index0);
955
956           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
957
958           proto0 = ip_proto_to_snat_proto (ip0->protocol);
959
960           if (PREDICT_FALSE(ip0->ttl == 1))
961             {
962               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
963               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
964                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
965                                            0);
966               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
967               goto trace00;
968             }
969
970           /* Next configured feature, probably ip4-lookup */
971           if (is_slow_path)
972             {
973               if (PREDICT_FALSE (proto0 == ~0))
974                 goto trace00;
975               
976               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
977                 {
978                   next0 = icmp_in2out_slow_path 
979                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
980                      node, next0, now, thread_index, &s0);
981                   goto trace00;
982                 }
983             }
984           else
985             {
986               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
987                 {
988                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
989                   goto trace00;
990                 }
991             }
992
993           key0.addr = ip0->src_address;
994           key0.port = udp0->src_port;
995           key0.protocol = proto0;
996           key0.fib_index = rx_fib_index0;
997           
998           kv0.key = key0.as_u64;
999
1000           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1001             {
1002               if (is_slow_path)
1003                 {
1004                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1005                       proto0, rx_fib_index0)))
1006                     goto trace00;
1007
1008                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1009                                      &s0, node, next0, thread_index);
1010                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1011                     goto trace00;
1012                 }
1013               else
1014                 {
1015                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1016                   goto trace00;
1017                 }
1018             }
1019           else
1020             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1021                                     value0.value);
1022
1023           old_addr0 = ip0->src_address.as_u32;
1024           ip0->src_address = s0->out2in.addr;
1025           new_addr0 = ip0->src_address.as_u32;
1026           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1027
1028           sum0 = ip0->checksum;
1029           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1030                                  ip4_header_t,
1031                                  src_address /* changed member */);
1032           ip0->checksum = ip_csum_fold (sum0);
1033
1034           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1035             {
1036               old_port0 = tcp0->src_port;
1037               tcp0->src_port = s0->out2in.port;
1038               new_port0 = tcp0->src_port;
1039
1040               sum0 = tcp0->checksum;
1041               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1042                                      ip4_header_t,
1043                                      dst_address /* changed member */);
1044               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1045                                      ip4_header_t /* cheat */,
1046                                      length /* changed member */);
1047               tcp0->checksum = ip_csum_fold(sum0);
1048             }
1049           else
1050             {
1051               old_port0 = udp0->src_port;
1052               udp0->src_port = s0->out2in.port;
1053               udp0->checksum = 0;
1054             }
1055
1056           /* Hairpinning */
1057           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1058
1059           /* Accounting */
1060           s0->last_heard = now;
1061           s0->total_pkts++;
1062           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1063           /* Per-user LRU list maintenance for dynamic translation */
1064           if (!snat_is_session_static (s0))
1065             {
1066               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1067                                  s0->per_user_index);
1068               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1069                                   s0->per_user_list_head_index,
1070                                   s0->per_user_index);
1071             }
1072         trace00:
1073
1074           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1075                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1076             {
1077               snat_in2out_trace_t *t = 
1078                  vlib_add_trace (vm, node, b0, sizeof (*t));
1079               t->is_slow_path = is_slow_path;
1080               t->sw_if_index = sw_if_index0;
1081               t->next_index = next0;
1082                   t->session_index = ~0;
1083               if (s0)
1084                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1085             }
1086
1087           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1088
1089           ip1 = vlib_buffer_get_current (b1);
1090           udp1 = ip4_next_header (ip1);
1091           tcp1 = (tcp_header_t *) udp1;
1092           icmp1 = (icmp46_header_t *) udp1;
1093
1094           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1095           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1096                                    sw_if_index1);
1097
1098           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1099
1100           if (PREDICT_FALSE(ip0->ttl == 1))
1101             {
1102               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1103               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1104                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1105                                            0);
1106               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1107               goto trace01;
1108             }
1109
1110           /* Next configured feature, probably ip4-lookup */
1111           if (is_slow_path)
1112             {
1113               if (PREDICT_FALSE (proto1 == ~0))
1114                 goto trace01;
1115               
1116               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1117                 {
1118                   next1 = icmp_in2out_slow_path 
1119                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1120                      next1, now, thread_index, &s1);
1121                   goto trace01;
1122                 }
1123             }
1124           else
1125             {
1126               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1127                 {
1128                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1129                   goto trace01;
1130                 }
1131             }
1132
1133           key1.addr = ip1->src_address;
1134           key1.port = udp1->src_port;
1135           key1.protocol = proto1;
1136           key1.fib_index = rx_fib_index1;
1137           
1138           kv1.key = key1.as_u64;
1139
1140             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1141             {
1142               if (is_slow_path)
1143                 {
1144                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
1145                       proto1, rx_fib_index1)))
1146                     goto trace01;
1147
1148                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1149                                      &s1, node, next1, thread_index);
1150                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1151                     goto trace01;
1152                 }
1153               else
1154                 {
1155                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1156                   goto trace01;
1157                 }
1158             }
1159           else
1160             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1161                                     value1.value);
1162
1163           old_addr1 = ip1->src_address.as_u32;
1164           ip1->src_address = s1->out2in.addr;
1165           new_addr1 = ip1->src_address.as_u32;
1166           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1167
1168           sum1 = ip1->checksum;
1169           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1170                                  ip4_header_t,
1171                                  src_address /* changed member */);
1172           ip1->checksum = ip_csum_fold (sum1);
1173
1174           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1175             {
1176               old_port1 = tcp1->src_port;
1177               tcp1->src_port = s1->out2in.port;
1178               new_port1 = tcp1->src_port;
1179
1180               sum1 = tcp1->checksum;
1181               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1182                                      ip4_header_t,
1183                                      dst_address /* changed member */);
1184               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1185                                      ip4_header_t /* cheat */,
1186                                      length /* changed member */);
1187               tcp1->checksum = ip_csum_fold(sum1);
1188             }
1189           else
1190             {
1191               old_port1 = udp1->src_port;
1192               udp1->src_port = s1->out2in.port;
1193               udp1->checksum = 0;
1194             }
1195
1196           /* Hairpinning */
1197           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1198
1199           /* Accounting */
1200           s1->last_heard = now;
1201           s1->total_pkts++;
1202           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1203           /* Per-user LRU list maintenance for dynamic translation */
1204           if (!snat_is_session_static (s1))
1205             {
1206               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1207                                  s1->per_user_index);
1208               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1209                                   s1->per_user_list_head_index,
1210                                   s1->per_user_index);
1211             }
1212         trace01:
1213
1214           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1215                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1216             {
1217               snat_in2out_trace_t *t = 
1218                  vlib_add_trace (vm, node, b1, sizeof (*t));
1219               t->sw_if_index = sw_if_index1;
1220               t->next_index = next1;
1221               t->session_index = ~0;
1222               if (s1)
1223                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1224             }
1225
1226           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1227
1228           /* verify speculative enqueues, maybe switch current next frame */
1229           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1230                                            to_next, n_left_to_next,
1231                                            bi0, bi1, next0, next1);
1232         }
1233
1234       while (n_left_from > 0 && n_left_to_next > 0)
1235         {
1236           u32 bi0;
1237           vlib_buffer_t * b0;
1238           u32 next0;
1239           u32 sw_if_index0;
1240           ip4_header_t * ip0;
1241           ip_csum_t sum0;
1242           u32 new_addr0, old_addr0;
1243           u16 old_port0, new_port0;
1244           udp_header_t * udp0;
1245           tcp_header_t * tcp0;
1246           icmp46_header_t * icmp0;
1247           snat_session_key_t key0;
1248           u32 rx_fib_index0;
1249           u32 proto0;
1250           snat_session_t * s0 = 0;
1251           clib_bihash_kv_8_8_t kv0, value0;
1252           
1253           /* speculatively enqueue b0 to the current next frame */
1254           bi0 = from[0];
1255           to_next[0] = bi0;
1256           from += 1;
1257           to_next += 1;
1258           n_left_from -= 1;
1259           n_left_to_next -= 1;
1260
1261           b0 = vlib_get_buffer (vm, bi0);
1262           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1263
1264           ip0 = vlib_buffer_get_current (b0);
1265           udp0 = ip4_next_header (ip0);
1266           tcp0 = (tcp_header_t *) udp0;
1267           icmp0 = (icmp46_header_t *) udp0;
1268
1269           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1270           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1271                                    sw_if_index0);
1272
1273           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1274
1275           if (PREDICT_FALSE(ip0->ttl == 1))
1276             {
1277               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1278               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1279                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1280                                            0);
1281               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1282               goto trace0;
1283             }
1284
1285           /* Next configured feature, probably ip4-lookup */
1286           if (is_slow_path)
1287             {
1288               if (PREDICT_FALSE (proto0 == ~0))
1289                 goto trace0;
1290               
1291               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1292                 {
1293                   next0 = icmp_in2out_slow_path 
1294                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1295                      next0, now, thread_index, &s0);
1296                   goto trace0;
1297                 }
1298             }
1299           else
1300             {
1301               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1302                 {
1303                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1304                   goto trace0;
1305                 }
1306             }
1307
1308           key0.addr = ip0->src_address;
1309           key0.port = udp0->src_port;
1310           key0.protocol = proto0;
1311           key0.fib_index = rx_fib_index0;
1312           
1313           kv0.key = key0.as_u64;
1314
1315           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1316             {
1317               if (is_slow_path)
1318                 {
1319                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1320                       proto0, rx_fib_index0)))
1321                     goto trace0;
1322
1323                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1324                                      &s0, node, next0, thread_index);
1325
1326                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1327                     goto trace0;
1328                 }
1329               else
1330                 {
1331                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1332                   goto trace0;
1333                 }
1334             }
1335           else
1336             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1337                                     value0.value);
1338
1339           old_addr0 = ip0->src_address.as_u32;
1340           ip0->src_address = s0->out2in.addr;
1341           new_addr0 = ip0->src_address.as_u32;
1342           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1343
1344           sum0 = ip0->checksum;
1345           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1346                                  ip4_header_t,
1347                                  src_address /* changed member */);
1348           ip0->checksum = ip_csum_fold (sum0);
1349
1350           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1351             {
1352               old_port0 = tcp0->src_port;
1353               tcp0->src_port = s0->out2in.port;
1354               new_port0 = tcp0->src_port;
1355
1356               sum0 = tcp0->checksum;
1357               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1358                                      ip4_header_t,
1359                                      dst_address /* changed member */);
1360               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1361                                      ip4_header_t /* cheat */,
1362                                      length /* changed member */);
1363               tcp0->checksum = ip_csum_fold(sum0);
1364             }
1365           else
1366             {
1367               old_port0 = udp0->src_port;
1368               udp0->src_port = s0->out2in.port;
1369               udp0->checksum = 0;
1370             }
1371
1372           /* Hairpinning */
1373           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1374
1375           /* Accounting */
1376           s0->last_heard = now;
1377           s0->total_pkts++;
1378           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1379           /* Per-user LRU list maintenance for dynamic translation */
1380           if (!snat_is_session_static (s0))
1381             {
1382               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1383                                  s0->per_user_index);
1384               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1385                                   s0->per_user_list_head_index,
1386                                   s0->per_user_index);
1387             }
1388
1389         trace0:
1390           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1391                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1392             {
1393               snat_in2out_trace_t *t = 
1394                  vlib_add_trace (vm, node, b0, sizeof (*t));
1395               t->is_slow_path = is_slow_path;
1396               t->sw_if_index = sw_if_index0;
1397               t->next_index = next0;
1398                   t->session_index = ~0;
1399               if (s0)
1400                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1401             }
1402
1403           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1404
1405           /* verify speculative enqueue, maybe switch current next frame */
1406           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1407                                            to_next, n_left_to_next,
1408                                            bi0, next0);
1409         }
1410
1411       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1412     }
1413
1414   vlib_node_increment_counter (vm, stats_node_index, 
1415                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1416                                pkts_processed);
1417   return frame->n_vectors;
1418 }
1419
1420 static uword
1421 snat_in2out_fast_path_fn (vlib_main_t * vm,
1422                           vlib_node_runtime_t * node,
1423                           vlib_frame_t * frame)
1424 {
1425   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1426 }
1427
1428 VLIB_REGISTER_NODE (snat_in2out_node) = {
1429   .function = snat_in2out_fast_path_fn,
1430   .name = "snat-in2out",
1431   .vector_size = sizeof (u32),
1432   .format_trace = format_snat_in2out_trace,
1433   .type = VLIB_NODE_TYPE_INTERNAL,
1434   
1435   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1436   .error_strings = snat_in2out_error_strings,
1437
1438   .runtime_data_bytes = sizeof (snat_runtime_t),
1439   
1440   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1441
1442   /* edit / add dispositions here */
1443   .next_nodes = {
1444     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1445     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1446     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1447     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1448   },
1449 };
1450
1451 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1452
1453 static uword
1454 snat_in2out_slow_path_fn (vlib_main_t * vm,
1455                           vlib_node_runtime_t * node,
1456                           vlib_frame_t * frame)
1457 {
1458   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1459 }
1460
1461 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1462   .function = snat_in2out_slow_path_fn,
1463   .name = "snat-in2out-slowpath",
1464   .vector_size = sizeof (u32),
1465   .format_trace = format_snat_in2out_trace,
1466   .type = VLIB_NODE_TYPE_INTERNAL,
1467   
1468   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1469   .error_strings = snat_in2out_error_strings,
1470
1471   .runtime_data_bytes = sizeof (snat_runtime_t),
1472   
1473   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1474
1475   /* edit / add dispositions here */
1476   .next_nodes = {
1477     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1478     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1479     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1480     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1481   },
1482 };
1483
1484 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1485
1486 /**************************/
1487 /*** deterministic mode ***/
1488 /**************************/
1489 static uword
1490 snat_det_in2out_node_fn (vlib_main_t * vm,
1491                          vlib_node_runtime_t * node,
1492                          vlib_frame_t * frame)
1493 {
1494   u32 n_left_from, * from, * to_next;
1495   snat_in2out_next_t next_index;
1496   u32 pkts_processed = 0;
1497   snat_main_t * sm = &snat_main;
1498   u32 now = (u32) vlib_time_now (vm);
1499
1500   from = vlib_frame_vector_args (frame);
1501   n_left_from = frame->n_vectors;
1502   next_index = node->cached_next_index;
1503
1504   while (n_left_from > 0)
1505     {
1506       u32 n_left_to_next;
1507
1508       vlib_get_next_frame (vm, node, next_index,
1509                            to_next, n_left_to_next);
1510
1511       while (n_left_from >= 4 && n_left_to_next >= 2)
1512         {
1513           u32 bi0, bi1;
1514           vlib_buffer_t * b0, * b1;
1515           u32 next0, next1;
1516           u32 sw_if_index0, sw_if_index1;
1517           ip4_header_t * ip0, * ip1;
1518           ip_csum_t sum0, sum1;
1519           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1520           u16 old_port0, new_port0, lo_port0, i0;
1521           u16 old_port1, new_port1, lo_port1, i1;
1522           udp_header_t * udp0, * udp1;
1523           tcp_header_t * tcp0, * tcp1;
1524           u32 proto0, proto1;
1525           snat_det_out_key_t key0, key1;
1526           snat_det_map_t * dm0, * dm1;
1527           snat_det_session_t * ses0 = 0, * ses1 = 0;
1528
1529           /* Prefetch next iteration. */
1530           {
1531             vlib_buffer_t * p2, * p3;
1532
1533             p2 = vlib_get_buffer (vm, from[2]);
1534             p3 = vlib_get_buffer (vm, from[3]);
1535
1536             vlib_prefetch_buffer_header (p2, LOAD);
1537             vlib_prefetch_buffer_header (p3, LOAD);
1538
1539             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1540             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1541           }
1542
1543           /* speculatively enqueue b0 and b1 to the current next frame */
1544           to_next[0] = bi0 = from[0];
1545           to_next[1] = bi1 = from[1];
1546           from += 2;
1547           to_next += 2;
1548           n_left_from -= 2;
1549           n_left_to_next -= 2;
1550
1551           b0 = vlib_get_buffer (vm, bi0);
1552           b1 = vlib_get_buffer (vm, bi1);
1553
1554           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1555           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1556
1557           ip0 = vlib_buffer_get_current (b0);
1558           udp0 = ip4_next_header (ip0);
1559           tcp0 = (tcp_header_t *) udp0;
1560
1561           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1562
1563           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1564           if (PREDICT_FALSE(!dm0))
1565             {
1566               clib_warning("no match for internal host %U",
1567                            format_ip4_address, &ip0->src_address);
1568               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1569               goto trace0;
1570             }
1571
1572           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1573
1574           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1575           if (PREDICT_FALSE(!ses0))
1576             {
1577               key0.ext_host_addr = ip0->dst_address;
1578               key0.ext_host_port = tcp0->dst;
1579               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1580                 {
1581                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1582                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1583
1584                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1585                     continue;
1586
1587                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1588                   break;
1589                 }
1590                 if (PREDICT_FALSE(!ses0))
1591                   {
1592                     next0 = SNAT_IN2OUT_NEXT_DROP;
1593                     b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1594                     goto trace0;
1595                   }
1596             }
1597
1598           new_port0 = ses0->out.out_port;
1599           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1600
1601           old_addr0.as_u32 = ip0->src_address.as_u32;
1602           ip0->src_address.as_u32 = new_addr0.as_u32;
1603           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1604
1605           sum0 = ip0->checksum;
1606           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1607                                  ip4_header_t,
1608                                  src_address /* changed member */);
1609           ip0->checksum = ip_csum_fold (sum0);
1610
1611           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1612             {
1613               if (tcp0->flags & TCP_FLAG_SYN)
1614                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1615               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1616                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1617               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1618                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1619               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1620                 snat_det_ses_close(dm0, ses0);
1621               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1622                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1623               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1624                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1625
1626               old_port0 = tcp0->src;
1627               tcp0->src = new_port0;
1628
1629               sum0 = tcp0->checksum;
1630               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1631                                      ip4_header_t,
1632                                      dst_address /* changed member */);
1633               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1634                                      ip4_header_t /* cheat */,
1635                                      length /* changed member */);
1636               tcp0->checksum = ip_csum_fold(sum0);
1637             }
1638           else
1639             {
1640               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1641               old_port0 = udp0->src_port;
1642               udp0->src_port = new_port0;
1643               udp0->checksum = 0;
1644             }
1645
1646           switch(ses0->state)
1647             {
1648             case SNAT_SESSION_UDP_ACTIVE:
1649                 ses0->expire = now + SNAT_UDP_TIMEOUT;
1650                 break;
1651             case SNAT_SESSION_TCP_SYN_SENT:
1652             case SNAT_SESSION_TCP_FIN_WAIT:
1653             case SNAT_SESSION_TCP_CLOSE_WAIT:
1654             case SNAT_SESSION_TCP_LAST_ACK:
1655                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1656                 break;
1657             case SNAT_SESSION_TCP_ESTABLISHED:
1658                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1659                 break;
1660             }
1661
1662         trace0:
1663           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1664                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1665             {
1666               snat_in2out_trace_t *t =
1667                  vlib_add_trace (vm, node, b0, sizeof (*t));
1668               t->is_slow_path = 0;
1669               t->sw_if_index = sw_if_index0;
1670               t->next_index = next0;
1671               t->session_index = ~0;
1672               if (ses0)
1673                 t->session_index = ses0 - dm0->sessions;
1674             }
1675
1676           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1677
1678           ip1 = vlib_buffer_get_current (b1);
1679           udp1 = ip4_next_header (ip1);
1680           tcp1 = (tcp_header_t *) udp1;
1681
1682           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1683
1684           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1685           if (PREDICT_FALSE(!dm1))
1686             {
1687               clib_warning("no match for internal host %U",
1688                            format_ip4_address, &ip0->src_address);
1689               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1690               goto trace1;
1691             }
1692
1693           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1694
1695
1696           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src);
1697           if (PREDICT_FALSE(!ses1))
1698             {
1699               key1.ext_host_addr = ip1->dst_address;
1700               key1.ext_host_port = tcp1->dst;
1701               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1702                 {
1703                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1704                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1705
1706                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1707                     continue;
1708
1709                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1710                   break;
1711                 }
1712                 if (PREDICT_FALSE(!ses1))
1713                   {
1714                     next1 = SNAT_IN2OUT_NEXT_DROP;
1715                     b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1716                     goto trace1;
1717                   }
1718             }
1719
1720           new_port1 = ses1->out.out_port;
1721           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1722
1723           old_addr1.as_u32 = ip1->src_address.as_u32;
1724           ip1->src_address.as_u32 = new_addr1.as_u32;
1725           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1726
1727           sum1 = ip1->checksum;
1728           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1729                                  ip4_header_t,
1730                                  src_address /* changed member */);
1731           ip1->checksum = ip_csum_fold (sum1);
1732
1733           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1734             {
1735               if (tcp1->flags & TCP_FLAG_SYN)
1736                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1737               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1738                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1739               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1740                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1741               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1742                 snat_det_ses_close(dm1, ses1);
1743               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1744                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1745               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1746                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1747
1748               old_port1 = tcp1->src;
1749               tcp1->src = new_port1;
1750
1751               sum1 = tcp1->checksum;
1752               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1753                                      ip4_header_t,
1754                                      dst_address /* changed member */);
1755               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1756                                      ip4_header_t /* cheat */,
1757                                      length /* changed member */);
1758               tcp1->checksum = ip_csum_fold(sum1);
1759             }
1760           else
1761             {
1762               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1763               old_port1 = udp1->src_port;
1764               udp1->src_port = new_port1;
1765               udp1->checksum = 0;
1766             }
1767
1768           switch(ses1->state)
1769             {
1770             case SNAT_SESSION_UDP_ACTIVE:
1771                 ses1->expire = now + SNAT_UDP_TIMEOUT;
1772                 break;
1773             case SNAT_SESSION_TCP_SYN_SENT:
1774             case SNAT_SESSION_TCP_FIN_WAIT:
1775             case SNAT_SESSION_TCP_CLOSE_WAIT:
1776             case SNAT_SESSION_TCP_LAST_ACK:
1777                 ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1778                 break;
1779             case SNAT_SESSION_TCP_ESTABLISHED:
1780                 ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1781                 break;
1782             }
1783
1784         trace1:
1785           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1786                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1787             {
1788               snat_in2out_trace_t *t =
1789                  vlib_add_trace (vm, node, b1, sizeof (*t));
1790               t->is_slow_path = 0;
1791               t->sw_if_index = sw_if_index1;
1792               t->next_index = next1;
1793               t->session_index = ~0;
1794               if (ses1)
1795                 t->session_index = ses1 - dm1->sessions;
1796             }
1797
1798           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1799
1800           /* verify speculative enqueues, maybe switch current next frame */
1801           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1802                                            to_next, n_left_to_next,
1803                                            bi0, bi1, next0, next1);
1804          }
1805
1806       while (n_left_from > 0 && n_left_to_next > 0)
1807         {
1808           u32 bi0;
1809           vlib_buffer_t * b0;
1810           u32 next0;
1811           u32 sw_if_index0;
1812           ip4_header_t * ip0;
1813           ip_csum_t sum0;
1814           ip4_address_t new_addr0, old_addr0;
1815           u16 old_port0, new_port0, lo_port0, i0;
1816           udp_header_t * udp0;
1817           tcp_header_t * tcp0;
1818           u32 proto0;
1819           snat_det_out_key_t key0;
1820           snat_det_map_t * dm0;
1821           snat_det_session_t * ses0 = 0;
1822
1823           /* speculatively enqueue b0 to the current next frame */
1824           bi0 = from[0];
1825           to_next[0] = bi0;
1826           from += 1;
1827           to_next += 1;
1828           n_left_from -= 1;
1829           n_left_to_next -= 1;
1830
1831           b0 = vlib_get_buffer (vm, bi0);
1832           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1833
1834           ip0 = vlib_buffer_get_current (b0);
1835           udp0 = ip4_next_header (ip0);
1836           tcp0 = (tcp_header_t *) udp0;
1837
1838           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1839
1840           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1841           if (PREDICT_FALSE(!dm0))
1842             {
1843               clib_warning("no match for internal host %U",
1844                            format_ip4_address, &ip0->src_address);
1845               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1846               goto trace00;
1847             }
1848
1849           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1850
1851           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1852           if (PREDICT_FALSE(!ses0))
1853             {
1854               key0.ext_host_addr = ip0->dst_address;
1855               key0.ext_host_port = tcp0->dst;
1856               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1857                 {
1858                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1859                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1860
1861                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1862                     continue;
1863
1864                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1865                   break;
1866                 }
1867                 if (PREDICT_FALSE(!ses0))
1868                   {
1869                     next0 = SNAT_IN2OUT_NEXT_DROP;
1870                     b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1871                     goto trace00;
1872                   }
1873             }
1874
1875           new_port0 = ses0->out.out_port;
1876           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1877
1878           old_addr0.as_u32 = ip0->src_address.as_u32;
1879           ip0->src_address.as_u32 = new_addr0.as_u32;
1880           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1881
1882           sum0 = ip0->checksum;
1883           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1884                                  ip4_header_t,
1885                                  src_address /* changed member */);
1886           ip0->checksum = ip_csum_fold (sum0);
1887
1888           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1889             {
1890               if (tcp0->flags & TCP_FLAG_SYN)
1891                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1892               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1893                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1894               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1895                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1896               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1897                 snat_det_ses_close(dm0, ses0);
1898               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1899                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1900               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1901                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1902
1903               old_port0 = tcp0->src;
1904               tcp0->src = new_port0;
1905
1906               sum0 = tcp0->checksum;
1907               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1908                                      ip4_header_t,
1909                                      dst_address /* changed member */);
1910               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1911                                      ip4_header_t /* cheat */,
1912                                      length /* changed member */);
1913               tcp0->checksum = ip_csum_fold(sum0);
1914             }
1915           else
1916             {
1917               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1918               old_port0 = udp0->src_port;
1919               udp0->src_port = new_port0;
1920               udp0->checksum = 0;
1921             }
1922
1923           switch(ses0->state)
1924             {
1925             case SNAT_SESSION_UDP_ACTIVE:
1926                 ses0->expire = now + SNAT_UDP_TIMEOUT;
1927                 break;
1928             case SNAT_SESSION_TCP_SYN_SENT:
1929             case SNAT_SESSION_TCP_FIN_WAIT:
1930             case SNAT_SESSION_TCP_CLOSE_WAIT:
1931             case SNAT_SESSION_TCP_LAST_ACK:
1932                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1933                 break;
1934             case SNAT_SESSION_TCP_ESTABLISHED:
1935                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1936                 break;
1937             }
1938
1939         trace00:
1940           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1941                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1942             {
1943               snat_in2out_trace_t *t =
1944                  vlib_add_trace (vm, node, b0, sizeof (*t));
1945               t->is_slow_path = 0;
1946               t->sw_if_index = sw_if_index0;
1947               t->next_index = next0;
1948               t->session_index = ~0;
1949               if (ses0)
1950                 t->session_index = ses0 - dm0->sessions;
1951             }
1952
1953           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1954
1955           /* verify speculative enqueue, maybe switch current next frame */
1956           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1957                                            to_next, n_left_to_next,
1958                                            bi0, next0);
1959         }
1960
1961       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1962     }
1963
1964   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
1965                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1966                                pkts_processed);
1967   return frame->n_vectors;
1968 }
1969
1970 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
1971   .function = snat_det_in2out_node_fn,
1972   .name = "snat-det-in2out",
1973   .vector_size = sizeof (u32),
1974   .format_trace = format_snat_in2out_trace,
1975   .type = VLIB_NODE_TYPE_INTERNAL,
1976
1977   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1978   .error_strings = snat_in2out_error_strings,
1979
1980   .runtime_data_bytes = sizeof (snat_runtime_t),
1981
1982   .n_next_nodes = 2,
1983
1984   /* edit / add dispositions here */
1985   .next_nodes = {
1986     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1987     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1988   },
1989 };
1990
1991 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
1992
1993 /**********************/
1994 /*** worker handoff ***/
1995 /**********************/
1996 static uword
1997 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1998                                vlib_node_runtime_t * node,
1999                                vlib_frame_t * frame)
2000 {
2001   snat_main_t *sm = &snat_main;
2002   vlib_thread_main_t *tm = vlib_get_thread_main ();
2003   u32 n_left_from, *from, *to_next = 0;
2004   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2005   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2006     = 0;
2007   vlib_frame_queue_elt_t *hf = 0;
2008   vlib_frame_t *f = 0;
2009   int i;
2010   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2011   u32 next_worker_index = 0;
2012   u32 current_worker_index = ~0;
2013   u32 thread_index = vlib_get_thread_index ();
2014
2015   ASSERT (vec_len (sm->workers));
2016
2017   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2018     {
2019       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2020
2021       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2022                                sm->first_worker_index + sm->num_workers - 1,
2023                                (vlib_frame_queue_t *) (~0));
2024     }
2025
2026   from = vlib_frame_vector_args (frame);
2027   n_left_from = frame->n_vectors;
2028
2029   while (n_left_from > 0)
2030     {
2031       u32 bi0;
2032       vlib_buffer_t *b0;
2033       u32 sw_if_index0;
2034       u32 rx_fib_index0;
2035       ip4_header_t * ip0;
2036       u8 do_handoff;
2037
2038       bi0 = from[0];
2039       from += 1;
2040       n_left_from -= 1;
2041
2042       b0 = vlib_get_buffer (vm, bi0);
2043
2044       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2045       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2046
2047       ip0 = vlib_buffer_get_current (b0);
2048
2049       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2050
2051       if (PREDICT_FALSE (next_worker_index != thread_index))
2052         {
2053           do_handoff = 1;
2054
2055           if (next_worker_index != current_worker_index)
2056             {
2057               if (hf)
2058                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2059
2060               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2061                                                       next_worker_index,
2062                                                       handoff_queue_elt_by_worker_index);
2063
2064               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2065               to_next_worker = &hf->buffer_index[hf->n_vectors];
2066               current_worker_index = next_worker_index;
2067             }
2068
2069           /* enqueue to correct worker thread */
2070           to_next_worker[0] = bi0;
2071           to_next_worker++;
2072           n_left_to_next_worker--;
2073
2074           if (n_left_to_next_worker == 0)
2075             {
2076               hf->n_vectors = VLIB_FRAME_SIZE;
2077               vlib_put_frame_queue_elt (hf);
2078               current_worker_index = ~0;
2079               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2080               hf = 0;
2081             }
2082         }
2083       else
2084         {
2085           do_handoff = 0;
2086           /* if this is 1st frame */
2087           if (!f)
2088             {
2089               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2090               to_next = vlib_frame_vector_args (f);
2091             }
2092
2093           to_next[0] = bi0;
2094           to_next += 1;
2095           f->n_vectors++;
2096         }
2097
2098       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2099                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2100         {
2101           snat_in2out_worker_handoff_trace_t *t =
2102             vlib_add_trace (vm, node, b0, sizeof (*t));
2103           t->next_worker_index = next_worker_index;
2104           t->do_handoff = do_handoff;
2105         }
2106     }
2107
2108   if (f)
2109     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2110
2111   if (hf)
2112     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2113
2114   /* Ship frames to the worker nodes */
2115   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2116     {
2117       if (handoff_queue_elt_by_worker_index[i])
2118         {
2119           hf = handoff_queue_elt_by_worker_index[i];
2120           /*
2121            * It works better to let the handoff node
2122            * rate-adapt, always ship the handoff queue element.
2123            */
2124           if (1 || hf->n_vectors == hf->last_n_vectors)
2125             {
2126               vlib_put_frame_queue_elt (hf);
2127               handoff_queue_elt_by_worker_index[i] = 0;
2128             }
2129           else
2130             hf->last_n_vectors = hf->n_vectors;
2131         }
2132       congested_handoff_queue_by_worker_index[i] =
2133         (vlib_frame_queue_t *) (~0);
2134     }
2135   hf = 0;
2136   current_worker_index = ~0;
2137   return frame->n_vectors;
2138 }
2139
2140 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2141   .function = snat_in2out_worker_handoff_fn,
2142   .name = "snat-in2out-worker-handoff",
2143   .vector_size = sizeof (u32),
2144   .format_trace = format_snat_in2out_worker_handoff_trace,
2145   .type = VLIB_NODE_TYPE_INTERNAL,
2146   
2147   .n_next_nodes = 1,
2148
2149   .next_nodes = {
2150     [0] = "error-drop",
2151   },
2152 };
2153
2154 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2155
2156 static uword
2157 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2158                                 vlib_node_runtime_t * node,
2159                                 vlib_frame_t * frame)
2160 {
2161   u32 n_left_from, * from, * to_next;
2162   snat_in2out_next_t next_index;
2163   u32 pkts_processed = 0;
2164   snat_main_t * sm = &snat_main;
2165   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
2166   u32 stats_node_index;
2167
2168   stats_node_index = snat_in2out_fast_node.index;
2169
2170   from = vlib_frame_vector_args (frame);
2171   n_left_from = frame->n_vectors;
2172   next_index = node->cached_next_index;
2173
2174   while (n_left_from > 0)
2175     {
2176       u32 n_left_to_next;
2177
2178       vlib_get_next_frame (vm, node, next_index,
2179                            to_next, n_left_to_next);
2180
2181       while (n_left_from > 0 && n_left_to_next > 0)
2182         {
2183           u32 bi0;
2184           vlib_buffer_t * b0;
2185           u32 next0;
2186           u32 sw_if_index0;
2187           ip4_header_t * ip0;
2188           ip_csum_t sum0;
2189           u32 new_addr0, old_addr0;
2190           u16 old_port0, new_port0;
2191           udp_header_t * udp0;
2192           tcp_header_t * tcp0;
2193           icmp46_header_t * icmp0;
2194           snat_session_key_t key0, sm0;
2195           u32 proto0;
2196           u32 rx_fib_index0;
2197
2198           /* speculatively enqueue b0 to the current next frame */
2199           bi0 = from[0];
2200           to_next[0] = bi0;
2201           from += 1;
2202           to_next += 1;
2203           n_left_from -= 1;
2204           n_left_to_next -= 1;
2205
2206           b0 = vlib_get_buffer (vm, bi0);
2207           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2208
2209           ip0 = vlib_buffer_get_current (b0);
2210           udp0 = ip4_next_header (ip0);
2211           tcp0 = (tcp_header_t *) udp0;
2212           icmp0 = (icmp46_header_t *) udp0;
2213
2214           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2215           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2216
2217           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2218
2219           if (PREDICT_FALSE (proto0 == ~0))
2220               goto trace0;
2221
2222           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2223             {
2224               if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
2225                   proto0, rx_fib_index0)))
2226                 goto trace0;
2227
2228               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2229                                   rx_fib_index0, node, next0, ~0, 0);
2230               goto trace0;
2231             }
2232
2233           key0.addr = ip0->src_address;
2234           key0.port = udp0->src_port;
2235           key0.fib_index = rx_fib_index0;
2236
2237           if (snat_static_mapping_match(sm, key0, &sm0, 0))
2238             {
2239               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2240               next0= SNAT_IN2OUT_NEXT_DROP;
2241               goto trace0;
2242             }
2243
2244           new_addr0 = sm0.addr.as_u32;
2245           new_port0 = sm0.port;
2246           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2247           old_addr0 = ip0->src_address.as_u32;
2248           ip0->src_address.as_u32 = new_addr0;
2249
2250           sum0 = ip0->checksum;
2251           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2252                                  ip4_header_t,
2253                                  src_address /* changed member */);
2254           ip0->checksum = ip_csum_fold (sum0);
2255
2256           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2257             {
2258               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2259                 {
2260                   old_port0 = tcp0->src_port;
2261                   tcp0->src_port = new_port0;
2262
2263                   sum0 = tcp0->checksum;
2264                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2265                                          ip4_header_t,
2266                                          dst_address /* changed member */);
2267                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2268                                          ip4_header_t /* cheat */,
2269                                          length /* changed member */);
2270                   tcp0->checksum = ip_csum_fold(sum0);
2271                 }
2272               else
2273                 {
2274                   old_port0 = udp0->src_port;
2275                   udp0->src_port = new_port0;
2276                   udp0->checksum = 0;
2277                 }
2278             }
2279           else
2280             {
2281               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2282                 {
2283                   sum0 = tcp0->checksum;
2284                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2285                                          ip4_header_t,
2286                                          dst_address /* changed member */);
2287                   tcp0->checksum = ip_csum_fold(sum0);
2288                 }
2289             }
2290
2291           /* Hairpinning */
2292           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2293
2294         trace0:
2295           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2296                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2297             {
2298               snat_in2out_trace_t *t =
2299                  vlib_add_trace (vm, node, b0, sizeof (*t));
2300               t->sw_if_index = sw_if_index0;
2301               t->next_index = next0;
2302             }
2303
2304           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2305
2306           /* verify speculative enqueue, maybe switch current next frame */
2307           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2308                                            to_next, n_left_to_next,
2309                                            bi0, next0);
2310         }
2311
2312       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2313     }
2314
2315   vlib_node_increment_counter (vm, stats_node_index,
2316                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2317                                pkts_processed);
2318   return frame->n_vectors;
2319 }
2320
2321
2322 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2323   .function = snat_in2out_fast_static_map_fn,
2324   .name = "snat-in2out-fast",
2325   .vector_size = sizeof (u32),
2326   .format_trace = format_snat_in2out_fast_trace,
2327   .type = VLIB_NODE_TYPE_INTERNAL,
2328   
2329   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2330   .error_strings = snat_in2out_error_strings,
2331
2332   .runtime_data_bytes = sizeof (snat_runtime_t),
2333   
2334   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2335
2336   /* edit / add dispositions here */
2337   .next_nodes = {
2338     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2339     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2340     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2341     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2342   },
2343 };
2344
2345 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);