silence -Wmaybe-uninitialized warning
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26
27 #include <vppinfra/hash.h>
28 #include <vppinfra/error.h>
29 #include <vppinfra/elog.h>
30
31 typedef struct {
32   u32 sw_if_index;
33   u32 next_index;
34   u32 session_index;
35   u32 is_slow_path;
36 } snat_in2out_trace_t;
37
38 typedef struct {
39   u32 next_worker_index;
40   u8 do_handoff;
41 } snat_in2out_worker_handoff_trace_t;
42
43 /* packet trace format function */
44 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
45 {
46   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
49   char * tag;
50
51   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
52   
53   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
54               t->sw_if_index, t->next_index, t->session_index);
55
56   return s;
57 }
58
59 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
60 {
61   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
62   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
63   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
64
65   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
66               t->sw_if_index, t->next_index);
67
68   return s;
69 }
70
71 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
72 {
73   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75   snat_in2out_worker_handoff_trace_t * t =
76     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
77   char * m;
78
79   m = t->do_handoff ? "next worker" : "same worker";
80   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
81
82   return s;
83 }
84
85 vlib_node_registration_t snat_in2out_node;
86 vlib_node_registration_t snat_in2out_slowpath_node;
87 vlib_node_registration_t snat_in2out_fast_node;
88 vlib_node_registration_t snat_in2out_worker_handoff_node;
89
90 #define foreach_snat_in2out_error                       \
91 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
92 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
93 _(OUT_OF_PORTS, "Out of ports")                         \
94 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
95 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
96 _(NO_TRANSLATION, "No translation")
97   
98 typedef enum {
99 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
100   foreach_snat_in2out_error
101 #undef _
102   SNAT_IN2OUT_N_ERROR,
103 } snat_in2out_error_t;
104
105 static char * snat_in2out_error_strings[] = {
106 #define _(sym,string) string,
107   foreach_snat_in2out_error
108 #undef _
109 };
110
111 typedef enum {
112   SNAT_IN2OUT_NEXT_LOOKUP,
113   SNAT_IN2OUT_NEXT_DROP,
114   SNAT_IN2OUT_NEXT_SLOW_PATH,
115   SNAT_IN2OUT_NEXT_ICMP_ERROR,
116   SNAT_IN2OUT_N_NEXT,
117 } snat_in2out_next_t;
118
119 /**
120  * @brief Check if packet should be translated
121  *
122  * Packets aimed at outside interface and external addresss with active session
123  * should be translated.
124  *
125  * @param sm            SNAT main
126  * @param rt            SNAT runtime data
127  * @param sw_if_index0  index of the inside interface
128  * @param ip0           IPv4 header
129  * @param proto0        SNAT protocol
130  * @param rx_fib_index0 RX FIB index
131  *
132  * @returns 0 if packet should be translated otherwise 1
133  */
134 static inline int
135 snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
136                    ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
137 {
138   ip4_address_t * first_int_addr;
139   udp_header_t * udp0 = ip4_next_header (ip0);
140   snat_session_key_t key0, sm0;
141   clib_bihash_kv_8_8_t kv0, value0;
142   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
143   fib_prefix_t pfx = {
144     .fp_proto = FIB_PROTOCOL_IP4,
145     .fp_len = 32,
146     .fp_addr = {
147         .ip4.as_u32 = ip0->dst_address.as_u32,
148     },
149   };
150
151   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
152     {
153       first_int_addr =
154         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
155                                      0 /* just want the address */);
156       rt->cached_sw_if_index = sw_if_index0;
157       if (first_int_addr)
158         rt->cached_ip4_address = first_int_addr->as_u32;
159       else
160         rt->cached_ip4_address = 0;
161     }
162
163   /* Don't NAT packet aimed at the intfc address */
164   if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
165     return 1;
166
167   key0.addr = ip0->dst_address;
168   key0.port = udp0->dst_port;
169   key0.protocol = proto0;
170   key0.fib_index = sm->outside_fib_index;
171   kv0.key = key0.as_u64;
172
173   /* NAT packet aimed at external address if */
174   /* has active sessions */
175   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
176     {
177       /* or is static mappings */
178       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
179         return 0;
180     }
181   else
182     return 0;
183
184   fei = fib_table_lookup (rx_fib_index0, &pfx);
185   if (FIB_NODE_INDEX_INVALID != fei)
186     {
187       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
188       if (sw_if_index == ~0)
189         {
190           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
191           if (FIB_NODE_INDEX_INVALID != fei)
192             sw_if_index = fib_entry_get_resolving_interface (fei);
193         }
194       snat_interface_t *i;
195       pool_foreach (i, sm->interfaces,
196       ({
197         /* NAT packet aimed at outside interface */
198         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
199           return 0;
200       }));
201     }
202
203   return 1;
204 }
205
206 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
207                       ip4_header_t * ip0,
208                       u32 rx_fib_index0,
209                       snat_session_key_t * key0,
210                       snat_session_t ** sessionp,
211                       vlib_node_runtime_t * node,
212                       u32 next0,
213                       u32 cpu_index)
214 {
215   snat_user_t *u;
216   snat_user_key_t user_key;
217   snat_session_t *s;
218   clib_bihash_kv_8_8_t kv0, value0;
219   u32 oldest_per_user_translation_list_index;
220   dlist_elt_t * oldest_per_user_translation_list_elt;
221   dlist_elt_t * per_user_translation_list_elt;
222   dlist_elt_t * per_user_list_head_elt;
223   u32 session_index;
224   snat_session_key_t key1;
225   u32 address_index = ~0;
226   u32 outside_fib_index;
227   uword * p;
228   snat_worker_key_t worker_by_out_key;
229
230   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
231   if (! p)
232     {
233       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
234       return SNAT_IN2OUT_NEXT_DROP;
235     }
236   outside_fib_index = p[0];
237
238   key1.protocol = key0->protocol;
239   user_key.addr = ip0->src_address;
240   user_key.fib_index = rx_fib_index0;
241   kv0.key = user_key.as_u64;
242   
243   /* Ever heard of the "user" = src ip4 address before? */
244   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
245     {
246       /* no, make a new one */
247       pool_get (sm->per_thread_data[cpu_index].users, u);
248       memset (u, 0, sizeof (*u));
249       u->addr = ip0->src_address;
250       u->fib_index = rx_fib_index0;
251
252       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
253
254       u->sessions_per_user_list_head_index = per_user_list_head_elt -
255         sm->per_thread_data[cpu_index].list_pool;
256
257       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
258                        u->sessions_per_user_list_head_index);
259
260       kv0.value = u - sm->per_thread_data[cpu_index].users;
261
262       /* add user */
263       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
264     }
265   else
266     {
267       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
268                              value0.value);
269     }
270
271   /* Over quota? Recycle the least recently used dynamic translation */
272   if (u->nsessions >= sm->max_translations_per_user)
273     {
274       /* Remove the oldest dynamic translation */
275       do {
276           oldest_per_user_translation_list_index =
277             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
278                                     u->sessions_per_user_list_head_index);
279
280           ASSERT (oldest_per_user_translation_list_index != ~0);
281
282           /* add it back to the end of the LRU list */
283           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
284                               u->sessions_per_user_list_head_index,
285                               oldest_per_user_translation_list_index);
286           /* Get the list element */
287           oldest_per_user_translation_list_elt =
288             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
289                                oldest_per_user_translation_list_index);
290
291           /* Get the session index from the list element */
292           session_index = oldest_per_user_translation_list_elt->value;
293
294           /* Get the session */
295           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
296                                  session_index);
297       } while (snat_is_session_static (s));
298
299       /* Remove in2out, out2in keys */
300       kv0.key = s->in2out.as_u64;
301       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
302           clib_warning ("in2out key delete failed");
303       kv0.key = s->out2in.as_u64;
304       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
305           clib_warning ("out2in key delete failed");
306
307       /* log NAT event */
308       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
309                                           s->out2in.addr.as_u32,
310                                           s->in2out.protocol,
311                                           s->in2out.port,
312                                           s->out2in.port,
313                                           s->in2out.fib_index);
314
315       snat_free_outside_address_and_port 
316         (sm, &s->out2in, s->outside_address_index);
317       s->outside_address_index = ~0;
318
319       if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
320         {
321           ASSERT(0);
322
323           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
324           return SNAT_IN2OUT_NEXT_DROP;
325         }
326       s->outside_address_index = address_index;
327     }
328   else
329     {
330       u8 static_mapping = 1;
331
332       /* First try to match static mapping by local address and port */
333       if (snat_static_mapping_match (sm, *key0, &key1, 0))
334         {
335           static_mapping = 0;
336           /* Try to create dynamic translation */
337           if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
338             {
339               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
340               return SNAT_IN2OUT_NEXT_DROP;
341             }
342         }
343
344       /* Create a new session */
345       pool_get (sm->per_thread_data[cpu_index].sessions, s);
346       memset (s, 0, sizeof (*s));
347       
348       s->outside_address_index = address_index;
349
350       if (static_mapping)
351         {
352           u->nstaticsessions++;
353           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
354         }
355       else
356         {
357           u->nsessions++;
358         }
359
360       /* Create list elts */
361       pool_get (sm->per_thread_data[cpu_index].list_pool,
362                 per_user_translation_list_elt);
363       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
364                        per_user_translation_list_elt -
365                        sm->per_thread_data[cpu_index].list_pool);
366
367       per_user_translation_list_elt->value =
368         s - sm->per_thread_data[cpu_index].sessions;
369       s->per_user_index = per_user_translation_list_elt -
370                           sm->per_thread_data[cpu_index].list_pool;
371       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
372
373       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
374                           s->per_user_list_head_index,
375                           per_user_translation_list_elt -
376                           sm->per_thread_data[cpu_index].list_pool);
377    }
378   
379   s->in2out = *key0;
380   s->out2in = key1;
381   s->out2in.protocol = key0->protocol;
382   s->out2in.fib_index = outside_fib_index;
383   *sessionp = s;
384
385   /* Add to translation hashes */
386   kv0.key = s->in2out.as_u64;
387   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
388   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
389       clib_warning ("in2out key add failed");
390   
391   kv0.key = s->out2in.as_u64;
392   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
393   
394   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
395       clib_warning ("out2in key add failed");
396
397   /* Add to translated packets worker lookup */
398   worker_by_out_key.addr = s->out2in.addr;
399   worker_by_out_key.port = s->out2in.port;
400   worker_by_out_key.fib_index = s->out2in.fib_index;
401   kv0.key = worker_by_out_key.as_u64;
402   kv0.value = cpu_index;
403   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
404
405   /* log NAT event */
406   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
407                                       s->out2in.addr.as_u32,
408                                       s->in2out.protocol,
409                                       s->in2out.port,
410                                       s->out2in.port,
411                                       s->in2out.fib_index);
412   return next0;
413 }
414                       
415 typedef struct {
416   u16 src_port, dst_port;
417 } tcp_udp_header_t;
418
419 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
420                                          vlib_buffer_t * b0,
421                                          ip4_header_t * ip0,
422                                          icmp46_header_t * icmp0,
423                                          u32 sw_if_index0,
424                                          u32 rx_fib_index0,
425                                          vlib_node_runtime_t * node,
426                                          u32 next0,
427                                          f64 now,
428                                          u32 cpu_index,
429                                          snat_session_t ** p_s0)
430 {
431   snat_session_key_t key0;
432   icmp_echo_header_t *echo0, *inner_echo0 = 0;
433   ip4_header_t *inner_ip0 = 0;
434   void *l4_header = 0;
435   icmp46_header_t *inner_icmp0;
436   clib_bihash_kv_8_8_t kv0, value0;
437   snat_session_t * s0 = 0;
438   u32 new_addr0, old_addr0;
439   u16 old_id0, new_id0;
440   ip_csum_t sum0;
441   u16 checksum0;
442   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
443   u8 is_error_message = 0;
444
445   echo0 = (icmp_echo_header_t *)(icmp0+1);
446
447   key0.addr = ip0->src_address;
448   key0.fib_index = rx_fib_index0;
449   
450   switch(icmp0->type)
451     {
452     case ICMP4_destination_unreachable:
453     case ICMP4_time_exceeded:
454     case ICMP4_parameter_problem:
455     case ICMP4_source_quench:
456     case ICMP4_redirect:
457     case ICMP4_alternate_host_address:
458       is_error_message = 1;
459     }
460
461   if (!is_error_message)
462     {
463       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
464         {
465           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
466           next0 = SNAT_IN2OUT_NEXT_DROP;
467           goto out;
468         }
469       key0.protocol = SNAT_PROTOCOL_ICMP;
470       key0.port = echo0->identifier;
471     }
472   else
473     {
474       inner_ip0 = (ip4_header_t *)(echo0+1);
475       l4_header = ip4_next_header (inner_ip0);
476       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
477       switch (key0.protocol)
478         {
479         case SNAT_PROTOCOL_ICMP:
480           inner_icmp0 = (icmp46_header_t*)l4_header;
481           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
482           key0.port = inner_echo0->identifier;
483           break;
484         case SNAT_PROTOCOL_UDP:
485         case SNAT_PROTOCOL_TCP:
486           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
487           break;
488         default:
489           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
490           next0 = SNAT_IN2OUT_NEXT_DROP;
491           goto out;
492         }
493     }
494
495   kv0.key = key0.as_u64;
496   
497   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
498     {
499       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
500           IP_PROTOCOL_ICMP, rx_fib_index0)))
501         goto out;
502
503       if (is_error_message)
504         {
505           next0 = SNAT_IN2OUT_NEXT_DROP;
506           goto out;
507         }
508
509       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
510                          &s0, node, next0, cpu_index);
511       
512       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
513         goto out;
514     }
515   else
516     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
517                             value0.value);
518
519   sum0 = ip_incremental_checksum (0, icmp0,
520                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
521   checksum0 = ~ip_csum_fold (sum0);
522   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
523     {
524       next0 = SNAT_IN2OUT_NEXT_DROP;
525       goto out;
526     }
527
528   old_addr0 = ip0->src_address.as_u32;
529   ip0->src_address = s0->out2in.addr;
530   new_addr0 = ip0->src_address.as_u32;
531   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
532
533   sum0 = ip0->checksum;
534   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
535                          src_address /* changed member */);
536   ip0->checksum = ip_csum_fold (sum0);
537   
538   if (!is_error_message)
539     {
540       old_id0 = echo0->identifier;
541       new_id0 = s0->out2in.port;
542       echo0->identifier = new_id0;
543
544       sum0 = icmp0->checksum;
545       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
546                              identifier);
547       icmp0->checksum = ip_csum_fold (sum0);
548     }
549   else
550     {
551       if (!ip4_header_checksum_is_valid (inner_ip0))
552         {
553           next0 = SNAT_IN2OUT_NEXT_DROP;
554           goto out;
555         }
556
557       old_addr0 = inner_ip0->dst_address.as_u32;
558       inner_ip0->dst_address = s0->out2in.addr;
559       new_addr0 = inner_ip0->src_address.as_u32;
560
561       sum0 = icmp0->checksum;
562       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
563                              dst_address /* changed member */);
564       icmp0->checksum = ip_csum_fold (sum0);
565
566       switch (key0.protocol)
567         {
568           case SNAT_PROTOCOL_ICMP:
569             old_id0 = inner_echo0->identifier;
570             new_id0 = s0->out2in.port;
571             inner_echo0->identifier = new_id0;
572
573             sum0 = icmp0->checksum;
574             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
575                                    identifier);
576             icmp0->checksum = ip_csum_fold (sum0);
577             break;
578           case SNAT_PROTOCOL_UDP:
579           case SNAT_PROTOCOL_TCP:
580             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
581             new_id0 = s0->out2in.port;
582             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
583
584             sum0 = icmp0->checksum;
585             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
586                                    dst_port);
587             icmp0->checksum = ip_csum_fold (sum0);
588             break;
589           default:
590             ASSERT(0);
591         }
592     }
593
594   /* Accounting */
595   s0->last_heard = now;
596   s0->total_pkts++;
597   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
598   /* Per-user LRU list maintenance for dynamic translations */
599   if (!snat_is_session_static (s0))
600     {
601       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
602                          s0->per_user_index);
603       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
604                           s0->per_user_list_head_index,
605                           s0->per_user_index);
606     }
607
608 out:
609   *p_s0 = s0;
610   return next0;
611 }
612
613 /**
614  * @brief Hairpinning
615  *
616  * Hairpinning allows two endpoints on the internal side of the NAT to
617  * communicate even if they only use each other's external IP addresses
618  * and ports.
619  *
620  * @param sm     SNAT main.
621  * @param b0     Vlib buffer.
622  * @param ip0    IP header.
623  * @param udp0   UDP header.
624  * @param tcp0   TCP header.
625  * @param proto0 SNAT protocol.
626  */
627 static inline void
628 snat_hairpinning (snat_main_t *sm,
629                   vlib_buffer_t * b0,
630                   ip4_header_t * ip0,
631                   udp_header_t * udp0,
632                   tcp_header_t * tcp0,
633                   u32 proto0)
634 {
635   snat_session_key_t key0, sm0;
636   snat_worker_key_t k0;
637   snat_session_t * s0;
638   clib_bihash_kv_8_8_t kv0, value0;
639   ip_csum_t sum0;
640   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
641   u16 new_dst_port0, old_dst_port0;
642
643   key0.addr = ip0->dst_address;
644   key0.port = udp0->dst_port;
645   key0.protocol = proto0;
646   key0.fib_index = sm->outside_fib_index;
647   kv0.key = key0.as_u64;
648
649   /* Check if destination is in active sessions */
650   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
651     {
652       /* or static mappings */
653       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
654         {
655           new_dst_addr0 = sm0.addr.as_u32;
656           new_dst_port0 = sm0.port;
657           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
658         }
659     }
660   else
661     {
662       si = value0.value;
663       if (sm->num_workers > 1)
664         {
665           k0.addr = ip0->dst_address;
666           k0.port = udp0->dst_port;
667           k0.fib_index = sm->outside_fib_index;
668           kv0.key = k0.as_u64;
669           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
670             ASSERT(0);
671           else
672             ti = value0.value;
673         }
674       else
675         ti = sm->num_workers;
676
677       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
678       new_dst_addr0 = s0->in2out.addr.as_u32;
679       new_dst_port0 = s0->in2out.port;
680       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
681     }
682
683   /* Destination is behind the same NAT, use internal address and port */
684   if (new_dst_addr0)
685     {
686       old_dst_addr0 = ip0->dst_address.as_u32;
687       ip0->dst_address.as_u32 = new_dst_addr0;
688       sum0 = ip0->checksum;
689       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
690                              ip4_header_t, dst_address);
691       ip0->checksum = ip_csum_fold (sum0);
692
693       old_dst_port0 = tcp0->dst;
694       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
695         {
696           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
697             {
698               tcp0->dst = new_dst_port0;
699               sum0 = tcp0->checksum;
700               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
701                                      ip4_header_t, dst_address);
702               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
703                                      ip4_header_t /* cheat */, length);
704               tcp0->checksum = ip_csum_fold(sum0);
705             }
706           else
707             {
708               udp0->dst_port = new_dst_port0;
709               udp0->checksum = 0;
710             }
711         }
712     }
713 }
714
715 static inline uword
716 snat_in2out_node_fn_inline (vlib_main_t * vm,
717                             vlib_node_runtime_t * node,
718                             vlib_frame_t * frame, int is_slow_path)
719 {
720   u32 n_left_from, * from, * to_next;
721   snat_in2out_next_t next_index;
722   u32 pkts_processed = 0;
723   snat_main_t * sm = &snat_main;
724   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
725   f64 now = vlib_time_now (vm);
726   u32 stats_node_index;
727   u32 cpu_index = os_get_cpu_number ();
728
729   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
730     snat_in2out_node.index;
731
732   from = vlib_frame_vector_args (frame);
733   n_left_from = frame->n_vectors;
734   next_index = node->cached_next_index;
735
736   while (n_left_from > 0)
737     {
738       u32 n_left_to_next;
739
740       vlib_get_next_frame (vm, node, next_index,
741                            to_next, n_left_to_next);
742
743       while (n_left_from >= 4 && n_left_to_next >= 2)
744         {
745           u32 bi0, bi1;
746           vlib_buffer_t * b0, * b1;
747           u32 next0, next1;
748           u32 sw_if_index0, sw_if_index1;
749           ip4_header_t * ip0, * ip1;
750           ip_csum_t sum0, sum1;
751           u32 new_addr0, old_addr0, new_addr1, old_addr1;
752           u16 old_port0, new_port0, old_port1, new_port1;
753           udp_header_t * udp0, * udp1;
754           tcp_header_t * tcp0, * tcp1;
755           icmp46_header_t * icmp0, * icmp1;
756           snat_session_key_t key0, key1;
757           u32 rx_fib_index0, rx_fib_index1;
758           u32 proto0, proto1;
759           snat_session_t * s0 = 0, * s1 = 0;
760           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
761           
762           /* Prefetch next iteration. */
763           {
764             vlib_buffer_t * p2, * p3;
765             
766             p2 = vlib_get_buffer (vm, from[2]);
767             p3 = vlib_get_buffer (vm, from[3]);
768             
769             vlib_prefetch_buffer_header (p2, LOAD);
770             vlib_prefetch_buffer_header (p3, LOAD);
771
772             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
773             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
774           }
775
776           /* speculatively enqueue b0 and b1 to the current next frame */
777           to_next[0] = bi0 = from[0];
778           to_next[1] = bi1 = from[1];
779           from += 2;
780           to_next += 2;
781           n_left_from -= 2;
782           n_left_to_next -= 2;
783           
784           b0 = vlib_get_buffer (vm, bi0);
785           b1 = vlib_get_buffer (vm, bi1);
786
787           ip0 = vlib_buffer_get_current (b0);
788           udp0 = ip4_next_header (ip0);
789           tcp0 = (tcp_header_t *) udp0;
790           icmp0 = (icmp46_header_t *) udp0;
791
792           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
793           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
794                                    sw_if_index0);
795
796           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
797
798           proto0 = ip_proto_to_snat_proto (ip0->protocol);
799
800           if (PREDICT_FALSE(ip0->ttl == 1))
801             {
802               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
803               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
804                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
805                                            0);
806               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
807               goto trace00;
808             }
809
810           /* Next configured feature, probably ip4-lookup */
811           if (is_slow_path)
812             {
813               if (PREDICT_FALSE (proto0 == ~0))
814                 goto trace00;
815               
816               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
817                 {
818                   next0 = icmp_in2out_slow_path 
819                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
820                      node, next0, now, cpu_index, &s0);
821                   goto trace00;
822                 }
823             }
824           else
825             {
826               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
827                 {
828                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
829                   goto trace00;
830                 }
831             }
832
833           key0.addr = ip0->src_address;
834           key0.port = udp0->src_port;
835           key0.protocol = proto0;
836           key0.fib_index = rx_fib_index0;
837           
838           kv0.key = key0.as_u64;
839
840           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
841             {
842               if (is_slow_path)
843                 {
844                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
845                       proto0, rx_fib_index0)))
846                     goto trace00;
847
848                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
849                                      &s0, node, next0, cpu_index);
850                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
851                     goto trace00;
852                 }
853               else
854                 {
855                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
856                   goto trace00;
857                 }
858             }
859           else
860             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
861                                     value0.value);
862
863           old_addr0 = ip0->src_address.as_u32;
864           ip0->src_address = s0->out2in.addr;
865           new_addr0 = ip0->src_address.as_u32;
866           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
867
868           sum0 = ip0->checksum;
869           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
870                                  ip4_header_t,
871                                  src_address /* changed member */);
872           ip0->checksum = ip_csum_fold (sum0);
873
874           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
875             {
876               old_port0 = tcp0->src_port;
877               tcp0->src_port = s0->out2in.port;
878               new_port0 = tcp0->src_port;
879
880               sum0 = tcp0->checksum;
881               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
882                                      ip4_header_t,
883                                      dst_address /* changed member */);
884               sum0 = ip_csum_update (sum0, old_port0, new_port0,
885                                      ip4_header_t /* cheat */,
886                                      length /* changed member */);
887               tcp0->checksum = ip_csum_fold(sum0);
888             }
889           else
890             {
891               old_port0 = udp0->src_port;
892               udp0->src_port = s0->out2in.port;
893               udp0->checksum = 0;
894             }
895
896           /* Hairpinning */
897           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
898
899           /* Accounting */
900           s0->last_heard = now;
901           s0->total_pkts++;
902           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
903           /* Per-user LRU list maintenance for dynamic translation */
904           if (!snat_is_session_static (s0))
905             {
906               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
907                                  s0->per_user_index);
908               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
909                                   s0->per_user_list_head_index,
910                                   s0->per_user_index);
911             }
912         trace00:
913
914           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
915                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
916             {
917               snat_in2out_trace_t *t = 
918                  vlib_add_trace (vm, node, b0, sizeof (*t));
919               t->is_slow_path = is_slow_path;
920               t->sw_if_index = sw_if_index0;
921               t->next_index = next0;
922                   t->session_index = ~0;
923               if (s0)
924                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
925             }
926
927           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
928
929           ip1 = vlib_buffer_get_current (b1);
930           udp1 = ip4_next_header (ip1);
931           tcp1 = (tcp_header_t *) udp1;
932           icmp1 = (icmp46_header_t *) udp1;
933
934           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
935           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
936                                    sw_if_index1);
937
938           proto1 = ip_proto_to_snat_proto (ip1->protocol);
939
940           if (PREDICT_FALSE(ip0->ttl == 1))
941             {
942               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
943               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
944                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
945                                            0);
946               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
947               goto trace01;
948             }
949
950           /* Next configured feature, probably ip4-lookup */
951           if (is_slow_path)
952             {
953               if (PREDICT_FALSE (proto1 == ~0))
954                 goto trace01;
955               
956               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
957                 {
958                   next1 = icmp_in2out_slow_path 
959                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
960                      next1, now, cpu_index, &s1);
961                   goto trace01;
962                 }
963             }
964           else
965             {
966               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
967                 {
968                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
969                   goto trace01;
970                 }
971             }
972
973           key1.addr = ip1->src_address;
974           key1.port = udp1->src_port;
975           key1.protocol = proto1;
976           key1.fib_index = rx_fib_index1;
977           
978           kv1.key = key1.as_u64;
979
980             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
981             {
982               if (is_slow_path)
983                 {
984                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
985                       proto1, rx_fib_index1)))
986                     goto trace01;
987
988                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
989                                      &s1, node, next1, cpu_index);
990                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
991                     goto trace01;
992                 }
993               else
994                 {
995                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
996                   goto trace01;
997                 }
998             }
999           else
1000             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1001                                     value1.value);
1002
1003           old_addr1 = ip1->src_address.as_u32;
1004           ip1->src_address = s1->out2in.addr;
1005           new_addr1 = ip1->src_address.as_u32;
1006           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1007
1008           sum1 = ip1->checksum;
1009           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1010                                  ip4_header_t,
1011                                  src_address /* changed member */);
1012           ip1->checksum = ip_csum_fold (sum1);
1013
1014           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1015             {
1016               old_port1 = tcp1->src_port;
1017               tcp1->src_port = s1->out2in.port;
1018               new_port1 = tcp1->src_port;
1019
1020               sum1 = tcp1->checksum;
1021               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1022                                      ip4_header_t,
1023                                      dst_address /* changed member */);
1024               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1025                                      ip4_header_t /* cheat */,
1026                                      length /* changed member */);
1027               tcp1->checksum = ip_csum_fold(sum1);
1028             }
1029           else
1030             {
1031               old_port1 = udp1->src_port;
1032               udp1->src_port = s1->out2in.port;
1033               udp1->checksum = 0;
1034             }
1035
1036           /* Hairpinning */
1037           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1038
1039           /* Accounting */
1040           s1->last_heard = now;
1041           s1->total_pkts++;
1042           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1043           /* Per-user LRU list maintenance for dynamic translation */
1044           if (!snat_is_session_static (s1))
1045             {
1046               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1047                                  s1->per_user_index);
1048               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1049                                   s1->per_user_list_head_index,
1050                                   s1->per_user_index);
1051             }
1052         trace01:
1053
1054           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1055                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1056             {
1057               snat_in2out_trace_t *t = 
1058                  vlib_add_trace (vm, node, b1, sizeof (*t));
1059               t->sw_if_index = sw_if_index1;
1060               t->next_index = next1;
1061               t->session_index = ~0;
1062               if (s1)
1063                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
1064             }
1065
1066           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1067
1068           /* verify speculative enqueues, maybe switch current next frame */
1069           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1070                                            to_next, n_left_to_next,
1071                                            bi0, bi1, next0, next1);
1072         }
1073
1074       while (n_left_from > 0 && n_left_to_next > 0)
1075         {
1076           u32 bi0;
1077           vlib_buffer_t * b0;
1078           u32 next0;
1079           u32 sw_if_index0;
1080           ip4_header_t * ip0;
1081           ip_csum_t sum0;
1082           u32 new_addr0, old_addr0;
1083           u16 old_port0, new_port0;
1084           udp_header_t * udp0;
1085           tcp_header_t * tcp0;
1086           icmp46_header_t * icmp0;
1087           snat_session_key_t key0;
1088           u32 rx_fib_index0;
1089           u32 proto0;
1090           snat_session_t * s0 = 0;
1091           clib_bihash_kv_8_8_t kv0, value0;
1092           
1093           /* speculatively enqueue b0 to the current next frame */
1094           bi0 = from[0];
1095           to_next[0] = bi0;
1096           from += 1;
1097           to_next += 1;
1098           n_left_from -= 1;
1099           n_left_to_next -= 1;
1100
1101           b0 = vlib_get_buffer (vm, bi0);
1102           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1103
1104           ip0 = vlib_buffer_get_current (b0);
1105           udp0 = ip4_next_header (ip0);
1106           tcp0 = (tcp_header_t *) udp0;
1107           icmp0 = (icmp46_header_t *) udp0;
1108
1109           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1110           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1111                                    sw_if_index0);
1112
1113           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1114
1115           if (PREDICT_FALSE(ip0->ttl == 1))
1116             {
1117               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1118               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1119                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1120                                            0);
1121               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1122               goto trace0;
1123             }
1124
1125           /* Next configured feature, probably ip4-lookup */
1126           if (is_slow_path)
1127             {
1128               if (PREDICT_FALSE (proto0 == ~0))
1129                 goto trace0;
1130               
1131               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1132                 {
1133                   next0 = icmp_in2out_slow_path 
1134                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1135                      next0, now, cpu_index, &s0);
1136                   goto trace0;
1137                 }
1138             }
1139           else
1140             {
1141               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1142                 {
1143                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1144                   goto trace0;
1145                 }
1146             }
1147
1148           key0.addr = ip0->src_address;
1149           key0.port = udp0->src_port;
1150           key0.protocol = proto0;
1151           key0.fib_index = rx_fib_index0;
1152           
1153           kv0.key = key0.as_u64;
1154
1155           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1156             {
1157               if (is_slow_path)
1158                 {
1159                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1160                       proto0, rx_fib_index0)))
1161                     goto trace0;
1162
1163                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1164                                      &s0, node, next0, cpu_index);
1165
1166                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1167                     goto trace0;
1168                 }
1169               else
1170                 {
1171                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1172                   goto trace0;
1173                 }
1174             }
1175           else
1176             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1177                                     value0.value);
1178
1179           old_addr0 = ip0->src_address.as_u32;
1180           ip0->src_address = s0->out2in.addr;
1181           new_addr0 = ip0->src_address.as_u32;
1182           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1183
1184           sum0 = ip0->checksum;
1185           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1186                                  ip4_header_t,
1187                                  src_address /* changed member */);
1188           ip0->checksum = ip_csum_fold (sum0);
1189
1190           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1191             {
1192               old_port0 = tcp0->src_port;
1193               tcp0->src_port = s0->out2in.port;
1194               new_port0 = tcp0->src_port;
1195
1196               sum0 = tcp0->checksum;
1197               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1198                                      ip4_header_t,
1199                                      dst_address /* changed member */);
1200               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1201                                      ip4_header_t /* cheat */,
1202                                      length /* changed member */);
1203               tcp0->checksum = ip_csum_fold(sum0);
1204             }
1205           else
1206             {
1207               old_port0 = udp0->src_port;
1208               udp0->src_port = s0->out2in.port;
1209               udp0->checksum = 0;
1210             }
1211
1212           /* Hairpinning */
1213           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1214
1215           /* Accounting */
1216           s0->last_heard = now;
1217           s0->total_pkts++;
1218           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1219           /* Per-user LRU list maintenance for dynamic translation */
1220           if (!snat_is_session_static (s0))
1221             {
1222               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1223                                  s0->per_user_index);
1224               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1225                                   s0->per_user_list_head_index,
1226                                   s0->per_user_index);
1227             }
1228
1229         trace0:
1230           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1231                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1232             {
1233               snat_in2out_trace_t *t = 
1234                  vlib_add_trace (vm, node, b0, sizeof (*t));
1235               t->is_slow_path = is_slow_path;
1236               t->sw_if_index = sw_if_index0;
1237               t->next_index = next0;
1238                   t->session_index = ~0;
1239               if (s0)
1240                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1241             }
1242
1243           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1244
1245           /* verify speculative enqueue, maybe switch current next frame */
1246           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1247                                            to_next, n_left_to_next,
1248                                            bi0, next0);
1249         }
1250
1251       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1252     }
1253
1254   vlib_node_increment_counter (vm, stats_node_index, 
1255                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1256                                pkts_processed);
1257   return frame->n_vectors;
1258 }
1259
1260 static uword
1261 snat_in2out_fast_path_fn (vlib_main_t * vm,
1262                           vlib_node_runtime_t * node,
1263                           vlib_frame_t * frame)
1264 {
1265   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1266 }
1267
1268 VLIB_REGISTER_NODE (snat_in2out_node) = {
1269   .function = snat_in2out_fast_path_fn,
1270   .name = "snat-in2out",
1271   .vector_size = sizeof (u32),
1272   .format_trace = format_snat_in2out_trace,
1273   .type = VLIB_NODE_TYPE_INTERNAL,
1274   
1275   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1276   .error_strings = snat_in2out_error_strings,
1277
1278   .runtime_data_bytes = sizeof (snat_runtime_t),
1279   
1280   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1281
1282   /* edit / add dispositions here */
1283   .next_nodes = {
1284     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1285     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1286     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1287     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1288   },
1289 };
1290
1291 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1292
1293 static uword
1294 snat_in2out_slow_path_fn (vlib_main_t * vm,
1295                           vlib_node_runtime_t * node,
1296                           vlib_frame_t * frame)
1297 {
1298   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1299 }
1300
1301 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1302   .function = snat_in2out_slow_path_fn,
1303   .name = "snat-in2out-slowpath",
1304   .vector_size = sizeof (u32),
1305   .format_trace = format_snat_in2out_trace,
1306   .type = VLIB_NODE_TYPE_INTERNAL,
1307   
1308   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1309   .error_strings = snat_in2out_error_strings,
1310
1311   .runtime_data_bytes = sizeof (snat_runtime_t),
1312   
1313   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1314
1315   /* edit / add dispositions here */
1316   .next_nodes = {
1317     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1318     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1319     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1320     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1321   },
1322 };
1323
1324 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1325
1326 static uword
1327 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1328                                vlib_node_runtime_t * node,
1329                                vlib_frame_t * frame)
1330 {
1331   snat_main_t *sm = &snat_main;
1332   vlib_thread_main_t *tm = vlib_get_thread_main ();
1333   u32 n_left_from, *from, *to_next = 0;
1334   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1335   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1336     = 0;
1337   vlib_frame_queue_elt_t *hf = 0;
1338   vlib_frame_t *f = 0;
1339   int i;
1340   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1341   u32 next_worker_index = 0;
1342   u32 current_worker_index = ~0;
1343   u32 cpu_index = os_get_cpu_number ();
1344
1345   ASSERT (vec_len (sm->workers));
1346
1347   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1348     {
1349       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1350
1351       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1352                                sm->first_worker_index + sm->num_workers - 1,
1353                                (vlib_frame_queue_t *) (~0));
1354     }
1355
1356   from = vlib_frame_vector_args (frame);
1357   n_left_from = frame->n_vectors;
1358
1359   while (n_left_from > 0)
1360     {
1361       u32 bi0;
1362       vlib_buffer_t *b0;
1363       u32 sw_if_index0;
1364       u32 rx_fib_index0;
1365       ip4_header_t * ip0;
1366       snat_user_key_t key0;
1367       clib_bihash_kv_8_8_t kv0, value0;
1368       u8 do_handoff;
1369
1370       bi0 = from[0];
1371       from += 1;
1372       n_left_from -= 1;
1373
1374       b0 = vlib_get_buffer (vm, bi0);
1375
1376       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1377       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1378
1379       ip0 = vlib_buffer_get_current (b0);
1380
1381       key0.addr = ip0->src_address;
1382       key0.fib_index = rx_fib_index0;
1383
1384       kv0.key = key0.as_u64;
1385
1386       /* Ever heard of of the "user" before? */
1387       if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
1388         {
1389           /* No, assign next available worker (RR) */
1390           next_worker_index = sm->first_worker_index;
1391           if (vec_len (sm->workers))
1392             {
1393               next_worker_index += 
1394                 sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
1395             }
1396
1397           /* add non-traslated packets worker lookup */
1398           kv0.value = next_worker_index;
1399           clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
1400         }
1401       else
1402         next_worker_index = value0.value;
1403
1404       if (PREDICT_FALSE (next_worker_index != cpu_index))
1405         {
1406           do_handoff = 1;
1407
1408           if (next_worker_index != current_worker_index)
1409             {
1410               if (hf)
1411                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1412
1413               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1414                                                       next_worker_index,
1415                                                       handoff_queue_elt_by_worker_index);
1416
1417               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1418               to_next_worker = &hf->buffer_index[hf->n_vectors];
1419               current_worker_index = next_worker_index;
1420             }
1421
1422           /* enqueue to correct worker thread */
1423           to_next_worker[0] = bi0;
1424           to_next_worker++;
1425           n_left_to_next_worker--;
1426
1427           if (n_left_to_next_worker == 0)
1428             {
1429               hf->n_vectors = VLIB_FRAME_SIZE;
1430               vlib_put_frame_queue_elt (hf);
1431               current_worker_index = ~0;
1432               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1433               hf = 0;
1434             }
1435         }
1436       else
1437         {
1438           do_handoff = 0;
1439           /* if this is 1st frame */
1440           if (!f)
1441             {
1442               f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
1443               to_next = vlib_frame_vector_args (f);
1444             }
1445
1446           to_next[0] = bi0;
1447           to_next += 1;
1448           f->n_vectors++;
1449         }
1450
1451       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1452                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1453         {
1454           snat_in2out_worker_handoff_trace_t *t =
1455             vlib_add_trace (vm, node, b0, sizeof (*t));
1456           t->next_worker_index = next_worker_index;
1457           t->do_handoff = do_handoff;
1458         }
1459     }
1460
1461   if (f)
1462     vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
1463
1464   if (hf)
1465     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1466
1467   /* Ship frames to the worker nodes */
1468   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1469     {
1470       if (handoff_queue_elt_by_worker_index[i])
1471         {
1472           hf = handoff_queue_elt_by_worker_index[i];
1473           /*
1474            * It works better to let the handoff node
1475            * rate-adapt, always ship the handoff queue element.
1476            */
1477           if (1 || hf->n_vectors == hf->last_n_vectors)
1478             {
1479               vlib_put_frame_queue_elt (hf);
1480               handoff_queue_elt_by_worker_index[i] = 0;
1481             }
1482           else
1483             hf->last_n_vectors = hf->n_vectors;
1484         }
1485       congested_handoff_queue_by_worker_index[i] =
1486         (vlib_frame_queue_t *) (~0);
1487     }
1488   hf = 0;
1489   current_worker_index = ~0;
1490   return frame->n_vectors;
1491 }
1492
1493 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1494   .function = snat_in2out_worker_handoff_fn,
1495   .name = "snat-in2out-worker-handoff",
1496   .vector_size = sizeof (u32),
1497   .format_trace = format_snat_in2out_worker_handoff_trace,
1498   .type = VLIB_NODE_TYPE_INTERNAL,
1499   
1500   .n_next_nodes = 1,
1501
1502   .next_nodes = {
1503     [0] = "error-drop",
1504   },
1505 };
1506
1507 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1508
1509 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
1510                                           vlib_buffer_t * b0,
1511                                           ip4_header_t * ip0,
1512                                           icmp46_header_t * icmp0,
1513                                           u32 sw_if_index0,
1514                                           vlib_node_runtime_t * node,
1515                                           u32 next0,
1516                                           u32 rx_fib_index0)
1517 {
1518   snat_session_key_t key0, sm0;
1519   icmp_echo_header_t *echo0;
1520   u32 new_addr0, old_addr0;
1521   u16 old_id0, new_id0;
1522   ip_csum_t sum0;
1523   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1524
1525   echo0 = (icmp_echo_header_t *)(icmp0+1);
1526
1527   key0.addr = ip0->src_address;
1528   key0.port = echo0->identifier;
1529   key0.fib_index = rx_fib_index0;
1530   
1531   if (snat_static_mapping_match(sm, key0, &sm0, 0))
1532     {
1533       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1534           IP_PROTOCOL_ICMP, rx_fib_index0)))
1535         return next0;
1536
1537       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1538       return SNAT_IN2OUT_NEXT_DROP;
1539     }
1540
1541   new_addr0 = sm0.addr.as_u32;
1542   new_id0 = sm0.port;
1543   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1544   old_addr0 = ip0->src_address.as_u32;
1545   ip0->src_address.as_u32 = new_addr0;
1546   
1547   sum0 = ip0->checksum;
1548   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1549                          ip4_header_t,
1550                          src_address /* changed member */);
1551   ip0->checksum = ip_csum_fold (sum0);
1552   
1553   if (PREDICT_FALSE(new_id0 != echo0->identifier))
1554     {
1555       old_id0 = echo0->identifier;
1556       echo0->identifier = new_id0;
1557
1558       sum0 = icmp0->checksum;
1559       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1560                              identifier);
1561       icmp0->checksum = ip_csum_fold (sum0);
1562     }
1563
1564   return next0;
1565 }
1566
1567 static uword
1568 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
1569                                 vlib_node_runtime_t * node,
1570                                 vlib_frame_t * frame)
1571 {
1572   u32 n_left_from, * from, * to_next;
1573   snat_in2out_next_t next_index;
1574   u32 pkts_processed = 0;
1575   snat_main_t * sm = &snat_main;
1576   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1577   u32 stats_node_index;
1578
1579   stats_node_index = snat_in2out_fast_node.index;
1580
1581   from = vlib_frame_vector_args (frame);
1582   n_left_from = frame->n_vectors;
1583   next_index = node->cached_next_index;
1584
1585   while (n_left_from > 0)
1586     {
1587       u32 n_left_to_next;
1588
1589       vlib_get_next_frame (vm, node, next_index,
1590                            to_next, n_left_to_next);
1591
1592       while (n_left_from > 0 && n_left_to_next > 0)
1593         {
1594           u32 bi0;
1595           vlib_buffer_t * b0;
1596           u32 next0;
1597           u32 sw_if_index0;
1598           ip4_header_t * ip0;
1599           ip_csum_t sum0;
1600           u32 new_addr0, old_addr0;
1601           u16 old_port0, new_port0;
1602           udp_header_t * udp0;
1603           tcp_header_t * tcp0;
1604           icmp46_header_t * icmp0;
1605           snat_session_key_t key0, sm0;
1606           u32 proto0;
1607           u32 rx_fib_index0;
1608
1609           /* speculatively enqueue b0 to the current next frame */
1610           bi0 = from[0];
1611           to_next[0] = bi0;
1612           from += 1;
1613           to_next += 1;
1614           n_left_from -= 1;
1615           n_left_to_next -= 1;
1616
1617           b0 = vlib_get_buffer (vm, bi0);
1618           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1619
1620           ip0 = vlib_buffer_get_current (b0);
1621           udp0 = ip4_next_header (ip0);
1622           tcp0 = (tcp_header_t *) udp0;
1623           icmp0 = (icmp46_header_t *) udp0;
1624
1625           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1626           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1627
1628           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1629
1630           if (PREDICT_FALSE (proto0 == ~0))
1631               goto trace0;
1632
1633           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1634             {
1635               if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1636                   proto0, rx_fib_index0)))
1637                 goto trace0;
1638
1639               next0 = icmp_in2out_static_map
1640                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1641               goto trace0;
1642             }
1643
1644           key0.addr = ip0->src_address;
1645           key0.port = udp0->src_port;
1646           key0.fib_index = rx_fib_index0;
1647
1648           if (snat_static_mapping_match(sm, key0, &sm0, 0))
1649             {
1650               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1651               next0= SNAT_IN2OUT_NEXT_DROP;
1652               goto trace0;
1653             }
1654
1655           new_addr0 = sm0.addr.as_u32;
1656           new_port0 = sm0.port;
1657           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1658           old_addr0 = ip0->src_address.as_u32;
1659           ip0->src_address.as_u32 = new_addr0;
1660
1661           sum0 = ip0->checksum;
1662           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1663                                  ip4_header_t,
1664                                  src_address /* changed member */);
1665           ip0->checksum = ip_csum_fold (sum0);
1666
1667           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1668             {
1669               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1670                 {
1671                   old_port0 = tcp0->src_port;
1672                   tcp0->src_port = new_port0;
1673
1674                   sum0 = tcp0->checksum;
1675                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1676                                          ip4_header_t,
1677                                          dst_address /* changed member */);
1678                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1679                                          ip4_header_t /* cheat */,
1680                                          length /* changed member */);
1681                   tcp0->checksum = ip_csum_fold(sum0);
1682                 }
1683               else
1684                 {
1685                   old_port0 = udp0->src_port;
1686                   udp0->src_port = new_port0;
1687                   udp0->checksum = 0;
1688                 }
1689             }
1690           else
1691             {
1692               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1693                 {
1694                   sum0 = tcp0->checksum;
1695                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1696                                          ip4_header_t,
1697                                          dst_address /* changed member */);
1698                   tcp0->checksum = ip_csum_fold(sum0);
1699                 }
1700             }
1701
1702           /* Hairpinning */
1703           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1704
1705         trace0:
1706           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1707                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1708             {
1709               snat_in2out_trace_t *t =
1710                  vlib_add_trace (vm, node, b0, sizeof (*t));
1711               t->sw_if_index = sw_if_index0;
1712               t->next_index = next0;
1713             }
1714
1715           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1716
1717           /* verify speculative enqueue, maybe switch current next frame */
1718           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1719                                            to_next, n_left_to_next,
1720                                            bi0, next0);
1721         }
1722
1723       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1724     }
1725
1726   vlib_node_increment_counter (vm, stats_node_index,
1727                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1728                                pkts_processed);
1729   return frame->n_vectors;
1730 }
1731
1732
1733 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
1734   .function = snat_in2out_fast_static_map_fn,
1735   .name = "snat-in2out-fast",
1736   .vector_size = sizeof (u32),
1737   .format_trace = format_snat_in2out_fast_trace,
1738   .type = VLIB_NODE_TYPE_INTERNAL,
1739   
1740   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1741   .error_strings = snat_in2out_error_strings,
1742
1743   .runtime_data_bytes = sizeof (snat_runtime_t),
1744   
1745   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1746
1747   /* edit / add dispositions here */
1748   .next_nodes = {
1749     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1750     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1751     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1752     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1753   },
1754 };
1755
1756 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);