VPP-598: tcp stack initial commit
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26
27 #include <vppinfra/hash.h>
28 #include <vppinfra/error.h>
29 #include <vppinfra/elog.h>
30
31 typedef struct {
32   u32 sw_if_index;
33   u32 next_index;
34   u32 session_index;
35   u32 is_slow_path;
36 } snat_in2out_trace_t;
37
38 typedef struct {
39   u32 next_worker_index;
40   u8 do_handoff;
41 } snat_in2out_worker_handoff_trace_t;
42
43 /* packet trace format function */
44 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
45 {
46   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
49   char * tag;
50
51   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
52   
53   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
54               t->sw_if_index, t->next_index, t->session_index);
55
56   return s;
57 }
58
59 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
60 {
61   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
62   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
63   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
64
65   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
66               t->sw_if_index, t->next_index);
67
68   return s;
69 }
70
71 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
72 {
73   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75   snat_in2out_worker_handoff_trace_t * t =
76     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
77   char * m;
78
79   m = t->do_handoff ? "next worker" : "same worker";
80   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
81
82   return s;
83 }
84
85 vlib_node_registration_t snat_in2out_node;
86 vlib_node_registration_t snat_in2out_slowpath_node;
87 vlib_node_registration_t snat_in2out_fast_node;
88 vlib_node_registration_t snat_in2out_worker_handoff_node;
89
90 #define foreach_snat_in2out_error                       \
91 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
92 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
93 _(OUT_OF_PORTS, "Out of ports")                         \
94 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
95 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
96 _(NO_TRANSLATION, "No translation")
97   
98 typedef enum {
99 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
100   foreach_snat_in2out_error
101 #undef _
102   SNAT_IN2OUT_N_ERROR,
103 } snat_in2out_error_t;
104
105 static char * snat_in2out_error_strings[] = {
106 #define _(sym,string) string,
107   foreach_snat_in2out_error
108 #undef _
109 };
110
111 typedef enum {
112   SNAT_IN2OUT_NEXT_LOOKUP,
113   SNAT_IN2OUT_NEXT_DROP,
114   SNAT_IN2OUT_NEXT_SLOW_PATH,
115   SNAT_IN2OUT_NEXT_ICMP_ERROR,
116   SNAT_IN2OUT_N_NEXT,
117 } snat_in2out_next_t;
118
119 /**
120  * @brief Check if packet should be translated
121  *
122  * Packets aimed at outside interface and external addresss with active session
123  * should be translated.
124  *
125  * @param sm            SNAT main
126  * @param rt            SNAT runtime data
127  * @param sw_if_index0  index of the inside interface
128  * @param ip0           IPv4 header
129  * @param proto0        SNAT protocol
130  * @param rx_fib_index0 RX FIB index
131  *
132  * @returns 0 if packet should be translated otherwise 1
133  */
134 static inline int
135 snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
136                    ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
137 {
138   ip4_address_t * first_int_addr;
139   udp_header_t * udp0 = ip4_next_header (ip0);
140   snat_session_key_t key0, sm0;
141   clib_bihash_kv_8_8_t kv0, value0;
142   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
143   fib_prefix_t pfx = {
144     .fp_proto = FIB_PROTOCOL_IP4,
145     .fp_len = 32,
146     .fp_addr = {
147         .ip4.as_u32 = ip0->dst_address.as_u32,
148     },
149   };
150
151   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
152     {
153       first_int_addr =
154         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
155                                      0 /* just want the address */);
156       rt->cached_sw_if_index = sw_if_index0;
157       if (first_int_addr)
158         rt->cached_ip4_address = first_int_addr->as_u32;
159       else
160         rt->cached_ip4_address = 0;
161     }
162
163   /* Don't NAT packet aimed at the intfc address */
164   if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
165     return 1;
166
167   key0.addr = ip0->dst_address;
168   key0.port = udp0->dst_port;
169   key0.protocol = proto0;
170   key0.fib_index = sm->outside_fib_index;
171   kv0.key = key0.as_u64;
172
173   /* NAT packet aimed at external address if */
174   /* has active sessions */
175   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
176     {
177       /* or is static mappings */
178       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
179         return 0;
180     }
181   else
182     return 0;
183
184   fei = fib_table_lookup (rx_fib_index0, &pfx);
185   if (FIB_NODE_INDEX_INVALID != fei)
186     {
187       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
188       if (sw_if_index == ~0)
189         {
190           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
191           if (FIB_NODE_INDEX_INVALID != fei)
192             sw_if_index = fib_entry_get_resolving_interface (fei);
193         }
194       snat_interface_t *i;
195       pool_foreach (i, sm->interfaces,
196       ({
197         /* NAT packet aimed at outside interface */
198         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
199           return 0;
200       }));
201     }
202
203   return 1;
204 }
205
206 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
207                       ip4_header_t * ip0,
208                       u32 rx_fib_index0,
209                       snat_session_key_t * key0,
210                       snat_session_t ** sessionp,
211                       vlib_node_runtime_t * node,
212                       u32 next0,
213                       u32 cpu_index)
214 {
215   snat_user_t *u;
216   snat_user_key_t user_key;
217   snat_session_t *s;
218   clib_bihash_kv_8_8_t kv0, value0;
219   u32 oldest_per_user_translation_list_index;
220   dlist_elt_t * oldest_per_user_translation_list_elt;
221   dlist_elt_t * per_user_translation_list_elt;
222   dlist_elt_t * per_user_list_head_elt;
223   u32 session_index;
224   snat_session_key_t key1;
225   u32 address_index = ~0;
226   u32 outside_fib_index;
227   uword * p;
228   snat_worker_key_t worker_by_out_key;
229
230   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
231   if (! p)
232     {
233       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
234       return SNAT_IN2OUT_NEXT_DROP;
235     }
236   outside_fib_index = p[0];
237
238   key1.protocol = key0->protocol;
239   user_key.addr = ip0->src_address;
240   user_key.fib_index = rx_fib_index0;
241   kv0.key = user_key.as_u64;
242   
243   /* Ever heard of the "user" = src ip4 address before? */
244   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
245     {
246       /* no, make a new one */
247       pool_get (sm->per_thread_data[cpu_index].users, u);
248       memset (u, 0, sizeof (*u));
249       u->addr = ip0->src_address;
250
251       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
252
253       u->sessions_per_user_list_head_index = per_user_list_head_elt -
254         sm->per_thread_data[cpu_index].list_pool;
255
256       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
257                        u->sessions_per_user_list_head_index);
258
259       kv0.value = u - sm->per_thread_data[cpu_index].users;
260
261       /* add user */
262       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
263     }
264   else
265     {
266       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
267                              value0.value);
268     }
269
270   /* Over quota? Recycle the least recently used dynamic translation */
271   if (u->nsessions >= sm->max_translations_per_user)
272     {
273       /* Remove the oldest dynamic translation */
274       do {
275           oldest_per_user_translation_list_index =
276             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
277                                     u->sessions_per_user_list_head_index);
278
279           ASSERT (oldest_per_user_translation_list_index != ~0);
280
281           /* add it back to the end of the LRU list */
282           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
283                               u->sessions_per_user_list_head_index,
284                               oldest_per_user_translation_list_index);
285           /* Get the list element */
286           oldest_per_user_translation_list_elt =
287             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
288                                oldest_per_user_translation_list_index);
289
290           /* Get the session index from the list element */
291           session_index = oldest_per_user_translation_list_elt->value;
292
293           /* Get the session */
294           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
295                                  session_index);
296       } while (snat_is_session_static (s));
297
298       /* Remove in2out, out2in keys */
299       kv0.key = s->in2out.as_u64;
300       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
301           clib_warning ("in2out key delete failed");
302       kv0.key = s->out2in.as_u64;
303       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
304           clib_warning ("out2in key delete failed");
305
306       /* log NAT event */
307       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
308                                           s->out2in.addr.as_u32,
309                                           s->in2out.protocol,
310                                           s->in2out.port,
311                                           s->out2in.port,
312                                           s->in2out.fib_index);
313
314       snat_free_outside_address_and_port 
315         (sm, &s->out2in, s->outside_address_index);
316       s->outside_address_index = ~0;
317
318       if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
319         {
320           ASSERT(0);
321
322           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
323           return SNAT_IN2OUT_NEXT_DROP;
324         }
325       s->outside_address_index = address_index;
326     }
327   else
328     {
329       u8 static_mapping = 1;
330
331       /* First try to match static mapping by local address and port */
332       if (snat_static_mapping_match (sm, *key0, &key1, 0))
333         {
334           static_mapping = 0;
335           /* Try to create dynamic translation */
336           if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
337             {
338               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
339               return SNAT_IN2OUT_NEXT_DROP;
340             }
341         }
342
343       /* Create a new session */
344       pool_get (sm->per_thread_data[cpu_index].sessions, s);
345       memset (s, 0, sizeof (*s));
346       
347       s->outside_address_index = address_index;
348
349       if (static_mapping)
350         {
351           u->nstaticsessions++;
352           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
353         }
354       else
355         {
356           u->nsessions++;
357         }
358
359       /* Create list elts */
360       pool_get (sm->per_thread_data[cpu_index].list_pool,
361                 per_user_translation_list_elt);
362       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
363                        per_user_translation_list_elt -
364                        sm->per_thread_data[cpu_index].list_pool);
365
366       per_user_translation_list_elt->value =
367         s - sm->per_thread_data[cpu_index].sessions;
368       s->per_user_index = per_user_translation_list_elt -
369                           sm->per_thread_data[cpu_index].list_pool;
370       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
371
372       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
373                           s->per_user_list_head_index,
374                           per_user_translation_list_elt -
375                           sm->per_thread_data[cpu_index].list_pool);
376    }
377   
378   s->in2out = *key0;
379   s->out2in = key1;
380   s->out2in.protocol = key0->protocol;
381   s->out2in.fib_index = outside_fib_index;
382   *sessionp = s;
383
384   /* Add to translation hashes */
385   kv0.key = s->in2out.as_u64;
386   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
387   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
388       clib_warning ("in2out key add failed");
389   
390   kv0.key = s->out2in.as_u64;
391   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
392   
393   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
394       clib_warning ("out2in key add failed");
395
396   /* Add to translated packets worker lookup */
397   worker_by_out_key.addr = s->out2in.addr;
398   worker_by_out_key.port = s->out2in.port;
399   worker_by_out_key.fib_index = s->out2in.fib_index;
400   kv0.key = worker_by_out_key.as_u64;
401   kv0.value = cpu_index;
402   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
403
404   /* log NAT event */
405   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
406                                       s->out2in.addr.as_u32,
407                                       s->in2out.protocol,
408                                       s->in2out.port,
409                                       s->out2in.port,
410                                       s->in2out.fib_index);
411   return next0;
412 }
413                       
414 typedef struct {
415   u16 src_port, dst_port;
416 } tcp_udp_header_t;
417
418 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
419                                          vlib_buffer_t * b0,
420                                          ip4_header_t * ip0,
421                                          icmp46_header_t * icmp0,
422                                          u32 sw_if_index0,
423                                          u32 rx_fib_index0,
424                                          vlib_node_runtime_t * node,
425                                          u32 next0,
426                                          f64 now,
427                                          u32 cpu_index,
428                                          snat_session_t ** p_s0)
429 {
430   snat_session_key_t key0;
431   icmp_echo_header_t *echo0, *inner_echo0 = 0;
432   ip4_header_t *inner_ip0;
433   void *l4_header = 0;
434   icmp46_header_t *inner_icmp0;
435   clib_bihash_kv_8_8_t kv0, value0;
436   snat_session_t * s0 = 0;
437   u32 new_addr0, old_addr0;
438   u16 old_id0, new_id0;
439   ip_csum_t sum0;
440   u16 checksum0;
441   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
442   u8 is_error_message = 0;
443
444   echo0 = (icmp_echo_header_t *)(icmp0+1);
445
446   key0.addr = ip0->src_address;
447   key0.fib_index = rx_fib_index0;
448   
449   switch(icmp0->type)
450     {
451     case ICMP4_destination_unreachable:
452     case ICMP4_time_exceeded:
453     case ICMP4_parameter_problem:
454     case ICMP4_source_quench:
455     case ICMP4_redirect:
456     case ICMP4_alternate_host_address:
457       is_error_message = 1;
458     }
459
460   if (!is_error_message)
461     {
462       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
463         {
464           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
465           next0 = SNAT_IN2OUT_NEXT_DROP;
466           goto out;
467         }
468       key0.protocol = SNAT_PROTOCOL_ICMP;
469       key0.port = echo0->identifier;
470     }
471   else
472     {
473       inner_ip0 = (ip4_header_t *)(echo0+1);
474       l4_header = ip4_next_header (inner_ip0);
475       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
476       switch (key0.protocol)
477         {
478         case SNAT_PROTOCOL_ICMP:
479           inner_icmp0 = (icmp46_header_t*)l4_header;
480           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
481           key0.port = inner_echo0->identifier;
482           break;
483         case SNAT_PROTOCOL_UDP:
484         case SNAT_PROTOCOL_TCP:
485           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
486           break;
487         default:
488           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
489           next0 = SNAT_IN2OUT_NEXT_DROP;
490           goto out;
491         }
492     }
493
494   kv0.key = key0.as_u64;
495   
496   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
497     {
498       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
499           IP_PROTOCOL_ICMP, rx_fib_index0)))
500         goto out;
501
502       if (is_error_message)
503         {
504           next0 = SNAT_IN2OUT_NEXT_DROP;
505           goto out;
506         }
507
508       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
509                          &s0, node, next0, cpu_index);
510       
511       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
512         goto out;
513     }
514   else
515     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
516                             value0.value);
517
518   sum0 = ip_incremental_checksum (0, icmp0,
519                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
520   checksum0 = ~ip_csum_fold (sum0);
521   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
522     {
523       next0 = SNAT_IN2OUT_NEXT_DROP;
524       goto out;
525     }
526
527   old_addr0 = ip0->src_address.as_u32;
528   ip0->src_address = s0->out2in.addr;
529   new_addr0 = ip0->src_address.as_u32;
530   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
531
532   sum0 = ip0->checksum;
533   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
534                          src_address /* changed member */);
535   ip0->checksum = ip_csum_fold (sum0);
536   
537   if (!is_error_message)
538     {
539       old_id0 = echo0->identifier;
540       new_id0 = s0->out2in.port;
541       echo0->identifier = new_id0;
542
543       sum0 = icmp0->checksum;
544       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
545                              identifier);
546       icmp0->checksum = ip_csum_fold (sum0);
547     }
548   else
549     {
550       if (!ip4_header_checksum_is_valid (inner_ip0))
551         {
552           next0 = SNAT_IN2OUT_NEXT_DROP;
553           goto out;
554         }
555
556       old_addr0 = inner_ip0->dst_address.as_u32;
557       inner_ip0->dst_address = s0->out2in.addr;
558       new_addr0 = inner_ip0->src_address.as_u32;
559
560       sum0 = icmp0->checksum;
561       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
562                              dst_address /* changed member */);
563       icmp0->checksum = ip_csum_fold (sum0);
564
565       switch (key0.protocol)
566         {
567           case SNAT_PROTOCOL_ICMP:
568             old_id0 = inner_echo0->identifier;
569             new_id0 = s0->out2in.port;
570             inner_echo0->identifier = new_id0;
571
572             sum0 = icmp0->checksum;
573             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
574                                    identifier);
575             icmp0->checksum = ip_csum_fold (sum0);
576             break;
577           case SNAT_PROTOCOL_UDP:
578           case SNAT_PROTOCOL_TCP:
579             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
580             new_id0 = s0->out2in.port;
581             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
582
583             sum0 = icmp0->checksum;
584             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
585                                    dst_port);
586             icmp0->checksum = ip_csum_fold (sum0);
587             break;
588           default:
589             ASSERT(0);
590         }
591     }
592
593   /* Accounting */
594   s0->last_heard = now;
595   s0->total_pkts++;
596   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
597   /* Per-user LRU list maintenance for dynamic translations */
598   if (!snat_is_session_static (s0))
599     {
600       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
601                          s0->per_user_index);
602       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
603                           s0->per_user_list_head_index,
604                           s0->per_user_index);
605     }
606
607 out:
608   *p_s0 = s0;
609   return next0;
610 }
611
612 /**
613  * @brief Hairpinning
614  *
615  * Hairpinning allows two endpoints on the internal side of the NAT to
616  * communicate even if they only use each other's external IP addresses
617  * and ports.
618  *
619  * @param sm     SNAT main.
620  * @param b0     Vlib buffer.
621  * @param ip0    IP header.
622  * @param udp0   UDP header.
623  * @param tcp0   TCP header.
624  * @param proto0 SNAT protocol.
625  */
626 static inline void
627 snat_hairpinning (snat_main_t *sm,
628                   vlib_buffer_t * b0,
629                   ip4_header_t * ip0,
630                   udp_header_t * udp0,
631                   tcp_header_t * tcp0,
632                   u32 proto0)
633 {
634   snat_session_key_t key0, sm0;
635   snat_worker_key_t k0;
636   snat_session_t * s0;
637   clib_bihash_kv_8_8_t kv0, value0;
638   ip_csum_t sum0;
639   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
640   u16 new_dst_port0, old_dst_port0;
641
642   key0.addr = ip0->dst_address;
643   key0.port = udp0->dst_port;
644   key0.protocol = proto0;
645   key0.fib_index = sm->outside_fib_index;
646   kv0.key = key0.as_u64;
647
648   /* Check if destination is in active sessions */
649   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
650     {
651       /* or static mappings */
652       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
653         {
654           new_dst_addr0 = sm0.addr.as_u32;
655           new_dst_port0 = sm0.port;
656           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
657         }
658     }
659   else
660     {
661       si = value0.value;
662       if (sm->num_workers > 1)
663         {
664           k0.addr = ip0->dst_address;
665           k0.port = udp0->dst_port;
666           k0.fib_index = sm->outside_fib_index;
667           kv0.key = k0.as_u64;
668           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
669             ASSERT(0);
670           else
671             ti = value0.value;
672         }
673       else
674         ti = sm->num_workers;
675
676       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
677       new_dst_addr0 = s0->in2out.addr.as_u32;
678       new_dst_port0 = s0->in2out.port;
679       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
680     }
681
682   /* Destination is behind the same NAT, use internal address and port */
683   if (new_dst_addr0)
684     {
685       old_dst_addr0 = ip0->dst_address.as_u32;
686       ip0->dst_address.as_u32 = new_dst_addr0;
687       sum0 = ip0->checksum;
688       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
689                              ip4_header_t, dst_address);
690       ip0->checksum = ip_csum_fold (sum0);
691
692       old_dst_port0 = tcp0->dst;
693       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
694         {
695           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
696             {
697               tcp0->dst = new_dst_port0;
698               sum0 = tcp0->checksum;
699               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
700                                      ip4_header_t, dst_address);
701               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
702                                      ip4_header_t /* cheat */, length);
703               tcp0->checksum = ip_csum_fold(sum0);
704             }
705           else
706             {
707               udp0->dst_port = new_dst_port0;
708               udp0->checksum = 0;
709             }
710         }
711     }
712 }
713
714 static inline uword
715 snat_in2out_node_fn_inline (vlib_main_t * vm,
716                             vlib_node_runtime_t * node,
717                             vlib_frame_t * frame, int is_slow_path)
718 {
719   u32 n_left_from, * from, * to_next;
720   snat_in2out_next_t next_index;
721   u32 pkts_processed = 0;
722   snat_main_t * sm = &snat_main;
723   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
724   f64 now = vlib_time_now (vm);
725   u32 stats_node_index;
726   u32 cpu_index = os_get_cpu_number ();
727
728   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
729     snat_in2out_node.index;
730
731   from = vlib_frame_vector_args (frame);
732   n_left_from = frame->n_vectors;
733   next_index = node->cached_next_index;
734
735   while (n_left_from > 0)
736     {
737       u32 n_left_to_next;
738
739       vlib_get_next_frame (vm, node, next_index,
740                            to_next, n_left_to_next);
741
742       while (n_left_from >= 4 && n_left_to_next >= 2)
743         {
744           u32 bi0, bi1;
745           vlib_buffer_t * b0, * b1;
746           u32 next0, next1;
747           u32 sw_if_index0, sw_if_index1;
748           ip4_header_t * ip0, * ip1;
749           ip_csum_t sum0, sum1;
750           u32 new_addr0, old_addr0, new_addr1, old_addr1;
751           u16 old_port0, new_port0, old_port1, new_port1;
752           udp_header_t * udp0, * udp1;
753           tcp_header_t * tcp0, * tcp1;
754           icmp46_header_t * icmp0, * icmp1;
755           snat_session_key_t key0, key1;
756           u32 rx_fib_index0, rx_fib_index1;
757           u32 proto0, proto1;
758           snat_session_t * s0 = 0, * s1 = 0;
759           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
760           
761           /* Prefetch next iteration. */
762           {
763             vlib_buffer_t * p2, * p3;
764             
765             p2 = vlib_get_buffer (vm, from[2]);
766             p3 = vlib_get_buffer (vm, from[3]);
767             
768             vlib_prefetch_buffer_header (p2, LOAD);
769             vlib_prefetch_buffer_header (p3, LOAD);
770
771             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
772             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
773           }
774
775           /* speculatively enqueue b0 and b1 to the current next frame */
776           to_next[0] = bi0 = from[0];
777           to_next[1] = bi1 = from[1];
778           from += 2;
779           to_next += 2;
780           n_left_from -= 2;
781           n_left_to_next -= 2;
782           
783           b0 = vlib_get_buffer (vm, bi0);
784           b1 = vlib_get_buffer (vm, bi1);
785
786           ip0 = vlib_buffer_get_current (b0);
787           udp0 = ip4_next_header (ip0);
788           tcp0 = (tcp_header_t *) udp0;
789           icmp0 = (icmp46_header_t *) udp0;
790
791           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
792           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
793                                    sw_if_index0);
794
795           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
796
797           proto0 = ip_proto_to_snat_proto (ip0->protocol);
798
799           if (PREDICT_FALSE(ip0->ttl == 1))
800             {
801               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
802               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
803                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
804                                            0);
805               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
806               goto trace00;
807             }
808
809           /* Next configured feature, probably ip4-lookup */
810           if (is_slow_path)
811             {
812               if (PREDICT_FALSE (proto0 == ~0))
813                 goto trace00;
814               
815               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
816                 {
817                   next0 = icmp_in2out_slow_path 
818                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
819                      node, next0, now, cpu_index, &s0);
820                   goto trace00;
821                 }
822             }
823           else
824             {
825               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
826                 {
827                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
828                   goto trace00;
829                 }
830             }
831
832           key0.addr = ip0->src_address;
833           key0.port = udp0->src_port;
834           key0.protocol = proto0;
835           key0.fib_index = rx_fib_index0;
836           
837           kv0.key = key0.as_u64;
838
839           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
840             {
841               if (is_slow_path)
842                 {
843                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
844                       proto0, rx_fib_index0)))
845                     goto trace00;
846
847                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
848                                      &s0, node, next0, cpu_index);
849                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
850                     goto trace00;
851                 }
852               else
853                 {
854                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
855                   goto trace00;
856                 }
857             }
858           else
859             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
860                                     value0.value);
861
862           old_addr0 = ip0->src_address.as_u32;
863           ip0->src_address = s0->out2in.addr;
864           new_addr0 = ip0->src_address.as_u32;
865           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
866
867           sum0 = ip0->checksum;
868           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
869                                  ip4_header_t,
870                                  src_address /* changed member */);
871           ip0->checksum = ip_csum_fold (sum0);
872
873           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
874             {
875               old_port0 = tcp0->src_port;
876               tcp0->src_port = s0->out2in.port;
877               new_port0 = tcp0->src_port;
878
879               sum0 = tcp0->checksum;
880               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
881                                      ip4_header_t,
882                                      dst_address /* changed member */);
883               sum0 = ip_csum_update (sum0, old_port0, new_port0,
884                                      ip4_header_t /* cheat */,
885                                      length /* changed member */);
886               tcp0->checksum = ip_csum_fold(sum0);
887             }
888           else
889             {
890               old_port0 = udp0->src_port;
891               udp0->src_port = s0->out2in.port;
892               udp0->checksum = 0;
893             }
894
895           /* Hairpinning */
896           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
897
898           /* Accounting */
899           s0->last_heard = now;
900           s0->total_pkts++;
901           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
902           /* Per-user LRU list maintenance for dynamic translation */
903           if (!snat_is_session_static (s0))
904             {
905               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
906                                  s0->per_user_index);
907               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
908                                   s0->per_user_list_head_index,
909                                   s0->per_user_index);
910             }
911         trace00:
912
913           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
914                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
915             {
916               snat_in2out_trace_t *t = 
917                  vlib_add_trace (vm, node, b0, sizeof (*t));
918               t->is_slow_path = is_slow_path;
919               t->sw_if_index = sw_if_index0;
920               t->next_index = next0;
921                   t->session_index = ~0;
922               if (s0)
923                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
924             }
925
926           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
927
928           ip1 = vlib_buffer_get_current (b1);
929           udp1 = ip4_next_header (ip1);
930           tcp1 = (tcp_header_t *) udp1;
931           icmp1 = (icmp46_header_t *) udp1;
932
933           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
934           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
935                                    sw_if_index1);
936
937           proto1 = ip_proto_to_snat_proto (ip1->protocol);
938
939           if (PREDICT_FALSE(ip0->ttl == 1))
940             {
941               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
942               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
943                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
944                                            0);
945               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
946               goto trace01;
947             }
948
949           /* Next configured feature, probably ip4-lookup */
950           if (is_slow_path)
951             {
952               if (PREDICT_FALSE (proto1 == ~0))
953                 goto trace01;
954               
955               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
956                 {
957                   next1 = icmp_in2out_slow_path 
958                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
959                      next1, now, cpu_index, &s1);
960                   goto trace01;
961                 }
962             }
963           else
964             {
965               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
966                 {
967                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
968                   goto trace01;
969                 }
970             }
971
972           key1.addr = ip1->src_address;
973           key1.port = udp1->src_port;
974           key1.protocol = proto1;
975           key1.fib_index = rx_fib_index1;
976           
977           kv1.key = key1.as_u64;
978
979             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
980             {
981               if (is_slow_path)
982                 {
983                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
984                       proto1, rx_fib_index1)))
985                     goto trace01;
986
987                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
988                                      &s1, node, next1, cpu_index);
989                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
990                     goto trace01;
991                 }
992               else
993                 {
994                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
995                   goto trace01;
996                 }
997             }
998           else
999             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1000                                     value1.value);
1001
1002           old_addr1 = ip1->src_address.as_u32;
1003           ip1->src_address = s1->out2in.addr;
1004           new_addr1 = ip1->src_address.as_u32;
1005           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1006
1007           sum1 = ip1->checksum;
1008           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1009                                  ip4_header_t,
1010                                  src_address /* changed member */);
1011           ip1->checksum = ip_csum_fold (sum1);
1012
1013           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1014             {
1015               old_port1 = tcp1->src_port;
1016               tcp1->src_port = s1->out2in.port;
1017               new_port1 = tcp1->src_port;
1018
1019               sum1 = tcp1->checksum;
1020               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1021                                      ip4_header_t,
1022                                      dst_address /* changed member */);
1023               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1024                                      ip4_header_t /* cheat */,
1025                                      length /* changed member */);
1026               tcp1->checksum = ip_csum_fold(sum1);
1027             }
1028           else
1029             {
1030               old_port1 = udp1->src_port;
1031               udp1->src_port = s1->out2in.port;
1032               udp1->checksum = 0;
1033             }
1034
1035           /* Hairpinning */
1036           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1037
1038           /* Accounting */
1039           s1->last_heard = now;
1040           s1->total_pkts++;
1041           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1042           /* Per-user LRU list maintenance for dynamic translation */
1043           if (!snat_is_session_static (s1))
1044             {
1045               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1046                                  s1->per_user_index);
1047               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1048                                   s1->per_user_list_head_index,
1049                                   s1->per_user_index);
1050             }
1051         trace01:
1052
1053           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1054                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1055             {
1056               snat_in2out_trace_t *t = 
1057                  vlib_add_trace (vm, node, b1, sizeof (*t));
1058               t->sw_if_index = sw_if_index1;
1059               t->next_index = next1;
1060               t->session_index = ~0;
1061               if (s1)
1062                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
1063             }
1064
1065           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1066
1067           /* verify speculative enqueues, maybe switch current next frame */
1068           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1069                                            to_next, n_left_to_next,
1070                                            bi0, bi1, next0, next1);
1071         }
1072
1073       while (n_left_from > 0 && n_left_to_next > 0)
1074         {
1075           u32 bi0;
1076           vlib_buffer_t * b0;
1077           u32 next0;
1078           u32 sw_if_index0;
1079           ip4_header_t * ip0;
1080           ip_csum_t sum0;
1081           u32 new_addr0, old_addr0;
1082           u16 old_port0, new_port0;
1083           udp_header_t * udp0;
1084           tcp_header_t * tcp0;
1085           icmp46_header_t * icmp0;
1086           snat_session_key_t key0;
1087           u32 rx_fib_index0;
1088           u32 proto0;
1089           snat_session_t * s0 = 0;
1090           clib_bihash_kv_8_8_t kv0, value0;
1091           
1092           /* speculatively enqueue b0 to the current next frame */
1093           bi0 = from[0];
1094           to_next[0] = bi0;
1095           from += 1;
1096           to_next += 1;
1097           n_left_from -= 1;
1098           n_left_to_next -= 1;
1099
1100           b0 = vlib_get_buffer (vm, bi0);
1101           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1102
1103           ip0 = vlib_buffer_get_current (b0);
1104           udp0 = ip4_next_header (ip0);
1105           tcp0 = (tcp_header_t *) udp0;
1106           icmp0 = (icmp46_header_t *) udp0;
1107
1108           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1109           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1110                                    sw_if_index0);
1111
1112           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1113
1114           if (PREDICT_FALSE(ip0->ttl == 1))
1115             {
1116               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1117               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1118                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1119                                            0);
1120               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1121               goto trace0;
1122             }
1123
1124           /* Next configured feature, probably ip4-lookup */
1125           if (is_slow_path)
1126             {
1127               if (PREDICT_FALSE (proto0 == ~0))
1128                 goto trace0;
1129               
1130               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1131                 {
1132                   next0 = icmp_in2out_slow_path 
1133                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1134                      next0, now, cpu_index, &s0);
1135                   goto trace0;
1136                 }
1137             }
1138           else
1139             {
1140               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1141                 {
1142                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1143                   goto trace0;
1144                 }
1145             }
1146
1147           key0.addr = ip0->src_address;
1148           key0.port = udp0->src_port;
1149           key0.protocol = proto0;
1150           key0.fib_index = rx_fib_index0;
1151           
1152           kv0.key = key0.as_u64;
1153
1154           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1155             {
1156               if (is_slow_path)
1157                 {
1158                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1159                       proto0, rx_fib_index0)))
1160                     goto trace0;
1161
1162                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1163                                      &s0, node, next0, cpu_index);
1164
1165                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1166                     goto trace0;
1167                 }
1168               else
1169                 {
1170                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1171                   goto trace0;
1172                 }
1173             }
1174           else
1175             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1176                                     value0.value);
1177
1178           old_addr0 = ip0->src_address.as_u32;
1179           ip0->src_address = s0->out2in.addr;
1180           new_addr0 = ip0->src_address.as_u32;
1181           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1182
1183           sum0 = ip0->checksum;
1184           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1185                                  ip4_header_t,
1186                                  src_address /* changed member */);
1187           ip0->checksum = ip_csum_fold (sum0);
1188
1189           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1190             {
1191               old_port0 = tcp0->src_port;
1192               tcp0->src_port = s0->out2in.port;
1193               new_port0 = tcp0->src_port;
1194
1195               sum0 = tcp0->checksum;
1196               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1197                                      ip4_header_t,
1198                                      dst_address /* changed member */);
1199               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1200                                      ip4_header_t /* cheat */,
1201                                      length /* changed member */);
1202               tcp0->checksum = ip_csum_fold(sum0);
1203             }
1204           else
1205             {
1206               old_port0 = udp0->src_port;
1207               udp0->src_port = s0->out2in.port;
1208               udp0->checksum = 0;
1209             }
1210
1211           /* Hairpinning */
1212           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1213
1214           /* Accounting */
1215           s0->last_heard = now;
1216           s0->total_pkts++;
1217           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1218           /* Per-user LRU list maintenance for dynamic translation */
1219           if (!snat_is_session_static (s0))
1220             {
1221               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1222                                  s0->per_user_index);
1223               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1224                                   s0->per_user_list_head_index,
1225                                   s0->per_user_index);
1226             }
1227
1228         trace0:
1229           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1230                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1231             {
1232               snat_in2out_trace_t *t = 
1233                  vlib_add_trace (vm, node, b0, sizeof (*t));
1234               t->is_slow_path = is_slow_path;
1235               t->sw_if_index = sw_if_index0;
1236               t->next_index = next0;
1237                   t->session_index = ~0;
1238               if (s0)
1239                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1240             }
1241
1242           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1243
1244           /* verify speculative enqueue, maybe switch current next frame */
1245           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1246                                            to_next, n_left_to_next,
1247                                            bi0, next0);
1248         }
1249
1250       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1251     }
1252
1253   vlib_node_increment_counter (vm, stats_node_index, 
1254                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1255                                pkts_processed);
1256   return frame->n_vectors;
1257 }
1258
1259 static uword
1260 snat_in2out_fast_path_fn (vlib_main_t * vm,
1261                           vlib_node_runtime_t * node,
1262                           vlib_frame_t * frame)
1263 {
1264   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1265 }
1266
1267 VLIB_REGISTER_NODE (snat_in2out_node) = {
1268   .function = snat_in2out_fast_path_fn,
1269   .name = "snat-in2out",
1270   .vector_size = sizeof (u32),
1271   .format_trace = format_snat_in2out_trace,
1272   .type = VLIB_NODE_TYPE_INTERNAL,
1273   
1274   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1275   .error_strings = snat_in2out_error_strings,
1276
1277   .runtime_data_bytes = sizeof (snat_runtime_t),
1278   
1279   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1280
1281   /* edit / add dispositions here */
1282   .next_nodes = {
1283     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1284     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1285     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1286     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1287   },
1288 };
1289
1290 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1291
1292 static uword
1293 snat_in2out_slow_path_fn (vlib_main_t * vm,
1294                           vlib_node_runtime_t * node,
1295                           vlib_frame_t * frame)
1296 {
1297   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1298 }
1299
1300 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1301   .function = snat_in2out_slow_path_fn,
1302   .name = "snat-in2out-slowpath",
1303   .vector_size = sizeof (u32),
1304   .format_trace = format_snat_in2out_trace,
1305   .type = VLIB_NODE_TYPE_INTERNAL,
1306   
1307   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1308   .error_strings = snat_in2out_error_strings,
1309
1310   .runtime_data_bytes = sizeof (snat_runtime_t),
1311   
1312   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1313
1314   /* edit / add dispositions here */
1315   .next_nodes = {
1316     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1317     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1318     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1319     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1320   },
1321 };
1322
1323 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1324
1325 static uword
1326 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1327                                vlib_node_runtime_t * node,
1328                                vlib_frame_t * frame)
1329 {
1330   snat_main_t *sm = &snat_main;
1331   vlib_thread_main_t *tm = vlib_get_thread_main ();
1332   u32 n_left_from, *from, *to_next = 0;
1333   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1334   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1335     = 0;
1336   vlib_frame_queue_elt_t *hf = 0;
1337   vlib_frame_t *f = 0;
1338   int i;
1339   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1340   u32 next_worker_index = 0;
1341   u32 current_worker_index = ~0;
1342   u32 cpu_index = os_get_cpu_number ();
1343
1344   ASSERT (vec_len (sm->workers));
1345
1346   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1347     {
1348       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1349
1350       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1351                                sm->first_worker_index + sm->num_workers - 1,
1352                                (vlib_frame_queue_t *) (~0));
1353     }
1354
1355   from = vlib_frame_vector_args (frame);
1356   n_left_from = frame->n_vectors;
1357
1358   while (n_left_from > 0)
1359     {
1360       u32 bi0;
1361       vlib_buffer_t *b0;
1362       u32 sw_if_index0;
1363       u32 rx_fib_index0;
1364       ip4_header_t * ip0;
1365       snat_user_key_t key0;
1366       clib_bihash_kv_8_8_t kv0, value0;
1367       u8 do_handoff;
1368
1369       bi0 = from[0];
1370       from += 1;
1371       n_left_from -= 1;
1372
1373       b0 = vlib_get_buffer (vm, bi0);
1374
1375       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1376       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1377
1378       ip0 = vlib_buffer_get_current (b0);
1379
1380       key0.addr = ip0->src_address;
1381       key0.fib_index = rx_fib_index0;
1382
1383       kv0.key = key0.as_u64;
1384
1385       /* Ever heard of of the "user" before? */
1386       if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
1387         {
1388           /* No, assign next available worker (RR) */
1389           next_worker_index = sm->first_worker_index;
1390           if (vec_len (sm->workers))
1391             {
1392               next_worker_index += 
1393                 sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
1394             }
1395
1396           /* add non-traslated packets worker lookup */
1397           kv0.value = next_worker_index;
1398           clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
1399         }
1400       else
1401         next_worker_index = value0.value;
1402
1403       if (PREDICT_FALSE (next_worker_index != cpu_index))
1404         {
1405           do_handoff = 1;
1406
1407           if (next_worker_index != current_worker_index)
1408             {
1409               if (hf)
1410                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1411
1412               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1413                                                       next_worker_index,
1414                                                       handoff_queue_elt_by_worker_index);
1415
1416               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1417               to_next_worker = &hf->buffer_index[hf->n_vectors];
1418               current_worker_index = next_worker_index;
1419             }
1420
1421           /* enqueue to correct worker thread */
1422           to_next_worker[0] = bi0;
1423           to_next_worker++;
1424           n_left_to_next_worker--;
1425
1426           if (n_left_to_next_worker == 0)
1427             {
1428               hf->n_vectors = VLIB_FRAME_SIZE;
1429               vlib_put_frame_queue_elt (hf);
1430               current_worker_index = ~0;
1431               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1432               hf = 0;
1433             }
1434         }
1435       else
1436         {
1437           do_handoff = 0;
1438           /* if this is 1st frame */
1439           if (!f)
1440             {
1441               f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
1442               to_next = vlib_frame_vector_args (f);
1443             }
1444
1445           to_next[0] = bi0;
1446           to_next += 1;
1447           f->n_vectors++;
1448         }
1449
1450       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1451                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1452         {
1453           snat_in2out_worker_handoff_trace_t *t =
1454             vlib_add_trace (vm, node, b0, sizeof (*t));
1455           t->next_worker_index = next_worker_index;
1456           t->do_handoff = do_handoff;
1457         }
1458     }
1459
1460   if (f)
1461     vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
1462
1463   if (hf)
1464     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1465
1466   /* Ship frames to the worker nodes */
1467   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1468     {
1469       if (handoff_queue_elt_by_worker_index[i])
1470         {
1471           hf = handoff_queue_elt_by_worker_index[i];
1472           /*
1473            * It works better to let the handoff node
1474            * rate-adapt, always ship the handoff queue element.
1475            */
1476           if (1 || hf->n_vectors == hf->last_n_vectors)
1477             {
1478               vlib_put_frame_queue_elt (hf);
1479               handoff_queue_elt_by_worker_index[i] = 0;
1480             }
1481           else
1482             hf->last_n_vectors = hf->n_vectors;
1483         }
1484       congested_handoff_queue_by_worker_index[i] =
1485         (vlib_frame_queue_t *) (~0);
1486     }
1487   hf = 0;
1488   current_worker_index = ~0;
1489   return frame->n_vectors;
1490 }
1491
1492 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1493   .function = snat_in2out_worker_handoff_fn,
1494   .name = "snat-in2out-worker-handoff",
1495   .vector_size = sizeof (u32),
1496   .format_trace = format_snat_in2out_worker_handoff_trace,
1497   .type = VLIB_NODE_TYPE_INTERNAL,
1498   
1499   .n_next_nodes = 1,
1500
1501   .next_nodes = {
1502     [0] = "error-drop",
1503   },
1504 };
1505
1506 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1507
1508 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
1509                                           vlib_buffer_t * b0,
1510                                           ip4_header_t * ip0,
1511                                           icmp46_header_t * icmp0,
1512                                           u32 sw_if_index0,
1513                                           vlib_node_runtime_t * node,
1514                                           u32 next0,
1515                                           u32 rx_fib_index0)
1516 {
1517   snat_session_key_t key0, sm0;
1518   icmp_echo_header_t *echo0;
1519   u32 new_addr0, old_addr0;
1520   u16 old_id0, new_id0;
1521   ip_csum_t sum0;
1522   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1523
1524   echo0 = (icmp_echo_header_t *)(icmp0+1);
1525
1526   key0.addr = ip0->src_address;
1527   key0.port = echo0->identifier;
1528   key0.fib_index = rx_fib_index0;
1529   
1530   if (snat_static_mapping_match(sm, key0, &sm0, 0))
1531     {
1532       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1533           IP_PROTOCOL_ICMP, rx_fib_index0)))
1534         return next0;
1535
1536       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1537       return SNAT_IN2OUT_NEXT_DROP;
1538     }
1539
1540   new_addr0 = sm0.addr.as_u32;
1541   new_id0 = sm0.port;
1542   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1543   old_addr0 = ip0->src_address.as_u32;
1544   ip0->src_address.as_u32 = new_addr0;
1545   
1546   sum0 = ip0->checksum;
1547   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1548                          ip4_header_t,
1549                          src_address /* changed member */);
1550   ip0->checksum = ip_csum_fold (sum0);
1551   
1552   if (PREDICT_FALSE(new_id0 != echo0->identifier))
1553     {
1554       old_id0 = echo0->identifier;
1555       echo0->identifier = new_id0;
1556
1557       sum0 = icmp0->checksum;
1558       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1559                              identifier);
1560       icmp0->checksum = ip_csum_fold (sum0);
1561     }
1562
1563   return next0;
1564 }
1565
1566 static uword
1567 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
1568                                 vlib_node_runtime_t * node,
1569                                 vlib_frame_t * frame)
1570 {
1571   u32 n_left_from, * from, * to_next;
1572   snat_in2out_next_t next_index;
1573   u32 pkts_processed = 0;
1574   snat_main_t * sm = &snat_main;
1575   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1576   u32 stats_node_index;
1577
1578   stats_node_index = snat_in2out_fast_node.index;
1579
1580   from = vlib_frame_vector_args (frame);
1581   n_left_from = frame->n_vectors;
1582   next_index = node->cached_next_index;
1583
1584   while (n_left_from > 0)
1585     {
1586       u32 n_left_to_next;
1587
1588       vlib_get_next_frame (vm, node, next_index,
1589                            to_next, n_left_to_next);
1590
1591       while (n_left_from > 0 && n_left_to_next > 0)
1592         {
1593           u32 bi0;
1594           vlib_buffer_t * b0;
1595           u32 next0;
1596           u32 sw_if_index0;
1597           ip4_header_t * ip0;
1598           ip_csum_t sum0;
1599           u32 new_addr0, old_addr0;
1600           u16 old_port0, new_port0;
1601           udp_header_t * udp0;
1602           tcp_header_t * tcp0;
1603           icmp46_header_t * icmp0;
1604           snat_session_key_t key0, sm0;
1605           u32 proto0;
1606           u32 rx_fib_index0;
1607
1608           /* speculatively enqueue b0 to the current next frame */
1609           bi0 = from[0];
1610           to_next[0] = bi0;
1611           from += 1;
1612           to_next += 1;
1613           n_left_from -= 1;
1614           n_left_to_next -= 1;
1615
1616           b0 = vlib_get_buffer (vm, bi0);
1617           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1618
1619           ip0 = vlib_buffer_get_current (b0);
1620           udp0 = ip4_next_header (ip0);
1621           tcp0 = (tcp_header_t *) udp0;
1622           icmp0 = (icmp46_header_t *) udp0;
1623
1624           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1625           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1626
1627           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1628
1629           if (PREDICT_FALSE (proto0 == ~0))
1630               goto trace0;
1631
1632           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1633             {
1634               if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1635                   proto0, rx_fib_index0)))
1636                 goto trace0;
1637
1638               next0 = icmp_in2out_static_map
1639                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1640               goto trace0;
1641             }
1642
1643           key0.addr = ip0->src_address;
1644           key0.port = udp0->src_port;
1645           key0.fib_index = rx_fib_index0;
1646
1647           if (snat_static_mapping_match(sm, key0, &sm0, 0))
1648             {
1649               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1650               next0= SNAT_IN2OUT_NEXT_DROP;
1651               goto trace0;
1652             }
1653
1654           new_addr0 = sm0.addr.as_u32;
1655           new_port0 = sm0.port;
1656           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1657           old_addr0 = ip0->src_address.as_u32;
1658           ip0->src_address.as_u32 = new_addr0;
1659
1660           sum0 = ip0->checksum;
1661           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1662                                  ip4_header_t,
1663                                  src_address /* changed member */);
1664           ip0->checksum = ip_csum_fold (sum0);
1665
1666           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1667             {
1668               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1669                 {
1670                   old_port0 = tcp0->src_port;
1671                   tcp0->src_port = new_port0;
1672
1673                   sum0 = tcp0->checksum;
1674                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1675                                          ip4_header_t,
1676                                          dst_address /* changed member */);
1677                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1678                                          ip4_header_t /* cheat */,
1679                                          length /* changed member */);
1680                   tcp0->checksum = ip_csum_fold(sum0);
1681                 }
1682               else
1683                 {
1684                   old_port0 = udp0->src_port;
1685                   udp0->src_port = new_port0;
1686                   udp0->checksum = 0;
1687                 }
1688             }
1689           else
1690             {
1691               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1692                 {
1693                   sum0 = tcp0->checksum;
1694                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1695                                          ip4_header_t,
1696                                          dst_address /* changed member */);
1697                   tcp0->checksum = ip_csum_fold(sum0);
1698                 }
1699             }
1700
1701           /* Hairpinning */
1702           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1703
1704         trace0:
1705           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1706                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1707             {
1708               snat_in2out_trace_t *t =
1709                  vlib_add_trace (vm, node, b0, sizeof (*t));
1710               t->sw_if_index = sw_if_index0;
1711               t->next_index = next0;
1712             }
1713
1714           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1715
1716           /* verify speculative enqueue, maybe switch current next frame */
1717           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1718                                            to_next, n_left_to_next,
1719                                            bi0, next0);
1720         }
1721
1722       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1723     }
1724
1725   vlib_node_increment_counter (vm, stats_node_index,
1726                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1727                                pkts_processed);
1728   return frame->n_vectors;
1729 }
1730
1731
1732 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
1733   .function = snat_in2out_fast_static_map_fn,
1734   .name = "snat-in2out-fast",
1735   .vector_size = sizeof (u32),
1736   .format_trace = format_snat_in2out_fast_trace,
1737   .type = VLIB_NODE_TYPE_INTERNAL,
1738   
1739   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1740   .error_strings = snat_in2out_error_strings,
1741
1742   .runtime_data_bytes = sizeof (snat_runtime_t),
1743   
1744   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1745
1746   /* edit / add dispositions here */
1747   .next_nodes = {
1748     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1749     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1750     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1751     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1752   },
1753 };
1754
1755 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);