CGN: fix outside port calculation and set buffer error (VPP-623)
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_SLOW_PATH,
117   SNAT_IN2OUT_NEXT_ICMP_ERROR,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
138                    ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
139 {
140   ip4_address_t * first_int_addr;
141   udp_header_t * udp0 = ip4_next_header (ip0);
142   snat_session_key_t key0, sm0;
143   clib_bihash_kv_8_8_t kv0, value0;
144   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
145   fib_prefix_t pfx = {
146     .fp_proto = FIB_PROTOCOL_IP4,
147     .fp_len = 32,
148     .fp_addr = {
149         .ip4.as_u32 = ip0->dst_address.as_u32,
150     },
151   };
152
153   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
154     {
155       first_int_addr =
156         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
157                                      0 /* just want the address */);
158       rt->cached_sw_if_index = sw_if_index0;
159       if (first_int_addr)
160         rt->cached_ip4_address = first_int_addr->as_u32;
161       else
162         rt->cached_ip4_address = 0;
163     }
164
165   /* Don't NAT packet aimed at the intfc address */
166   if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
167     return 1;
168
169   key0.addr = ip0->dst_address;
170   key0.port = udp0->dst_port;
171   key0.protocol = proto0;
172   key0.fib_index = sm->outside_fib_index;
173   kv0.key = key0.as_u64;
174
175   /* NAT packet aimed at external address if */
176   /* has active sessions */
177   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
178     {
179       /* or is static mappings */
180       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
181         return 0;
182     }
183   else
184     return 0;
185
186   fei = fib_table_lookup (rx_fib_index0, &pfx);
187   if (FIB_NODE_INDEX_INVALID != fei)
188     {
189       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
190       if (sw_if_index == ~0)
191         {
192           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
193           if (FIB_NODE_INDEX_INVALID != fei)
194             sw_if_index = fib_entry_get_resolving_interface (fei);
195         }
196       snat_interface_t *i;
197       pool_foreach (i, sm->interfaces,
198       ({
199         /* NAT packet aimed at outside interface */
200         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
201           return 0;
202       }));
203     }
204
205   return 1;
206 }
207
208 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
209                       ip4_header_t * ip0,
210                       u32 rx_fib_index0,
211                       snat_session_key_t * key0,
212                       snat_session_t ** sessionp,
213                       vlib_node_runtime_t * node,
214                       u32 next0,
215                       u32 cpu_index)
216 {
217   snat_user_t *u;
218   snat_user_key_t user_key;
219   snat_session_t *s;
220   clib_bihash_kv_8_8_t kv0, value0;
221   u32 oldest_per_user_translation_list_index;
222   dlist_elt_t * oldest_per_user_translation_list_elt;
223   dlist_elt_t * per_user_translation_list_elt;
224   dlist_elt_t * per_user_list_head_elt;
225   u32 session_index;
226   snat_session_key_t key1;
227   u32 address_index = ~0;
228   u32 outside_fib_index;
229   uword * p;
230   snat_worker_key_t worker_by_out_key;
231
232   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
233   if (! p)
234     {
235       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
236       return SNAT_IN2OUT_NEXT_DROP;
237     }
238   outside_fib_index = p[0];
239
240   key1.protocol = key0->protocol;
241   user_key.addr = ip0->src_address;
242   user_key.fib_index = rx_fib_index0;
243   kv0.key = user_key.as_u64;
244   
245   /* Ever heard of the "user" = src ip4 address before? */
246   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
247     {
248       /* no, make a new one */
249       pool_get (sm->per_thread_data[cpu_index].users, u);
250       memset (u, 0, sizeof (*u));
251       u->addr = ip0->src_address;
252       u->fib_index = rx_fib_index0;
253
254       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
255
256       u->sessions_per_user_list_head_index = per_user_list_head_elt -
257         sm->per_thread_data[cpu_index].list_pool;
258
259       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
260                        u->sessions_per_user_list_head_index);
261
262       kv0.value = u - sm->per_thread_data[cpu_index].users;
263
264       /* add user */
265       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
266     }
267   else
268     {
269       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
270                              value0.value);
271     }
272
273   /* Over quota? Recycle the least recently used dynamic translation */
274   if (u->nsessions >= sm->max_translations_per_user)
275     {
276       /* Remove the oldest dynamic translation */
277       do {
278           oldest_per_user_translation_list_index =
279             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
280                                     u->sessions_per_user_list_head_index);
281
282           ASSERT (oldest_per_user_translation_list_index != ~0);
283
284           /* add it back to the end of the LRU list */
285           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
286                               u->sessions_per_user_list_head_index,
287                               oldest_per_user_translation_list_index);
288           /* Get the list element */
289           oldest_per_user_translation_list_elt =
290             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
291                                oldest_per_user_translation_list_index);
292
293           /* Get the session index from the list element */
294           session_index = oldest_per_user_translation_list_elt->value;
295
296           /* Get the session */
297           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
298                                  session_index);
299       } while (snat_is_session_static (s));
300
301       /* Remove in2out, out2in keys */
302       kv0.key = s->in2out.as_u64;
303       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
304           clib_warning ("in2out key delete failed");
305       kv0.key = s->out2in.as_u64;
306       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
307           clib_warning ("out2in key delete failed");
308
309       /* log NAT event */
310       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
311                                           s->out2in.addr.as_u32,
312                                           s->in2out.protocol,
313                                           s->in2out.port,
314                                           s->out2in.port,
315                                           s->in2out.fib_index);
316
317       snat_free_outside_address_and_port 
318         (sm, &s->out2in, s->outside_address_index);
319       s->outside_address_index = ~0;
320
321       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
322                                                &address_index))
323         {
324           ASSERT(0);
325
326           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
327           return SNAT_IN2OUT_NEXT_DROP;
328         }
329       s->outside_address_index = address_index;
330     }
331   else
332     {
333       u8 static_mapping = 1;
334
335       /* First try to match static mapping by local address and port */
336       if (snat_static_mapping_match (sm, *key0, &key1, 0))
337         {
338           static_mapping = 0;
339           /* Try to create dynamic translation */
340           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
341                                                    &address_index))
342             {
343               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
344               return SNAT_IN2OUT_NEXT_DROP;
345             }
346         }
347
348       /* Create a new session */
349       pool_get (sm->per_thread_data[cpu_index].sessions, s);
350       memset (s, 0, sizeof (*s));
351       
352       s->outside_address_index = address_index;
353
354       if (static_mapping)
355         {
356           u->nstaticsessions++;
357           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
358         }
359       else
360         {
361           u->nsessions++;
362         }
363
364       /* Create list elts */
365       pool_get (sm->per_thread_data[cpu_index].list_pool,
366                 per_user_translation_list_elt);
367       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
368                        per_user_translation_list_elt -
369                        sm->per_thread_data[cpu_index].list_pool);
370
371       per_user_translation_list_elt->value =
372         s - sm->per_thread_data[cpu_index].sessions;
373       s->per_user_index = per_user_translation_list_elt -
374                           sm->per_thread_data[cpu_index].list_pool;
375       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
376
377       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
378                           s->per_user_list_head_index,
379                           per_user_translation_list_elt -
380                           sm->per_thread_data[cpu_index].list_pool);
381    }
382   
383   s->in2out = *key0;
384   s->out2in = key1;
385   s->out2in.protocol = key0->protocol;
386   s->out2in.fib_index = outside_fib_index;
387   *sessionp = s;
388
389   /* Add to translation hashes */
390   kv0.key = s->in2out.as_u64;
391   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
392   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
393       clib_warning ("in2out key add failed");
394   
395   kv0.key = s->out2in.as_u64;
396   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
397   
398   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
399       clib_warning ("out2in key add failed");
400
401   /* Add to translated packets worker lookup */
402   worker_by_out_key.addr = s->out2in.addr;
403   worker_by_out_key.port = s->out2in.port;
404   worker_by_out_key.fib_index = s->out2in.fib_index;
405   kv0.key = worker_by_out_key.as_u64;
406   kv0.value = cpu_index;
407   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
408
409   /* log NAT event */
410   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
411                                       s->out2in.addr.as_u32,
412                                       s->in2out.protocol,
413                                       s->in2out.port,
414                                       s->out2in.port,
415                                       s->in2out.fib_index);
416   return next0;
417 }
418                       
419 typedef struct {
420   u16 src_port, dst_port;
421 } tcp_udp_header_t;
422
423 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
424                                          vlib_buffer_t * b0,
425                                          ip4_header_t * ip0,
426                                          icmp46_header_t * icmp0,
427                                          u32 sw_if_index0,
428                                          u32 rx_fib_index0,
429                                          vlib_node_runtime_t * node,
430                                          u32 next0,
431                                          f64 now,
432                                          u32 cpu_index,
433                                          snat_session_t ** p_s0)
434 {
435   snat_session_key_t key0;
436   icmp_echo_header_t *echo0, *inner_echo0 = 0;
437   ip4_header_t *inner_ip0 = 0;
438   void *l4_header = 0;
439   icmp46_header_t *inner_icmp0;
440   clib_bihash_kv_8_8_t kv0, value0;
441   snat_session_t * s0 = 0;
442   u32 new_addr0, old_addr0;
443   u16 old_id0, new_id0;
444   ip_csum_t sum0;
445   u16 checksum0;
446   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
447   u8 is_error_message = 0;
448
449   echo0 = (icmp_echo_header_t *)(icmp0+1);
450
451   key0.addr = ip0->src_address;
452   key0.fib_index = rx_fib_index0;
453   
454   switch(icmp0->type)
455     {
456     case ICMP4_destination_unreachable:
457     case ICMP4_time_exceeded:
458     case ICMP4_parameter_problem:
459     case ICMP4_source_quench:
460     case ICMP4_redirect:
461     case ICMP4_alternate_host_address:
462       is_error_message = 1;
463     }
464
465   if (!is_error_message)
466     {
467       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
468         {
469           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
470           next0 = SNAT_IN2OUT_NEXT_DROP;
471           goto out;
472         }
473       key0.protocol = SNAT_PROTOCOL_ICMP;
474       key0.port = echo0->identifier;
475     }
476   else
477     {
478       inner_ip0 = (ip4_header_t *)(echo0+1);
479       l4_header = ip4_next_header (inner_ip0);
480       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
481       switch (key0.protocol)
482         {
483         case SNAT_PROTOCOL_ICMP:
484           inner_icmp0 = (icmp46_header_t*)l4_header;
485           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
486           key0.port = inner_echo0->identifier;
487           break;
488         case SNAT_PROTOCOL_UDP:
489         case SNAT_PROTOCOL_TCP:
490           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
491           break;
492         default:
493           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
494           next0 = SNAT_IN2OUT_NEXT_DROP;
495           goto out;
496         }
497     }
498
499   kv0.key = key0.as_u64;
500   
501   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
502     {
503       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
504           IP_PROTOCOL_ICMP, rx_fib_index0)))
505         goto out;
506
507       if (is_error_message)
508         {
509           next0 = SNAT_IN2OUT_NEXT_DROP;
510           goto out;
511         }
512
513       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
514                          &s0, node, next0, cpu_index);
515       
516       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
517         goto out;
518     }
519   else
520     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
521                             value0.value);
522
523   sum0 = ip_incremental_checksum (0, icmp0,
524                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
525   checksum0 = ~ip_csum_fold (sum0);
526   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
527     {
528       next0 = SNAT_IN2OUT_NEXT_DROP;
529       goto out;
530     }
531
532   old_addr0 = ip0->src_address.as_u32;
533   ip0->src_address = s0->out2in.addr;
534   new_addr0 = ip0->src_address.as_u32;
535   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
536
537   sum0 = ip0->checksum;
538   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
539                          src_address /* changed member */);
540   ip0->checksum = ip_csum_fold (sum0);
541   
542   if (!is_error_message)
543     {
544       old_id0 = echo0->identifier;
545       new_id0 = s0->out2in.port;
546       echo0->identifier = new_id0;
547
548       sum0 = icmp0->checksum;
549       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
550                              identifier);
551       icmp0->checksum = ip_csum_fold (sum0);
552     }
553   else
554     {
555       if (!ip4_header_checksum_is_valid (inner_ip0))
556         {
557           next0 = SNAT_IN2OUT_NEXT_DROP;
558           goto out;
559         }
560
561       old_addr0 = inner_ip0->dst_address.as_u32;
562       inner_ip0->dst_address = s0->out2in.addr;
563       new_addr0 = inner_ip0->src_address.as_u32;
564
565       sum0 = icmp0->checksum;
566       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
567                              dst_address /* changed member */);
568       icmp0->checksum = ip_csum_fold (sum0);
569
570       switch (key0.protocol)
571         {
572           case SNAT_PROTOCOL_ICMP:
573             old_id0 = inner_echo0->identifier;
574             new_id0 = s0->out2in.port;
575             inner_echo0->identifier = new_id0;
576
577             sum0 = icmp0->checksum;
578             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
579                                    identifier);
580             icmp0->checksum = ip_csum_fold (sum0);
581             break;
582           case SNAT_PROTOCOL_UDP:
583           case SNAT_PROTOCOL_TCP:
584             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
585             new_id0 = s0->out2in.port;
586             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
587
588             sum0 = icmp0->checksum;
589             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
590                                    dst_port);
591             icmp0->checksum = ip_csum_fold (sum0);
592             break;
593           default:
594             ASSERT(0);
595         }
596     }
597
598   /* Accounting */
599   s0->last_heard = now;
600   s0->total_pkts++;
601   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
602   /* Per-user LRU list maintenance for dynamic translations */
603   if (!snat_is_session_static (s0))
604     {
605       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
606                          s0->per_user_index);
607       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
608                           s0->per_user_list_head_index,
609                           s0->per_user_index);
610     }
611
612 out:
613   *p_s0 = s0;
614   return next0;
615 }
616
617 /**
618  * @brief Hairpinning
619  *
620  * Hairpinning allows two endpoints on the internal side of the NAT to
621  * communicate even if they only use each other's external IP addresses
622  * and ports.
623  *
624  * @param sm     SNAT main.
625  * @param b0     Vlib buffer.
626  * @param ip0    IP header.
627  * @param udp0   UDP header.
628  * @param tcp0   TCP header.
629  * @param proto0 SNAT protocol.
630  */
631 static inline void
632 snat_hairpinning (snat_main_t *sm,
633                   vlib_buffer_t * b0,
634                   ip4_header_t * ip0,
635                   udp_header_t * udp0,
636                   tcp_header_t * tcp0,
637                   u32 proto0)
638 {
639   snat_session_key_t key0, sm0;
640   snat_worker_key_t k0;
641   snat_session_t * s0;
642   clib_bihash_kv_8_8_t kv0, value0;
643   ip_csum_t sum0;
644   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
645   u16 new_dst_port0, old_dst_port0;
646
647   key0.addr = ip0->dst_address;
648   key0.port = udp0->dst_port;
649   key0.protocol = proto0;
650   key0.fib_index = sm->outside_fib_index;
651   kv0.key = key0.as_u64;
652
653   /* Check if destination is in active sessions */
654   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
655     {
656       /* or static mappings */
657       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
658         {
659           new_dst_addr0 = sm0.addr.as_u32;
660           new_dst_port0 = sm0.port;
661           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
662         }
663     }
664   else
665     {
666       si = value0.value;
667       if (sm->num_workers > 1)
668         {
669           k0.addr = ip0->dst_address;
670           k0.port = udp0->dst_port;
671           k0.fib_index = sm->outside_fib_index;
672           kv0.key = k0.as_u64;
673           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
674             ASSERT(0);
675           else
676             ti = value0.value;
677         }
678       else
679         ti = sm->num_workers;
680
681       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
682       new_dst_addr0 = s0->in2out.addr.as_u32;
683       new_dst_port0 = s0->in2out.port;
684       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
685     }
686
687   /* Destination is behind the same NAT, use internal address and port */
688   if (new_dst_addr0)
689     {
690       old_dst_addr0 = ip0->dst_address.as_u32;
691       ip0->dst_address.as_u32 = new_dst_addr0;
692       sum0 = ip0->checksum;
693       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
694                              ip4_header_t, dst_address);
695       ip0->checksum = ip_csum_fold (sum0);
696
697       old_dst_port0 = tcp0->dst;
698       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
699         {
700           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
701             {
702               tcp0->dst = new_dst_port0;
703               sum0 = tcp0->checksum;
704               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
705                                      ip4_header_t, dst_address);
706               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
707                                      ip4_header_t /* cheat */, length);
708               tcp0->checksum = ip_csum_fold(sum0);
709             }
710           else
711             {
712               udp0->dst_port = new_dst_port0;
713               udp0->checksum = 0;
714             }
715         }
716     }
717 }
718
719 static inline uword
720 snat_in2out_node_fn_inline (vlib_main_t * vm,
721                             vlib_node_runtime_t * node,
722                             vlib_frame_t * frame, int is_slow_path)
723 {
724   u32 n_left_from, * from, * to_next;
725   snat_in2out_next_t next_index;
726   u32 pkts_processed = 0;
727   snat_main_t * sm = &snat_main;
728   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
729   f64 now = vlib_time_now (vm);
730   u32 stats_node_index;
731   u32 cpu_index = os_get_cpu_number ();
732
733   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
734     snat_in2out_node.index;
735
736   from = vlib_frame_vector_args (frame);
737   n_left_from = frame->n_vectors;
738   next_index = node->cached_next_index;
739
740   while (n_left_from > 0)
741     {
742       u32 n_left_to_next;
743
744       vlib_get_next_frame (vm, node, next_index,
745                            to_next, n_left_to_next);
746
747       while (n_left_from >= 4 && n_left_to_next >= 2)
748         {
749           u32 bi0, bi1;
750           vlib_buffer_t * b0, * b1;
751           u32 next0, next1;
752           u32 sw_if_index0, sw_if_index1;
753           ip4_header_t * ip0, * ip1;
754           ip_csum_t sum0, sum1;
755           u32 new_addr0, old_addr0, new_addr1, old_addr1;
756           u16 old_port0, new_port0, old_port1, new_port1;
757           udp_header_t * udp0, * udp1;
758           tcp_header_t * tcp0, * tcp1;
759           icmp46_header_t * icmp0, * icmp1;
760           snat_session_key_t key0, key1;
761           u32 rx_fib_index0, rx_fib_index1;
762           u32 proto0, proto1;
763           snat_session_t * s0 = 0, * s1 = 0;
764           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
765           
766           /* Prefetch next iteration. */
767           {
768             vlib_buffer_t * p2, * p3;
769             
770             p2 = vlib_get_buffer (vm, from[2]);
771             p3 = vlib_get_buffer (vm, from[3]);
772             
773             vlib_prefetch_buffer_header (p2, LOAD);
774             vlib_prefetch_buffer_header (p3, LOAD);
775
776             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
777             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
778           }
779
780           /* speculatively enqueue b0 and b1 to the current next frame */
781           to_next[0] = bi0 = from[0];
782           to_next[1] = bi1 = from[1];
783           from += 2;
784           to_next += 2;
785           n_left_from -= 2;
786           n_left_to_next -= 2;
787           
788           b0 = vlib_get_buffer (vm, bi0);
789           b1 = vlib_get_buffer (vm, bi1);
790
791           ip0 = vlib_buffer_get_current (b0);
792           udp0 = ip4_next_header (ip0);
793           tcp0 = (tcp_header_t *) udp0;
794           icmp0 = (icmp46_header_t *) udp0;
795
796           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
797           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
798                                    sw_if_index0);
799
800           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
801
802           proto0 = ip_proto_to_snat_proto (ip0->protocol);
803
804           if (PREDICT_FALSE(ip0->ttl == 1))
805             {
806               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
807               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
808                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
809                                            0);
810               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
811               goto trace00;
812             }
813
814           /* Next configured feature, probably ip4-lookup */
815           if (is_slow_path)
816             {
817               if (PREDICT_FALSE (proto0 == ~0))
818                 goto trace00;
819               
820               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
821                 {
822                   next0 = icmp_in2out_slow_path 
823                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
824                      node, next0, now, cpu_index, &s0);
825                   goto trace00;
826                 }
827             }
828           else
829             {
830               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
831                 {
832                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
833                   goto trace00;
834                 }
835             }
836
837           key0.addr = ip0->src_address;
838           key0.port = udp0->src_port;
839           key0.protocol = proto0;
840           key0.fib_index = rx_fib_index0;
841           
842           kv0.key = key0.as_u64;
843
844           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
845             {
846               if (is_slow_path)
847                 {
848                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
849                       proto0, rx_fib_index0)))
850                     goto trace00;
851
852                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
853                                      &s0, node, next0, cpu_index);
854                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
855                     goto trace00;
856                 }
857               else
858                 {
859                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
860                   goto trace00;
861                 }
862             }
863           else
864             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
865                                     value0.value);
866
867           old_addr0 = ip0->src_address.as_u32;
868           ip0->src_address = s0->out2in.addr;
869           new_addr0 = ip0->src_address.as_u32;
870           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
871
872           sum0 = ip0->checksum;
873           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
874                                  ip4_header_t,
875                                  src_address /* changed member */);
876           ip0->checksum = ip_csum_fold (sum0);
877
878           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
879             {
880               old_port0 = tcp0->src_port;
881               tcp0->src_port = s0->out2in.port;
882               new_port0 = tcp0->src_port;
883
884               sum0 = tcp0->checksum;
885               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
886                                      ip4_header_t,
887                                      dst_address /* changed member */);
888               sum0 = ip_csum_update (sum0, old_port0, new_port0,
889                                      ip4_header_t /* cheat */,
890                                      length /* changed member */);
891               tcp0->checksum = ip_csum_fold(sum0);
892             }
893           else
894             {
895               old_port0 = udp0->src_port;
896               udp0->src_port = s0->out2in.port;
897               udp0->checksum = 0;
898             }
899
900           /* Hairpinning */
901           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
902
903           /* Accounting */
904           s0->last_heard = now;
905           s0->total_pkts++;
906           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
907           /* Per-user LRU list maintenance for dynamic translation */
908           if (!snat_is_session_static (s0))
909             {
910               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
911                                  s0->per_user_index);
912               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
913                                   s0->per_user_list_head_index,
914                                   s0->per_user_index);
915             }
916         trace00:
917
918           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
919                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
920             {
921               snat_in2out_trace_t *t = 
922                  vlib_add_trace (vm, node, b0, sizeof (*t));
923               t->is_slow_path = is_slow_path;
924               t->sw_if_index = sw_if_index0;
925               t->next_index = next0;
926                   t->session_index = ~0;
927               if (s0)
928                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
929             }
930
931           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
932
933           ip1 = vlib_buffer_get_current (b1);
934           udp1 = ip4_next_header (ip1);
935           tcp1 = (tcp_header_t *) udp1;
936           icmp1 = (icmp46_header_t *) udp1;
937
938           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
939           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
940                                    sw_if_index1);
941
942           proto1 = ip_proto_to_snat_proto (ip1->protocol);
943
944           if (PREDICT_FALSE(ip0->ttl == 1))
945             {
946               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
947               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
948                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
949                                            0);
950               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
951               goto trace01;
952             }
953
954           /* Next configured feature, probably ip4-lookup */
955           if (is_slow_path)
956             {
957               if (PREDICT_FALSE (proto1 == ~0))
958                 goto trace01;
959               
960               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
961                 {
962                   next1 = icmp_in2out_slow_path 
963                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
964                      next1, now, cpu_index, &s1);
965                   goto trace01;
966                 }
967             }
968           else
969             {
970               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
971                 {
972                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
973                   goto trace01;
974                 }
975             }
976
977           key1.addr = ip1->src_address;
978           key1.port = udp1->src_port;
979           key1.protocol = proto1;
980           key1.fib_index = rx_fib_index1;
981           
982           kv1.key = key1.as_u64;
983
984             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
985             {
986               if (is_slow_path)
987                 {
988                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
989                       proto1, rx_fib_index1)))
990                     goto trace01;
991
992                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
993                                      &s1, node, next1, cpu_index);
994                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
995                     goto trace01;
996                 }
997               else
998                 {
999                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1000                   goto trace01;
1001                 }
1002             }
1003           else
1004             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1005                                     value1.value);
1006
1007           old_addr1 = ip1->src_address.as_u32;
1008           ip1->src_address = s1->out2in.addr;
1009           new_addr1 = ip1->src_address.as_u32;
1010           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1011
1012           sum1 = ip1->checksum;
1013           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1014                                  ip4_header_t,
1015                                  src_address /* changed member */);
1016           ip1->checksum = ip_csum_fold (sum1);
1017
1018           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1019             {
1020               old_port1 = tcp1->src_port;
1021               tcp1->src_port = s1->out2in.port;
1022               new_port1 = tcp1->src_port;
1023
1024               sum1 = tcp1->checksum;
1025               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1026                                      ip4_header_t,
1027                                      dst_address /* changed member */);
1028               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1029                                      ip4_header_t /* cheat */,
1030                                      length /* changed member */);
1031               tcp1->checksum = ip_csum_fold(sum1);
1032             }
1033           else
1034             {
1035               old_port1 = udp1->src_port;
1036               udp1->src_port = s1->out2in.port;
1037               udp1->checksum = 0;
1038             }
1039
1040           /* Hairpinning */
1041           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1042
1043           /* Accounting */
1044           s1->last_heard = now;
1045           s1->total_pkts++;
1046           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1047           /* Per-user LRU list maintenance for dynamic translation */
1048           if (!snat_is_session_static (s1))
1049             {
1050               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1051                                  s1->per_user_index);
1052               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1053                                   s1->per_user_list_head_index,
1054                                   s1->per_user_index);
1055             }
1056         trace01:
1057
1058           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1059                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1060             {
1061               snat_in2out_trace_t *t = 
1062                  vlib_add_trace (vm, node, b1, sizeof (*t));
1063               t->sw_if_index = sw_if_index1;
1064               t->next_index = next1;
1065               t->session_index = ~0;
1066               if (s1)
1067                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
1068             }
1069
1070           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1071
1072           /* verify speculative enqueues, maybe switch current next frame */
1073           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1074                                            to_next, n_left_to_next,
1075                                            bi0, bi1, next0, next1);
1076         }
1077
1078       while (n_left_from > 0 && n_left_to_next > 0)
1079         {
1080           u32 bi0;
1081           vlib_buffer_t * b0;
1082           u32 next0;
1083           u32 sw_if_index0;
1084           ip4_header_t * ip0;
1085           ip_csum_t sum0;
1086           u32 new_addr0, old_addr0;
1087           u16 old_port0, new_port0;
1088           udp_header_t * udp0;
1089           tcp_header_t * tcp0;
1090           icmp46_header_t * icmp0;
1091           snat_session_key_t key0;
1092           u32 rx_fib_index0;
1093           u32 proto0;
1094           snat_session_t * s0 = 0;
1095           clib_bihash_kv_8_8_t kv0, value0;
1096           
1097           /* speculatively enqueue b0 to the current next frame */
1098           bi0 = from[0];
1099           to_next[0] = bi0;
1100           from += 1;
1101           to_next += 1;
1102           n_left_from -= 1;
1103           n_left_to_next -= 1;
1104
1105           b0 = vlib_get_buffer (vm, bi0);
1106           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1107
1108           ip0 = vlib_buffer_get_current (b0);
1109           udp0 = ip4_next_header (ip0);
1110           tcp0 = (tcp_header_t *) udp0;
1111           icmp0 = (icmp46_header_t *) udp0;
1112
1113           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1114           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1115                                    sw_if_index0);
1116
1117           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1118
1119           if (PREDICT_FALSE(ip0->ttl == 1))
1120             {
1121               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1122               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1123                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1124                                            0);
1125               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1126               goto trace0;
1127             }
1128
1129           /* Next configured feature, probably ip4-lookup */
1130           if (is_slow_path)
1131             {
1132               if (PREDICT_FALSE (proto0 == ~0))
1133                 goto trace0;
1134               
1135               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1136                 {
1137                   next0 = icmp_in2out_slow_path 
1138                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1139                      next0, now, cpu_index, &s0);
1140                   goto trace0;
1141                 }
1142             }
1143           else
1144             {
1145               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1146                 {
1147                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1148                   goto trace0;
1149                 }
1150             }
1151
1152           key0.addr = ip0->src_address;
1153           key0.port = udp0->src_port;
1154           key0.protocol = proto0;
1155           key0.fib_index = rx_fib_index0;
1156           
1157           kv0.key = key0.as_u64;
1158
1159           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1160             {
1161               if (is_slow_path)
1162                 {
1163                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1164                       proto0, rx_fib_index0)))
1165                     goto trace0;
1166
1167                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1168                                      &s0, node, next0, cpu_index);
1169
1170                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1171                     goto trace0;
1172                 }
1173               else
1174                 {
1175                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1176                   goto trace0;
1177                 }
1178             }
1179           else
1180             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1181                                     value0.value);
1182
1183           old_addr0 = ip0->src_address.as_u32;
1184           ip0->src_address = s0->out2in.addr;
1185           new_addr0 = ip0->src_address.as_u32;
1186           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1187
1188           sum0 = ip0->checksum;
1189           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1190                                  ip4_header_t,
1191                                  src_address /* changed member */);
1192           ip0->checksum = ip_csum_fold (sum0);
1193
1194           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1195             {
1196               old_port0 = tcp0->src_port;
1197               tcp0->src_port = s0->out2in.port;
1198               new_port0 = tcp0->src_port;
1199
1200               sum0 = tcp0->checksum;
1201               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1202                                      ip4_header_t,
1203                                      dst_address /* changed member */);
1204               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1205                                      ip4_header_t /* cheat */,
1206                                      length /* changed member */);
1207               tcp0->checksum = ip_csum_fold(sum0);
1208             }
1209           else
1210             {
1211               old_port0 = udp0->src_port;
1212               udp0->src_port = s0->out2in.port;
1213               udp0->checksum = 0;
1214             }
1215
1216           /* Hairpinning */
1217           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1218
1219           /* Accounting */
1220           s0->last_heard = now;
1221           s0->total_pkts++;
1222           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1223           /* Per-user LRU list maintenance for dynamic translation */
1224           if (!snat_is_session_static (s0))
1225             {
1226               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1227                                  s0->per_user_index);
1228               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1229                                   s0->per_user_list_head_index,
1230                                   s0->per_user_index);
1231             }
1232
1233         trace0:
1234           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1235                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1236             {
1237               snat_in2out_trace_t *t = 
1238                  vlib_add_trace (vm, node, b0, sizeof (*t));
1239               t->is_slow_path = is_slow_path;
1240               t->sw_if_index = sw_if_index0;
1241               t->next_index = next0;
1242                   t->session_index = ~0;
1243               if (s0)
1244                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1245             }
1246
1247           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1248
1249           /* verify speculative enqueue, maybe switch current next frame */
1250           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1251                                            to_next, n_left_to_next,
1252                                            bi0, next0);
1253         }
1254
1255       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1256     }
1257
1258   vlib_node_increment_counter (vm, stats_node_index, 
1259                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1260                                pkts_processed);
1261   return frame->n_vectors;
1262 }
1263
1264 static uword
1265 snat_in2out_fast_path_fn (vlib_main_t * vm,
1266                           vlib_node_runtime_t * node,
1267                           vlib_frame_t * frame)
1268 {
1269   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1270 }
1271
1272 VLIB_REGISTER_NODE (snat_in2out_node) = {
1273   .function = snat_in2out_fast_path_fn,
1274   .name = "snat-in2out",
1275   .vector_size = sizeof (u32),
1276   .format_trace = format_snat_in2out_trace,
1277   .type = VLIB_NODE_TYPE_INTERNAL,
1278   
1279   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1280   .error_strings = snat_in2out_error_strings,
1281
1282   .runtime_data_bytes = sizeof (snat_runtime_t),
1283   
1284   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1285
1286   /* edit / add dispositions here */
1287   .next_nodes = {
1288     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1289     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1290     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1291     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1292   },
1293 };
1294
1295 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1296
1297 static uword
1298 snat_in2out_slow_path_fn (vlib_main_t * vm,
1299                           vlib_node_runtime_t * node,
1300                           vlib_frame_t * frame)
1301 {
1302   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1303 }
1304
1305 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1306   .function = snat_in2out_slow_path_fn,
1307   .name = "snat-in2out-slowpath",
1308   .vector_size = sizeof (u32),
1309   .format_trace = format_snat_in2out_trace,
1310   .type = VLIB_NODE_TYPE_INTERNAL,
1311   
1312   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1313   .error_strings = snat_in2out_error_strings,
1314
1315   .runtime_data_bytes = sizeof (snat_runtime_t),
1316   
1317   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1318
1319   /* edit / add dispositions here */
1320   .next_nodes = {
1321     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1322     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1323     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1324     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1325   },
1326 };
1327
1328 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1329
1330 /**************************/
1331 /*** deterministic mode ***/
1332 /**************************/
1333 static uword
1334 snat_det_in2out_node_fn (vlib_main_t * vm,
1335                          vlib_node_runtime_t * node,
1336                          vlib_frame_t * frame)
1337 {
1338   u32 n_left_from, * from, * to_next;
1339   snat_in2out_next_t next_index;
1340   u32 pkts_processed = 0;
1341   snat_main_t * sm = &snat_main;
1342   u32 now = (u32) vlib_time_now (vm);
1343
1344   from = vlib_frame_vector_args (frame);
1345   n_left_from = frame->n_vectors;
1346   next_index = node->cached_next_index;
1347
1348   while (n_left_from > 0)
1349     {
1350       u32 n_left_to_next;
1351
1352       vlib_get_next_frame (vm, node, next_index,
1353                            to_next, n_left_to_next);
1354
1355       while (n_left_from >= 4 && n_left_to_next >= 2)
1356         {
1357           u32 bi0, bi1;
1358           vlib_buffer_t * b0, * b1;
1359           u32 next0, next1;
1360           u32 sw_if_index0, sw_if_index1;
1361           ip4_header_t * ip0, * ip1;
1362           ip_csum_t sum0, sum1;
1363           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1364           u16 old_port0, new_port0, lo_port0, i0;
1365           u16 old_port1, new_port1, lo_port1, i1;
1366           udp_header_t * udp0, * udp1;
1367           tcp_header_t * tcp0, * tcp1;
1368           u32 proto0, proto1;
1369           snat_det_out_key_t key0, key1;
1370           snat_det_map_t * dm0, * dm1;
1371           snat_det_session_t * ses0 = 0, * ses1 = 0;
1372
1373           /* Prefetch next iteration. */
1374           {
1375             vlib_buffer_t * p2, * p3;
1376
1377             p2 = vlib_get_buffer (vm, from[2]);
1378             p3 = vlib_get_buffer (vm, from[3]);
1379
1380             vlib_prefetch_buffer_header (p2, LOAD);
1381             vlib_prefetch_buffer_header (p3, LOAD);
1382
1383             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1384             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1385           }
1386
1387           /* speculatively enqueue b0 and b1 to the current next frame */
1388           to_next[0] = bi0 = from[0];
1389           to_next[1] = bi1 = from[1];
1390           from += 2;
1391           to_next += 2;
1392           n_left_from -= 2;
1393           n_left_to_next -= 2;
1394
1395           b0 = vlib_get_buffer (vm, bi0);
1396           b1 = vlib_get_buffer (vm, bi1);
1397
1398           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1399           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1400
1401           ip0 = vlib_buffer_get_current (b0);
1402           udp0 = ip4_next_header (ip0);
1403           tcp0 = (tcp_header_t *) udp0;
1404
1405           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1406
1407           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1408           if (PREDICT_FALSE(!dm0))
1409             {
1410               clib_warning("no match for internal host %U",
1411                            format_ip4_address, &ip0->src_address);
1412               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1413               goto trace0;
1414             }
1415
1416           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1417
1418           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1419           if (PREDICT_FALSE(!ses0))
1420             {
1421               key0.ext_host_addr = ip0->dst_address;
1422               key0.ext_host_port = tcp0->dst;
1423               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1424                 {
1425                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1426                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1427
1428                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1429                     continue;
1430
1431                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1432                   break;
1433                 }
1434                 if (PREDICT_FALSE(!ses0))
1435                   {
1436                     next0 = SNAT_IN2OUT_NEXT_DROP;
1437                     b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1438                     goto trace0;
1439                   }
1440             }
1441
1442           new_port0 = ses0->out.out_port;
1443           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1444
1445           old_addr0.as_u32 = ip0->src_address.as_u32;
1446           ip0->src_address.as_u32 = new_addr0.as_u32;
1447           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1448
1449           sum0 = ip0->checksum;
1450           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1451                                  ip4_header_t,
1452                                  src_address /* changed member */);
1453           ip0->checksum = ip_csum_fold (sum0);
1454
1455           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1456             {
1457               if (tcp0->flags & TCP_FLAG_SYN)
1458                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1459               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1460                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1461               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1462                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1463               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1464                 snat_det_ses_close(dm0, ses0);
1465               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1466                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1467               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1468                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1469
1470               old_port0 = tcp0->src;
1471               tcp0->src = new_port0;
1472
1473               sum0 = tcp0->checksum;
1474               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1475                                      ip4_header_t,
1476                                      dst_address /* changed member */);
1477               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1478                                      ip4_header_t /* cheat */,
1479                                      length /* changed member */);
1480               tcp0->checksum = ip_csum_fold(sum0);
1481             }
1482           else
1483             {
1484               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1485               old_port0 = udp0->src_port;
1486               udp0->src_port = new_port0;
1487               udp0->checksum = 0;
1488             }
1489
1490           switch(ses0->state)
1491             {
1492             case SNAT_SESSION_UDP_ACTIVE:
1493                 ses0->expire = now + SNAT_UDP_TIMEOUT;
1494                 break;
1495             case SNAT_SESSION_TCP_SYN_SENT:
1496             case SNAT_SESSION_TCP_FIN_WAIT:
1497             case SNAT_SESSION_TCP_CLOSE_WAIT:
1498             case SNAT_SESSION_TCP_LAST_ACK:
1499                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1500                 break;
1501             case SNAT_SESSION_TCP_ESTABLISHED:
1502                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1503                 break;
1504             }
1505
1506         trace0:
1507           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1508                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1509             {
1510               snat_in2out_trace_t *t =
1511                  vlib_add_trace (vm, node, b0, sizeof (*t));
1512               t->is_slow_path = 0;
1513               t->sw_if_index = sw_if_index0;
1514               t->next_index = next0;
1515               t->session_index = ~0;
1516               if (ses0)
1517                 t->session_index = ses0 - dm0->sessions;
1518             }
1519
1520           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1521
1522           ip1 = vlib_buffer_get_current (b1);
1523           udp1 = ip4_next_header (ip1);
1524           tcp1 = (tcp_header_t *) udp1;
1525
1526           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1527
1528           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1529           if (PREDICT_FALSE(!dm1))
1530             {
1531               clib_warning("no match for internal host %U",
1532                            format_ip4_address, &ip0->src_address);
1533               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1534               goto trace1;
1535             }
1536
1537           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1538
1539
1540           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src);
1541           if (PREDICT_FALSE(!ses1))
1542             {
1543               key1.ext_host_addr = ip1->dst_address;
1544               key1.ext_host_port = tcp1->dst;
1545               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1546                 {
1547                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1548                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1549
1550                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1551                     continue;
1552
1553                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1554                   break;
1555                 }
1556                 if (PREDICT_FALSE(!ses1))
1557                   {
1558                     next1 = SNAT_IN2OUT_NEXT_DROP;
1559                     b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1560                     goto trace1;
1561                   }
1562             }
1563
1564           new_port1 = ses1->out.out_port;
1565           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1566
1567           old_addr1.as_u32 = ip1->src_address.as_u32;
1568           ip1->src_address.as_u32 = new_addr1.as_u32;
1569           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1570
1571           sum1 = ip1->checksum;
1572           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1573                                  ip4_header_t,
1574                                  src_address /* changed member */);
1575           ip1->checksum = ip_csum_fold (sum1);
1576
1577           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1578             {
1579               if (tcp1->flags & TCP_FLAG_SYN)
1580                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1581               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1582                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1583               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1584                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1585               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1586                 snat_det_ses_close(dm1, ses1);
1587               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1588                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1589               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1590                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1591
1592               old_port1 = tcp1->src;
1593               tcp1->src = new_port1;
1594
1595               sum1 = tcp1->checksum;
1596               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1597                                      ip4_header_t,
1598                                      dst_address /* changed member */);
1599               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1600                                      ip4_header_t /* cheat */,
1601                                      length /* changed member */);
1602               tcp1->checksum = ip_csum_fold(sum1);
1603             }
1604           else
1605             {
1606               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1607               old_port1 = udp1->src_port;
1608               udp1->src_port = new_port1;
1609               udp1->checksum = 0;
1610             }
1611
1612           switch(ses1->state)
1613             {
1614             case SNAT_SESSION_UDP_ACTIVE:
1615                 ses1->expire = now + SNAT_UDP_TIMEOUT;
1616                 break;
1617             case SNAT_SESSION_TCP_SYN_SENT:
1618             case SNAT_SESSION_TCP_FIN_WAIT:
1619             case SNAT_SESSION_TCP_CLOSE_WAIT:
1620             case SNAT_SESSION_TCP_LAST_ACK:
1621                 ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1622                 break;
1623             case SNAT_SESSION_TCP_ESTABLISHED:
1624                 ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1625                 break;
1626             }
1627
1628         trace1:
1629           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1630                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1631             {
1632               snat_in2out_trace_t *t =
1633                  vlib_add_trace (vm, node, b1, sizeof (*t));
1634               t->is_slow_path = 0;
1635               t->sw_if_index = sw_if_index1;
1636               t->next_index = next1;
1637               t->session_index = ~0;
1638               if (ses1)
1639                 t->session_index = ses1 - dm1->sessions;
1640             }
1641
1642           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1643
1644           /* verify speculative enqueues, maybe switch current next frame */
1645           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1646                                            to_next, n_left_to_next,
1647                                            bi0, bi1, next0, next1);
1648          }
1649
1650       while (n_left_from > 0 && n_left_to_next > 0)
1651         {
1652           u32 bi0;
1653           vlib_buffer_t * b0;
1654           u32 next0;
1655           u32 sw_if_index0;
1656           ip4_header_t * ip0;
1657           ip_csum_t sum0;
1658           ip4_address_t new_addr0, old_addr0;
1659           u16 old_port0, new_port0, lo_port0, i0;
1660           udp_header_t * udp0;
1661           tcp_header_t * tcp0;
1662           u32 proto0;
1663           snat_det_out_key_t key0;
1664           snat_det_map_t * dm0;
1665           snat_det_session_t * ses0 = 0;
1666
1667           /* speculatively enqueue b0 to the current next frame */
1668           bi0 = from[0];
1669           to_next[0] = bi0;
1670           from += 1;
1671           to_next += 1;
1672           n_left_from -= 1;
1673           n_left_to_next -= 1;
1674
1675           b0 = vlib_get_buffer (vm, bi0);
1676           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1677
1678           ip0 = vlib_buffer_get_current (b0);
1679           udp0 = ip4_next_header (ip0);
1680           tcp0 = (tcp_header_t *) udp0;
1681
1682           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1683
1684           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1685           if (PREDICT_FALSE(!dm0))
1686             {
1687               clib_warning("no match for internal host %U",
1688                            format_ip4_address, &ip0->src_address);
1689               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1690               goto trace00;
1691             }
1692
1693           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1694
1695           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1696           if (PREDICT_FALSE(!ses0))
1697             {
1698               key0.ext_host_addr = ip0->dst_address;
1699               key0.ext_host_port = tcp0->dst;
1700               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1701                 {
1702                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1703                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1704
1705                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1706                     continue;
1707
1708                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1709                   break;
1710                 }
1711                 if (PREDICT_FALSE(!ses0))
1712                   {
1713                     next0 = SNAT_IN2OUT_NEXT_DROP;
1714                     b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1715                     goto trace00;
1716                   }
1717             }
1718
1719           new_port0 = ses0->out.out_port;
1720           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1721
1722           old_addr0.as_u32 = ip0->src_address.as_u32;
1723           ip0->src_address.as_u32 = new_addr0.as_u32;
1724           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1725
1726           sum0 = ip0->checksum;
1727           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1728                                  ip4_header_t,
1729                                  src_address /* changed member */);
1730           ip0->checksum = ip_csum_fold (sum0);
1731
1732           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1733             {
1734               if (tcp0->flags & TCP_FLAG_SYN)
1735                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1736               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1737                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1738               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1739                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1740               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1741                 snat_det_ses_close(dm0, ses0);
1742               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1743                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1744               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1745                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1746
1747               old_port0 = tcp0->src;
1748               tcp0->src = new_port0;
1749
1750               sum0 = tcp0->checksum;
1751               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1752                                      ip4_header_t,
1753                                      dst_address /* changed member */);
1754               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1755                                      ip4_header_t /* cheat */,
1756                                      length /* changed member */);
1757               tcp0->checksum = ip_csum_fold(sum0);
1758             }
1759           else
1760             {
1761               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1762               old_port0 = udp0->src_port;
1763               udp0->src_port = new_port0;
1764               udp0->checksum = 0;
1765             }
1766
1767           switch(ses0->state)
1768             {
1769             case SNAT_SESSION_UDP_ACTIVE:
1770                 ses0->expire = now + SNAT_UDP_TIMEOUT;
1771                 break;
1772             case SNAT_SESSION_TCP_SYN_SENT:
1773             case SNAT_SESSION_TCP_FIN_WAIT:
1774             case SNAT_SESSION_TCP_CLOSE_WAIT:
1775             case SNAT_SESSION_TCP_LAST_ACK:
1776                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1777                 break;
1778             case SNAT_SESSION_TCP_ESTABLISHED:
1779                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1780                 break;
1781             }
1782
1783         trace00:
1784           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1785                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1786             {
1787               snat_in2out_trace_t *t =
1788                  vlib_add_trace (vm, node, b0, sizeof (*t));
1789               t->is_slow_path = 0;
1790               t->sw_if_index = sw_if_index0;
1791               t->next_index = next0;
1792               t->session_index = ~0;
1793               if (ses0)
1794                 t->session_index = ses0 - dm0->sessions;
1795             }
1796
1797           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1798
1799           /* verify speculative enqueue, maybe switch current next frame */
1800           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1801                                            to_next, n_left_to_next,
1802                                            bi0, next0);
1803         }
1804
1805       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1806     }
1807
1808   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
1809                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1810                                pkts_processed);
1811   return frame->n_vectors;
1812 }
1813
1814 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
1815   .function = snat_det_in2out_node_fn,
1816   .name = "snat-det-in2out",
1817   .vector_size = sizeof (u32),
1818   .format_trace = format_snat_in2out_trace,
1819   .type = VLIB_NODE_TYPE_INTERNAL,
1820
1821   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1822   .error_strings = snat_in2out_error_strings,
1823
1824   .runtime_data_bytes = sizeof (snat_runtime_t),
1825
1826   .n_next_nodes = 2,
1827
1828   /* edit / add dispositions here */
1829   .next_nodes = {
1830     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1831     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1832   },
1833 };
1834
1835 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
1836
1837 /**********************/
1838 /*** worker handoff ***/
1839 /**********************/
1840 static uword
1841 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1842                                vlib_node_runtime_t * node,
1843                                vlib_frame_t * frame)
1844 {
1845   snat_main_t *sm = &snat_main;
1846   vlib_thread_main_t *tm = vlib_get_thread_main ();
1847   u32 n_left_from, *from, *to_next = 0;
1848   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1849   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1850     = 0;
1851   vlib_frame_queue_elt_t *hf = 0;
1852   vlib_frame_t *f = 0;
1853   int i;
1854   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1855   u32 next_worker_index = 0;
1856   u32 current_worker_index = ~0;
1857   u32 cpu_index = os_get_cpu_number ();
1858
1859   ASSERT (vec_len (sm->workers));
1860
1861   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1862     {
1863       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1864
1865       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1866                                sm->first_worker_index + sm->num_workers - 1,
1867                                (vlib_frame_queue_t *) (~0));
1868     }
1869
1870   from = vlib_frame_vector_args (frame);
1871   n_left_from = frame->n_vectors;
1872
1873   while (n_left_from > 0)
1874     {
1875       u32 bi0;
1876       vlib_buffer_t *b0;
1877       u32 sw_if_index0;
1878       u32 rx_fib_index0;
1879       ip4_header_t * ip0;
1880       u8 do_handoff;
1881
1882       bi0 = from[0];
1883       from += 1;
1884       n_left_from -= 1;
1885
1886       b0 = vlib_get_buffer (vm, bi0);
1887
1888       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1889       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1890
1891       ip0 = vlib_buffer_get_current (b0);
1892
1893       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
1894
1895       if (PREDICT_FALSE (next_worker_index != cpu_index))
1896         {
1897           do_handoff = 1;
1898
1899           if (next_worker_index != current_worker_index)
1900             {
1901               if (hf)
1902                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1903
1904               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1905                                                       next_worker_index,
1906                                                       handoff_queue_elt_by_worker_index);
1907
1908               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1909               to_next_worker = &hf->buffer_index[hf->n_vectors];
1910               current_worker_index = next_worker_index;
1911             }
1912
1913           /* enqueue to correct worker thread */
1914           to_next_worker[0] = bi0;
1915           to_next_worker++;
1916           n_left_to_next_worker--;
1917
1918           if (n_left_to_next_worker == 0)
1919             {
1920               hf->n_vectors = VLIB_FRAME_SIZE;
1921               vlib_put_frame_queue_elt (hf);
1922               current_worker_index = ~0;
1923               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1924               hf = 0;
1925             }
1926         }
1927       else
1928         {
1929           do_handoff = 0;
1930           /* if this is 1st frame */
1931           if (!f)
1932             {
1933               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
1934               to_next = vlib_frame_vector_args (f);
1935             }
1936
1937           to_next[0] = bi0;
1938           to_next += 1;
1939           f->n_vectors++;
1940         }
1941
1942       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1943                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1944         {
1945           snat_in2out_worker_handoff_trace_t *t =
1946             vlib_add_trace (vm, node, b0, sizeof (*t));
1947           t->next_worker_index = next_worker_index;
1948           t->do_handoff = do_handoff;
1949         }
1950     }
1951
1952   if (f)
1953     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
1954
1955   if (hf)
1956     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1957
1958   /* Ship frames to the worker nodes */
1959   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1960     {
1961       if (handoff_queue_elt_by_worker_index[i])
1962         {
1963           hf = handoff_queue_elt_by_worker_index[i];
1964           /*
1965            * It works better to let the handoff node
1966            * rate-adapt, always ship the handoff queue element.
1967            */
1968           if (1 || hf->n_vectors == hf->last_n_vectors)
1969             {
1970               vlib_put_frame_queue_elt (hf);
1971               handoff_queue_elt_by_worker_index[i] = 0;
1972             }
1973           else
1974             hf->last_n_vectors = hf->n_vectors;
1975         }
1976       congested_handoff_queue_by_worker_index[i] =
1977         (vlib_frame_queue_t *) (~0);
1978     }
1979   hf = 0;
1980   current_worker_index = ~0;
1981   return frame->n_vectors;
1982 }
1983
1984 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1985   .function = snat_in2out_worker_handoff_fn,
1986   .name = "snat-in2out-worker-handoff",
1987   .vector_size = sizeof (u32),
1988   .format_trace = format_snat_in2out_worker_handoff_trace,
1989   .type = VLIB_NODE_TYPE_INTERNAL,
1990   
1991   .n_next_nodes = 1,
1992
1993   .next_nodes = {
1994     [0] = "error-drop",
1995   },
1996 };
1997
1998 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1999
2000 /********************************/
2001 /*** static mapping only mode ***/
2002 /********************************/
2003 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
2004                                           vlib_buffer_t * b0,
2005                                           ip4_header_t * ip0,
2006                                           icmp46_header_t * icmp0,
2007                                           u32 sw_if_index0,
2008                                           vlib_node_runtime_t * node,
2009                                           u32 next0,
2010                                           u32 rx_fib_index0)
2011 {
2012   snat_session_key_t key0, sm0;
2013   icmp_echo_header_t *echo0;
2014   u32 new_addr0, old_addr0;
2015   u16 old_id0, new_id0;
2016   ip_csum_t sum0;
2017   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
2018
2019   echo0 = (icmp_echo_header_t *)(icmp0+1);
2020
2021   key0.addr = ip0->src_address;
2022   key0.port = echo0->identifier;
2023   key0.fib_index = rx_fib_index0;
2024   
2025   if (snat_static_mapping_match(sm, key0, &sm0, 0))
2026     {
2027       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
2028           IP_PROTOCOL_ICMP, rx_fib_index0)))
2029         return next0;
2030
2031       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2032       return SNAT_IN2OUT_NEXT_DROP;
2033     }
2034
2035   new_addr0 = sm0.addr.as_u32;
2036   new_id0 = sm0.port;
2037   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2038   old_addr0 = ip0->src_address.as_u32;
2039   ip0->src_address.as_u32 = new_addr0;
2040   
2041   sum0 = ip0->checksum;
2042   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2043                          ip4_header_t,
2044                          src_address /* changed member */);
2045   ip0->checksum = ip_csum_fold (sum0);
2046   
2047   if (PREDICT_FALSE(new_id0 != echo0->identifier))
2048     {
2049       old_id0 = echo0->identifier;
2050       echo0->identifier = new_id0;
2051
2052       sum0 = icmp0->checksum;
2053       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
2054                              identifier);
2055       icmp0->checksum = ip_csum_fold (sum0);
2056     }
2057
2058   return next0;
2059 }
2060
2061 static uword
2062 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2063                                 vlib_node_runtime_t * node,
2064                                 vlib_frame_t * frame)
2065 {
2066   u32 n_left_from, * from, * to_next;
2067   snat_in2out_next_t next_index;
2068   u32 pkts_processed = 0;
2069   snat_main_t * sm = &snat_main;
2070   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
2071   u32 stats_node_index;
2072
2073   stats_node_index = snat_in2out_fast_node.index;
2074
2075   from = vlib_frame_vector_args (frame);
2076   n_left_from = frame->n_vectors;
2077   next_index = node->cached_next_index;
2078
2079   while (n_left_from > 0)
2080     {
2081       u32 n_left_to_next;
2082
2083       vlib_get_next_frame (vm, node, next_index,
2084                            to_next, n_left_to_next);
2085
2086       while (n_left_from > 0 && n_left_to_next > 0)
2087         {
2088           u32 bi0;
2089           vlib_buffer_t * b0;
2090           u32 next0;
2091           u32 sw_if_index0;
2092           ip4_header_t * ip0;
2093           ip_csum_t sum0;
2094           u32 new_addr0, old_addr0;
2095           u16 old_port0, new_port0;
2096           udp_header_t * udp0;
2097           tcp_header_t * tcp0;
2098           icmp46_header_t * icmp0;
2099           snat_session_key_t key0, sm0;
2100           u32 proto0;
2101           u32 rx_fib_index0;
2102
2103           /* speculatively enqueue b0 to the current next frame */
2104           bi0 = from[0];
2105           to_next[0] = bi0;
2106           from += 1;
2107           to_next += 1;
2108           n_left_from -= 1;
2109           n_left_to_next -= 1;
2110
2111           b0 = vlib_get_buffer (vm, bi0);
2112           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2113
2114           ip0 = vlib_buffer_get_current (b0);
2115           udp0 = ip4_next_header (ip0);
2116           tcp0 = (tcp_header_t *) udp0;
2117           icmp0 = (icmp46_header_t *) udp0;
2118
2119           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2120           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2121
2122           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2123
2124           if (PREDICT_FALSE (proto0 == ~0))
2125               goto trace0;
2126
2127           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2128             {
2129               if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
2130                   proto0, rx_fib_index0)))
2131                 goto trace0;
2132
2133               next0 = icmp_in2out_static_map
2134                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
2135               goto trace0;
2136             }
2137
2138           key0.addr = ip0->src_address;
2139           key0.port = udp0->src_port;
2140           key0.fib_index = rx_fib_index0;
2141
2142           if (snat_static_mapping_match(sm, key0, &sm0, 0))
2143             {
2144               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2145               next0= SNAT_IN2OUT_NEXT_DROP;
2146               goto trace0;
2147             }
2148
2149           new_addr0 = sm0.addr.as_u32;
2150           new_port0 = sm0.port;
2151           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2152           old_addr0 = ip0->src_address.as_u32;
2153           ip0->src_address.as_u32 = new_addr0;
2154
2155           sum0 = ip0->checksum;
2156           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2157                                  ip4_header_t,
2158                                  src_address /* changed member */);
2159           ip0->checksum = ip_csum_fold (sum0);
2160
2161           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2162             {
2163               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2164                 {
2165                   old_port0 = tcp0->src_port;
2166                   tcp0->src_port = new_port0;
2167
2168                   sum0 = tcp0->checksum;
2169                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2170                                          ip4_header_t,
2171                                          dst_address /* changed member */);
2172                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2173                                          ip4_header_t /* cheat */,
2174                                          length /* changed member */);
2175                   tcp0->checksum = ip_csum_fold(sum0);
2176                 }
2177               else
2178                 {
2179                   old_port0 = udp0->src_port;
2180                   udp0->src_port = new_port0;
2181                   udp0->checksum = 0;
2182                 }
2183             }
2184           else
2185             {
2186               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2187                 {
2188                   sum0 = tcp0->checksum;
2189                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2190                                          ip4_header_t,
2191                                          dst_address /* changed member */);
2192                   tcp0->checksum = ip_csum_fold(sum0);
2193                 }
2194             }
2195
2196           /* Hairpinning */
2197           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2198
2199         trace0:
2200           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2201                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2202             {
2203               snat_in2out_trace_t *t =
2204                  vlib_add_trace (vm, node, b0, sizeof (*t));
2205               t->sw_if_index = sw_if_index0;
2206               t->next_index = next0;
2207             }
2208
2209           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2210
2211           /* verify speculative enqueue, maybe switch current next frame */
2212           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2213                                            to_next, n_left_to_next,
2214                                            bi0, next0);
2215         }
2216
2217       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2218     }
2219
2220   vlib_node_increment_counter (vm, stats_node_index,
2221                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2222                                pkts_processed);
2223   return frame->n_vectors;
2224 }
2225
2226
2227 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2228   .function = snat_in2out_fast_static_map_fn,
2229   .name = "snat-in2out-fast",
2230   .vector_size = sizeof (u32),
2231   .format_trace = format_snat_in2out_fast_trace,
2232   .type = VLIB_NODE_TYPE_INTERNAL,
2233   
2234   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2235   .error_strings = snat_in2out_error_strings,
2236
2237   .runtime_data_bytes = sizeof (snat_runtime_t),
2238   
2239   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2240
2241   /* edit / add dispositions here */
2242   .next_nodes = {
2243     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2244     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2245     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2246     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2247   },
2248 };
2249
2250 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);