SNAT: in2out translation as an output feature (VPP-903)
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91 vlib_node_registration_t snat_in2out_output_node;
92 vlib_node_registration_t snat_in2out_output_slowpath_node;
93 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
94
95 #define foreach_snat_in2out_error                       \
96 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
97 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
98 _(OUT_OF_PORTS, "Out of ports")                         \
99 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
100 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
101 _(NO_TRANSLATION, "No translation")
102   
103 typedef enum {
104 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
105   foreach_snat_in2out_error
106 #undef _
107   SNAT_IN2OUT_N_ERROR,
108 } snat_in2out_error_t;
109
110 static char * snat_in2out_error_strings[] = {
111 #define _(sym,string) string,
112   foreach_snat_in2out_error
113 #undef _
114 };
115
116 typedef enum {
117   SNAT_IN2OUT_NEXT_LOOKUP,
118   SNAT_IN2OUT_NEXT_DROP,
119   SNAT_IN2OUT_NEXT_ICMP_ERROR,
120   SNAT_IN2OUT_NEXT_SLOW_PATH,
121   SNAT_IN2OUT_N_NEXT,
122 } snat_in2out_next_t;
123
124 /**
125  * @brief Check if packet should be translated
126  *
127  * Packets aimed at outside interface and external addresss with active session
128  * should be translated.
129  *
130  * @param sm            SNAT main
131  * @param rt            SNAT runtime data
132  * @param sw_if_index0  index of the inside interface
133  * @param ip0           IPv4 header
134  * @param proto0        SNAT protocol
135  * @param rx_fib_index0 RX FIB index
136  *
137  * @returns 0 if packet should be translated otherwise 1
138  */
139 static inline int
140 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
141                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
142                          u32 rx_fib_index0)
143 {
144   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
145   fib_prefix_t pfx = {
146     .fp_proto = FIB_PROTOCOL_IP4,
147     .fp_len = 32,
148     .fp_addr = {
149         .ip4.as_u32 = ip0->dst_address.as_u32,
150     },
151   };
152
153   /* Don't NAT packet aimed at the intfc address */
154   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
155                                       ip0->dst_address.as_u32)))
156     return 1;
157
158   fei = fib_table_lookup (rx_fib_index0, &pfx);
159   if (FIB_NODE_INDEX_INVALID != fei)
160     {
161       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
162       if (sw_if_index == ~0)
163         {
164           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
165           if (FIB_NODE_INDEX_INVALID != fei)
166             sw_if_index = fib_entry_get_resolving_interface (fei);
167         }
168       snat_interface_t *i;
169       pool_foreach (i, sm->interfaces,
170       ({
171         /* NAT packet aimed at outside interface */
172         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
173           return 0;
174       }));
175     }
176
177   return 1;
178 }
179
180 static inline int
181 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
182                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
183                     u32 rx_fib_index0)
184 {
185   udp_header_t * udp0 = ip4_next_header (ip0);
186   snat_session_key_t key0, sm0;
187   clib_bihash_kv_8_8_t kv0, value0;
188
189   key0.addr = ip0->dst_address;
190   key0.port = udp0->dst_port;
191   key0.protocol = proto0;
192   key0.fib_index = sm->outside_fib_index;
193   kv0.key = key0.as_u64;
194
195   /* NAT packet aimed at external address if */
196   /* has active sessions */
197   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
198     {
199       /* or is static mappings */
200       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
201         return 0;
202     }
203   else
204     return 0;
205
206   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
207                                  rx_fib_index0);
208 }
209
210 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
211                       ip4_header_t * ip0,
212                       u32 rx_fib_index0,
213                       snat_session_key_t * key0,
214                       snat_session_t ** sessionp,
215                       vlib_node_runtime_t * node,
216                       u32 next0,
217                       u32 thread_index)
218 {
219   snat_user_t *u;
220   snat_user_key_t user_key;
221   snat_session_t *s;
222   clib_bihash_kv_8_8_t kv0, value0;
223   u32 oldest_per_user_translation_list_index;
224   dlist_elt_t * oldest_per_user_translation_list_elt;
225   dlist_elt_t * per_user_translation_list_elt;
226   dlist_elt_t * per_user_list_head_elt;
227   u32 session_index;
228   snat_session_key_t key1;
229   u32 address_index = ~0;
230   u32 outside_fib_index;
231   uword * p;
232   snat_worker_key_t worker_by_out_key;
233
234   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
235   if (! p)
236     {
237       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
238       return SNAT_IN2OUT_NEXT_DROP;
239     }
240   outside_fib_index = p[0];
241
242   key1.protocol = key0->protocol;
243   user_key.addr = ip0->src_address;
244   user_key.fib_index = rx_fib_index0;
245   kv0.key = user_key.as_u64;
246   
247   /* Ever heard of the "user" = src ip4 address before? */
248   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
249     {
250       /* no, make a new one */
251       pool_get (sm->per_thread_data[thread_index].users, u);
252       memset (u, 0, sizeof (*u));
253       u->addr = ip0->src_address;
254       u->fib_index = rx_fib_index0;
255
256       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
257
258       u->sessions_per_user_list_head_index = per_user_list_head_elt -
259         sm->per_thread_data[thread_index].list_pool;
260
261       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
262                        u->sessions_per_user_list_head_index);
263
264       kv0.value = u - sm->per_thread_data[thread_index].users;
265
266       /* add user */
267       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
268     }
269   else
270     {
271       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
272                              value0.value);
273     }
274
275   /* Over quota? Recycle the least recently used dynamic translation */
276   if (u->nsessions >= sm->max_translations_per_user)
277     {
278       /* Remove the oldest dynamic translation */
279       do {
280           oldest_per_user_translation_list_index =
281             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
282                                     u->sessions_per_user_list_head_index);
283
284           ASSERT (oldest_per_user_translation_list_index != ~0);
285
286           /* add it back to the end of the LRU list */
287           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
288                               u->sessions_per_user_list_head_index,
289                               oldest_per_user_translation_list_index);
290           /* Get the list element */
291           oldest_per_user_translation_list_elt =
292             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
293                                oldest_per_user_translation_list_index);
294
295           /* Get the session index from the list element */
296           session_index = oldest_per_user_translation_list_elt->value;
297
298           /* Get the session */
299           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
300                                  session_index);
301       } while (snat_is_session_static (s));
302
303       if (snat_is_unk_proto_session (s))
304         {
305           clib_bihash_kv_16_8_t up_kv;
306           snat_unk_proto_ses_key_t key;
307
308           /* Remove from lookup tables */
309           key.l_addr = s->in2out.addr;
310           key.r_addr = s->ext_host_addr;
311           key.fib_index = s->in2out.fib_index;
312           key.proto = s->in2out.port;
313           up_kv.key[0] = key.as_u64[0];
314           up_kv.key[1] = key.as_u64[1];
315           if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0))
316             clib_warning ("in2out key del failed");
317
318           key.l_addr = s->out2in.addr;
319           key.fib_index = s->out2in.fib_index;
320           up_kv.key[0] = key.as_u64[0];
321           up_kv.key[1] = key.as_u64[1];
322           if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0))
323             clib_warning ("out2in key del failed");
324         }
325       else
326         {
327           /* Remove in2out, out2in keys */
328           kv0.key = s->in2out.as_u64;
329           if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
330               clib_warning ("in2out key delete failed");
331           kv0.key = s->out2in.as_u64;
332           if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
333               clib_warning ("out2in key delete failed");
334
335           /* log NAT event */
336           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
337                                               s->out2in.addr.as_u32,
338                                               s->in2out.protocol,
339                                               s->in2out.port,
340                                               s->out2in.port,
341                                               s->in2out.fib_index);
342
343           snat_free_outside_address_and_port
344             (sm, &s->out2in, s->outside_address_index);
345         }
346       s->outside_address_index = ~0;
347
348       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
349                                                &address_index))
350         {
351           ASSERT(0);
352
353           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
354           return SNAT_IN2OUT_NEXT_DROP;
355         }
356       s->outside_address_index = address_index;
357     }
358   else
359     {
360       u8 static_mapping = 1;
361
362       /* First try to match static mapping by local address and port */
363       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
364         {
365           static_mapping = 0;
366           /* Try to create dynamic translation */
367           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
368                                                    &address_index))
369             {
370               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
371               return SNAT_IN2OUT_NEXT_DROP;
372             }
373         }
374
375       /* Create a new session */
376       pool_get (sm->per_thread_data[thread_index].sessions, s);
377       memset (s, 0, sizeof (*s));
378       
379       s->outside_address_index = address_index;
380
381       if (static_mapping)
382         {
383           u->nstaticsessions++;
384           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
385         }
386       else
387         {
388           u->nsessions++;
389         }
390
391       /* Create list elts */
392       pool_get (sm->per_thread_data[thread_index].list_pool,
393                 per_user_translation_list_elt);
394       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
395                        per_user_translation_list_elt -
396                        sm->per_thread_data[thread_index].list_pool);
397
398       per_user_translation_list_elt->value =
399         s - sm->per_thread_data[thread_index].sessions;
400       s->per_user_index = per_user_translation_list_elt -
401                           sm->per_thread_data[thread_index].list_pool;
402       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
403
404       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
405                           s->per_user_list_head_index,
406                           per_user_translation_list_elt -
407                           sm->per_thread_data[thread_index].list_pool);
408    }
409   
410   s->in2out = *key0;
411   s->out2in = key1;
412   s->out2in.protocol = key0->protocol;
413   s->out2in.fib_index = outside_fib_index;
414   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
415   *sessionp = s;
416
417   /* Add to translation hashes */
418   kv0.key = s->in2out.as_u64;
419   kv0.value = s - sm->per_thread_data[thread_index].sessions;
420   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
421       clib_warning ("in2out key add failed");
422   
423   kv0.key = s->out2in.as_u64;
424   kv0.value = s - sm->per_thread_data[thread_index].sessions;
425   
426   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
427       clib_warning ("out2in key add failed");
428
429   /* Add to translated packets worker lookup */
430   worker_by_out_key.addr = s->out2in.addr;
431   worker_by_out_key.port = s->out2in.port;
432   worker_by_out_key.fib_index = s->out2in.fib_index;
433   kv0.key = worker_by_out_key.as_u64;
434   kv0.value = thread_index;
435   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
436
437   /* log NAT event */
438   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
439                                       s->out2in.addr.as_u32,
440                                       s->in2out.protocol,
441                                       s->in2out.port,
442                                       s->out2in.port,
443                                       s->in2out.fib_index);
444   return next0;
445 }
446
447 static_always_inline
448 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
449                                  snat_session_key_t *p_key0)
450 {
451   icmp46_header_t *icmp0;
452   snat_session_key_t key0;
453   icmp_echo_header_t *echo0, *inner_echo0 = 0;
454   ip4_header_t *inner_ip0 = 0;
455   void *l4_header = 0;
456   icmp46_header_t *inner_icmp0;
457
458   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
459   echo0 = (icmp_echo_header_t *)(icmp0+1);
460
461   if (!icmp_is_error_message (icmp0))
462     {
463       key0.protocol = SNAT_PROTOCOL_ICMP;
464       key0.addr = ip0->src_address;
465       key0.port = echo0->identifier;
466     }
467   else
468     {
469       inner_ip0 = (ip4_header_t *)(echo0+1);
470       l4_header = ip4_next_header (inner_ip0);
471       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
472       key0.addr = inner_ip0->dst_address;
473       switch (key0.protocol)
474         {
475         case SNAT_PROTOCOL_ICMP:
476           inner_icmp0 = (icmp46_header_t*)l4_header;
477           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
478           key0.port = inner_echo0->identifier;
479           break;
480         case SNAT_PROTOCOL_UDP:
481         case SNAT_PROTOCOL_TCP:
482           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
483           break;
484         default:
485           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
486         }
487     }
488   *p_key0 = key0;
489   return -1; /* success */
490 }
491
492 /**
493  * Get address and port values to be used for packet SNAT translation
494  * and create session if needed
495  *
496  * @param[in,out] sm             SNAT main
497  * @param[in,out] node           SNAT node runtime
498  * @param[in] thread_index       thread index
499  * @param[in,out] b0             buffer containing packet to be translated
500  * @param[out] p_proto           protocol used for matching
501  * @param[out] p_value           address and port after NAT translation
502  * @param[out] p_dont_translate  if packet should not be translated
503  * @param d                      optional parameter
504  * @param e                      optional parameter
505  */
506 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
507                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
508                            snat_session_key_t *p_value,
509                            u8 *p_dont_translate, void *d, void *e)
510 {
511   ip4_header_t *ip0;
512   icmp46_header_t *icmp0;
513   u32 sw_if_index0;
514   u32 rx_fib_index0;
515   snat_session_key_t key0;
516   snat_session_t *s0 = 0;
517   u8 dont_translate = 0;
518   clib_bihash_kv_8_8_t kv0, value0;
519   u32 next0 = ~0;
520   int err;
521   u32 iph_offset0 = 0;
522
523   if (PREDICT_FALSE(vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0))
524     {
525       iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
526     }
527   ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
528   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
529   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
530   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
531
532   err = icmp_get_key (ip0, &key0);
533   if (err != -1)
534     {
535       b0->error = node->errors[err];
536       next0 = SNAT_IN2OUT_NEXT_DROP;
537       goto out;
538     }
539   key0.fib_index = rx_fib_index0;
540
541   kv0.key = key0.as_u64;
542
543   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
544     {
545       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
546           IP_PROTOCOL_ICMP, rx_fib_index0) &&
547           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
548         {
549           dont_translate = 1;
550           goto out;
551         }
552
553       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
554         {
555           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
556           next0 = SNAT_IN2OUT_NEXT_DROP;
557           goto out;
558         }
559
560       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
561                          &s0, node, next0, thread_index);
562
563       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
564         goto out;
565     }
566   else
567     {
568       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
569                         icmp0->type != ICMP4_echo_reply &&
570                         !icmp_is_error_message (icmp0)))
571         {
572           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
573           next0 = SNAT_IN2OUT_NEXT_DROP;
574           goto out;
575         }
576
577       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
578                               value0.value);
579     }
580
581 out:
582   *p_proto = key0.protocol;
583   if (s0)
584     *p_value = s0->out2in;
585   *p_dont_translate = dont_translate;
586   if (d)
587     *(snat_session_t**)d = s0;
588   return next0;
589 }
590
591 /**
592  * Get address and port values to be used for packet SNAT translation
593  *
594  * @param[in] sm                 SNAT main
595  * @param[in,out] node           SNAT node runtime
596  * @param[in] thread_index       thread index
597  * @param[in,out] b0             buffer containing packet to be translated
598  * @param[out] p_proto           protocol used for matching
599  * @param[out] p_value           address and port after NAT translation
600  * @param[out] p_dont_translate  if packet should not be translated
601  * @param d                      optional parameter
602  * @param e                      optional parameter
603  */
604 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
605                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
606                            snat_session_key_t *p_value,
607                            u8 *p_dont_translate, void *d, void *e)
608 {
609   ip4_header_t *ip0;
610   icmp46_header_t *icmp0;
611   u32 sw_if_index0;
612   u32 rx_fib_index0;
613   snat_session_key_t key0;
614   snat_session_key_t sm0;
615   u8 dont_translate = 0;
616   u8 is_addr_only;
617   u32 next0 = ~0;
618   int err;
619
620   ip0 = vlib_buffer_get_current (b0);
621   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
622   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
623   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
624
625   err = icmp_get_key (ip0, &key0);
626   if (err != -1)
627     {
628       b0->error = node->errors[err];
629       next0 = SNAT_IN2OUT_NEXT_DROP;
630       goto out2;
631     }
632   key0.fib_index = rx_fib_index0;
633
634   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
635     {
636       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
637           IP_PROTOCOL_ICMP, rx_fib_index0)))
638         {
639           dont_translate = 1;
640           goto out;
641         }
642
643       if (icmp_is_error_message (icmp0))
644         {
645           next0 = SNAT_IN2OUT_NEXT_DROP;
646           goto out;
647         }
648
649       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
650       next0 = SNAT_IN2OUT_NEXT_DROP;
651       goto out;
652     }
653
654   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
655                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
656                     !icmp_is_error_message (icmp0)))
657     {
658       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
659       next0 = SNAT_IN2OUT_NEXT_DROP;
660       goto out;
661     }
662
663 out:
664   *p_value = sm0;
665 out2:
666   *p_proto = key0.protocol;
667   *p_dont_translate = dont_translate;
668   return next0;
669 }
670
671 static inline u32 icmp_in2out (snat_main_t *sm,
672                                vlib_buffer_t * b0,
673                                ip4_header_t * ip0,
674                                icmp46_header_t * icmp0,
675                                u32 sw_if_index0,
676                                u32 rx_fib_index0,
677                                vlib_node_runtime_t * node,
678                                u32 next0,
679                                u32 thread_index,
680                                void *d,
681                                void *e)
682 {
683   snat_session_key_t sm0;
684   u8 protocol;
685   icmp_echo_header_t *echo0, *inner_echo0 = 0;
686   ip4_header_t *inner_ip0;
687   void *l4_header = 0;
688   icmp46_header_t *inner_icmp0;
689   u8 dont_translate;
690   u32 new_addr0, old_addr0;
691   u16 old_id0, new_id0;
692   ip_csum_t sum0;
693   u16 checksum0;
694   u32 next0_tmp;
695
696   echo0 = (icmp_echo_header_t *)(icmp0+1);
697
698   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
699                                        &protocol, &sm0, &dont_translate, d, e);
700   if (next0_tmp != ~0)
701     next0 = next0_tmp;
702   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
703     goto out;
704
705   sum0 = ip_incremental_checksum (0, icmp0,
706                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
707   checksum0 = ~ip_csum_fold (sum0);
708   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
709     {
710       next0 = SNAT_IN2OUT_NEXT_DROP;
711       goto out;
712     }
713
714   old_addr0 = ip0->src_address.as_u32;
715   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
716   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
717     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
718
719   sum0 = ip0->checksum;
720   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
721                          src_address /* changed member */);
722   ip0->checksum = ip_csum_fold (sum0);
723   
724   if (!icmp_is_error_message (icmp0))
725     {
726       new_id0 = sm0.port;
727       if (PREDICT_FALSE(new_id0 != echo0->identifier))
728         {
729           old_id0 = echo0->identifier;
730           new_id0 = sm0.port;
731           echo0->identifier = new_id0;
732
733           sum0 = icmp0->checksum;
734           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
735                                  identifier);
736           icmp0->checksum = ip_csum_fold (sum0);
737         }
738     }
739   else
740     {
741       inner_ip0 = (ip4_header_t *)(echo0+1);
742       l4_header = ip4_next_header (inner_ip0);
743
744       if (!ip4_header_checksum_is_valid (inner_ip0))
745         {
746           next0 = SNAT_IN2OUT_NEXT_DROP;
747           goto out;
748         }
749
750       old_addr0 = inner_ip0->dst_address.as_u32;
751       inner_ip0->dst_address = sm0.addr;
752       new_addr0 = inner_ip0->dst_address.as_u32;
753
754       sum0 = icmp0->checksum;
755       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
756                              dst_address /* changed member */);
757       icmp0->checksum = ip_csum_fold (sum0);
758
759       switch (protocol)
760         {
761           case SNAT_PROTOCOL_ICMP:
762             inner_icmp0 = (icmp46_header_t*)l4_header;
763             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
764
765             old_id0 = inner_echo0->identifier;
766             new_id0 = sm0.port;
767             inner_echo0->identifier = new_id0;
768
769             sum0 = icmp0->checksum;
770             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
771                                    identifier);
772             icmp0->checksum = ip_csum_fold (sum0);
773             break;
774           case SNAT_PROTOCOL_UDP:
775           case SNAT_PROTOCOL_TCP:
776             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
777             new_id0 = sm0.port;
778             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
779
780             sum0 = icmp0->checksum;
781             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
782                                    dst_port);
783             icmp0->checksum = ip_csum_fold (sum0);
784             break;
785           default:
786             ASSERT(0);
787         }
788     }
789
790 out:
791   return next0;
792 }
793
794 /**
795  * @brief Hairpinning
796  *
797  * Hairpinning allows two endpoints on the internal side of the NAT to
798  * communicate even if they only use each other's external IP addresses
799  * and ports.
800  *
801  * @param sm     SNAT main.
802  * @param b0     Vlib buffer.
803  * @param ip0    IP header.
804  * @param udp0   UDP header.
805  * @param tcp0   TCP header.
806  * @param proto0 SNAT protocol.
807  */
808 static inline void
809 snat_hairpinning (snat_main_t *sm,
810                   vlib_buffer_t * b0,
811                   ip4_header_t * ip0,
812                   udp_header_t * udp0,
813                   tcp_header_t * tcp0,
814                   u32 proto0)
815 {
816   snat_session_key_t key0, sm0;
817   snat_worker_key_t k0;
818   snat_session_t * s0;
819   clib_bihash_kv_8_8_t kv0, value0;
820   ip_csum_t sum0;
821   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
822   u16 new_dst_port0, old_dst_port0;
823
824   key0.addr = ip0->dst_address;
825   key0.port = udp0->dst_port;
826   key0.protocol = proto0;
827   key0.fib_index = sm->outside_fib_index;
828   kv0.key = key0.as_u64;
829
830   /* Check if destination is in active sessions */
831   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
832     {
833       /* or static mappings */
834       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
835         {
836           new_dst_addr0 = sm0.addr.as_u32;
837           new_dst_port0 = sm0.port;
838           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
839         }
840     }
841   else
842     {
843       si = value0.value;
844       if (sm->num_workers > 1)
845         {
846           k0.addr = ip0->dst_address;
847           k0.port = udp0->dst_port;
848           k0.fib_index = sm->outside_fib_index;
849           kv0.key = k0.as_u64;
850           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
851             ASSERT(0);
852           else
853             ti = value0.value;
854         }
855       else
856         ti = sm->num_workers;
857
858       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
859       new_dst_addr0 = s0->in2out.addr.as_u32;
860       new_dst_port0 = s0->in2out.port;
861       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
862     }
863
864   /* Destination is behind the same NAT, use internal address and port */
865   if (new_dst_addr0)
866     {
867       old_dst_addr0 = ip0->dst_address.as_u32;
868       ip0->dst_address.as_u32 = new_dst_addr0;
869       sum0 = ip0->checksum;
870       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
871                              ip4_header_t, dst_address);
872       ip0->checksum = ip_csum_fold (sum0);
873
874       old_dst_port0 = tcp0->dst;
875       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
876         {
877           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
878             {
879               tcp0->dst = new_dst_port0;
880               sum0 = tcp0->checksum;
881               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
882                                      ip4_header_t, dst_address);
883               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
884                                      ip4_header_t /* cheat */, length);
885               tcp0->checksum = ip_csum_fold(sum0);
886             }
887           else
888             {
889               udp0->dst_port = new_dst_port0;
890               udp0->checksum = 0;
891             }
892         }
893       else
894         {
895           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
896             {
897               sum0 = tcp0->checksum;
898               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
899                                      ip4_header_t, dst_address);
900               tcp0->checksum = ip_csum_fold(sum0);
901             }
902         }
903     }
904 }
905
906 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
907                                          vlib_buffer_t * b0,
908                                          ip4_header_t * ip0,
909                                          icmp46_header_t * icmp0,
910                                          u32 sw_if_index0,
911                                          u32 rx_fib_index0,
912                                          vlib_node_runtime_t * node,
913                                          u32 next0,
914                                          f64 now,
915                                          u32 thread_index,
916                                          snat_session_t ** p_s0)
917 {
918   snat_session_key_t key0, sm0;
919   clib_bihash_kv_8_8_t kv0, value0;
920   snat_worker_key_t k0;
921   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
922   ip_csum_t sum0;
923
924   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
925                       next0, thread_index, p_s0, 0);
926   snat_session_t * s0 = *p_s0;
927   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
928     {
929       /* Hairpinning */
930       if (!icmp_is_error_message (icmp0))
931         {
932           icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
933           u16 icmp_id0 = echo0->identifier;
934           key0.addr = ip0->dst_address;
935           key0.port = icmp_id0;
936           key0.protocol = SNAT_PROTOCOL_ICMP;
937           key0.fib_index = sm->outside_fib_index;
938           kv0.key = key0.as_u64;
939
940           /* Check if destination is in active sessions */
941           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
942             {
943               /* or static mappings */
944               if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
945                 {
946                   new_dst_addr0 = sm0.addr.as_u32;
947                   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
948                 }
949             }
950           else
951             {
952               si = value0.value;
953               if (sm->num_workers > 1)
954                 {
955                   k0.addr = ip0->dst_address;
956                   k0.port = icmp_id0;
957                   k0.fib_index = sm->outside_fib_index;
958                   kv0.key = k0.as_u64;
959                   if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
960                     ASSERT(0);
961                   else
962                     ti = value0.value;
963                 }
964               else
965                 ti = sm->num_workers;
966
967               s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
968               new_dst_addr0 = s0->in2out.addr.as_u32;
969               vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
970               echo0->identifier = s0->in2out.port;
971               sum0 = icmp0->checksum;
972               sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
973                                      icmp_echo_header_t, identifier);
974               icmp0->checksum = ip_csum_fold (sum0);
975             }
976
977           /* Destination is behind the same NAT, use internal address and port */
978           if (new_dst_addr0)
979             {
980               old_dst_addr0 = ip0->dst_address.as_u32;
981               ip0->dst_address.as_u32 = new_dst_addr0;
982               sum0 = ip0->checksum;
983               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
984                                      ip4_header_t, dst_address);
985               ip0->checksum = ip_csum_fold (sum0);
986             }
987         }
988
989       /* Accounting */
990       s0->last_heard = now;
991       s0->total_pkts++;
992       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
993       /* Per-user LRU list maintenance for dynamic translations */
994       if (!snat_is_session_static (s0))
995         {
996           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
997                              s0->per_user_index);
998           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
999                               s0->per_user_list_head_index,
1000                               s0->per_user_index);
1001         }
1002     }
1003   return next0;
1004 }
1005
1006 static void
1007 snat_in2out_unknown_proto (snat_main_t *sm,
1008                            vlib_buffer_t * b,
1009                            ip4_header_t * ip,
1010                            u32 rx_fib_index,
1011                            u32 thread_index,
1012                            f64 now,
1013                            vlib_main_t * vm)
1014 {
1015   clib_bihash_kv_8_8_t kv, value;
1016   clib_bihash_kv_16_8_t s_kv, s_value;
1017   snat_static_mapping_t *m;
1018   snat_session_key_t m_key;
1019   u32 old_addr, new_addr = 0;
1020   ip_csum_t sum;
1021   snat_user_key_t u_key;
1022   snat_user_t *u;
1023   dlist_elt_t *head, *elt, *oldest;
1024   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1025   u32 elt_index, head_index, ses_index, oldest_index;
1026   snat_session_t * s;
1027   snat_unk_proto_ses_key_t key;
1028   u32 address_index = ~0;
1029   int i;
1030   u8 is_sm = 0;
1031
1032   old_addr = ip->src_address.as_u32;
1033
1034   key.l_addr = ip->src_address;
1035   key.r_addr = ip->dst_address;
1036   key.fib_index = rx_fib_index;
1037   key.proto = ip->protocol;
1038   key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
1039   s_kv.key[0] = key.as_u64[0];
1040   s_kv.key[1] = key.as_u64[1];
1041
1042   if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value))
1043     {
1044       s = pool_elt_at_index (tsm->sessions, s_value.value);
1045       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1046     }
1047   else
1048     {
1049       u_key.addr = ip->src_address;
1050       u_key.fib_index = rx_fib_index;
1051       kv.key = u_key.as_u64;
1052
1053       /* Ever heard of the "user" = src ip4 address before? */
1054       if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1055         {
1056           /* no, make a new one */
1057           pool_get (tsm->users, u);
1058           memset (u, 0, sizeof (*u));
1059           u->addr = ip->src_address;
1060           u->fib_index = rx_fib_index;
1061
1062           pool_get (tsm->list_pool, head);
1063           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1064
1065           clib_dlist_init (tsm->list_pool,
1066                            u->sessions_per_user_list_head_index);
1067
1068           kv.value = u - tsm->users;
1069
1070           /* add user */
1071           clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1);
1072         }
1073       else
1074         {
1075           u = pool_elt_at_index (tsm->users, value.value);
1076         }
1077
1078       m_key.addr = ip->src_address;
1079       m_key.port = 0;
1080       m_key.protocol = 0;
1081       m_key.fib_index = rx_fib_index;
1082       kv.key = m_key.as_u64;
1083
1084       /* Try to find static mapping first */
1085       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1086         {
1087           m = pool_elt_at_index (sm->static_mappings, value.value);
1088           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1089           is_sm = 1;
1090           goto create_ses;
1091         }
1092       /* Fallback to 3-tuple key */
1093       else
1094         {
1095           /* Choose same out address as for TCP/UDP session to same destination */
1096           if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1097             {
1098               head_index = u->sessions_per_user_list_head_index;
1099               head = pool_elt_at_index (tsm->list_pool, head_index);
1100               elt_index = head->next;
1101               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1102               ses_index = elt->value;
1103               while (ses_index != ~0)
1104                 {
1105                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1106                   elt_index = elt->next;
1107                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1108                   ses_index = elt->value;
1109
1110                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1111                     {
1112                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1113                       address_index = s->outside_address_index;
1114
1115                       key.fib_index = sm->outside_fib_index;
1116                       key.l_addr.as_u32 = new_addr;
1117                       s_kv.key[0] = key.as_u64[0];
1118                       s_kv.key[1] = key.as_u64[1];
1119                       if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1120                         break;
1121
1122                       goto create_ses;
1123                     }
1124                 }
1125             }
1126           key.fib_index = sm->outside_fib_index;
1127           for (i = 0; i < vec_len (sm->addresses); i++)
1128             {
1129               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1130               s_kv.key[0] = key.as_u64[0];
1131               s_kv.key[1] = key.as_u64[1];
1132               if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1133                 {
1134                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1135                   address_index = i;
1136                   goto create_ses;
1137                 }
1138             }
1139           return;
1140         }
1141
1142 create_ses:
1143       /* Over quota? Recycle the least recently used dynamic translation */
1144       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1145         {
1146           /* Remove the oldest dynamic translation */
1147           do {
1148               oldest_index = clib_dlist_remove_head (
1149                 tsm->list_pool, u->sessions_per_user_list_head_index);
1150
1151               ASSERT (oldest_index != ~0);
1152
1153               /* add it back to the end of the LRU list */
1154               clib_dlist_addtail (tsm->list_pool,
1155                                   u->sessions_per_user_list_head_index,
1156                                   oldest_index);
1157               /* Get the list element */
1158               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1159
1160               /* Get the session index from the list element */
1161               ses_index = oldest->value;
1162
1163               /* Get the session */
1164               s = pool_elt_at_index (tsm->sessions, ses_index);
1165           } while (snat_is_session_static (s));
1166
1167           if (snat_is_unk_proto_session (s))
1168             {
1169               /* Remove from lookup tables */
1170               key.l_addr = s->in2out.addr;
1171               key.r_addr = s->ext_host_addr;
1172               key.fib_index = s->in2out.fib_index;
1173               key.proto = s->in2out.port;
1174               s_kv.key[0] = key.as_u64[0];
1175               s_kv.key[1] = key.as_u64[1];
1176               if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0))
1177                 clib_warning ("in2out key del failed");
1178
1179               key.l_addr = s->out2in.addr;
1180               key.fib_index = s->out2in.fib_index;
1181               s_kv.key[0] = key.as_u64[0];
1182               s_kv.key[1] = key.as_u64[1];
1183               if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0))
1184                 clib_warning ("out2in key del failed");
1185             }
1186           else
1187             {
1188               /* log NAT event */
1189               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1190                                                   s->out2in.addr.as_u32,
1191                                                   s->in2out.protocol,
1192                                                   s->in2out.port,
1193                                                   s->out2in.port,
1194                                                   s->in2out.fib_index);
1195
1196               snat_free_outside_address_and_port (sm, &s->out2in,
1197                                                   s->outside_address_index);
1198
1199               /* Remove in2out, out2in keys */
1200               kv.key = s->in2out.as_u64;
1201               if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0))
1202                 clib_warning ("in2out key del failed");
1203               kv.key = s->out2in.as_u64;
1204               if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0))
1205                 clib_warning ("out2in key del failed");
1206             }
1207         }
1208       else
1209         {
1210           /* Create a new session */
1211           pool_get (tsm->sessions, s);
1212           memset (s, 0, sizeof (*s));
1213
1214           /* Create list elts */
1215           pool_get (tsm->list_pool, elt);
1216           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1217           elt->value = s - tsm->sessions;
1218           s->per_user_index = elt - tsm->list_pool;
1219           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1220           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1221                               s->per_user_index);
1222         }
1223
1224       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1225       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1226       s->outside_address_index = address_index;
1227       s->out2in.addr.as_u32 = new_addr;
1228       s->out2in.fib_index = sm->outside_fib_index;
1229       s->in2out.addr.as_u32 = old_addr;
1230       s->in2out.fib_index = rx_fib_index;
1231       s->in2out.port = s->out2in.port = ip->protocol;
1232       if (is_sm)
1233         {
1234           u->nstaticsessions++;
1235           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1236         }
1237       else
1238         {
1239           u->nsessions++;
1240         }
1241
1242       /* Add to lookup tables */
1243       key.l_addr.as_u32 = old_addr;
1244       key.r_addr = ip->dst_address;
1245       key.proto = ip->protocol;
1246       key.fib_index = rx_fib_index;
1247       s_kv.key[0] = key.as_u64[0];
1248       s_kv.key[1] = key.as_u64[1];
1249       s_kv.value = s - tsm->sessions;
1250       if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
1251         clib_warning ("in2out key add failed");
1252
1253       key.l_addr.as_u32 = new_addr;
1254       key.fib_index = sm->outside_fib_index;
1255       s_kv.key[0] = key.as_u64[0];
1256       s_kv.key[1] = key.as_u64[1];
1257       if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
1258         clib_warning ("out2in key add failed");
1259   }
1260
1261   /* Update IP checksum */
1262   sum = ip->checksum;
1263   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1264   ip->checksum = ip_csum_fold (sum);
1265
1266   /* Accounting */
1267   s->last_heard = now;
1268   s->total_pkts++;
1269   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1270   /* Per-user LRU list maintenance */
1271   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1272   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1273                       s->per_user_index);
1274
1275   /* Hairpinning */
1276   old_addr = ip->dst_address.as_u32;
1277   key.l_addr.as_u32 = ip->dst_address.as_u32;
1278   key.r_addr.as_u32 = new_addr;
1279   key.fib_index = sm->outside_fib_index;
1280   s_kv.key[0] = key.as_u64[0];
1281   s_kv.key[1] = key.as_u64[1];
1282   if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1283     {
1284       m_key.addr = ip->dst_address;
1285       m_key.fib_index = sm->outside_fib_index;
1286       kv.key = m_key.as_u64;
1287       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1288         {
1289           if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1290             vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1291           return;
1292         }
1293
1294       m = pool_elt_at_index (sm->static_mappings, value.value);
1295       vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1296       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1297     }
1298   else
1299     {
1300       s = pool_elt_at_index (tsm->sessions, s_value.value);
1301       vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1302       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1303     }
1304   sum = ip->checksum;
1305   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1306   ip->checksum = ip_csum_fold (sum);
1307 }
1308
1309 static inline uword
1310 snat_in2out_node_fn_inline (vlib_main_t * vm,
1311                             vlib_node_runtime_t * node,
1312                             vlib_frame_t * frame, int is_slow_path,
1313                             int is_output_feature)
1314 {
1315   u32 n_left_from, * from, * to_next;
1316   snat_in2out_next_t next_index;
1317   u32 pkts_processed = 0;
1318   snat_main_t * sm = &snat_main;
1319   f64 now = vlib_time_now (vm);
1320   u32 stats_node_index;
1321   u32 thread_index = vlib_get_thread_index ();
1322
1323   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1324     snat_in2out_node.index;
1325
1326   from = vlib_frame_vector_args (frame);
1327   n_left_from = frame->n_vectors;
1328   next_index = node->cached_next_index;
1329
1330   while (n_left_from > 0)
1331     {
1332       u32 n_left_to_next;
1333
1334       vlib_get_next_frame (vm, node, next_index,
1335                            to_next, n_left_to_next);
1336
1337       while (n_left_from >= 4 && n_left_to_next >= 2)
1338         {
1339           u32 bi0, bi1;
1340           vlib_buffer_t * b0, * b1;
1341           u32 next0, next1;
1342           u32 sw_if_index0, sw_if_index1;
1343           ip4_header_t * ip0, * ip1;
1344           ip_csum_t sum0, sum1;
1345           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1346           u16 old_port0, new_port0, old_port1, new_port1;
1347           udp_header_t * udp0, * udp1;
1348           tcp_header_t * tcp0, * tcp1;
1349           icmp46_header_t * icmp0, * icmp1;
1350           snat_session_key_t key0, key1;
1351           u32 rx_fib_index0, rx_fib_index1;
1352           u32 proto0, proto1;
1353           snat_session_t * s0 = 0, * s1 = 0;
1354           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1355           u32 iph_offset0 = 0, iph_offset1 = 0;
1356           
1357           /* Prefetch next iteration. */
1358           {
1359             vlib_buffer_t * p2, * p3;
1360             
1361             p2 = vlib_get_buffer (vm, from[2]);
1362             p3 = vlib_get_buffer (vm, from[3]);
1363             
1364             vlib_prefetch_buffer_header (p2, LOAD);
1365             vlib_prefetch_buffer_header (p3, LOAD);
1366
1367             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1368             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1369           }
1370
1371           /* speculatively enqueue b0 and b1 to the current next frame */
1372           to_next[0] = bi0 = from[0];
1373           to_next[1] = bi1 = from[1];
1374           from += 2;
1375           to_next += 2;
1376           n_left_from -= 2;
1377           n_left_to_next -= 2;
1378           
1379           b0 = vlib_get_buffer (vm, bi0);
1380           b1 = vlib_get_buffer (vm, bi1);
1381
1382           if (is_output_feature)
1383             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1384
1385           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1386                  iph_offset0);
1387
1388           udp0 = ip4_next_header (ip0);
1389           tcp0 = (tcp_header_t *) udp0;
1390           icmp0 = (icmp46_header_t *) udp0;
1391
1392           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1393           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1394                                    sw_if_index0);
1395
1396           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1397
1398           if (PREDICT_FALSE(ip0->ttl == 1))
1399             {
1400               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1401               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1402                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1403                                            0);
1404               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1405               goto trace00;
1406             }
1407
1408           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1409
1410           /* Next configured feature, probably ip4-lookup */
1411           if (is_slow_path)
1412             {
1413               if (PREDICT_FALSE (proto0 == ~0))
1414                 {
1415                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1416                                              thread_index, now, vm);
1417                   goto trace00;
1418                 }
1419
1420               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1421                 {
1422                   next0 = icmp_in2out_slow_path 
1423                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
1424                      node, next0, now, thread_index, &s0);
1425                   goto trace00;
1426                 }
1427             }
1428           else
1429             {
1430               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1431                 {
1432                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1433                   goto trace00;
1434                 }
1435             }
1436
1437           key0.addr = ip0->src_address;
1438           key0.port = udp0->src_port;
1439           key0.protocol = proto0;
1440           key0.fib_index = rx_fib_index0;
1441           
1442           kv0.key = key0.as_u64;
1443
1444           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1445             {
1446               if (is_slow_path)
1447                 {
1448                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1449                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
1450                     goto trace00;
1451
1452                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1453                                      &s0, node, next0, thread_index);
1454                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1455                     goto trace00;
1456                 }
1457               else
1458                 {
1459                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1460                   goto trace00;
1461                 }
1462             }
1463           else
1464             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1465                                     value0.value);
1466
1467           old_addr0 = ip0->src_address.as_u32;
1468           ip0->src_address = s0->out2in.addr;
1469           new_addr0 = ip0->src_address.as_u32;
1470           if (!is_output_feature)
1471             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1472
1473           sum0 = ip0->checksum;
1474           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1475                                  ip4_header_t,
1476                                  src_address /* changed member */);
1477           ip0->checksum = ip_csum_fold (sum0);
1478
1479           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1480             {
1481               old_port0 = tcp0->src_port;
1482               tcp0->src_port = s0->out2in.port;
1483               new_port0 = tcp0->src_port;
1484
1485               sum0 = tcp0->checksum;
1486               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1487                                      ip4_header_t,
1488                                      dst_address /* changed member */);
1489               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1490                                      ip4_header_t /* cheat */,
1491                                      length /* changed member */);
1492               tcp0->checksum = ip_csum_fold(sum0);
1493             }
1494           else
1495             {
1496               old_port0 = udp0->src_port;
1497               udp0->src_port = s0->out2in.port;
1498               udp0->checksum = 0;
1499             }
1500
1501           /* Hairpinning */
1502           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1503
1504           /* Accounting */
1505           s0->last_heard = now;
1506           s0->total_pkts++;
1507           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1508           /* Per-user LRU list maintenance for dynamic translation */
1509           if (!snat_is_session_static (s0))
1510             {
1511               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1512                                  s0->per_user_index);
1513               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1514                                   s0->per_user_list_head_index,
1515                                   s0->per_user_index);
1516             }
1517         trace00:
1518
1519           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1520                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1521             {
1522               snat_in2out_trace_t *t = 
1523                  vlib_add_trace (vm, node, b0, sizeof (*t));
1524               t->is_slow_path = is_slow_path;
1525               t->sw_if_index = sw_if_index0;
1526               t->next_index = next0;
1527                   t->session_index = ~0;
1528               if (s0)
1529                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1530             }
1531
1532           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1533
1534           if (is_output_feature)
1535             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1536
1537           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1538                  iph_offset1);
1539
1540           udp1 = ip4_next_header (ip1);
1541           tcp1 = (tcp_header_t *) udp1;
1542           icmp1 = (icmp46_header_t *) udp1;
1543
1544           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1545           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1546                                    sw_if_index1);
1547
1548           if (PREDICT_FALSE(ip1->ttl == 1))
1549             {
1550               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1551               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1552                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1553                                            0);
1554               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1555               goto trace01;
1556             }
1557
1558           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1559
1560           /* Next configured feature, probably ip4-lookup */
1561           if (is_slow_path)
1562             {
1563               if (PREDICT_FALSE (proto1 == ~0))
1564                 {
1565                   snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1566                                              thread_index, now, vm);
1567                   goto trace01;
1568                 }
1569
1570               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1571                 {
1572                   next1 = icmp_in2out_slow_path 
1573                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1574                      next1, now, thread_index, &s1);
1575                   goto trace01;
1576                 }
1577             }
1578           else
1579             {
1580               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1581                 {
1582                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1583                   goto trace01;
1584                 }
1585             }
1586
1587           key1.addr = ip1->src_address;
1588           key1.port = udp1->src_port;
1589           key1.protocol = proto1;
1590           key1.fib_index = rx_fib_index1;
1591           
1592           kv1.key = key1.as_u64;
1593
1594             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1595             {
1596               if (is_slow_path)
1597                 {
1598                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1599                       ip1, proto1, rx_fib_index1)) && !is_output_feature)
1600                     goto trace01;
1601
1602                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1603                                      &s1, node, next1, thread_index);
1604                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1605                     goto trace01;
1606                 }
1607               else
1608                 {
1609                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1610                   goto trace01;
1611                 }
1612             }
1613           else
1614             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1615                                     value1.value);
1616
1617           old_addr1 = ip1->src_address.as_u32;
1618           ip1->src_address = s1->out2in.addr;
1619           new_addr1 = ip1->src_address.as_u32;
1620           if (!is_output_feature)
1621             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1622
1623           sum1 = ip1->checksum;
1624           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1625                                  ip4_header_t,
1626                                  src_address /* changed member */);
1627           ip1->checksum = ip_csum_fold (sum1);
1628
1629           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1630             {
1631               old_port1 = tcp1->src_port;
1632               tcp1->src_port = s1->out2in.port;
1633               new_port1 = tcp1->src_port;
1634
1635               sum1 = tcp1->checksum;
1636               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1637                                      ip4_header_t,
1638                                      dst_address /* changed member */);
1639               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1640                                      ip4_header_t /* cheat */,
1641                                      length /* changed member */);
1642               tcp1->checksum = ip_csum_fold(sum1);
1643             }
1644           else
1645             {
1646               old_port1 = udp1->src_port;
1647               udp1->src_port = s1->out2in.port;
1648               udp1->checksum = 0;
1649             }
1650
1651           /* Hairpinning */
1652           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1653
1654           /* Accounting */
1655           s1->last_heard = now;
1656           s1->total_pkts++;
1657           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1658           /* Per-user LRU list maintenance for dynamic translation */
1659           if (!snat_is_session_static (s1))
1660             {
1661               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1662                                  s1->per_user_index);
1663               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1664                                   s1->per_user_list_head_index,
1665                                   s1->per_user_index);
1666             }
1667         trace01:
1668
1669           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1670                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1671             {
1672               snat_in2out_trace_t *t = 
1673                  vlib_add_trace (vm, node, b1, sizeof (*t));
1674               t->sw_if_index = sw_if_index1;
1675               t->next_index = next1;
1676               t->session_index = ~0;
1677               if (s1)
1678                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1679             }
1680
1681           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1682
1683           /* verify speculative enqueues, maybe switch current next frame */
1684           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1685                                            to_next, n_left_to_next,
1686                                            bi0, bi1, next0, next1);
1687         }
1688
1689       while (n_left_from > 0 && n_left_to_next > 0)
1690         {
1691           u32 bi0;
1692           vlib_buffer_t * b0;
1693           u32 next0;
1694           u32 sw_if_index0;
1695           ip4_header_t * ip0;
1696           ip_csum_t sum0;
1697           u32 new_addr0, old_addr0;
1698           u16 old_port0, new_port0;
1699           udp_header_t * udp0;
1700           tcp_header_t * tcp0;
1701           icmp46_header_t * icmp0;
1702           snat_session_key_t key0;
1703           u32 rx_fib_index0;
1704           u32 proto0;
1705           snat_session_t * s0 = 0;
1706           clib_bihash_kv_8_8_t kv0, value0;
1707           u32 iph_offset0 = 0;
1708
1709           /* speculatively enqueue b0 to the current next frame */
1710           bi0 = from[0];
1711           to_next[0] = bi0;
1712           from += 1;
1713           to_next += 1;
1714           n_left_from -= 1;
1715           n_left_to_next -= 1;
1716
1717           b0 = vlib_get_buffer (vm, bi0);
1718           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1719
1720           if (is_output_feature)
1721             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1722
1723           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1724                  iph_offset0);
1725
1726           udp0 = ip4_next_header (ip0);
1727           tcp0 = (tcp_header_t *) udp0;
1728           icmp0 = (icmp46_header_t *) udp0;
1729
1730           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1731           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1732                                    sw_if_index0);
1733
1734           if (PREDICT_FALSE(ip0->ttl == 1))
1735             {
1736               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1737               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1738                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1739                                            0);
1740               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1741               goto trace0;
1742             }
1743
1744           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1745
1746           /* Next configured feature, probably ip4-lookup */
1747           if (is_slow_path)
1748             {
1749               if (PREDICT_FALSE (proto0 == ~0))
1750                 {
1751                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1752                                              thread_index, now, vm);
1753                   goto trace0;
1754                 }
1755
1756               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1757                 {
1758                   next0 = icmp_in2out_slow_path 
1759                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1760                      next0, now, thread_index, &s0);
1761                   goto trace0;
1762                 }
1763             }
1764           else
1765             {
1766               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1767                 {
1768                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1769                   goto trace0;
1770                 }
1771             }
1772
1773           key0.addr = ip0->src_address;
1774           key0.port = udp0->src_port;
1775           key0.protocol = proto0;
1776           key0.fib_index = rx_fib_index0;
1777           
1778           kv0.key = key0.as_u64;
1779
1780           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1781             {
1782               if (is_slow_path)
1783                 {
1784                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1785                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
1786                     goto trace0;
1787
1788                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1789                                      &s0, node, next0, thread_index);
1790
1791                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1792                     goto trace0;
1793                 }
1794               else
1795                 {
1796                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1797                   goto trace0;
1798                 }
1799             }
1800           else
1801             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1802                                     value0.value);
1803
1804           old_addr0 = ip0->src_address.as_u32;
1805           ip0->src_address = s0->out2in.addr;
1806           new_addr0 = ip0->src_address.as_u32;
1807           if (!is_output_feature)
1808             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1809
1810           sum0 = ip0->checksum;
1811           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1812                                  ip4_header_t,
1813                                  src_address /* changed member */);
1814           ip0->checksum = ip_csum_fold (sum0);
1815
1816           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1817             {
1818               old_port0 = tcp0->src_port;
1819               tcp0->src_port = s0->out2in.port;
1820               new_port0 = tcp0->src_port;
1821
1822               sum0 = tcp0->checksum;
1823               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1824                                      ip4_header_t,
1825                                      dst_address /* changed member */);
1826               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1827                                      ip4_header_t /* cheat */,
1828                                      length /* changed member */);
1829               tcp0->checksum = ip_csum_fold(sum0);
1830             }
1831           else
1832             {
1833               old_port0 = udp0->src_port;
1834               udp0->src_port = s0->out2in.port;
1835               udp0->checksum = 0;
1836             }
1837
1838           /* Hairpinning */
1839           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1840
1841           /* Accounting */
1842           s0->last_heard = now;
1843           s0->total_pkts++;
1844           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1845           /* Per-user LRU list maintenance for dynamic translation */
1846           if (!snat_is_session_static (s0))
1847             {
1848               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1849                                  s0->per_user_index);
1850               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1851                                   s0->per_user_list_head_index,
1852                                   s0->per_user_index);
1853             }
1854
1855         trace0:
1856           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1857                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1858             {
1859               snat_in2out_trace_t *t = 
1860                  vlib_add_trace (vm, node, b0, sizeof (*t));
1861               t->is_slow_path = is_slow_path;
1862               t->sw_if_index = sw_if_index0;
1863               t->next_index = next0;
1864                   t->session_index = ~0;
1865               if (s0)
1866                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1867             }
1868
1869           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1870
1871           /* verify speculative enqueue, maybe switch current next frame */
1872           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1873                                            to_next, n_left_to_next,
1874                                            bi0, next0);
1875         }
1876
1877       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1878     }
1879
1880   vlib_node_increment_counter (vm, stats_node_index, 
1881                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1882                                pkts_processed);
1883   return frame->n_vectors;
1884 }
1885
1886 static uword
1887 snat_in2out_fast_path_fn (vlib_main_t * vm,
1888                           vlib_node_runtime_t * node,
1889                           vlib_frame_t * frame)
1890 {
1891   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1892 }
1893
1894 VLIB_REGISTER_NODE (snat_in2out_node) = {
1895   .function = snat_in2out_fast_path_fn,
1896   .name = "snat-in2out",
1897   .vector_size = sizeof (u32),
1898   .format_trace = format_snat_in2out_trace,
1899   .type = VLIB_NODE_TYPE_INTERNAL,
1900
1901   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1902   .error_strings = snat_in2out_error_strings,
1903
1904   .runtime_data_bytes = sizeof (snat_runtime_t),
1905
1906   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1907
1908   /* edit / add dispositions here */
1909   .next_nodes = {
1910     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1911     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1912     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1913     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1914   },
1915 };
1916
1917 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1918
1919 static uword
1920 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1921                                  vlib_node_runtime_t * node,
1922                                  vlib_frame_t * frame)
1923 {
1924   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1925 }
1926
1927 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1928   .function = snat_in2out_output_fast_path_fn,
1929   .name = "snat-in2out-output",
1930   .vector_size = sizeof (u32),
1931   .format_trace = format_snat_in2out_trace,
1932   .type = VLIB_NODE_TYPE_INTERNAL,
1933
1934   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1935   .error_strings = snat_in2out_error_strings,
1936
1937   .runtime_data_bytes = sizeof (snat_runtime_t),
1938
1939   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1940
1941   /* edit / add dispositions here */
1942   .next_nodes = {
1943     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1944     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
1945     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-output-slowpath",
1946     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1947   },
1948 };
1949
1950 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
1951                               snat_in2out_output_fast_path_fn);
1952
1953 static uword
1954 snat_in2out_slow_path_fn (vlib_main_t * vm,
1955                           vlib_node_runtime_t * node,
1956                           vlib_frame_t * frame)
1957 {
1958   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
1959 }
1960
1961 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1962   .function = snat_in2out_slow_path_fn,
1963   .name = "snat-in2out-slowpath",
1964   .vector_size = sizeof (u32),
1965   .format_trace = format_snat_in2out_trace,
1966   .type = VLIB_NODE_TYPE_INTERNAL,
1967
1968   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1969   .error_strings = snat_in2out_error_strings,
1970
1971   .runtime_data_bytes = sizeof (snat_runtime_t),
1972
1973   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1974
1975   /* edit / add dispositions here */
1976   .next_nodes = {
1977     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1978     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1979     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1980     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1981   },
1982 };
1983
1984 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
1985                               snat_in2out_slow_path_fn);
1986
1987 static uword
1988 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
1989                                  vlib_node_runtime_t * node,
1990                                  vlib_frame_t * frame)
1991 {
1992   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
1993 }
1994
1995 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
1996   .function = snat_in2out_output_slow_path_fn,
1997   .name = "snat-in2out-output-slowpath",
1998   .vector_size = sizeof (u32),
1999   .format_trace = format_snat_in2out_trace,
2000   .type = VLIB_NODE_TYPE_INTERNAL,
2001
2002   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2003   .error_strings = snat_in2out_error_strings,
2004
2005   .runtime_data_bytes = sizeof (snat_runtime_t),
2006
2007   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2008
2009   /* edit / add dispositions here */
2010   .next_nodes = {
2011     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2012     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2013     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-output-slowpath",
2014     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2015   },
2016 };
2017
2018 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2019                               snat_in2out_output_slow_path_fn);
2020
2021 /**************************/
2022 /*** deterministic mode ***/
2023 /**************************/
2024 static uword
2025 snat_det_in2out_node_fn (vlib_main_t * vm,
2026                          vlib_node_runtime_t * node,
2027                          vlib_frame_t * frame)
2028 {
2029   u32 n_left_from, * from, * to_next;
2030   snat_in2out_next_t next_index;
2031   u32 pkts_processed = 0;
2032   snat_main_t * sm = &snat_main;
2033   u32 now = (u32) vlib_time_now (vm);
2034   u32 thread_index = vlib_get_thread_index ();
2035
2036   from = vlib_frame_vector_args (frame);
2037   n_left_from = frame->n_vectors;
2038   next_index = node->cached_next_index;
2039
2040   while (n_left_from > 0)
2041     {
2042       u32 n_left_to_next;
2043
2044       vlib_get_next_frame (vm, node, next_index,
2045                            to_next, n_left_to_next);
2046
2047       while (n_left_from >= 4 && n_left_to_next >= 2)
2048         {
2049           u32 bi0, bi1;
2050           vlib_buffer_t * b0, * b1;
2051           u32 next0, next1;
2052           u32 sw_if_index0, sw_if_index1;
2053           ip4_header_t * ip0, * ip1;
2054           ip_csum_t sum0, sum1;
2055           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2056           u16 old_port0, new_port0, lo_port0, i0;
2057           u16 old_port1, new_port1, lo_port1, i1;
2058           udp_header_t * udp0, * udp1;
2059           tcp_header_t * tcp0, * tcp1;
2060           u32 proto0, proto1;
2061           snat_det_out_key_t key0, key1;
2062           snat_det_map_t * dm0, * dm1;
2063           snat_det_session_t * ses0 = 0, * ses1 = 0;
2064           u32 rx_fib_index0, rx_fib_index1;
2065           icmp46_header_t * icmp0, * icmp1;
2066
2067           /* Prefetch next iteration. */
2068           {
2069             vlib_buffer_t * p2, * p3;
2070
2071             p2 = vlib_get_buffer (vm, from[2]);
2072             p3 = vlib_get_buffer (vm, from[3]);
2073
2074             vlib_prefetch_buffer_header (p2, LOAD);
2075             vlib_prefetch_buffer_header (p3, LOAD);
2076
2077             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2078             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2079           }
2080
2081           /* speculatively enqueue b0 and b1 to the current next frame */
2082           to_next[0] = bi0 = from[0];
2083           to_next[1] = bi1 = from[1];
2084           from += 2;
2085           to_next += 2;
2086           n_left_from -= 2;
2087           n_left_to_next -= 2;
2088
2089           b0 = vlib_get_buffer (vm, bi0);
2090           b1 = vlib_get_buffer (vm, bi1);
2091
2092           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2093           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2094
2095           ip0 = vlib_buffer_get_current (b0);
2096           udp0 = ip4_next_header (ip0);
2097           tcp0 = (tcp_header_t *) udp0;
2098
2099           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2100
2101           if (PREDICT_FALSE(ip0->ttl == 1))
2102             {
2103               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2104               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2105                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2106                                            0);
2107               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2108               goto trace0;
2109             }
2110
2111           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2112
2113           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2114             {
2115               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2116               icmp0 = (icmp46_header_t *) udp0;
2117
2118               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2119                                   rx_fib_index0, node, next0, thread_index,
2120                                   &ses0, &dm0);
2121               goto trace0;
2122             }
2123
2124           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2125           if (PREDICT_FALSE(!dm0))
2126             {
2127               clib_warning("no match for internal host %U",
2128                            format_ip4_address, &ip0->src_address);
2129               next0 = SNAT_IN2OUT_NEXT_DROP;
2130               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2131               goto trace0;
2132             }
2133
2134           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2135
2136           key0.ext_host_addr = ip0->dst_address;
2137           key0.ext_host_port = tcp0->dst;
2138
2139           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2140           if (PREDICT_FALSE(!ses0))
2141             {
2142               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2143                 {
2144                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2145                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2146
2147                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2148                     continue;
2149
2150                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2151                   break;
2152                 }
2153               if (PREDICT_FALSE(!ses0))
2154                 {
2155                   /* too many sessions for user, send ICMP error packet */
2156
2157                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2158                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2159                                                ICMP4_destination_unreachable_destination_unreachable_host,
2160                                                0);
2161                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2162                   goto trace0;
2163                 }
2164             }
2165
2166           new_port0 = ses0->out.out_port;
2167
2168           old_addr0.as_u32 = ip0->src_address.as_u32;
2169           ip0->src_address.as_u32 = new_addr0.as_u32;
2170           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2171
2172           sum0 = ip0->checksum;
2173           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2174                                  ip4_header_t,
2175                                  src_address /* changed member */);
2176           ip0->checksum = ip_csum_fold (sum0);
2177
2178           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2179             {
2180               if (tcp0->flags & TCP_FLAG_SYN)
2181                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2182               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2183                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2184               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2185                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2186               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2187                 snat_det_ses_close(dm0, ses0);
2188               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2189                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2190               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2191                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2192
2193               old_port0 = tcp0->src;
2194               tcp0->src = new_port0;
2195
2196               sum0 = tcp0->checksum;
2197               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2198                                      ip4_header_t,
2199                                      dst_address /* changed member */);
2200               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2201                                      ip4_header_t /* cheat */,
2202                                      length /* changed member */);
2203               tcp0->checksum = ip_csum_fold(sum0);
2204             }
2205           else
2206             {
2207               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2208               old_port0 = udp0->src_port;
2209               udp0->src_port = new_port0;
2210               udp0->checksum = 0;
2211             }
2212
2213           switch(ses0->state)
2214             {
2215             case SNAT_SESSION_UDP_ACTIVE:
2216                 ses0->expire = now + sm->udp_timeout;
2217                 break;
2218             case SNAT_SESSION_TCP_SYN_SENT:
2219             case SNAT_SESSION_TCP_FIN_WAIT:
2220             case SNAT_SESSION_TCP_CLOSE_WAIT:
2221             case SNAT_SESSION_TCP_LAST_ACK:
2222                 ses0->expire = now + sm->tcp_transitory_timeout;
2223                 break;
2224             case SNAT_SESSION_TCP_ESTABLISHED:
2225                 ses0->expire = now + sm->tcp_established_timeout;
2226                 break;
2227             }
2228
2229         trace0:
2230           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2231                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2232             {
2233               snat_in2out_trace_t *t =
2234                  vlib_add_trace (vm, node, b0, sizeof (*t));
2235               t->is_slow_path = 0;
2236               t->sw_if_index = sw_if_index0;
2237               t->next_index = next0;
2238               t->session_index = ~0;
2239               if (ses0)
2240                 t->session_index = ses0 - dm0->sessions;
2241             }
2242
2243           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2244
2245           ip1 = vlib_buffer_get_current (b1);
2246           udp1 = ip4_next_header (ip1);
2247           tcp1 = (tcp_header_t *) udp1;
2248
2249           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2250
2251           if (PREDICT_FALSE(ip1->ttl == 1))
2252             {
2253               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2254               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2255                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2256                                            0);
2257               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2258               goto trace1;
2259             }
2260
2261           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2262
2263           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2264             {
2265               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2266               icmp1 = (icmp46_header_t *) udp1;
2267
2268               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2269                                   rx_fib_index1, node, next1, thread_index,
2270                                   &ses1, &dm1);
2271               goto trace1;
2272             }
2273
2274           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2275           if (PREDICT_FALSE(!dm1))
2276             {
2277               clib_warning("no match for internal host %U",
2278                            format_ip4_address, &ip0->src_address);
2279               next1 = SNAT_IN2OUT_NEXT_DROP;
2280               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2281               goto trace1;
2282             }
2283
2284           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2285
2286           key1.ext_host_addr = ip1->dst_address;
2287           key1.ext_host_port = tcp1->dst;
2288
2289           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2290           if (PREDICT_FALSE(!ses1))
2291             {
2292               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2293                 {
2294                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2295                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2296
2297                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2298                     continue;
2299
2300                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2301                   break;
2302                 }
2303               if (PREDICT_FALSE(!ses1))
2304                 {
2305                   /* too many sessions for user, send ICMP error packet */
2306
2307                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2308                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2309                                                ICMP4_destination_unreachable_destination_unreachable_host,
2310                                                0);
2311                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2312                   goto trace1;
2313                 }
2314             }
2315
2316           new_port1 = ses1->out.out_port;
2317
2318           old_addr1.as_u32 = ip1->src_address.as_u32;
2319           ip1->src_address.as_u32 = new_addr1.as_u32;
2320           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2321
2322           sum1 = ip1->checksum;
2323           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2324                                  ip4_header_t,
2325                                  src_address /* changed member */);
2326           ip1->checksum = ip_csum_fold (sum1);
2327
2328           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2329             {
2330               if (tcp1->flags & TCP_FLAG_SYN)
2331                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2332               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2333                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2334               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2335                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2336               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2337                 snat_det_ses_close(dm1, ses1);
2338               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2339                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2340               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2341                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2342
2343               old_port1 = tcp1->src;
2344               tcp1->src = new_port1;
2345
2346               sum1 = tcp1->checksum;
2347               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2348                                      ip4_header_t,
2349                                      dst_address /* changed member */);
2350               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2351                                      ip4_header_t /* cheat */,
2352                                      length /* changed member */);
2353               tcp1->checksum = ip_csum_fold(sum1);
2354             }
2355           else
2356             {
2357               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2358               old_port1 = udp1->src_port;
2359               udp1->src_port = new_port1;
2360               udp1->checksum = 0;
2361             }
2362
2363           switch(ses1->state)
2364             {
2365             case SNAT_SESSION_UDP_ACTIVE:
2366                 ses1->expire = now + sm->udp_timeout;
2367                 break;
2368             case SNAT_SESSION_TCP_SYN_SENT:
2369             case SNAT_SESSION_TCP_FIN_WAIT:
2370             case SNAT_SESSION_TCP_CLOSE_WAIT:
2371             case SNAT_SESSION_TCP_LAST_ACK:
2372                 ses1->expire = now + sm->tcp_transitory_timeout;
2373                 break;
2374             case SNAT_SESSION_TCP_ESTABLISHED:
2375                 ses1->expire = now + sm->tcp_established_timeout;
2376                 break;
2377             }
2378
2379         trace1:
2380           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2381                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2382             {
2383               snat_in2out_trace_t *t =
2384                  vlib_add_trace (vm, node, b1, sizeof (*t));
2385               t->is_slow_path = 0;
2386               t->sw_if_index = sw_if_index1;
2387               t->next_index = next1;
2388               t->session_index = ~0;
2389               if (ses1)
2390                 t->session_index = ses1 - dm1->sessions;
2391             }
2392
2393           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2394
2395           /* verify speculative enqueues, maybe switch current next frame */
2396           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2397                                            to_next, n_left_to_next,
2398                                            bi0, bi1, next0, next1);
2399          }
2400
2401       while (n_left_from > 0 && n_left_to_next > 0)
2402         {
2403           u32 bi0;
2404           vlib_buffer_t * b0;
2405           u32 next0;
2406           u32 sw_if_index0;
2407           ip4_header_t * ip0;
2408           ip_csum_t sum0;
2409           ip4_address_t new_addr0, old_addr0;
2410           u16 old_port0, new_port0, lo_port0, i0;
2411           udp_header_t * udp0;
2412           tcp_header_t * tcp0;
2413           u32 proto0;
2414           snat_det_out_key_t key0;
2415           snat_det_map_t * dm0;
2416           snat_det_session_t * ses0 = 0;
2417           u32 rx_fib_index0;
2418           icmp46_header_t * icmp0;
2419
2420           /* speculatively enqueue b0 to the current next frame */
2421           bi0 = from[0];
2422           to_next[0] = bi0;
2423           from += 1;
2424           to_next += 1;
2425           n_left_from -= 1;
2426           n_left_to_next -= 1;
2427
2428           b0 = vlib_get_buffer (vm, bi0);
2429           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2430
2431           ip0 = vlib_buffer_get_current (b0);
2432           udp0 = ip4_next_header (ip0);
2433           tcp0 = (tcp_header_t *) udp0;
2434
2435           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2436
2437           if (PREDICT_FALSE(ip0->ttl == 1))
2438             {
2439               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2440               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2441                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2442                                            0);
2443               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2444               goto trace00;
2445             }
2446
2447           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2448
2449           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2450             {
2451               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2452               icmp0 = (icmp46_header_t *) udp0;
2453
2454               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2455                                   rx_fib_index0, node, next0, thread_index,
2456                                   &ses0, &dm0);
2457               goto trace00;
2458             }
2459
2460           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2461           if (PREDICT_FALSE(!dm0))
2462             {
2463               clib_warning("no match for internal host %U",
2464                            format_ip4_address, &ip0->src_address);
2465               next0 = SNAT_IN2OUT_NEXT_DROP;
2466               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2467               goto trace00;
2468             }
2469
2470           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2471
2472           key0.ext_host_addr = ip0->dst_address;
2473           key0.ext_host_port = tcp0->dst;
2474
2475           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2476           if (PREDICT_FALSE(!ses0))
2477             {
2478               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2479                 {
2480                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2481                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2482
2483                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2484                     continue;
2485
2486                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2487                   break;
2488                 }
2489               if (PREDICT_FALSE(!ses0))
2490                 {
2491                   /* too many sessions for user, send ICMP error packet */
2492
2493                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2494                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2495                                                ICMP4_destination_unreachable_destination_unreachable_host,
2496                                                0);
2497                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2498                   goto trace00;
2499                 }
2500             }
2501
2502           new_port0 = ses0->out.out_port;
2503
2504           old_addr0.as_u32 = ip0->src_address.as_u32;
2505           ip0->src_address.as_u32 = new_addr0.as_u32;
2506           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2507
2508           sum0 = ip0->checksum;
2509           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2510                                  ip4_header_t,
2511                                  src_address /* changed member */);
2512           ip0->checksum = ip_csum_fold (sum0);
2513
2514           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2515             {
2516               if (tcp0->flags & TCP_FLAG_SYN)
2517                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2518               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2519                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2520               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2521                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2522               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2523                 snat_det_ses_close(dm0, ses0);
2524               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2525                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2526               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2527                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2528
2529               old_port0 = tcp0->src;
2530               tcp0->src = new_port0;
2531
2532               sum0 = tcp0->checksum;
2533               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2534                                      ip4_header_t,
2535                                      dst_address /* changed member */);
2536               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2537                                      ip4_header_t /* cheat */,
2538                                      length /* changed member */);
2539               tcp0->checksum = ip_csum_fold(sum0);
2540             }
2541           else
2542             {
2543               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2544               old_port0 = udp0->src_port;
2545               udp0->src_port = new_port0;
2546               udp0->checksum = 0;
2547             }
2548
2549           switch(ses0->state)
2550             {
2551             case SNAT_SESSION_UDP_ACTIVE:
2552                 ses0->expire = now + sm->udp_timeout;
2553                 break;
2554             case SNAT_SESSION_TCP_SYN_SENT:
2555             case SNAT_SESSION_TCP_FIN_WAIT:
2556             case SNAT_SESSION_TCP_CLOSE_WAIT:
2557             case SNAT_SESSION_TCP_LAST_ACK:
2558                 ses0->expire = now + sm->tcp_transitory_timeout;
2559                 break;
2560             case SNAT_SESSION_TCP_ESTABLISHED:
2561                 ses0->expire = now + sm->tcp_established_timeout;
2562                 break;
2563             }
2564
2565         trace00:
2566           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2567                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2568             {
2569               snat_in2out_trace_t *t =
2570                  vlib_add_trace (vm, node, b0, sizeof (*t));
2571               t->is_slow_path = 0;
2572               t->sw_if_index = sw_if_index0;
2573               t->next_index = next0;
2574               t->session_index = ~0;
2575               if (ses0)
2576                 t->session_index = ses0 - dm0->sessions;
2577             }
2578
2579           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2580
2581           /* verify speculative enqueue, maybe switch current next frame */
2582           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2583                                            to_next, n_left_to_next,
2584                                            bi0, next0);
2585         }
2586
2587       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2588     }
2589
2590   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2591                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2592                                pkts_processed);
2593   return frame->n_vectors;
2594 }
2595
2596 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2597   .function = snat_det_in2out_node_fn,
2598   .name = "snat-det-in2out",
2599   .vector_size = sizeof (u32),
2600   .format_trace = format_snat_in2out_trace,
2601   .type = VLIB_NODE_TYPE_INTERNAL,
2602
2603   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2604   .error_strings = snat_in2out_error_strings,
2605
2606   .runtime_data_bytes = sizeof (snat_runtime_t),
2607
2608   .n_next_nodes = 3,
2609
2610   /* edit / add dispositions here */
2611   .next_nodes = {
2612     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2613     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2614     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2615   },
2616 };
2617
2618 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2619
2620 /**
2621  * Get address and port values to be used for packet SNAT translation
2622  * and create session if needed
2623  *
2624  * @param[in,out] sm             SNAT main
2625  * @param[in,out] node           SNAT node runtime
2626  * @param[in] thread_index       thread index
2627  * @param[in,out] b0             buffer containing packet to be translated
2628  * @param[out] p_proto           protocol used for matching
2629  * @param[out] p_value           address and port after NAT translation
2630  * @param[out] p_dont_translate  if packet should not be translated
2631  * @param d                      optional parameter
2632  * @param e                      optional parameter
2633  */
2634 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2635                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2636                           snat_session_key_t *p_value,
2637                           u8 *p_dont_translate, void *d, void *e)
2638 {
2639   ip4_header_t *ip0;
2640   icmp46_header_t *icmp0;
2641   u32 sw_if_index0;
2642   u32 rx_fib_index0;
2643   u8 protocol;
2644   snat_det_out_key_t key0;
2645   u8 dont_translate = 0;
2646   u32 next0 = ~0;
2647   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2648   ip4_header_t *inner_ip0;
2649   void *l4_header = 0;
2650   icmp46_header_t *inner_icmp0;
2651   snat_det_map_t * dm0 = 0;
2652   ip4_address_t new_addr0;
2653   u16 lo_port0, i0;
2654   snat_det_session_t * ses0 = 0;
2655   ip4_address_t in_addr;
2656   u16 in_port;
2657
2658   ip0 = vlib_buffer_get_current (b0);
2659   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2660   echo0 = (icmp_echo_header_t *)(icmp0+1);
2661   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2662   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2663
2664   if (!icmp_is_error_message (icmp0))
2665     {
2666       protocol = SNAT_PROTOCOL_ICMP;
2667       in_addr = ip0->src_address;
2668       in_port = echo0->identifier;
2669     }
2670   else
2671     {
2672       inner_ip0 = (ip4_header_t *)(echo0+1);
2673       l4_header = ip4_next_header (inner_ip0);
2674       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2675       in_addr = inner_ip0->dst_address;
2676       switch (protocol)
2677         {
2678         case SNAT_PROTOCOL_ICMP:
2679           inner_icmp0 = (icmp46_header_t*)l4_header;
2680           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2681           in_port = inner_echo0->identifier;
2682           break;
2683         case SNAT_PROTOCOL_UDP:
2684         case SNAT_PROTOCOL_TCP:
2685           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2686           break;
2687         default:
2688           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2689           next0 = SNAT_IN2OUT_NEXT_DROP;
2690           goto out;
2691         }
2692     }
2693
2694   dm0 = snat_det_map_by_user(sm, &in_addr);
2695   if (PREDICT_FALSE(!dm0))
2696     {
2697       clib_warning("no match for internal host %U",
2698                    format_ip4_address, &in_addr);
2699       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2700           IP_PROTOCOL_ICMP, rx_fib_index0)))
2701         {
2702           dont_translate = 1;
2703           goto out;
2704         }
2705       next0 = SNAT_IN2OUT_NEXT_DROP;
2706       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2707       goto out;
2708     }
2709
2710   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2711
2712   key0.ext_host_addr = ip0->dst_address;
2713   key0.ext_host_port = 0;
2714
2715   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2716   if (PREDICT_FALSE(!ses0))
2717     {
2718       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2719           IP_PROTOCOL_ICMP, rx_fib_index0)))
2720         {
2721           dont_translate = 1;
2722           goto out;
2723         }
2724       if (icmp0->type != ICMP4_echo_request)
2725         {
2726           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2727           next0 = SNAT_IN2OUT_NEXT_DROP;
2728           goto out;
2729         }
2730       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2731         {
2732           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2733             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2734
2735           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2736             continue;
2737
2738           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2739           break;
2740         }
2741       if (PREDICT_FALSE(!ses0))
2742         {
2743           next0 = SNAT_IN2OUT_NEXT_DROP;
2744           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2745           goto out;
2746         }
2747     }
2748
2749   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2750                     !icmp_is_error_message (icmp0)))
2751     {
2752       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2753       next0 = SNAT_IN2OUT_NEXT_DROP;
2754       goto out;
2755     }
2756
2757   u32 now = (u32) vlib_time_now (sm->vlib_main);
2758
2759   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2760   ses0->expire = now + sm->icmp_timeout;
2761
2762 out:
2763   *p_proto = protocol;
2764   if (ses0)
2765     {
2766       p_value->addr = new_addr0;
2767       p_value->fib_index = sm->outside_fib_index;
2768       p_value->port = ses0->out.out_port;
2769     }
2770   *p_dont_translate = dont_translate;
2771   if (d)
2772     *(snat_det_session_t**)d = ses0;
2773   if (e)
2774     *(snat_det_map_t**)e = dm0;
2775   return next0;
2776 }
2777
2778 /**********************/
2779 /*** worker handoff ***/
2780 /**********************/
2781 static inline uword
2782 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
2783                                       vlib_node_runtime_t * node,
2784                                       vlib_frame_t * frame,
2785                                       u8 is_output)
2786 {
2787   snat_main_t *sm = &snat_main;
2788   vlib_thread_main_t *tm = vlib_get_thread_main ();
2789   u32 n_left_from, *from, *to_next = 0;
2790   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2791   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2792     = 0;
2793   vlib_frame_queue_elt_t *hf = 0;
2794   vlib_frame_t *f = 0;
2795   int i;
2796   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2797   u32 next_worker_index = 0;
2798   u32 current_worker_index = ~0;
2799   u32 thread_index = vlib_get_thread_index ();
2800   u32 fq_index;
2801   u32 to_node_index;
2802
2803   ASSERT (vec_len (sm->workers));
2804
2805   if (is_output)
2806     {
2807       fq_index = sm->fq_in2out_output_index;
2808       to_node_index = sm->in2out_output_node_index;
2809     }
2810   else
2811     {
2812       fq_index = sm->fq_in2out_index;
2813       to_node_index = sm->in2out_node_index;
2814     }
2815
2816   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2817     {
2818       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2819
2820       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2821                                sm->first_worker_index + sm->num_workers - 1,
2822                                (vlib_frame_queue_t *) (~0));
2823     }
2824
2825   from = vlib_frame_vector_args (frame);
2826   n_left_from = frame->n_vectors;
2827
2828   while (n_left_from > 0)
2829     {
2830       u32 bi0;
2831       vlib_buffer_t *b0;
2832       u32 sw_if_index0;
2833       u32 rx_fib_index0;
2834       ip4_header_t * ip0;
2835       u8 do_handoff;
2836
2837       bi0 = from[0];
2838       from += 1;
2839       n_left_from -= 1;
2840
2841       b0 = vlib_get_buffer (vm, bi0);
2842
2843       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2844       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2845
2846       ip0 = vlib_buffer_get_current (b0);
2847
2848       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2849
2850       if (PREDICT_FALSE (next_worker_index != thread_index))
2851         {
2852           do_handoff = 1;
2853
2854           if (next_worker_index != current_worker_index)
2855             {
2856               if (hf)
2857                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2858
2859               hf = vlib_get_worker_handoff_queue_elt (fq_index,
2860                                                       next_worker_index,
2861                                                       handoff_queue_elt_by_worker_index);
2862
2863               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2864               to_next_worker = &hf->buffer_index[hf->n_vectors];
2865               current_worker_index = next_worker_index;
2866             }
2867
2868           /* enqueue to correct worker thread */
2869           to_next_worker[0] = bi0;
2870           to_next_worker++;
2871           n_left_to_next_worker--;
2872
2873           if (n_left_to_next_worker == 0)
2874             {
2875               hf->n_vectors = VLIB_FRAME_SIZE;
2876               vlib_put_frame_queue_elt (hf);
2877               current_worker_index = ~0;
2878               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2879               hf = 0;
2880             }
2881         }
2882       else
2883         {
2884           do_handoff = 0;
2885           /* if this is 1st frame */
2886           if (!f)
2887             {
2888               f = vlib_get_frame_to_node (vm, to_node_index);
2889               to_next = vlib_frame_vector_args (f);
2890             }
2891
2892           to_next[0] = bi0;
2893           to_next += 1;
2894           f->n_vectors++;
2895         }
2896
2897       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2898                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2899         {
2900           snat_in2out_worker_handoff_trace_t *t =
2901             vlib_add_trace (vm, node, b0, sizeof (*t));
2902           t->next_worker_index = next_worker_index;
2903           t->do_handoff = do_handoff;
2904         }
2905     }
2906
2907   if (f)
2908     vlib_put_frame_to_node (vm, to_node_index, f);
2909
2910   if (hf)
2911     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2912
2913   /* Ship frames to the worker nodes */
2914   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2915     {
2916       if (handoff_queue_elt_by_worker_index[i])
2917         {
2918           hf = handoff_queue_elt_by_worker_index[i];
2919           /*
2920            * It works better to let the handoff node
2921            * rate-adapt, always ship the handoff queue element.
2922            */
2923           if (1 || hf->n_vectors == hf->last_n_vectors)
2924             {
2925               vlib_put_frame_queue_elt (hf);
2926               handoff_queue_elt_by_worker_index[i] = 0;
2927             }
2928           else
2929             hf->last_n_vectors = hf->n_vectors;
2930         }
2931       congested_handoff_queue_by_worker_index[i] =
2932         (vlib_frame_queue_t *) (~0);
2933     }
2934   hf = 0;
2935   current_worker_index = ~0;
2936   return frame->n_vectors;
2937 }
2938
2939 static uword
2940 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2941                                vlib_node_runtime_t * node,
2942                                vlib_frame_t * frame)
2943 {
2944   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
2945 }
2946
2947 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2948   .function = snat_in2out_worker_handoff_fn,
2949   .name = "snat-in2out-worker-handoff",
2950   .vector_size = sizeof (u32),
2951   .format_trace = format_snat_in2out_worker_handoff_trace,
2952   .type = VLIB_NODE_TYPE_INTERNAL,
2953
2954   .n_next_nodes = 1,
2955
2956   .next_nodes = {
2957     [0] = "error-drop",
2958   },
2959 };
2960
2961 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
2962                               snat_in2out_worker_handoff_fn);
2963
2964 static uword
2965 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
2966                                       vlib_node_runtime_t * node,
2967                                       vlib_frame_t * frame)
2968 {
2969   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
2970 }
2971
2972 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
2973   .function = snat_in2out_output_worker_handoff_fn,
2974   .name = "snat-in2out-output-worker-handoff",
2975   .vector_size = sizeof (u32),
2976   .format_trace = format_snat_in2out_worker_handoff_trace,
2977   .type = VLIB_NODE_TYPE_INTERNAL,
2978
2979   .n_next_nodes = 1,
2980
2981   .next_nodes = {
2982     [0] = "error-drop",
2983   },
2984 };
2985
2986 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
2987                               snat_in2out_output_worker_handoff_fn);
2988
2989 static uword
2990 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2991                                 vlib_node_runtime_t * node,
2992                                 vlib_frame_t * frame)
2993 {
2994   u32 n_left_from, * from, * to_next;
2995   snat_in2out_next_t next_index;
2996   u32 pkts_processed = 0;
2997   snat_main_t * sm = &snat_main;
2998   u32 stats_node_index;
2999
3000   stats_node_index = snat_in2out_fast_node.index;
3001
3002   from = vlib_frame_vector_args (frame);
3003   n_left_from = frame->n_vectors;
3004   next_index = node->cached_next_index;
3005
3006   while (n_left_from > 0)
3007     {
3008       u32 n_left_to_next;
3009
3010       vlib_get_next_frame (vm, node, next_index,
3011                            to_next, n_left_to_next);
3012
3013       while (n_left_from > 0 && n_left_to_next > 0)
3014         {
3015           u32 bi0;
3016           vlib_buffer_t * b0;
3017           u32 next0;
3018           u32 sw_if_index0;
3019           ip4_header_t * ip0;
3020           ip_csum_t sum0;
3021           u32 new_addr0, old_addr0;
3022           u16 old_port0, new_port0;
3023           udp_header_t * udp0;
3024           tcp_header_t * tcp0;
3025           icmp46_header_t * icmp0;
3026           snat_session_key_t key0, sm0;
3027           u32 proto0;
3028           u32 rx_fib_index0;
3029
3030           /* speculatively enqueue b0 to the current next frame */
3031           bi0 = from[0];
3032           to_next[0] = bi0;
3033           from += 1;
3034           to_next += 1;
3035           n_left_from -= 1;
3036           n_left_to_next -= 1;
3037
3038           b0 = vlib_get_buffer (vm, bi0);
3039           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3040
3041           ip0 = vlib_buffer_get_current (b0);
3042           udp0 = ip4_next_header (ip0);
3043           tcp0 = (tcp_header_t *) udp0;
3044           icmp0 = (icmp46_header_t *) udp0;
3045
3046           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3047           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3048
3049           if (PREDICT_FALSE(ip0->ttl == 1))
3050             {
3051               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3052               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3053                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3054                                            0);
3055               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3056               goto trace0;
3057             }
3058
3059           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3060
3061           if (PREDICT_FALSE (proto0 == ~0))
3062               goto trace0;
3063
3064           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3065             {
3066               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3067                                   rx_fib_index0, node, next0, ~0, 0, 0);
3068               goto trace0;
3069             }
3070
3071           key0.addr = ip0->src_address;
3072           key0.port = udp0->src_port;
3073           key0.fib_index = rx_fib_index0;
3074
3075           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
3076             {
3077               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3078               next0= SNAT_IN2OUT_NEXT_DROP;
3079               goto trace0;
3080             }
3081
3082           new_addr0 = sm0.addr.as_u32;
3083           new_port0 = sm0.port;
3084           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3085           old_addr0 = ip0->src_address.as_u32;
3086           ip0->src_address.as_u32 = new_addr0;
3087
3088           sum0 = ip0->checksum;
3089           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3090                                  ip4_header_t,
3091                                  src_address /* changed member */);
3092           ip0->checksum = ip_csum_fold (sum0);
3093
3094           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3095             {
3096               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3097                 {
3098                   old_port0 = tcp0->src_port;
3099                   tcp0->src_port = new_port0;
3100
3101                   sum0 = tcp0->checksum;
3102                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3103                                          ip4_header_t,
3104                                          dst_address /* changed member */);
3105                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3106                                          ip4_header_t /* cheat */,
3107                                          length /* changed member */);
3108                   tcp0->checksum = ip_csum_fold(sum0);
3109                 }
3110               else
3111                 {
3112                   old_port0 = udp0->src_port;
3113                   udp0->src_port = new_port0;
3114                   udp0->checksum = 0;
3115                 }
3116             }
3117           else
3118             {
3119               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3120                 {
3121                   sum0 = tcp0->checksum;
3122                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3123                                          ip4_header_t,
3124                                          dst_address /* changed member */);
3125                   tcp0->checksum = ip_csum_fold(sum0);
3126                 }
3127             }
3128
3129           /* Hairpinning */
3130           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3131
3132         trace0:
3133           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3134                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3135             {
3136               snat_in2out_trace_t *t =
3137                  vlib_add_trace (vm, node, b0, sizeof (*t));
3138               t->sw_if_index = sw_if_index0;
3139               t->next_index = next0;
3140             }
3141
3142           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3143
3144           /* verify speculative enqueue, maybe switch current next frame */
3145           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3146                                            to_next, n_left_to_next,
3147                                            bi0, next0);
3148         }
3149
3150       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3151     }
3152
3153   vlib_node_increment_counter (vm, stats_node_index,
3154                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3155                                pkts_processed);
3156   return frame->n_vectors;
3157 }
3158
3159
3160 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3161   .function = snat_in2out_fast_static_map_fn,
3162   .name = "snat-in2out-fast",
3163   .vector_size = sizeof (u32),
3164   .format_trace = format_snat_in2out_fast_trace,
3165   .type = VLIB_NODE_TYPE_INTERNAL,
3166   
3167   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3168   .error_strings = snat_in2out_error_strings,
3169
3170   .runtime_data_bytes = sizeof (snat_runtime_t),
3171   
3172   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3173
3174   /* edit / add dispositions here */
3175   .next_nodes = {
3176     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3177     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3178     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
3179     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3180   },
3181 };
3182
3183 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);