SNAT: NAT packet with unknown L4 protocol if match 1:1 NAT
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_ICMP_ERROR,
117   SNAT_IN2OUT_NEXT_SLOW_PATH,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
138                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
139                          u32 rx_fib_index0)
140 {
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   /* Don't NAT packet aimed at the intfc address */
151   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
152                                       ip0->dst_address.as_u32)))
153     return 1;
154
155   fei = fib_table_lookup (rx_fib_index0, &pfx);
156   if (FIB_NODE_INDEX_INVALID != fei)
157     {
158       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
159       if (sw_if_index == ~0)
160         {
161           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
162           if (FIB_NODE_INDEX_INVALID != fei)
163             sw_if_index = fib_entry_get_resolving_interface (fei);
164         }
165       snat_interface_t *i;
166       pool_foreach (i, sm->interfaces,
167       ({
168         /* NAT packet aimed at outside interface */
169         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
170           return 0;
171       }));
172     }
173
174   return 1;
175 }
176
177 static inline int
178 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
179                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                     u32 rx_fib_index0)
181 {
182   udp_header_t * udp0 = ip4_next_header (ip0);
183   snat_session_key_t key0, sm0;
184   clib_bihash_kv_8_8_t kv0, value0;
185
186   key0.addr = ip0->dst_address;
187   key0.port = udp0->dst_port;
188   key0.protocol = proto0;
189   key0.fib_index = sm->outside_fib_index;
190   kv0.key = key0.as_u64;
191
192   /* NAT packet aimed at external address if */
193   /* has active sessions */
194   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
195     {
196       /* or is static mappings */
197       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
198         return 0;
199     }
200   else
201     return 0;
202
203   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
204                                  rx_fib_index0);
205 }
206
207 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
208                       ip4_header_t * ip0,
209                       u32 rx_fib_index0,
210                       snat_session_key_t * key0,
211                       snat_session_t ** sessionp,
212                       vlib_node_runtime_t * node,
213                       u32 next0,
214                       u32 thread_index)
215 {
216   snat_user_t *u;
217   snat_user_key_t user_key;
218   snat_session_t *s;
219   clib_bihash_kv_8_8_t kv0, value0;
220   u32 oldest_per_user_translation_list_index;
221   dlist_elt_t * oldest_per_user_translation_list_elt;
222   dlist_elt_t * per_user_translation_list_elt;
223   dlist_elt_t * per_user_list_head_elt;
224   u32 session_index;
225   snat_session_key_t key1;
226   u32 address_index = ~0;
227   u32 outside_fib_index;
228   uword * p;
229   snat_worker_key_t worker_by_out_key;
230
231   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
232   if (! p)
233     {
234       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
235       return SNAT_IN2OUT_NEXT_DROP;
236     }
237   outside_fib_index = p[0];
238
239   key1.protocol = key0->protocol;
240   user_key.addr = ip0->src_address;
241   user_key.fib_index = rx_fib_index0;
242   kv0.key = user_key.as_u64;
243   
244   /* Ever heard of the "user" = src ip4 address before? */
245   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
246     {
247       /* no, make a new one */
248       pool_get (sm->per_thread_data[thread_index].users, u);
249       memset (u, 0, sizeof (*u));
250       u->addr = ip0->src_address;
251       u->fib_index = rx_fib_index0;
252
253       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
254
255       u->sessions_per_user_list_head_index = per_user_list_head_elt -
256         sm->per_thread_data[thread_index].list_pool;
257
258       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
259                        u->sessions_per_user_list_head_index);
260
261       kv0.value = u - sm->per_thread_data[thread_index].users;
262
263       /* add user */
264       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
265     }
266   else
267     {
268       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
269                              value0.value);
270     }
271
272   /* Over quota? Recycle the least recently used dynamic translation */
273   if (u->nsessions >= sm->max_translations_per_user)
274     {
275       /* Remove the oldest dynamic translation */
276       do {
277           oldest_per_user_translation_list_index =
278             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
279                                     u->sessions_per_user_list_head_index);
280
281           ASSERT (oldest_per_user_translation_list_index != ~0);
282
283           /* add it back to the end of the LRU list */
284           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
285                               u->sessions_per_user_list_head_index,
286                               oldest_per_user_translation_list_index);
287           /* Get the list element */
288           oldest_per_user_translation_list_elt =
289             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
290                                oldest_per_user_translation_list_index);
291
292           /* Get the session index from the list element */
293           session_index = oldest_per_user_translation_list_elt->value;
294
295           /* Get the session */
296           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
297                                  session_index);
298       } while (snat_is_session_static (s));
299
300       /* Remove in2out, out2in keys */
301       kv0.key = s->in2out.as_u64;
302       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
303           clib_warning ("in2out key delete failed");
304       kv0.key = s->out2in.as_u64;
305       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
306           clib_warning ("out2in key delete failed");
307
308       /* log NAT event */
309       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
310                                           s->out2in.addr.as_u32,
311                                           s->in2out.protocol,
312                                           s->in2out.port,
313                                           s->out2in.port,
314                                           s->in2out.fib_index);
315
316       snat_free_outside_address_and_port 
317         (sm, &s->out2in, s->outside_address_index);
318       s->outside_address_index = ~0;
319
320       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
321                                                &address_index))
322         {
323           ASSERT(0);
324
325           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
326           return SNAT_IN2OUT_NEXT_DROP;
327         }
328       s->outside_address_index = address_index;
329     }
330   else
331     {
332       u8 static_mapping = 1;
333
334       /* First try to match static mapping by local address and port */
335       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
336         {
337           static_mapping = 0;
338           /* Try to create dynamic translation */
339           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
340                                                    &address_index))
341             {
342               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
343               return SNAT_IN2OUT_NEXT_DROP;
344             }
345         }
346
347       /* Create a new session */
348       pool_get (sm->per_thread_data[thread_index].sessions, s);
349       memset (s, 0, sizeof (*s));
350       
351       s->outside_address_index = address_index;
352
353       if (static_mapping)
354         {
355           u->nstaticsessions++;
356           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
357         }
358       else
359         {
360           u->nsessions++;
361         }
362
363       /* Create list elts */
364       pool_get (sm->per_thread_data[thread_index].list_pool,
365                 per_user_translation_list_elt);
366       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
367                        per_user_translation_list_elt -
368                        sm->per_thread_data[thread_index].list_pool);
369
370       per_user_translation_list_elt->value =
371         s - sm->per_thread_data[thread_index].sessions;
372       s->per_user_index = per_user_translation_list_elt -
373                           sm->per_thread_data[thread_index].list_pool;
374       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
375
376       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
377                           s->per_user_list_head_index,
378                           per_user_translation_list_elt -
379                           sm->per_thread_data[thread_index].list_pool);
380    }
381   
382   s->in2out = *key0;
383   s->out2in = key1;
384   s->out2in.protocol = key0->protocol;
385   s->out2in.fib_index = outside_fib_index;
386   *sessionp = s;
387
388   /* Add to translation hashes */
389   kv0.key = s->in2out.as_u64;
390   kv0.value = s - sm->per_thread_data[thread_index].sessions;
391   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
392       clib_warning ("in2out key add failed");
393   
394   kv0.key = s->out2in.as_u64;
395   kv0.value = s - sm->per_thread_data[thread_index].sessions;
396   
397   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
398       clib_warning ("out2in key add failed");
399
400   /* Add to translated packets worker lookup */
401   worker_by_out_key.addr = s->out2in.addr;
402   worker_by_out_key.port = s->out2in.port;
403   worker_by_out_key.fib_index = s->out2in.fib_index;
404   kv0.key = worker_by_out_key.as_u64;
405   kv0.value = thread_index;
406   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
407
408   /* log NAT event */
409   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
410                                       s->out2in.addr.as_u32,
411                                       s->in2out.protocol,
412                                       s->in2out.port,
413                                       s->out2in.port,
414                                       s->in2out.fib_index);
415   return next0;
416 }
417
418 static_always_inline
419 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
420                                  snat_session_key_t *p_key0)
421 {
422   icmp46_header_t *icmp0;
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0, *inner_echo0 = 0;
425   ip4_header_t *inner_ip0 = 0;
426   void *l4_header = 0;
427   icmp46_header_t *inner_icmp0;
428
429   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
430   echo0 = (icmp_echo_header_t *)(icmp0+1);
431
432   if (!icmp_is_error_message (icmp0))
433     {
434       key0.protocol = SNAT_PROTOCOL_ICMP;
435       key0.addr = ip0->src_address;
436       key0.port = echo0->identifier;
437     }
438   else
439     {
440       inner_ip0 = (ip4_header_t *)(echo0+1);
441       l4_header = ip4_next_header (inner_ip0);
442       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
443       key0.addr = inner_ip0->dst_address;
444       switch (key0.protocol)
445         {
446         case SNAT_PROTOCOL_ICMP:
447           inner_icmp0 = (icmp46_header_t*)l4_header;
448           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
449           key0.port = inner_echo0->identifier;
450           break;
451         case SNAT_PROTOCOL_UDP:
452         case SNAT_PROTOCOL_TCP:
453           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
454           break;
455         default:
456           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
457         }
458     }
459   *p_key0 = key0;
460   return -1; /* success */
461 }
462
463 /**
464  * Get address and port values to be used for packet SNAT translation
465  * and create session if needed
466  *
467  * @param[in,out] sm             SNAT main
468  * @param[in,out] node           SNAT node runtime
469  * @param[in] thread_index       thread index
470  * @param[in,out] b0             buffer containing packet to be translated
471  * @param[out] p_proto           protocol used for matching
472  * @param[out] p_value           address and port after NAT translation
473  * @param[out] p_dont_translate  if packet should not be translated
474  * @param d                      optional parameter
475  * @param e                      optional parameter
476  */
477 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
478                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
479                            snat_session_key_t *p_value,
480                            u8 *p_dont_translate, void *d, void *e)
481 {
482   ip4_header_t *ip0;
483   icmp46_header_t *icmp0;
484   u32 sw_if_index0;
485   u32 rx_fib_index0;
486   snat_session_key_t key0;
487   snat_session_t *s0 = 0;
488   u8 dont_translate = 0;
489   clib_bihash_kv_8_8_t kv0, value0;
490   u32 next0 = ~0;
491   int err;
492
493   ip0 = vlib_buffer_get_current (b0);
494   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
495   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
496   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
497
498   err = icmp_get_key (ip0, &key0);
499   if (err != -1)
500     {
501       b0->error = node->errors[err];
502       next0 = SNAT_IN2OUT_NEXT_DROP;
503       goto out;
504     }
505   key0.fib_index = rx_fib_index0;
506
507   kv0.key = key0.as_u64;
508
509   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
510     {
511       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
512           IP_PROTOCOL_ICMP, rx_fib_index0)))
513         {
514           dont_translate = 1;
515           goto out;
516         }
517
518       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
519         {
520           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
521           next0 = SNAT_IN2OUT_NEXT_DROP;
522           goto out;
523         }
524
525       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
526                          &s0, node, next0, thread_index);
527
528       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
529         goto out;
530     }
531   else
532     {
533       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
534                         icmp0->type != ICMP4_echo_reply &&
535                         !icmp_is_error_message (icmp0)))
536         {
537           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
538           next0 = SNAT_IN2OUT_NEXT_DROP;
539           goto out;
540         }
541
542       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
543                               value0.value);
544     }
545
546 out:
547   *p_proto = key0.protocol;
548   if (s0)
549     *p_value = s0->out2in;
550   *p_dont_translate = dont_translate;
551   if (d)
552     *(snat_session_t**)d = s0;
553   return next0;
554 }
555
556 /**
557  * Get address and port values to be used for packet SNAT translation
558  *
559  * @param[in] sm                 SNAT main
560  * @param[in,out] node           SNAT node runtime
561  * @param[in] thread_index       thread index
562  * @param[in,out] b0             buffer containing packet to be translated
563  * @param[out] p_proto           protocol used for matching
564  * @param[out] p_value           address and port after NAT translation
565  * @param[out] p_dont_translate  if packet should not be translated
566  * @param d                      optional parameter
567  * @param e                      optional parameter
568  */
569 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
570                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
571                            snat_session_key_t *p_value,
572                            u8 *p_dont_translate, void *d, void *e)
573 {
574   ip4_header_t *ip0;
575   icmp46_header_t *icmp0;
576   u32 sw_if_index0;
577   u32 rx_fib_index0;
578   snat_session_key_t key0;
579   snat_session_key_t sm0;
580   u8 dont_translate = 0;
581   u8 is_addr_only;
582   u32 next0 = ~0;
583   int err;
584
585   ip0 = vlib_buffer_get_current (b0);
586   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
587   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
588   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
589
590   err = icmp_get_key (ip0, &key0);
591   if (err != -1)
592     {
593       b0->error = node->errors[err];
594       next0 = SNAT_IN2OUT_NEXT_DROP;
595       goto out2;
596     }
597   key0.fib_index = rx_fib_index0;
598
599   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
600     {
601       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
602           IP_PROTOCOL_ICMP, rx_fib_index0)))
603         {
604           dont_translate = 1;
605           goto out;
606         }
607
608       if (icmp_is_error_message (icmp0))
609         {
610           next0 = SNAT_IN2OUT_NEXT_DROP;
611           goto out;
612         }
613
614       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
615       next0 = SNAT_IN2OUT_NEXT_DROP;
616       goto out;
617     }
618
619   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
620                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
621                     !icmp_is_error_message (icmp0)))
622     {
623       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
624       next0 = SNAT_IN2OUT_NEXT_DROP;
625       goto out;
626     }
627
628 out:
629   *p_value = sm0;
630 out2:
631   *p_proto = key0.protocol;
632   *p_dont_translate = dont_translate;
633   return next0;
634 }
635
636 static inline u32 icmp_in2out (snat_main_t *sm,
637                                vlib_buffer_t * b0,
638                                ip4_header_t * ip0,
639                                icmp46_header_t * icmp0,
640                                u32 sw_if_index0,
641                                u32 rx_fib_index0,
642                                vlib_node_runtime_t * node,
643                                u32 next0,
644                                u32 thread_index,
645                                void *d,
646                                void *e)
647 {
648   snat_session_key_t sm0;
649   u8 protocol;
650   icmp_echo_header_t *echo0, *inner_echo0 = 0;
651   ip4_header_t *inner_ip0;
652   void *l4_header = 0;
653   icmp46_header_t *inner_icmp0;
654   u8 dont_translate;
655   u32 new_addr0, old_addr0;
656   u16 old_id0, new_id0;
657   ip_csum_t sum0;
658   u16 checksum0;
659   u32 next0_tmp;
660
661   echo0 = (icmp_echo_header_t *)(icmp0+1);
662
663   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
664                                        &protocol, &sm0, &dont_translate, d, e);
665   if (next0_tmp != ~0)
666     next0 = next0_tmp;
667   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
668     goto out;
669
670   sum0 = ip_incremental_checksum (0, icmp0,
671                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
672   checksum0 = ~ip_csum_fold (sum0);
673   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
674     {
675       next0 = SNAT_IN2OUT_NEXT_DROP;
676       goto out;
677     }
678
679   old_addr0 = ip0->src_address.as_u32;
680   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
681   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
682
683   sum0 = ip0->checksum;
684   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
685                          src_address /* changed member */);
686   ip0->checksum = ip_csum_fold (sum0);
687   
688   if (!icmp_is_error_message (icmp0))
689     {
690       new_id0 = sm0.port;
691       if (PREDICT_FALSE(new_id0 != echo0->identifier))
692         {
693           old_id0 = echo0->identifier;
694           new_id0 = sm0.port;
695           echo0->identifier = new_id0;
696
697           sum0 = icmp0->checksum;
698           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
699                                  identifier);
700           icmp0->checksum = ip_csum_fold (sum0);
701         }
702     }
703   else
704     {
705       inner_ip0 = (ip4_header_t *)(echo0+1);
706       l4_header = ip4_next_header (inner_ip0);
707
708       if (!ip4_header_checksum_is_valid (inner_ip0))
709         {
710           next0 = SNAT_IN2OUT_NEXT_DROP;
711           goto out;
712         }
713
714       old_addr0 = inner_ip0->dst_address.as_u32;
715       inner_ip0->dst_address = sm0.addr;
716       new_addr0 = inner_ip0->dst_address.as_u32;
717
718       sum0 = icmp0->checksum;
719       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
720                              dst_address /* changed member */);
721       icmp0->checksum = ip_csum_fold (sum0);
722
723       switch (protocol)
724         {
725           case SNAT_PROTOCOL_ICMP:
726             inner_icmp0 = (icmp46_header_t*)l4_header;
727             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
728
729             old_id0 = inner_echo0->identifier;
730             new_id0 = sm0.port;
731             inner_echo0->identifier = new_id0;
732
733             sum0 = icmp0->checksum;
734             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
735                                    identifier);
736             icmp0->checksum = ip_csum_fold (sum0);
737             break;
738           case SNAT_PROTOCOL_UDP:
739           case SNAT_PROTOCOL_TCP:
740             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
741             new_id0 = sm0.port;
742             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
743
744             sum0 = icmp0->checksum;
745             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
746                                    dst_port);
747             icmp0->checksum = ip_csum_fold (sum0);
748             break;
749           default:
750             ASSERT(0);
751         }
752     }
753
754 out:
755   return next0;
756 }
757
758 /**
759  * @brief Hairpinning
760  *
761  * Hairpinning allows two endpoints on the internal side of the NAT to
762  * communicate even if they only use each other's external IP addresses
763  * and ports.
764  *
765  * @param sm     SNAT main.
766  * @param b0     Vlib buffer.
767  * @param ip0    IP header.
768  * @param udp0   UDP header.
769  * @param tcp0   TCP header.
770  * @param proto0 SNAT protocol.
771  */
772 static inline void
773 snat_hairpinning (snat_main_t *sm,
774                   vlib_buffer_t * b0,
775                   ip4_header_t * ip0,
776                   udp_header_t * udp0,
777                   tcp_header_t * tcp0,
778                   u32 proto0)
779 {
780   snat_session_key_t key0, sm0;
781   snat_worker_key_t k0;
782   snat_session_t * s0;
783   clib_bihash_kv_8_8_t kv0, value0;
784   ip_csum_t sum0;
785   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
786   u16 new_dst_port0, old_dst_port0;
787
788   key0.addr = ip0->dst_address;
789   key0.port = udp0->dst_port;
790   key0.protocol = proto0;
791   key0.fib_index = sm->outside_fib_index;
792   kv0.key = key0.as_u64;
793
794   /* Check if destination is in active sessions */
795   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
796     {
797       /* or static mappings */
798       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
799         {
800           new_dst_addr0 = sm0.addr.as_u32;
801           new_dst_port0 = sm0.port;
802           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
803         }
804     }
805   else
806     {
807       si = value0.value;
808       if (sm->num_workers > 1)
809         {
810           k0.addr = ip0->dst_address;
811           k0.port = udp0->dst_port;
812           k0.fib_index = sm->outside_fib_index;
813           kv0.key = k0.as_u64;
814           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
815             ASSERT(0);
816           else
817             ti = value0.value;
818         }
819       else
820         ti = sm->num_workers;
821
822       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
823       new_dst_addr0 = s0->in2out.addr.as_u32;
824       new_dst_port0 = s0->in2out.port;
825       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
826     }
827
828   /* Destination is behind the same NAT, use internal address and port */
829   if (new_dst_addr0)
830     {
831       old_dst_addr0 = ip0->dst_address.as_u32;
832       ip0->dst_address.as_u32 = new_dst_addr0;
833       sum0 = ip0->checksum;
834       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
835                              ip4_header_t, dst_address);
836       ip0->checksum = ip_csum_fold (sum0);
837
838       old_dst_port0 = tcp0->dst;
839       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
840         {
841           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
842             {
843               tcp0->dst = new_dst_port0;
844               sum0 = tcp0->checksum;
845               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
846                                      ip4_header_t, dst_address);
847               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
848                                      ip4_header_t /* cheat */, length);
849               tcp0->checksum = ip_csum_fold(sum0);
850             }
851           else
852             {
853               udp0->dst_port = new_dst_port0;
854               udp0->checksum = 0;
855             }
856         }
857       else
858         {
859           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
860             {
861               sum0 = tcp0->checksum;
862               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
863                                      ip4_header_t, dst_address);
864               tcp0->checksum = ip_csum_fold(sum0);
865             }
866         }
867     }
868 }
869
870 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
871                                          vlib_buffer_t * b0,
872                                          ip4_header_t * ip0,
873                                          icmp46_header_t * icmp0,
874                                          u32 sw_if_index0,
875                                          u32 rx_fib_index0,
876                                          vlib_node_runtime_t * node,
877                                          u32 next0,
878                                          f64 now,
879                                          u32 thread_index,
880                                          snat_session_t ** p_s0)
881 {
882   snat_session_key_t key0, sm0;
883   clib_bihash_kv_8_8_t kv0, value0;
884   snat_worker_key_t k0;
885   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
886   ip_csum_t sum0;
887
888   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
889                       next0, thread_index, p_s0, 0);
890   snat_session_t * s0 = *p_s0;
891   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
892     {
893       /* Hairpinning */
894       if (!icmp_is_error_message (icmp0))
895         {
896           icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
897           u16 icmp_id0 = echo0->identifier;
898           key0.addr = ip0->dst_address;
899           key0.port = icmp_id0;
900           key0.protocol = SNAT_PROTOCOL_ICMP;
901           key0.fib_index = sm->outside_fib_index;
902           kv0.key = key0.as_u64;
903
904           /* Check if destination is in active sessions */
905           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
906             {
907               /* or static mappings */
908               if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
909                 {
910                   new_dst_addr0 = sm0.addr.as_u32;
911                   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
912                 }
913             }
914           else
915             {
916               si = value0.value;
917               if (sm->num_workers > 1)
918                 {
919                   k0.addr = ip0->dst_address;
920                   k0.port = icmp_id0;
921                   k0.fib_index = sm->outside_fib_index;
922                   kv0.key = k0.as_u64;
923                   if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
924                     ASSERT(0);
925                   else
926                     ti = value0.value;
927                 }
928               else
929                 ti = sm->num_workers;
930
931               s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
932               new_dst_addr0 = s0->in2out.addr.as_u32;
933               vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
934               echo0->identifier = s0->in2out.port;
935               sum0 = icmp0->checksum;
936               sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
937                                      icmp_echo_header_t, identifier);
938               icmp0->checksum = ip_csum_fold (sum0);
939             }
940
941           /* Destination is behind the same NAT, use internal address and port */
942           if (new_dst_addr0)
943             {
944               old_dst_addr0 = ip0->dst_address.as_u32;
945               ip0->dst_address.as_u32 = new_dst_addr0;
946               sum0 = ip0->checksum;
947               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
948                                      ip4_header_t, dst_address);
949               ip0->checksum = ip_csum_fold (sum0);
950             }
951         }
952
953       /* Accounting */
954       s0->last_heard = now;
955       s0->total_pkts++;
956       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
957       /* Per-user LRU list maintenance for dynamic translations */
958       if (!snat_is_session_static (s0))
959         {
960           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
961                              s0->per_user_index);
962           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
963                               s0->per_user_list_head_index,
964                               s0->per_user_index);
965         }
966     }
967   return next0;
968 }
969
970 static void
971 snat_in2out_unknown_proto (snat_main_t *sm,
972                            vlib_buffer_t * b,
973                            ip4_header_t * ip,
974                            u32 rx_fib_index)
975 {
976   clib_bihash_kv_8_8_t kv, value;
977   snat_static_mapping_t *m;
978   snat_session_key_t m_key;
979   u32 old_addr, new_addr;
980   ip_csum_t sum;
981
982   m_key.addr = ip->src_address;
983   m_key.port = 0;
984   m_key.protocol = 0;
985   m_key.fib_index = rx_fib_index;
986   kv.key = m_key.as_u64;
987   if (clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
988     return;
989
990   m = pool_elt_at_index (sm->static_mappings, value.value);
991
992   old_addr = ip->src_address.as_u32;
993   new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
994   sum = ip->checksum;
995   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
996   ip->checksum = ip_csum_fold (sum);
997
998   /* Hairpinning */
999   m_key.addr = ip->dst_address;
1000   m_key.fib_index = sm->outside_fib_index;
1001   kv.key = m_key.as_u64;
1002   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1003     {
1004       vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1005       return;
1006     }
1007
1008   m = pool_elt_at_index (sm->static_mappings, value.value);
1009
1010   old_addr = ip->dst_address.as_u32;
1011   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1012   sum = ip->checksum;
1013   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1014   ip->checksum = ip_csum_fold (sum);
1015
1016   vnet_buffer(b)->sw_if_index[VLIB_TX] = vnet_buffer(b)->sw_if_index[VLIB_RX];
1017 }
1018
1019 static inline uword
1020 snat_in2out_node_fn_inline (vlib_main_t * vm,
1021                             vlib_node_runtime_t * node,
1022                             vlib_frame_t * frame, int is_slow_path)
1023 {
1024   u32 n_left_from, * from, * to_next;
1025   snat_in2out_next_t next_index;
1026   u32 pkts_processed = 0;
1027   snat_main_t * sm = &snat_main;
1028   f64 now = vlib_time_now (vm);
1029   u32 stats_node_index;
1030   u32 thread_index = vlib_get_thread_index ();
1031
1032   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1033     snat_in2out_node.index;
1034
1035   from = vlib_frame_vector_args (frame);
1036   n_left_from = frame->n_vectors;
1037   next_index = node->cached_next_index;
1038
1039   while (n_left_from > 0)
1040     {
1041       u32 n_left_to_next;
1042
1043       vlib_get_next_frame (vm, node, next_index,
1044                            to_next, n_left_to_next);
1045
1046       while (n_left_from >= 4 && n_left_to_next >= 2)
1047         {
1048           u32 bi0, bi1;
1049           vlib_buffer_t * b0, * b1;
1050           u32 next0, next1;
1051           u32 sw_if_index0, sw_if_index1;
1052           ip4_header_t * ip0, * ip1;
1053           ip_csum_t sum0, sum1;
1054           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1055           u16 old_port0, new_port0, old_port1, new_port1;
1056           udp_header_t * udp0, * udp1;
1057           tcp_header_t * tcp0, * tcp1;
1058           icmp46_header_t * icmp0, * icmp1;
1059           snat_session_key_t key0, key1;
1060           u32 rx_fib_index0, rx_fib_index1;
1061           u32 proto0, proto1;
1062           snat_session_t * s0 = 0, * s1 = 0;
1063           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1064           
1065           /* Prefetch next iteration. */
1066           {
1067             vlib_buffer_t * p2, * p3;
1068             
1069             p2 = vlib_get_buffer (vm, from[2]);
1070             p3 = vlib_get_buffer (vm, from[3]);
1071             
1072             vlib_prefetch_buffer_header (p2, LOAD);
1073             vlib_prefetch_buffer_header (p3, LOAD);
1074
1075             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1076             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1077           }
1078
1079           /* speculatively enqueue b0 and b1 to the current next frame */
1080           to_next[0] = bi0 = from[0];
1081           to_next[1] = bi1 = from[1];
1082           from += 2;
1083           to_next += 2;
1084           n_left_from -= 2;
1085           n_left_to_next -= 2;
1086           
1087           b0 = vlib_get_buffer (vm, bi0);
1088           b1 = vlib_get_buffer (vm, bi1);
1089
1090           ip0 = vlib_buffer_get_current (b0);
1091           udp0 = ip4_next_header (ip0);
1092           tcp0 = (tcp_header_t *) udp0;
1093           icmp0 = (icmp46_header_t *) udp0;
1094
1095           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1096           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1097                                    sw_if_index0);
1098
1099           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1100
1101           if (PREDICT_FALSE(ip0->ttl == 1))
1102             {
1103               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1104               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1105                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1106                                            0);
1107               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1108               goto trace00;
1109             }
1110
1111           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1112
1113           /* Next configured feature, probably ip4-lookup */
1114           if (is_slow_path)
1115             {
1116               if (PREDICT_FALSE (proto0 == ~0))
1117                 {
1118                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0);
1119                   goto trace00;
1120                 }
1121
1122               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1123                 {
1124                   next0 = icmp_in2out_slow_path 
1125                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
1126                      node, next0, now, thread_index, &s0);
1127                   goto trace00;
1128                 }
1129             }
1130           else
1131             {
1132               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1133                 {
1134                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1135                   goto trace00;
1136                 }
1137             }
1138
1139           key0.addr = ip0->src_address;
1140           key0.port = udp0->src_port;
1141           key0.protocol = proto0;
1142           key0.fib_index = rx_fib_index0;
1143           
1144           kv0.key = key0.as_u64;
1145
1146           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1147             {
1148               if (is_slow_path)
1149                 {
1150                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1151                       proto0, rx_fib_index0)))
1152                     goto trace00;
1153
1154                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1155                                      &s0, node, next0, thread_index);
1156                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1157                     goto trace00;
1158                 }
1159               else
1160                 {
1161                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1162                   goto trace00;
1163                 }
1164             }
1165           else
1166             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1167                                     value0.value);
1168
1169           old_addr0 = ip0->src_address.as_u32;
1170           ip0->src_address = s0->out2in.addr;
1171           new_addr0 = ip0->src_address.as_u32;
1172           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1173
1174           sum0 = ip0->checksum;
1175           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1176                                  ip4_header_t,
1177                                  src_address /* changed member */);
1178           ip0->checksum = ip_csum_fold (sum0);
1179
1180           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1181             {
1182               old_port0 = tcp0->src_port;
1183               tcp0->src_port = s0->out2in.port;
1184               new_port0 = tcp0->src_port;
1185
1186               sum0 = tcp0->checksum;
1187               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1188                                      ip4_header_t,
1189                                      dst_address /* changed member */);
1190               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1191                                      ip4_header_t /* cheat */,
1192                                      length /* changed member */);
1193               tcp0->checksum = ip_csum_fold(sum0);
1194             }
1195           else
1196             {
1197               old_port0 = udp0->src_port;
1198               udp0->src_port = s0->out2in.port;
1199               udp0->checksum = 0;
1200             }
1201
1202           /* Hairpinning */
1203           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1204
1205           /* Accounting */
1206           s0->last_heard = now;
1207           s0->total_pkts++;
1208           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1209           /* Per-user LRU list maintenance for dynamic translation */
1210           if (!snat_is_session_static (s0))
1211             {
1212               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1213                                  s0->per_user_index);
1214               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1215                                   s0->per_user_list_head_index,
1216                                   s0->per_user_index);
1217             }
1218         trace00:
1219
1220           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1221                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1222             {
1223               snat_in2out_trace_t *t = 
1224                  vlib_add_trace (vm, node, b0, sizeof (*t));
1225               t->is_slow_path = is_slow_path;
1226               t->sw_if_index = sw_if_index0;
1227               t->next_index = next0;
1228                   t->session_index = ~0;
1229               if (s0)
1230                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1231             }
1232
1233           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1234
1235           ip1 = vlib_buffer_get_current (b1);
1236           udp1 = ip4_next_header (ip1);
1237           tcp1 = (tcp_header_t *) udp1;
1238           icmp1 = (icmp46_header_t *) udp1;
1239
1240           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1241           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1242                                    sw_if_index1);
1243
1244           if (PREDICT_FALSE(ip1->ttl == 1))
1245             {
1246               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1247               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1248                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1249                                            0);
1250               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1251               goto trace01;
1252             }
1253
1254           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1255
1256           /* Next configured feature, probably ip4-lookup */
1257           if (is_slow_path)
1258             {
1259               if (PREDICT_FALSE (proto1 == ~0))
1260                 {
1261                   snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1);
1262                   goto trace01;
1263                 }
1264
1265               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1266                 {
1267                   next1 = icmp_in2out_slow_path 
1268                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1269                      next1, now, thread_index, &s1);
1270                   goto trace01;
1271                 }
1272             }
1273           else
1274             {
1275               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1276                 {
1277                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1278                   goto trace01;
1279                 }
1280             }
1281
1282           key1.addr = ip1->src_address;
1283           key1.port = udp1->src_port;
1284           key1.protocol = proto1;
1285           key1.fib_index = rx_fib_index1;
1286           
1287           kv1.key = key1.as_u64;
1288
1289             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1290             {
1291               if (is_slow_path)
1292                 {
1293                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
1294                       proto1, rx_fib_index1)))
1295                     goto trace01;
1296
1297                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1298                                      &s1, node, next1, thread_index);
1299                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1300                     goto trace01;
1301                 }
1302               else
1303                 {
1304                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1305                   goto trace01;
1306                 }
1307             }
1308           else
1309             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1310                                     value1.value);
1311
1312           old_addr1 = ip1->src_address.as_u32;
1313           ip1->src_address = s1->out2in.addr;
1314           new_addr1 = ip1->src_address.as_u32;
1315           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1316
1317           sum1 = ip1->checksum;
1318           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1319                                  ip4_header_t,
1320                                  src_address /* changed member */);
1321           ip1->checksum = ip_csum_fold (sum1);
1322
1323           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1324             {
1325               old_port1 = tcp1->src_port;
1326               tcp1->src_port = s1->out2in.port;
1327               new_port1 = tcp1->src_port;
1328
1329               sum1 = tcp1->checksum;
1330               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1331                                      ip4_header_t,
1332                                      dst_address /* changed member */);
1333               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1334                                      ip4_header_t /* cheat */,
1335                                      length /* changed member */);
1336               tcp1->checksum = ip_csum_fold(sum1);
1337             }
1338           else
1339             {
1340               old_port1 = udp1->src_port;
1341               udp1->src_port = s1->out2in.port;
1342               udp1->checksum = 0;
1343             }
1344
1345           /* Hairpinning */
1346           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1347
1348           /* Accounting */
1349           s1->last_heard = now;
1350           s1->total_pkts++;
1351           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1352           /* Per-user LRU list maintenance for dynamic translation */
1353           if (!snat_is_session_static (s1))
1354             {
1355               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1356                                  s1->per_user_index);
1357               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1358                                   s1->per_user_list_head_index,
1359                                   s1->per_user_index);
1360             }
1361         trace01:
1362
1363           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1364                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1365             {
1366               snat_in2out_trace_t *t = 
1367                  vlib_add_trace (vm, node, b1, sizeof (*t));
1368               t->sw_if_index = sw_if_index1;
1369               t->next_index = next1;
1370               t->session_index = ~0;
1371               if (s1)
1372                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1373             }
1374
1375           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1376
1377           /* verify speculative enqueues, maybe switch current next frame */
1378           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1379                                            to_next, n_left_to_next,
1380                                            bi0, bi1, next0, next1);
1381         }
1382
1383       while (n_left_from > 0 && n_left_to_next > 0)
1384         {
1385           u32 bi0;
1386           vlib_buffer_t * b0;
1387           u32 next0;
1388           u32 sw_if_index0;
1389           ip4_header_t * ip0;
1390           ip_csum_t sum0;
1391           u32 new_addr0, old_addr0;
1392           u16 old_port0, new_port0;
1393           udp_header_t * udp0;
1394           tcp_header_t * tcp0;
1395           icmp46_header_t * icmp0;
1396           snat_session_key_t key0;
1397           u32 rx_fib_index0;
1398           u32 proto0;
1399           snat_session_t * s0 = 0;
1400           clib_bihash_kv_8_8_t kv0, value0;
1401           
1402           /* speculatively enqueue b0 to the current next frame */
1403           bi0 = from[0];
1404           to_next[0] = bi0;
1405           from += 1;
1406           to_next += 1;
1407           n_left_from -= 1;
1408           n_left_to_next -= 1;
1409
1410           b0 = vlib_get_buffer (vm, bi0);
1411           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1412
1413           ip0 = vlib_buffer_get_current (b0);
1414           udp0 = ip4_next_header (ip0);
1415           tcp0 = (tcp_header_t *) udp0;
1416           icmp0 = (icmp46_header_t *) udp0;
1417
1418           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1419           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1420                                    sw_if_index0);
1421
1422           if (PREDICT_FALSE(ip0->ttl == 1))
1423             {
1424               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1425               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1426                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1427                                            0);
1428               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1429               goto trace0;
1430             }
1431
1432           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1433
1434           /* Next configured feature, probably ip4-lookup */
1435           if (is_slow_path)
1436             {
1437               if (PREDICT_FALSE (proto0 == ~0))
1438                 {
1439                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0);
1440                   goto trace0;
1441                 }
1442
1443               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1444                 {
1445                   next0 = icmp_in2out_slow_path 
1446                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1447                      next0, now, thread_index, &s0);
1448                   goto trace0;
1449                 }
1450             }
1451           else
1452             {
1453               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1454                 {
1455                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1456                   goto trace0;
1457                 }
1458             }
1459
1460           key0.addr = ip0->src_address;
1461           key0.port = udp0->src_port;
1462           key0.protocol = proto0;
1463           key0.fib_index = rx_fib_index0;
1464           
1465           kv0.key = key0.as_u64;
1466
1467           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1468             {
1469               if (is_slow_path)
1470                 {
1471                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
1472                       proto0, rx_fib_index0)))
1473                     goto trace0;
1474
1475                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1476                                      &s0, node, next0, thread_index);
1477
1478                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1479                     goto trace0;
1480                 }
1481               else
1482                 {
1483                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1484                   goto trace0;
1485                 }
1486             }
1487           else
1488             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1489                                     value0.value);
1490
1491           old_addr0 = ip0->src_address.as_u32;
1492           ip0->src_address = s0->out2in.addr;
1493           new_addr0 = ip0->src_address.as_u32;
1494           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1495
1496           sum0 = ip0->checksum;
1497           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1498                                  ip4_header_t,
1499                                  src_address /* changed member */);
1500           ip0->checksum = ip_csum_fold (sum0);
1501
1502           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1503             {
1504               old_port0 = tcp0->src_port;
1505               tcp0->src_port = s0->out2in.port;
1506               new_port0 = tcp0->src_port;
1507
1508               sum0 = tcp0->checksum;
1509               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1510                                      ip4_header_t,
1511                                      dst_address /* changed member */);
1512               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1513                                      ip4_header_t /* cheat */,
1514                                      length /* changed member */);
1515               tcp0->checksum = ip_csum_fold(sum0);
1516             }
1517           else
1518             {
1519               old_port0 = udp0->src_port;
1520               udp0->src_port = s0->out2in.port;
1521               udp0->checksum = 0;
1522             }
1523
1524           /* Hairpinning */
1525           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1526
1527           /* Accounting */
1528           s0->last_heard = now;
1529           s0->total_pkts++;
1530           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1531           /* Per-user LRU list maintenance for dynamic translation */
1532           if (!snat_is_session_static (s0))
1533             {
1534               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1535                                  s0->per_user_index);
1536               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1537                                   s0->per_user_list_head_index,
1538                                   s0->per_user_index);
1539             }
1540
1541         trace0:
1542           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1543                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1544             {
1545               snat_in2out_trace_t *t = 
1546                  vlib_add_trace (vm, node, b0, sizeof (*t));
1547               t->is_slow_path = is_slow_path;
1548               t->sw_if_index = sw_if_index0;
1549               t->next_index = next0;
1550                   t->session_index = ~0;
1551               if (s0)
1552                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1553             }
1554
1555           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1556
1557           /* verify speculative enqueue, maybe switch current next frame */
1558           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1559                                            to_next, n_left_to_next,
1560                                            bi0, next0);
1561         }
1562
1563       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1564     }
1565
1566   vlib_node_increment_counter (vm, stats_node_index, 
1567                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1568                                pkts_processed);
1569   return frame->n_vectors;
1570 }
1571
1572 static uword
1573 snat_in2out_fast_path_fn (vlib_main_t * vm,
1574                           vlib_node_runtime_t * node,
1575                           vlib_frame_t * frame)
1576 {
1577   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1578 }
1579
1580 VLIB_REGISTER_NODE (snat_in2out_node) = {
1581   .function = snat_in2out_fast_path_fn,
1582   .name = "snat-in2out",
1583   .vector_size = sizeof (u32),
1584   .format_trace = format_snat_in2out_trace,
1585   .type = VLIB_NODE_TYPE_INTERNAL,
1586   
1587   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1588   .error_strings = snat_in2out_error_strings,
1589
1590   .runtime_data_bytes = sizeof (snat_runtime_t),
1591   
1592   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1593
1594   /* edit / add dispositions here */
1595   .next_nodes = {
1596     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1597     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1598     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1599     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1600   },
1601 };
1602
1603 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1604
1605 static uword
1606 snat_in2out_slow_path_fn (vlib_main_t * vm,
1607                           vlib_node_runtime_t * node,
1608                           vlib_frame_t * frame)
1609 {
1610   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1611 }
1612
1613 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1614   .function = snat_in2out_slow_path_fn,
1615   .name = "snat-in2out-slowpath",
1616   .vector_size = sizeof (u32),
1617   .format_trace = format_snat_in2out_trace,
1618   .type = VLIB_NODE_TYPE_INTERNAL,
1619   
1620   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1621   .error_strings = snat_in2out_error_strings,
1622
1623   .runtime_data_bytes = sizeof (snat_runtime_t),
1624   
1625   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1626
1627   /* edit / add dispositions here */
1628   .next_nodes = {
1629     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1630     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1631     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1632     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1633   },
1634 };
1635
1636 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1637
1638 /**************************/
1639 /*** deterministic mode ***/
1640 /**************************/
1641 static uword
1642 snat_det_in2out_node_fn (vlib_main_t * vm,
1643                          vlib_node_runtime_t * node,
1644                          vlib_frame_t * frame)
1645 {
1646   u32 n_left_from, * from, * to_next;
1647   snat_in2out_next_t next_index;
1648   u32 pkts_processed = 0;
1649   snat_main_t * sm = &snat_main;
1650   u32 now = (u32) vlib_time_now (vm);
1651   u32 thread_index = vlib_get_thread_index ();
1652
1653   from = vlib_frame_vector_args (frame);
1654   n_left_from = frame->n_vectors;
1655   next_index = node->cached_next_index;
1656
1657   while (n_left_from > 0)
1658     {
1659       u32 n_left_to_next;
1660
1661       vlib_get_next_frame (vm, node, next_index,
1662                            to_next, n_left_to_next);
1663
1664       while (n_left_from >= 4 && n_left_to_next >= 2)
1665         {
1666           u32 bi0, bi1;
1667           vlib_buffer_t * b0, * b1;
1668           u32 next0, next1;
1669           u32 sw_if_index0, sw_if_index1;
1670           ip4_header_t * ip0, * ip1;
1671           ip_csum_t sum0, sum1;
1672           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1673           u16 old_port0, new_port0, lo_port0, i0;
1674           u16 old_port1, new_port1, lo_port1, i1;
1675           udp_header_t * udp0, * udp1;
1676           tcp_header_t * tcp0, * tcp1;
1677           u32 proto0, proto1;
1678           snat_det_out_key_t key0, key1;
1679           snat_det_map_t * dm0, * dm1;
1680           snat_det_session_t * ses0 = 0, * ses1 = 0;
1681           u32 rx_fib_index0, rx_fib_index1;
1682           icmp46_header_t * icmp0, * icmp1;
1683
1684           /* Prefetch next iteration. */
1685           {
1686             vlib_buffer_t * p2, * p3;
1687
1688             p2 = vlib_get_buffer (vm, from[2]);
1689             p3 = vlib_get_buffer (vm, from[3]);
1690
1691             vlib_prefetch_buffer_header (p2, LOAD);
1692             vlib_prefetch_buffer_header (p3, LOAD);
1693
1694             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1695             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1696           }
1697
1698           /* speculatively enqueue b0 and b1 to the current next frame */
1699           to_next[0] = bi0 = from[0];
1700           to_next[1] = bi1 = from[1];
1701           from += 2;
1702           to_next += 2;
1703           n_left_from -= 2;
1704           n_left_to_next -= 2;
1705
1706           b0 = vlib_get_buffer (vm, bi0);
1707           b1 = vlib_get_buffer (vm, bi1);
1708
1709           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1710           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1711
1712           ip0 = vlib_buffer_get_current (b0);
1713           udp0 = ip4_next_header (ip0);
1714           tcp0 = (tcp_header_t *) udp0;
1715
1716           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1717
1718           if (PREDICT_FALSE(ip0->ttl == 1))
1719             {
1720               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1721               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1722                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1723                                            0);
1724               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1725               goto trace0;
1726             }
1727
1728           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1729
1730           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1731             {
1732               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1733               icmp0 = (icmp46_header_t *) udp0;
1734
1735               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
1736                                   rx_fib_index0, node, next0, thread_index,
1737                                   &ses0, &dm0);
1738               goto trace0;
1739             }
1740
1741           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1742           if (PREDICT_FALSE(!dm0))
1743             {
1744               clib_warning("no match for internal host %U",
1745                            format_ip4_address, &ip0->src_address);
1746               next0 = SNAT_IN2OUT_NEXT_DROP;
1747               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1748               goto trace0;
1749             }
1750
1751           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1752
1753           key0.ext_host_addr = ip0->dst_address;
1754           key0.ext_host_port = tcp0->dst;
1755
1756           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
1757           if (PREDICT_FALSE(!ses0))
1758             {
1759               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1760                 {
1761                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1762                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1763
1764                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1765                     continue;
1766
1767                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1768                   break;
1769                 }
1770               if (PREDICT_FALSE(!ses0))
1771                 {
1772                   /* too many sessions for user, send ICMP error packet */
1773
1774                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1775                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
1776                                                ICMP4_destination_unreachable_destination_unreachable_host,
1777                                                0);
1778                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1779                   goto trace0;
1780                 }
1781             }
1782
1783           new_port0 = ses0->out.out_port;
1784
1785           old_addr0.as_u32 = ip0->src_address.as_u32;
1786           ip0->src_address.as_u32 = new_addr0.as_u32;
1787           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1788
1789           sum0 = ip0->checksum;
1790           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1791                                  ip4_header_t,
1792                                  src_address /* changed member */);
1793           ip0->checksum = ip_csum_fold (sum0);
1794
1795           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1796             {
1797               if (tcp0->flags & TCP_FLAG_SYN)
1798                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1799               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1800                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1801               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1802                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1803               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1804                 snat_det_ses_close(dm0, ses0);
1805               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1806                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1807               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1808                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1809
1810               old_port0 = tcp0->src;
1811               tcp0->src = new_port0;
1812
1813               sum0 = tcp0->checksum;
1814               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1815                                      ip4_header_t,
1816                                      dst_address /* changed member */);
1817               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1818                                      ip4_header_t /* cheat */,
1819                                      length /* changed member */);
1820               tcp0->checksum = ip_csum_fold(sum0);
1821             }
1822           else
1823             {
1824               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1825               old_port0 = udp0->src_port;
1826               udp0->src_port = new_port0;
1827               udp0->checksum = 0;
1828             }
1829
1830           switch(ses0->state)
1831             {
1832             case SNAT_SESSION_UDP_ACTIVE:
1833                 ses0->expire = now + sm->udp_timeout;
1834                 break;
1835             case SNAT_SESSION_TCP_SYN_SENT:
1836             case SNAT_SESSION_TCP_FIN_WAIT:
1837             case SNAT_SESSION_TCP_CLOSE_WAIT:
1838             case SNAT_SESSION_TCP_LAST_ACK:
1839                 ses0->expire = now + sm->tcp_transitory_timeout;
1840                 break;
1841             case SNAT_SESSION_TCP_ESTABLISHED:
1842                 ses0->expire = now + sm->tcp_established_timeout;
1843                 break;
1844             }
1845
1846         trace0:
1847           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1848                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1849             {
1850               snat_in2out_trace_t *t =
1851                  vlib_add_trace (vm, node, b0, sizeof (*t));
1852               t->is_slow_path = 0;
1853               t->sw_if_index = sw_if_index0;
1854               t->next_index = next0;
1855               t->session_index = ~0;
1856               if (ses0)
1857                 t->session_index = ses0 - dm0->sessions;
1858             }
1859
1860           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1861
1862           ip1 = vlib_buffer_get_current (b1);
1863           udp1 = ip4_next_header (ip1);
1864           tcp1 = (tcp_header_t *) udp1;
1865
1866           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1867
1868           if (PREDICT_FALSE(ip1->ttl == 1))
1869             {
1870               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1871               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1872                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1873                                            0);
1874               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1875               goto trace1;
1876             }
1877
1878           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1879
1880           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1881             {
1882               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1883               icmp1 = (icmp46_header_t *) udp1;
1884
1885               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
1886                                   rx_fib_index1, node, next1, thread_index,
1887                                   &ses1, &dm1);
1888               goto trace1;
1889             }
1890
1891           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1892           if (PREDICT_FALSE(!dm1))
1893             {
1894               clib_warning("no match for internal host %U",
1895                            format_ip4_address, &ip0->src_address);
1896               next1 = SNAT_IN2OUT_NEXT_DROP;
1897               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1898               goto trace1;
1899             }
1900
1901           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1902
1903           key1.ext_host_addr = ip1->dst_address;
1904           key1.ext_host_port = tcp1->dst;
1905
1906           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
1907           if (PREDICT_FALSE(!ses1))
1908             {
1909               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1910                 {
1911                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1912                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1913
1914                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1915                     continue;
1916
1917                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1918                   break;
1919                 }
1920               if (PREDICT_FALSE(!ses1))
1921                 {
1922                   /* too many sessions for user, send ICMP error packet */
1923
1924                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1925                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
1926                                                ICMP4_destination_unreachable_destination_unreachable_host,
1927                                                0);
1928                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1929                   goto trace1;
1930                 }
1931             }
1932
1933           new_port1 = ses1->out.out_port;
1934
1935           old_addr1.as_u32 = ip1->src_address.as_u32;
1936           ip1->src_address.as_u32 = new_addr1.as_u32;
1937           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1938
1939           sum1 = ip1->checksum;
1940           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1941                                  ip4_header_t,
1942                                  src_address /* changed member */);
1943           ip1->checksum = ip_csum_fold (sum1);
1944
1945           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1946             {
1947               if (tcp1->flags & TCP_FLAG_SYN)
1948                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1949               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1950                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1951               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1952                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1953               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1954                 snat_det_ses_close(dm1, ses1);
1955               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1956                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1957               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1958                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1959
1960               old_port1 = tcp1->src;
1961               tcp1->src = new_port1;
1962
1963               sum1 = tcp1->checksum;
1964               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1965                                      ip4_header_t,
1966                                      dst_address /* changed member */);
1967               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1968                                      ip4_header_t /* cheat */,
1969                                      length /* changed member */);
1970               tcp1->checksum = ip_csum_fold(sum1);
1971             }
1972           else
1973             {
1974               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1975               old_port1 = udp1->src_port;
1976               udp1->src_port = new_port1;
1977               udp1->checksum = 0;
1978             }
1979
1980           switch(ses1->state)
1981             {
1982             case SNAT_SESSION_UDP_ACTIVE:
1983                 ses1->expire = now + sm->udp_timeout;
1984                 break;
1985             case SNAT_SESSION_TCP_SYN_SENT:
1986             case SNAT_SESSION_TCP_FIN_WAIT:
1987             case SNAT_SESSION_TCP_CLOSE_WAIT:
1988             case SNAT_SESSION_TCP_LAST_ACK:
1989                 ses1->expire = now + sm->tcp_transitory_timeout;
1990                 break;
1991             case SNAT_SESSION_TCP_ESTABLISHED:
1992                 ses1->expire = now + sm->tcp_established_timeout;
1993                 break;
1994             }
1995
1996         trace1:
1997           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1998                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1999             {
2000               snat_in2out_trace_t *t =
2001                  vlib_add_trace (vm, node, b1, sizeof (*t));
2002               t->is_slow_path = 0;
2003               t->sw_if_index = sw_if_index1;
2004               t->next_index = next1;
2005               t->session_index = ~0;
2006               if (ses1)
2007                 t->session_index = ses1 - dm1->sessions;
2008             }
2009
2010           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2011
2012           /* verify speculative enqueues, maybe switch current next frame */
2013           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2014                                            to_next, n_left_to_next,
2015                                            bi0, bi1, next0, next1);
2016          }
2017
2018       while (n_left_from > 0 && n_left_to_next > 0)
2019         {
2020           u32 bi0;
2021           vlib_buffer_t * b0;
2022           u32 next0;
2023           u32 sw_if_index0;
2024           ip4_header_t * ip0;
2025           ip_csum_t sum0;
2026           ip4_address_t new_addr0, old_addr0;
2027           u16 old_port0, new_port0, lo_port0, i0;
2028           udp_header_t * udp0;
2029           tcp_header_t * tcp0;
2030           u32 proto0;
2031           snat_det_out_key_t key0;
2032           snat_det_map_t * dm0;
2033           snat_det_session_t * ses0 = 0;
2034           u32 rx_fib_index0;
2035           icmp46_header_t * icmp0;
2036
2037           /* speculatively enqueue b0 to the current next frame */
2038           bi0 = from[0];
2039           to_next[0] = bi0;
2040           from += 1;
2041           to_next += 1;
2042           n_left_from -= 1;
2043           n_left_to_next -= 1;
2044
2045           b0 = vlib_get_buffer (vm, bi0);
2046           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2047
2048           ip0 = vlib_buffer_get_current (b0);
2049           udp0 = ip4_next_header (ip0);
2050           tcp0 = (tcp_header_t *) udp0;
2051
2052           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2053
2054           if (PREDICT_FALSE(ip0->ttl == 1))
2055             {
2056               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2057               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2058                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2059                                            0);
2060               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2061               goto trace00;
2062             }
2063
2064           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2065
2066           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2067             {
2068               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2069               icmp0 = (icmp46_header_t *) udp0;
2070
2071               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2072                                   rx_fib_index0, node, next0, thread_index,
2073                                   &ses0, &dm0);
2074               goto trace00;
2075             }
2076
2077           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2078           if (PREDICT_FALSE(!dm0))
2079             {
2080               clib_warning("no match for internal host %U",
2081                            format_ip4_address, &ip0->src_address);
2082               next0 = SNAT_IN2OUT_NEXT_DROP;
2083               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2084               goto trace00;
2085             }
2086
2087           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2088
2089           key0.ext_host_addr = ip0->dst_address;
2090           key0.ext_host_port = tcp0->dst;
2091
2092           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2093           if (PREDICT_FALSE(!ses0))
2094             {
2095               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2096                 {
2097                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2098                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2099
2100                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2101                     continue;
2102
2103                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2104                   break;
2105                 }
2106               if (PREDICT_FALSE(!ses0))
2107                 {
2108                   /* too many sessions for user, send ICMP error packet */
2109
2110                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2111                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2112                                                ICMP4_destination_unreachable_destination_unreachable_host,
2113                                                0);
2114                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2115                   goto trace00;
2116                 }
2117             }
2118
2119           new_port0 = ses0->out.out_port;
2120
2121           old_addr0.as_u32 = ip0->src_address.as_u32;
2122           ip0->src_address.as_u32 = new_addr0.as_u32;
2123           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2124
2125           sum0 = ip0->checksum;
2126           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2127                                  ip4_header_t,
2128                                  src_address /* changed member */);
2129           ip0->checksum = ip_csum_fold (sum0);
2130
2131           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2132             {
2133               if (tcp0->flags & TCP_FLAG_SYN)
2134                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2135               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2136                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2137               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2138                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2139               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2140                 snat_det_ses_close(dm0, ses0);
2141               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2142                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2143               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2144                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2145
2146               old_port0 = tcp0->src;
2147               tcp0->src = new_port0;
2148
2149               sum0 = tcp0->checksum;
2150               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2151                                      ip4_header_t,
2152                                      dst_address /* changed member */);
2153               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2154                                      ip4_header_t /* cheat */,
2155                                      length /* changed member */);
2156               tcp0->checksum = ip_csum_fold(sum0);
2157             }
2158           else
2159             {
2160               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2161               old_port0 = udp0->src_port;
2162               udp0->src_port = new_port0;
2163               udp0->checksum = 0;
2164             }
2165
2166           switch(ses0->state)
2167             {
2168             case SNAT_SESSION_UDP_ACTIVE:
2169                 ses0->expire = now + sm->udp_timeout;
2170                 break;
2171             case SNAT_SESSION_TCP_SYN_SENT:
2172             case SNAT_SESSION_TCP_FIN_WAIT:
2173             case SNAT_SESSION_TCP_CLOSE_WAIT:
2174             case SNAT_SESSION_TCP_LAST_ACK:
2175                 ses0->expire = now + sm->tcp_transitory_timeout;
2176                 break;
2177             case SNAT_SESSION_TCP_ESTABLISHED:
2178                 ses0->expire = now + sm->tcp_established_timeout;
2179                 break;
2180             }
2181
2182         trace00:
2183           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2184                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2185             {
2186               snat_in2out_trace_t *t =
2187                  vlib_add_trace (vm, node, b0, sizeof (*t));
2188               t->is_slow_path = 0;
2189               t->sw_if_index = sw_if_index0;
2190               t->next_index = next0;
2191               t->session_index = ~0;
2192               if (ses0)
2193                 t->session_index = ses0 - dm0->sessions;
2194             }
2195
2196           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2197
2198           /* verify speculative enqueue, maybe switch current next frame */
2199           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2200                                            to_next, n_left_to_next,
2201                                            bi0, next0);
2202         }
2203
2204       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2205     }
2206
2207   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2208                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2209                                pkts_processed);
2210   return frame->n_vectors;
2211 }
2212
2213 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2214   .function = snat_det_in2out_node_fn,
2215   .name = "snat-det-in2out",
2216   .vector_size = sizeof (u32),
2217   .format_trace = format_snat_in2out_trace,
2218   .type = VLIB_NODE_TYPE_INTERNAL,
2219
2220   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2221   .error_strings = snat_in2out_error_strings,
2222
2223   .runtime_data_bytes = sizeof (snat_runtime_t),
2224
2225   .n_next_nodes = 3,
2226
2227   /* edit / add dispositions here */
2228   .next_nodes = {
2229     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2230     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2231     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2232   },
2233 };
2234
2235 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2236
2237 /**
2238  * Get address and port values to be used for packet SNAT translation
2239  * and create session if needed
2240  *
2241  * @param[in,out] sm             SNAT main
2242  * @param[in,out] node           SNAT node runtime
2243  * @param[in] thread_index       thread index
2244  * @param[in,out] b0             buffer containing packet to be translated
2245  * @param[out] p_proto           protocol used for matching
2246  * @param[out] p_value           address and port after NAT translation
2247  * @param[out] p_dont_translate  if packet should not be translated
2248  * @param d                      optional parameter
2249  * @param e                      optional parameter
2250  */
2251 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2252                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2253                           snat_session_key_t *p_value,
2254                           u8 *p_dont_translate, void *d, void *e)
2255 {
2256   ip4_header_t *ip0;
2257   icmp46_header_t *icmp0;
2258   u32 sw_if_index0;
2259   u32 rx_fib_index0;
2260   u8 protocol;
2261   snat_det_out_key_t key0;
2262   u8 dont_translate = 0;
2263   u32 next0 = ~0;
2264   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2265   ip4_header_t *inner_ip0;
2266   void *l4_header = 0;
2267   icmp46_header_t *inner_icmp0;
2268   snat_det_map_t * dm0 = 0;
2269   ip4_address_t new_addr0;
2270   u16 lo_port0, i0;
2271   snat_det_session_t * ses0 = 0;
2272   ip4_address_t in_addr;
2273   u16 in_port;
2274
2275   ip0 = vlib_buffer_get_current (b0);
2276   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2277   echo0 = (icmp_echo_header_t *)(icmp0+1);
2278   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2279   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2280
2281   if (!icmp_is_error_message (icmp0))
2282     {
2283       protocol = SNAT_PROTOCOL_ICMP;
2284       in_addr = ip0->src_address;
2285       in_port = echo0->identifier;
2286     }
2287   else
2288     {
2289       inner_ip0 = (ip4_header_t *)(echo0+1);
2290       l4_header = ip4_next_header (inner_ip0);
2291       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2292       in_addr = inner_ip0->dst_address;
2293       switch (protocol)
2294         {
2295         case SNAT_PROTOCOL_ICMP:
2296           inner_icmp0 = (icmp46_header_t*)l4_header;
2297           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2298           in_port = inner_echo0->identifier;
2299           break;
2300         case SNAT_PROTOCOL_UDP:
2301         case SNAT_PROTOCOL_TCP:
2302           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2303           break;
2304         default:
2305           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2306           next0 = SNAT_IN2OUT_NEXT_DROP;
2307           goto out;
2308         }
2309     }
2310
2311   dm0 = snat_det_map_by_user(sm, &in_addr);
2312   if (PREDICT_FALSE(!dm0))
2313     {
2314       clib_warning("no match for internal host %U",
2315                    format_ip4_address, &in_addr);
2316       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2317           IP_PROTOCOL_ICMP, rx_fib_index0)))
2318         {
2319           dont_translate = 1;
2320           goto out;
2321         }
2322       next0 = SNAT_IN2OUT_NEXT_DROP;
2323       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2324       goto out;
2325     }
2326
2327   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2328
2329   key0.ext_host_addr = ip0->dst_address;
2330   key0.ext_host_port = 0;
2331
2332   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2333   if (PREDICT_FALSE(!ses0))
2334     {
2335       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2336           IP_PROTOCOL_ICMP, rx_fib_index0)))
2337         {
2338           dont_translate = 1;
2339           goto out;
2340         }
2341       if (icmp0->type != ICMP4_echo_request)
2342         {
2343           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2344           next0 = SNAT_IN2OUT_NEXT_DROP;
2345           goto out;
2346         }
2347       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2348         {
2349           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2350             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2351
2352           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2353             continue;
2354
2355           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2356           break;
2357         }
2358       if (PREDICT_FALSE(!ses0))
2359         {
2360           next0 = SNAT_IN2OUT_NEXT_DROP;
2361           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2362           goto out;
2363         }
2364     }
2365
2366   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2367                     !icmp_is_error_message (icmp0)))
2368     {
2369       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2370       next0 = SNAT_IN2OUT_NEXT_DROP;
2371       goto out;
2372     }
2373
2374   u32 now = (u32) vlib_time_now (sm->vlib_main);
2375
2376   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2377   ses0->expire = now + sm->icmp_timeout;
2378
2379 out:
2380   *p_proto = protocol;
2381   if (ses0)
2382     {
2383       p_value->addr = new_addr0;
2384       p_value->fib_index = sm->outside_fib_index;
2385       p_value->port = ses0->out.out_port;
2386     }
2387   *p_dont_translate = dont_translate;
2388   if (d)
2389     *(snat_det_session_t**)d = ses0;
2390   if (e)
2391     *(snat_det_map_t**)e = dm0;
2392   return next0;
2393 }
2394
2395 /**********************/
2396 /*** worker handoff ***/
2397 /**********************/
2398 static uword
2399 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2400                                vlib_node_runtime_t * node,
2401                                vlib_frame_t * frame)
2402 {
2403   snat_main_t *sm = &snat_main;
2404   vlib_thread_main_t *tm = vlib_get_thread_main ();
2405   u32 n_left_from, *from, *to_next = 0;
2406   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2407   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2408     = 0;
2409   vlib_frame_queue_elt_t *hf = 0;
2410   vlib_frame_t *f = 0;
2411   int i;
2412   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2413   u32 next_worker_index = 0;
2414   u32 current_worker_index = ~0;
2415   u32 thread_index = vlib_get_thread_index ();
2416
2417   ASSERT (vec_len (sm->workers));
2418
2419   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2420     {
2421       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2422
2423       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2424                                sm->first_worker_index + sm->num_workers - 1,
2425                                (vlib_frame_queue_t *) (~0));
2426     }
2427
2428   from = vlib_frame_vector_args (frame);
2429   n_left_from = frame->n_vectors;
2430
2431   while (n_left_from > 0)
2432     {
2433       u32 bi0;
2434       vlib_buffer_t *b0;
2435       u32 sw_if_index0;
2436       u32 rx_fib_index0;
2437       ip4_header_t * ip0;
2438       u8 do_handoff;
2439
2440       bi0 = from[0];
2441       from += 1;
2442       n_left_from -= 1;
2443
2444       b0 = vlib_get_buffer (vm, bi0);
2445
2446       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2447       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2448
2449       ip0 = vlib_buffer_get_current (b0);
2450
2451       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2452
2453       if (PREDICT_FALSE (next_worker_index != thread_index))
2454         {
2455           do_handoff = 1;
2456
2457           if (next_worker_index != current_worker_index)
2458             {
2459               if (hf)
2460                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2461
2462               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2463                                                       next_worker_index,
2464                                                       handoff_queue_elt_by_worker_index);
2465
2466               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2467               to_next_worker = &hf->buffer_index[hf->n_vectors];
2468               current_worker_index = next_worker_index;
2469             }
2470
2471           /* enqueue to correct worker thread */
2472           to_next_worker[0] = bi0;
2473           to_next_worker++;
2474           n_left_to_next_worker--;
2475
2476           if (n_left_to_next_worker == 0)
2477             {
2478               hf->n_vectors = VLIB_FRAME_SIZE;
2479               vlib_put_frame_queue_elt (hf);
2480               current_worker_index = ~0;
2481               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2482               hf = 0;
2483             }
2484         }
2485       else
2486         {
2487           do_handoff = 0;
2488           /* if this is 1st frame */
2489           if (!f)
2490             {
2491               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2492               to_next = vlib_frame_vector_args (f);
2493             }
2494
2495           to_next[0] = bi0;
2496           to_next += 1;
2497           f->n_vectors++;
2498         }
2499
2500       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2501                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2502         {
2503           snat_in2out_worker_handoff_trace_t *t =
2504             vlib_add_trace (vm, node, b0, sizeof (*t));
2505           t->next_worker_index = next_worker_index;
2506           t->do_handoff = do_handoff;
2507         }
2508     }
2509
2510   if (f)
2511     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2512
2513   if (hf)
2514     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2515
2516   /* Ship frames to the worker nodes */
2517   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2518     {
2519       if (handoff_queue_elt_by_worker_index[i])
2520         {
2521           hf = handoff_queue_elt_by_worker_index[i];
2522           /*
2523            * It works better to let the handoff node
2524            * rate-adapt, always ship the handoff queue element.
2525            */
2526           if (1 || hf->n_vectors == hf->last_n_vectors)
2527             {
2528               vlib_put_frame_queue_elt (hf);
2529               handoff_queue_elt_by_worker_index[i] = 0;
2530             }
2531           else
2532             hf->last_n_vectors = hf->n_vectors;
2533         }
2534       congested_handoff_queue_by_worker_index[i] =
2535         (vlib_frame_queue_t *) (~0);
2536     }
2537   hf = 0;
2538   current_worker_index = ~0;
2539   return frame->n_vectors;
2540 }
2541
2542 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2543   .function = snat_in2out_worker_handoff_fn,
2544   .name = "snat-in2out-worker-handoff",
2545   .vector_size = sizeof (u32),
2546   .format_trace = format_snat_in2out_worker_handoff_trace,
2547   .type = VLIB_NODE_TYPE_INTERNAL,
2548   
2549   .n_next_nodes = 1,
2550
2551   .next_nodes = {
2552     [0] = "error-drop",
2553   },
2554 };
2555
2556 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2557
2558 static uword
2559 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2560                                 vlib_node_runtime_t * node,
2561                                 vlib_frame_t * frame)
2562 {
2563   u32 n_left_from, * from, * to_next;
2564   snat_in2out_next_t next_index;
2565   u32 pkts_processed = 0;
2566   snat_main_t * sm = &snat_main;
2567   u32 stats_node_index;
2568
2569   stats_node_index = snat_in2out_fast_node.index;
2570
2571   from = vlib_frame_vector_args (frame);
2572   n_left_from = frame->n_vectors;
2573   next_index = node->cached_next_index;
2574
2575   while (n_left_from > 0)
2576     {
2577       u32 n_left_to_next;
2578
2579       vlib_get_next_frame (vm, node, next_index,
2580                            to_next, n_left_to_next);
2581
2582       while (n_left_from > 0 && n_left_to_next > 0)
2583         {
2584           u32 bi0;
2585           vlib_buffer_t * b0;
2586           u32 next0;
2587           u32 sw_if_index0;
2588           ip4_header_t * ip0;
2589           ip_csum_t sum0;
2590           u32 new_addr0, old_addr0;
2591           u16 old_port0, new_port0;
2592           udp_header_t * udp0;
2593           tcp_header_t * tcp0;
2594           icmp46_header_t * icmp0;
2595           snat_session_key_t key0, sm0;
2596           u32 proto0;
2597           u32 rx_fib_index0;
2598
2599           /* speculatively enqueue b0 to the current next frame */
2600           bi0 = from[0];
2601           to_next[0] = bi0;
2602           from += 1;
2603           to_next += 1;
2604           n_left_from -= 1;
2605           n_left_to_next -= 1;
2606
2607           b0 = vlib_get_buffer (vm, bi0);
2608           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2609
2610           ip0 = vlib_buffer_get_current (b0);
2611           udp0 = ip4_next_header (ip0);
2612           tcp0 = (tcp_header_t *) udp0;
2613           icmp0 = (icmp46_header_t *) udp0;
2614
2615           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2616           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2617
2618           if (PREDICT_FALSE(ip0->ttl == 1))
2619             {
2620               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2621               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2622                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2623                                            0);
2624               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2625               goto trace0;
2626             }
2627
2628           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2629
2630           if (PREDICT_FALSE (proto0 == ~0))
2631               goto trace0;
2632
2633           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2634             {
2635               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2636                                   rx_fib_index0, node, next0, ~0, 0, 0);
2637               goto trace0;
2638             }
2639
2640           key0.addr = ip0->src_address;
2641           key0.port = udp0->src_port;
2642           key0.fib_index = rx_fib_index0;
2643
2644           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
2645             {
2646               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2647               next0= SNAT_IN2OUT_NEXT_DROP;
2648               goto trace0;
2649             }
2650
2651           new_addr0 = sm0.addr.as_u32;
2652           new_port0 = sm0.port;
2653           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2654           old_addr0 = ip0->src_address.as_u32;
2655           ip0->src_address.as_u32 = new_addr0;
2656
2657           sum0 = ip0->checksum;
2658           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2659                                  ip4_header_t,
2660                                  src_address /* changed member */);
2661           ip0->checksum = ip_csum_fold (sum0);
2662
2663           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2664             {
2665               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2666                 {
2667                   old_port0 = tcp0->src_port;
2668                   tcp0->src_port = new_port0;
2669
2670                   sum0 = tcp0->checksum;
2671                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2672                                          ip4_header_t,
2673                                          dst_address /* changed member */);
2674                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2675                                          ip4_header_t /* cheat */,
2676                                          length /* changed member */);
2677                   tcp0->checksum = ip_csum_fold(sum0);
2678                 }
2679               else
2680                 {
2681                   old_port0 = udp0->src_port;
2682                   udp0->src_port = new_port0;
2683                   udp0->checksum = 0;
2684                 }
2685             }
2686           else
2687             {
2688               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2689                 {
2690                   sum0 = tcp0->checksum;
2691                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2692                                          ip4_header_t,
2693                                          dst_address /* changed member */);
2694                   tcp0->checksum = ip_csum_fold(sum0);
2695                 }
2696             }
2697
2698           /* Hairpinning */
2699           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2700
2701         trace0:
2702           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2703                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2704             {
2705               snat_in2out_trace_t *t =
2706                  vlib_add_trace (vm, node, b0, sizeof (*t));
2707               t->sw_if_index = sw_if_index0;
2708               t->next_index = next0;
2709             }
2710
2711           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2712
2713           /* verify speculative enqueue, maybe switch current next frame */
2714           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2715                                            to_next, n_left_to_next,
2716                                            bi0, next0);
2717         }
2718
2719       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2720     }
2721
2722   vlib_node_increment_counter (vm, stats_node_index,
2723                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2724                                pkts_processed);
2725   return frame->n_vectors;
2726 }
2727
2728
2729 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2730   .function = snat_in2out_fast_static_map_fn,
2731   .name = "snat-in2out-fast",
2732   .vector_size = sizeof (u32),
2733   .format_trace = format_snat_in2out_fast_trace,
2734   .type = VLIB_NODE_TYPE_INTERNAL,
2735   
2736   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2737   .error_strings = snat_in2out_error_strings,
2738
2739   .runtime_data_bytes = sizeof (snat_runtime_t),
2740   
2741   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2742
2743   /* edit / add dispositions here */
2744   .next_nodes = {
2745     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2746     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2747     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2748     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2749   },
2750 };
2751
2752 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);