20d99396292a43107c613be2fd04137f46769a02
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91
92 #define foreach_snat_in2out_error                       \
93 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
94 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
95 _(OUT_OF_PORTS, "Out of ports")                         \
96 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
97 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
98 _(NO_TRANSLATION, "No translation")
99   
100 typedef enum {
101 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
102   foreach_snat_in2out_error
103 #undef _
104   SNAT_IN2OUT_N_ERROR,
105 } snat_in2out_error_t;
106
107 static char * snat_in2out_error_strings[] = {
108 #define _(sym,string) string,
109   foreach_snat_in2out_error
110 #undef _
111 };
112
113 typedef enum {
114   SNAT_IN2OUT_NEXT_LOOKUP,
115   SNAT_IN2OUT_NEXT_DROP,
116   SNAT_IN2OUT_NEXT_SLOW_PATH,
117   SNAT_IN2OUT_NEXT_ICMP_ERROR,
118   SNAT_IN2OUT_N_NEXT,
119 } snat_in2out_next_t;
120
121 /**
122  * @brief Check if packet should be translated
123  *
124  * Packets aimed at outside interface and external addresss with active session
125  * should be translated.
126  *
127  * @param sm            SNAT main
128  * @param rt            SNAT runtime data
129  * @param sw_if_index0  index of the inside interface
130  * @param ip0           IPv4 header
131  * @param proto0        SNAT protocol
132  * @param rx_fib_index0 RX FIB index
133  *
134  * @returns 0 if packet should be translated otherwise 1
135  */
136 static inline int
137 snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
138                    ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
139 {
140   ip4_address_t * first_int_addr;
141   udp_header_t * udp0 = ip4_next_header (ip0);
142   snat_session_key_t key0, sm0;
143   clib_bihash_kv_8_8_t kv0, value0;
144   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
145   fib_prefix_t pfx = {
146     .fp_proto = FIB_PROTOCOL_IP4,
147     .fp_len = 32,
148     .fp_addr = {
149         .ip4.as_u32 = ip0->dst_address.as_u32,
150     },
151   };
152
153   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
154     {
155       first_int_addr =
156         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
157                                      0 /* just want the address */);
158       rt->cached_sw_if_index = sw_if_index0;
159       if (first_int_addr)
160         rt->cached_ip4_address = first_int_addr->as_u32;
161       else
162         rt->cached_ip4_address = 0;
163     }
164
165   /* Don't NAT packet aimed at the intfc address */
166   if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
167     return 1;
168
169   key0.addr = ip0->dst_address;
170   key0.port = udp0->dst_port;
171   key0.protocol = proto0;
172   key0.fib_index = sm->outside_fib_index;
173   kv0.key = key0.as_u64;
174
175   /* NAT packet aimed at external address if */
176   /* has active sessions */
177   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
178     {
179       /* or is static mappings */
180       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
181         return 0;
182     }
183   else
184     return 0;
185
186   fei = fib_table_lookup (rx_fib_index0, &pfx);
187   if (FIB_NODE_INDEX_INVALID != fei)
188     {
189       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
190       if (sw_if_index == ~0)
191         {
192           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
193           if (FIB_NODE_INDEX_INVALID != fei)
194             sw_if_index = fib_entry_get_resolving_interface (fei);
195         }
196       snat_interface_t *i;
197       pool_foreach (i, sm->interfaces,
198       ({
199         /* NAT packet aimed at outside interface */
200         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
201           return 0;
202       }));
203     }
204
205   return 1;
206 }
207
208 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
209                       ip4_header_t * ip0,
210                       u32 rx_fib_index0,
211                       snat_session_key_t * key0,
212                       snat_session_t ** sessionp,
213                       vlib_node_runtime_t * node,
214                       u32 next0,
215                       u32 thread_index)
216 {
217   snat_user_t *u;
218   snat_user_key_t user_key;
219   snat_session_t *s;
220   clib_bihash_kv_8_8_t kv0, value0;
221   u32 oldest_per_user_translation_list_index;
222   dlist_elt_t * oldest_per_user_translation_list_elt;
223   dlist_elt_t * per_user_translation_list_elt;
224   dlist_elt_t * per_user_list_head_elt;
225   u32 session_index;
226   snat_session_key_t key1;
227   u32 address_index = ~0;
228   u32 outside_fib_index;
229   uword * p;
230   snat_worker_key_t worker_by_out_key;
231
232   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
233   if (! p)
234     {
235       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
236       return SNAT_IN2OUT_NEXT_DROP;
237     }
238   outside_fib_index = p[0];
239
240   key1.protocol = key0->protocol;
241   user_key.addr = ip0->src_address;
242   user_key.fib_index = rx_fib_index0;
243   kv0.key = user_key.as_u64;
244   
245   /* Ever heard of the "user" = src ip4 address before? */
246   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
247     {
248       /* no, make a new one */
249       pool_get (sm->per_thread_data[thread_index].users, u);
250       memset (u, 0, sizeof (*u));
251       u->addr = ip0->src_address;
252       u->fib_index = rx_fib_index0;
253
254       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
255
256       u->sessions_per_user_list_head_index = per_user_list_head_elt -
257         sm->per_thread_data[thread_index].list_pool;
258
259       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
260                        u->sessions_per_user_list_head_index);
261
262       kv0.value = u - sm->per_thread_data[thread_index].users;
263
264       /* add user */
265       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
266     }
267   else
268     {
269       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
270                              value0.value);
271     }
272
273   /* Over quota? Recycle the least recently used dynamic translation */
274   if (u->nsessions >= sm->max_translations_per_user)
275     {
276       /* Remove the oldest dynamic translation */
277       do {
278           oldest_per_user_translation_list_index =
279             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
280                                     u->sessions_per_user_list_head_index);
281
282           ASSERT (oldest_per_user_translation_list_index != ~0);
283
284           /* add it back to the end of the LRU list */
285           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
286                               u->sessions_per_user_list_head_index,
287                               oldest_per_user_translation_list_index);
288           /* Get the list element */
289           oldest_per_user_translation_list_elt =
290             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
291                                oldest_per_user_translation_list_index);
292
293           /* Get the session index from the list element */
294           session_index = oldest_per_user_translation_list_elt->value;
295
296           /* Get the session */
297           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
298                                  session_index);
299       } while (snat_is_session_static (s));
300
301       /* Remove in2out, out2in keys */
302       kv0.key = s->in2out.as_u64;
303       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
304           clib_warning ("in2out key delete failed");
305       kv0.key = s->out2in.as_u64;
306       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
307           clib_warning ("out2in key delete failed");
308
309       /* log NAT event */
310       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
311                                           s->out2in.addr.as_u32,
312                                           s->in2out.protocol,
313                                           s->in2out.port,
314                                           s->out2in.port,
315                                           s->in2out.fib_index);
316
317       snat_free_outside_address_and_port 
318         (sm, &s->out2in, s->outside_address_index);
319       s->outside_address_index = ~0;
320
321       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
322                                                &address_index))
323         {
324           ASSERT(0);
325
326           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
327           return SNAT_IN2OUT_NEXT_DROP;
328         }
329       s->outside_address_index = address_index;
330     }
331   else
332     {
333       u8 static_mapping = 1;
334
335       /* First try to match static mapping by local address and port */
336       if (snat_static_mapping_match (sm, *key0, &key1, 0))
337         {
338           static_mapping = 0;
339           /* Try to create dynamic translation */
340           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
341                                                    &address_index))
342             {
343               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
344               return SNAT_IN2OUT_NEXT_DROP;
345             }
346         }
347
348       /* Create a new session */
349       pool_get (sm->per_thread_data[thread_index].sessions, s);
350       memset (s, 0, sizeof (*s));
351       
352       s->outside_address_index = address_index;
353
354       if (static_mapping)
355         {
356           u->nstaticsessions++;
357           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
358         }
359       else
360         {
361           u->nsessions++;
362         }
363
364       /* Create list elts */
365       pool_get (sm->per_thread_data[thread_index].list_pool,
366                 per_user_translation_list_elt);
367       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
368                        per_user_translation_list_elt -
369                        sm->per_thread_data[thread_index].list_pool);
370
371       per_user_translation_list_elt->value =
372         s - sm->per_thread_data[thread_index].sessions;
373       s->per_user_index = per_user_translation_list_elt -
374                           sm->per_thread_data[thread_index].list_pool;
375       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
376
377       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
378                           s->per_user_list_head_index,
379                           per_user_translation_list_elt -
380                           sm->per_thread_data[thread_index].list_pool);
381    }
382   
383   s->in2out = *key0;
384   s->out2in = key1;
385   s->out2in.protocol = key0->protocol;
386   s->out2in.fib_index = outside_fib_index;
387   *sessionp = s;
388
389   /* Add to translation hashes */
390   kv0.key = s->in2out.as_u64;
391   kv0.value = s - sm->per_thread_data[thread_index].sessions;
392   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
393       clib_warning ("in2out key add failed");
394   
395   kv0.key = s->out2in.as_u64;
396   kv0.value = s - sm->per_thread_data[thread_index].sessions;
397   
398   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
399       clib_warning ("out2in key add failed");
400
401   /* Add to translated packets worker lookup */
402   worker_by_out_key.addr = s->out2in.addr;
403   worker_by_out_key.port = s->out2in.port;
404   worker_by_out_key.fib_index = s->out2in.fib_index;
405   kv0.key = worker_by_out_key.as_u64;
406   kv0.value = thread_index;
407   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
408
409   /* log NAT event */
410   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
411                                       s->out2in.addr.as_u32,
412                                       s->in2out.protocol,
413                                       s->in2out.port,
414                                       s->out2in.port,
415                                       s->in2out.fib_index);
416   return next0;
417 }
418
419 static_always_inline
420 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
421                                  snat_session_key_t *p_key0)
422 {
423   icmp46_header_t *icmp0;
424   snat_session_key_t key0;
425   icmp_echo_header_t *echo0, *inner_echo0 = 0;
426   ip4_header_t *inner_ip0 = 0;
427   void *l4_header = 0;
428   icmp46_header_t *inner_icmp0;
429
430   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
431   echo0 = (icmp_echo_header_t *)(icmp0+1);
432
433   if (!icmp_is_error_message (icmp0))
434     {
435       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
436         return SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE;
437       key0.protocol = SNAT_PROTOCOL_ICMP;
438       key0.addr = ip0->src_address;
439       key0.port = echo0->identifier;
440     }
441   else
442     {
443       inner_ip0 = (ip4_header_t *)(echo0+1);
444       l4_header = ip4_next_header (inner_ip0);
445       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
446       key0.addr = inner_ip0->dst_address;
447       switch (key0.protocol)
448         {
449         case SNAT_PROTOCOL_ICMP:
450           inner_icmp0 = (icmp46_header_t*)l4_header;
451           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
452           key0.port = inner_echo0->identifier;
453           break;
454         case SNAT_PROTOCOL_UDP:
455         case SNAT_PROTOCOL_TCP:
456           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
457           break;
458         default:
459           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
460         }
461     }
462   *p_key0 = key0;
463   return -1; /* success */
464 }
465
466 /**
467  * Get address and port values to be used for packet SNAT translation
468  * and create session if needed
469  *
470  * @param[in,out] sm             SNAT main
471  * @param[in,out] node           SNAT node runtime
472  * @param[in] thread_index       thread index
473  * @param[in,out] b0             buffer containing packet to be translated
474  * @param[out] p_key             address and port before NAT translation
475  * @param[out] p_value           address and port after NAT translation
476  * @param[out] p_dont_translate  if packet should not be translated
477  * @param d                      optional parameter
478  */
479 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
480                            u32 thread_index, vlib_buffer_t *b0,
481                            snat_session_key_t *p_key,
482                            snat_session_key_t *p_value,
483                            u8 *p_dont_translate, void *d)
484 {
485   snat_runtime_t *rt;
486   ip4_header_t *ip0;
487   icmp46_header_t *icmp0;
488   u32 sw_if_index0;
489   u32 rx_fib_index0;
490   snat_session_key_t key0;
491   snat_session_t *s0 = 0;
492   u8 dont_translate = 0;
493   clib_bihash_kv_8_8_t kv0, value0;
494   u32 next0 = ~0;
495   int err;
496
497   rt = (snat_runtime_t *) node->runtime_data;
498   ip0 = vlib_buffer_get_current (b0);
499   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
500   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
501   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
502
503   err = icmp_get_key (ip0, &key0);
504   if (err != -1)
505     {
506       b0->error = node->errors[err];
507       next0 = SNAT_IN2OUT_NEXT_DROP;
508       goto out;
509     }
510   key0.fib_index = rx_fib_index0;
511
512   kv0.key = key0.as_u64;
513
514   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
515     {
516       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
517           IP_PROTOCOL_ICMP, rx_fib_index0)))
518         {
519           dont_translate = 1;
520           goto out;
521         }
522
523       if (icmp_is_error_message (icmp0))
524         {
525           next0 = SNAT_IN2OUT_NEXT_DROP;
526           goto out;
527         }
528
529       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
530                          &s0, node, next0, thread_index);
531
532       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
533         goto out;
534     }
535   else
536     s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
537                             value0.value);
538
539 out:
540   *p_key = key0;
541   if (s0)
542     *p_value = s0->out2in;
543   *p_dont_translate = dont_translate;
544   if (d)
545     *(snat_session_t**)d = s0;
546   return next0;
547 }
548
549 /**
550  * Get address and port values to be used for packet SNAT translation
551  *
552  * @param[in] sm                 SNAT main
553  * @param[in,out] node           SNAT node runtime
554  * @param[in] thread_index       thread index
555  * @param[in,out] b0             buffer containing packet to be translated
556  * @param[out] p_key             address and port before NAT translation
557  * @param[out] p_value           address and port after NAT translation
558  * @param[out] p_dont_translate  if packet should not be translated
559  * @param d                      optional parameter
560  */
561 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
562                            u32 thread_index, vlib_buffer_t *b0,
563                            snat_session_key_t *p_key,
564                            snat_session_key_t *p_value,
565                            u8 *p_dont_translate, void *d)
566 {
567   snat_runtime_t *rt;
568   ip4_header_t *ip0;
569   icmp46_header_t *icmp0;
570   u32 sw_if_index0;
571   u32 rx_fib_index0;
572   snat_session_key_t key0;
573   snat_session_key_t sm0;
574   u8 dont_translate = 0;
575   u32 next0 = ~0;
576   int err;
577
578   rt = (snat_runtime_t *) node->runtime_data;
579   ip0 = vlib_buffer_get_current (b0);
580   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
581   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
582   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
583
584   err = icmp_get_key (ip0, &key0);
585   if (err != -1)
586     {
587       b0->error = node->errors[err];
588       next0 = SNAT_IN2OUT_NEXT_DROP;
589       goto out2;
590     }
591   key0.fib_index = rx_fib_index0;
592
593   if (snat_static_mapping_match(sm, key0, &sm0, 0))
594     {
595       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
596           IP_PROTOCOL_ICMP, rx_fib_index0)))
597         {
598           dont_translate = 1;
599           goto out;
600         }
601
602       if (icmp_is_error_message (icmp0))
603         {
604           next0 = SNAT_IN2OUT_NEXT_DROP;
605           goto out;
606         }
607
608       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
609       next0 = SNAT_IN2OUT_NEXT_DROP;
610       goto out;
611     }
612
613 out:
614   *p_value = sm0;
615 out2:
616   *p_key = key0;
617   *p_dont_translate = dont_translate;
618   return next0;
619 }
620
621 static inline u32 icmp_in2out (snat_main_t *sm,
622                                vlib_buffer_t * b0,
623                                ip4_header_t * ip0,
624                                icmp46_header_t * icmp0,
625                                u32 sw_if_index0,
626                                u32 rx_fib_index0,
627                                vlib_node_runtime_t * node,
628                                u32 next0,
629                                u32 thread_index,
630                                void *d)
631 {
632   snat_session_key_t key0, sm0;
633   icmp_echo_header_t *echo0, *inner_echo0 = 0;
634   ip4_header_t *inner_ip0;
635   void *l4_header = 0;
636   icmp46_header_t *inner_icmp0;
637   u8 dont_translate;
638   u32 new_addr0, old_addr0;
639   u16 old_id0, new_id0;
640   ip_csum_t sum0;
641   u16 checksum0;
642   u32 next0_tmp;
643
644   echo0 = (icmp_echo_header_t *)(icmp0+1);
645
646   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
647                                        &key0, &sm0, &dont_translate, d);
648   if (next0_tmp != ~0)
649     next0 = next0_tmp;
650   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
651     goto out;
652
653   sum0 = ip_incremental_checksum (0, icmp0,
654                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
655   checksum0 = ~ip_csum_fold (sum0);
656   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
657     {
658       next0 = SNAT_IN2OUT_NEXT_DROP;
659       goto out;
660     }
661
662   old_addr0 = ip0->src_address.as_u32;
663   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
664   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
665
666   sum0 = ip0->checksum;
667   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
668                          src_address /* changed member */);
669   ip0->checksum = ip_csum_fold (sum0);
670   
671   if (!icmp_is_error_message (icmp0))
672     {
673       new_id0 = sm0.port;
674       if (PREDICT_FALSE(new_id0 != echo0->identifier))
675         {
676           old_id0 = echo0->identifier;
677           new_id0 = sm0.port;
678           echo0->identifier = new_id0;
679
680           sum0 = icmp0->checksum;
681           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
682                                  identifier);
683           icmp0->checksum = ip_csum_fold (sum0);
684         }
685     }
686   else
687     {
688       inner_ip0 = (ip4_header_t *)(echo0+1);
689       l4_header = ip4_next_header (inner_ip0);
690
691       if (!ip4_header_checksum_is_valid (inner_ip0))
692         {
693           next0 = SNAT_IN2OUT_NEXT_DROP;
694           goto out;
695         }
696
697       old_addr0 = inner_ip0->dst_address.as_u32;
698       inner_ip0->dst_address = sm0.addr;
699       new_addr0 = inner_ip0->dst_address.as_u32;
700
701       sum0 = icmp0->checksum;
702       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
703                              dst_address /* changed member */);
704       icmp0->checksum = ip_csum_fold (sum0);
705
706       switch (key0.protocol)
707         {
708           case SNAT_PROTOCOL_ICMP:
709             inner_icmp0 = (icmp46_header_t*)l4_header;
710             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
711
712             old_id0 = inner_echo0->identifier;
713             new_id0 = sm0.port;
714             inner_echo0->identifier = new_id0;
715
716             sum0 = icmp0->checksum;
717             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
718                                    identifier);
719             icmp0->checksum = ip_csum_fold (sum0);
720             break;
721           case SNAT_PROTOCOL_UDP:
722           case SNAT_PROTOCOL_TCP:
723             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
724             new_id0 = sm0.port;
725             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
726
727             sum0 = icmp0->checksum;
728             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
729                                    dst_port);
730             icmp0->checksum = ip_csum_fold (sum0);
731             break;
732           default:
733             ASSERT(0);
734         }
735     }
736
737 out:
738   return next0;
739 }
740
741 /**
742  * @brief Hairpinning
743  *
744  * Hairpinning allows two endpoints on the internal side of the NAT to
745  * communicate even if they only use each other's external IP addresses
746  * and ports.
747  *
748  * @param sm     SNAT main.
749  * @param b0     Vlib buffer.
750  * @param ip0    IP header.
751  * @param udp0   UDP header.
752  * @param tcp0   TCP header.
753  * @param proto0 SNAT protocol.
754  */
755 static inline void
756 snat_hairpinning (snat_main_t *sm,
757                   vlib_buffer_t * b0,
758                   ip4_header_t * ip0,
759                   udp_header_t * udp0,
760                   tcp_header_t * tcp0,
761                   u32 proto0)
762 {
763   snat_session_key_t key0, sm0;
764   snat_worker_key_t k0;
765   snat_session_t * s0;
766   clib_bihash_kv_8_8_t kv0, value0;
767   ip_csum_t sum0;
768   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
769   u16 new_dst_port0, old_dst_port0;
770
771   key0.addr = ip0->dst_address;
772   key0.port = udp0->dst_port;
773   key0.protocol = proto0;
774   key0.fib_index = sm->outside_fib_index;
775   kv0.key = key0.as_u64;
776
777   /* Check if destination is in active sessions */
778   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
779     {
780       /* or static mappings */
781       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
782         {
783           new_dst_addr0 = sm0.addr.as_u32;
784           new_dst_port0 = sm0.port;
785           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
786         }
787     }
788   else
789     {
790       si = value0.value;
791       if (sm->num_workers > 1)
792         {
793           k0.addr = ip0->dst_address;
794           k0.port = udp0->dst_port;
795           k0.fib_index = sm->outside_fib_index;
796           kv0.key = k0.as_u64;
797           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
798             ASSERT(0);
799           else
800             ti = value0.value;
801         }
802       else
803         ti = sm->num_workers;
804
805       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
806       new_dst_addr0 = s0->in2out.addr.as_u32;
807       new_dst_port0 = s0->in2out.port;
808       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
809     }
810
811   /* Destination is behind the same NAT, use internal address and port */
812   if (new_dst_addr0)
813     {
814       old_dst_addr0 = ip0->dst_address.as_u32;
815       ip0->dst_address.as_u32 = new_dst_addr0;
816       sum0 = ip0->checksum;
817       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
818                              ip4_header_t, dst_address);
819       ip0->checksum = ip_csum_fold (sum0);
820
821       old_dst_port0 = tcp0->dst;
822       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
823         {
824           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
825             {
826               tcp0->dst = new_dst_port0;
827               sum0 = tcp0->checksum;
828               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
829                                      ip4_header_t, dst_address);
830               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
831                                      ip4_header_t /* cheat */, length);
832               tcp0->checksum = ip_csum_fold(sum0);
833             }
834           else
835             {
836               udp0->dst_port = new_dst_port0;
837               udp0->checksum = 0;
838             }
839         }
840     }
841 }
842
843 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
844                                          vlib_buffer_t * b0,
845                                          ip4_header_t * ip0,
846                                          icmp46_header_t * icmp0,
847                                          u32 sw_if_index0,
848                                          u32 rx_fib_index0,
849                                          vlib_node_runtime_t * node,
850                                          u32 next0,
851                                          f64 now,
852                                          u32 thread_index,
853                                          snat_session_t ** p_s0)
854 {
855   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
856                       next0, thread_index, p_s0);
857   snat_session_t * s0 = *p_s0;
858   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
859     {
860       /* Accounting */
861       s0->last_heard = now;
862       s0->total_pkts++;
863       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
864       /* Per-user LRU list maintenance for dynamic translations */
865       if (!snat_is_session_static (s0))
866         {
867           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
868                              s0->per_user_index);
869           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
870                               s0->per_user_list_head_index,
871                               s0->per_user_index);
872         }
873     }
874   return next0;
875 }
876
877 static inline uword
878 snat_in2out_node_fn_inline (vlib_main_t * vm,
879                             vlib_node_runtime_t * node,
880                             vlib_frame_t * frame, int is_slow_path)
881 {
882   u32 n_left_from, * from, * to_next;
883   snat_in2out_next_t next_index;
884   u32 pkts_processed = 0;
885   snat_main_t * sm = &snat_main;
886   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
887   f64 now = vlib_time_now (vm);
888   u32 stats_node_index;
889   u32 thread_index = vlib_get_thread_index ();
890
891   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
892     snat_in2out_node.index;
893
894   from = vlib_frame_vector_args (frame);
895   n_left_from = frame->n_vectors;
896   next_index = node->cached_next_index;
897
898   while (n_left_from > 0)
899     {
900       u32 n_left_to_next;
901
902       vlib_get_next_frame (vm, node, next_index,
903                            to_next, n_left_to_next);
904
905       while (n_left_from >= 4 && n_left_to_next >= 2)
906         {
907           u32 bi0, bi1;
908           vlib_buffer_t * b0, * b1;
909           u32 next0, next1;
910           u32 sw_if_index0, sw_if_index1;
911           ip4_header_t * ip0, * ip1;
912           ip_csum_t sum0, sum1;
913           u32 new_addr0, old_addr0, new_addr1, old_addr1;
914           u16 old_port0, new_port0, old_port1, new_port1;
915           udp_header_t * udp0, * udp1;
916           tcp_header_t * tcp0, * tcp1;
917           icmp46_header_t * icmp0, * icmp1;
918           snat_session_key_t key0, key1;
919           u32 rx_fib_index0, rx_fib_index1;
920           u32 proto0, proto1;
921           snat_session_t * s0 = 0, * s1 = 0;
922           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
923           
924           /* Prefetch next iteration. */
925           {
926             vlib_buffer_t * p2, * p3;
927             
928             p2 = vlib_get_buffer (vm, from[2]);
929             p3 = vlib_get_buffer (vm, from[3]);
930             
931             vlib_prefetch_buffer_header (p2, LOAD);
932             vlib_prefetch_buffer_header (p3, LOAD);
933
934             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
935             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
936           }
937
938           /* speculatively enqueue b0 and b1 to the current next frame */
939           to_next[0] = bi0 = from[0];
940           to_next[1] = bi1 = from[1];
941           from += 2;
942           to_next += 2;
943           n_left_from -= 2;
944           n_left_to_next -= 2;
945           
946           b0 = vlib_get_buffer (vm, bi0);
947           b1 = vlib_get_buffer (vm, bi1);
948
949           ip0 = vlib_buffer_get_current (b0);
950           udp0 = ip4_next_header (ip0);
951           tcp0 = (tcp_header_t *) udp0;
952           icmp0 = (icmp46_header_t *) udp0;
953
954           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
955           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
956                                    sw_if_index0);
957
958           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
959
960           if (PREDICT_FALSE(ip0->ttl == 1))
961             {
962               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
963               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
964                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
965                                            0);
966               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
967               goto trace00;
968             }
969
970           proto0 = ip_proto_to_snat_proto (ip0->protocol);
971
972           /* Next configured feature, probably ip4-lookup */
973           if (is_slow_path)
974             {
975               if (PREDICT_FALSE (proto0 == ~0))
976                 goto trace00;
977               
978               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
979                 {
980                   next0 = icmp_in2out_slow_path 
981                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
982                      node, next0, now, thread_index, &s0);
983                   goto trace00;
984                 }
985             }
986           else
987             {
988               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
989                 {
990                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
991                   goto trace00;
992                 }
993             }
994
995           key0.addr = ip0->src_address;
996           key0.port = udp0->src_port;
997           key0.protocol = proto0;
998           key0.fib_index = rx_fib_index0;
999           
1000           kv0.key = key0.as_u64;
1001
1002           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1003             {
1004               if (is_slow_path)
1005                 {
1006                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1007                       proto0, rx_fib_index0)))
1008                     goto trace00;
1009
1010                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1011                                      &s0, node, next0, thread_index);
1012                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1013                     goto trace00;
1014                 }
1015               else
1016                 {
1017                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1018                   goto trace00;
1019                 }
1020             }
1021           else
1022             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1023                                     value0.value);
1024
1025           old_addr0 = ip0->src_address.as_u32;
1026           ip0->src_address = s0->out2in.addr;
1027           new_addr0 = ip0->src_address.as_u32;
1028           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1029
1030           sum0 = ip0->checksum;
1031           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1032                                  ip4_header_t,
1033                                  src_address /* changed member */);
1034           ip0->checksum = ip_csum_fold (sum0);
1035
1036           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1037             {
1038               old_port0 = tcp0->src_port;
1039               tcp0->src_port = s0->out2in.port;
1040               new_port0 = tcp0->src_port;
1041
1042               sum0 = tcp0->checksum;
1043               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1044                                      ip4_header_t,
1045                                      dst_address /* changed member */);
1046               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1047                                      ip4_header_t /* cheat */,
1048                                      length /* changed member */);
1049               tcp0->checksum = ip_csum_fold(sum0);
1050             }
1051           else
1052             {
1053               old_port0 = udp0->src_port;
1054               udp0->src_port = s0->out2in.port;
1055               udp0->checksum = 0;
1056             }
1057
1058           /* Hairpinning */
1059           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1060
1061           /* Accounting */
1062           s0->last_heard = now;
1063           s0->total_pkts++;
1064           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1065           /* Per-user LRU list maintenance for dynamic translation */
1066           if (!snat_is_session_static (s0))
1067             {
1068               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1069                                  s0->per_user_index);
1070               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1071                                   s0->per_user_list_head_index,
1072                                   s0->per_user_index);
1073             }
1074         trace00:
1075
1076           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1077                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1078             {
1079               snat_in2out_trace_t *t = 
1080                  vlib_add_trace (vm, node, b0, sizeof (*t));
1081               t->is_slow_path = is_slow_path;
1082               t->sw_if_index = sw_if_index0;
1083               t->next_index = next0;
1084                   t->session_index = ~0;
1085               if (s0)
1086                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1087             }
1088
1089           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1090
1091           ip1 = vlib_buffer_get_current (b1);
1092           udp1 = ip4_next_header (ip1);
1093           tcp1 = (tcp_header_t *) udp1;
1094           icmp1 = (icmp46_header_t *) udp1;
1095
1096           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1097           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1098                                    sw_if_index1);
1099
1100           if (PREDICT_FALSE(ip1->ttl == 1))
1101             {
1102               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1103               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1104                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1105                                            0);
1106               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1107               goto trace01;
1108             }
1109
1110           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1111
1112           /* Next configured feature, probably ip4-lookup */
1113           if (is_slow_path)
1114             {
1115               if (PREDICT_FALSE (proto1 == ~0))
1116                 goto trace01;
1117               
1118               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1119                 {
1120                   next1 = icmp_in2out_slow_path 
1121                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1122                      next1, now, thread_index, &s1);
1123                   goto trace01;
1124                 }
1125             }
1126           else
1127             {
1128               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1129                 {
1130                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1131                   goto trace01;
1132                 }
1133             }
1134
1135           key1.addr = ip1->src_address;
1136           key1.port = udp1->src_port;
1137           key1.protocol = proto1;
1138           key1.fib_index = rx_fib_index1;
1139           
1140           kv1.key = key1.as_u64;
1141
1142             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1143             {
1144               if (is_slow_path)
1145                 {
1146                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
1147                       proto1, rx_fib_index1)))
1148                     goto trace01;
1149
1150                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1151                                      &s1, node, next1, thread_index);
1152                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1153                     goto trace01;
1154                 }
1155               else
1156                 {
1157                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1158                   goto trace01;
1159                 }
1160             }
1161           else
1162             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1163                                     value1.value);
1164
1165           old_addr1 = ip1->src_address.as_u32;
1166           ip1->src_address = s1->out2in.addr;
1167           new_addr1 = ip1->src_address.as_u32;
1168           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1169
1170           sum1 = ip1->checksum;
1171           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1172                                  ip4_header_t,
1173                                  src_address /* changed member */);
1174           ip1->checksum = ip_csum_fold (sum1);
1175
1176           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1177             {
1178               old_port1 = tcp1->src_port;
1179               tcp1->src_port = s1->out2in.port;
1180               new_port1 = tcp1->src_port;
1181
1182               sum1 = tcp1->checksum;
1183               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1184                                      ip4_header_t,
1185                                      dst_address /* changed member */);
1186               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1187                                      ip4_header_t /* cheat */,
1188                                      length /* changed member */);
1189               tcp1->checksum = ip_csum_fold(sum1);
1190             }
1191           else
1192             {
1193               old_port1 = udp1->src_port;
1194               udp1->src_port = s1->out2in.port;
1195               udp1->checksum = 0;
1196             }
1197
1198           /* Hairpinning */
1199           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1200
1201           /* Accounting */
1202           s1->last_heard = now;
1203           s1->total_pkts++;
1204           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1205           /* Per-user LRU list maintenance for dynamic translation */
1206           if (!snat_is_session_static (s1))
1207             {
1208               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1209                                  s1->per_user_index);
1210               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1211                                   s1->per_user_list_head_index,
1212                                   s1->per_user_index);
1213             }
1214         trace01:
1215
1216           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1217                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1218             {
1219               snat_in2out_trace_t *t = 
1220                  vlib_add_trace (vm, node, b1, sizeof (*t));
1221               t->sw_if_index = sw_if_index1;
1222               t->next_index = next1;
1223               t->session_index = ~0;
1224               if (s1)
1225                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1226             }
1227
1228           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1229
1230           /* verify speculative enqueues, maybe switch current next frame */
1231           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1232                                            to_next, n_left_to_next,
1233                                            bi0, bi1, next0, next1);
1234         }
1235
1236       while (n_left_from > 0 && n_left_to_next > 0)
1237         {
1238           u32 bi0;
1239           vlib_buffer_t * b0;
1240           u32 next0;
1241           u32 sw_if_index0;
1242           ip4_header_t * ip0;
1243           ip_csum_t sum0;
1244           u32 new_addr0, old_addr0;
1245           u16 old_port0, new_port0;
1246           udp_header_t * udp0;
1247           tcp_header_t * tcp0;
1248           icmp46_header_t * icmp0;
1249           snat_session_key_t key0;
1250           u32 rx_fib_index0;
1251           u32 proto0;
1252           snat_session_t * s0 = 0;
1253           clib_bihash_kv_8_8_t kv0, value0;
1254           
1255           /* speculatively enqueue b0 to the current next frame */
1256           bi0 = from[0];
1257           to_next[0] = bi0;
1258           from += 1;
1259           to_next += 1;
1260           n_left_from -= 1;
1261           n_left_to_next -= 1;
1262
1263           b0 = vlib_get_buffer (vm, bi0);
1264           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1265
1266           ip0 = vlib_buffer_get_current (b0);
1267           udp0 = ip4_next_header (ip0);
1268           tcp0 = (tcp_header_t *) udp0;
1269           icmp0 = (icmp46_header_t *) udp0;
1270
1271           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1272           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1273                                    sw_if_index0);
1274
1275           if (PREDICT_FALSE(ip0->ttl == 1))
1276             {
1277               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1278               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1279                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1280                                            0);
1281               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1282               goto trace0;
1283             }
1284
1285           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1286
1287           /* Next configured feature, probably ip4-lookup */
1288           if (is_slow_path)
1289             {
1290               if (PREDICT_FALSE (proto0 == ~0))
1291                 goto trace0;
1292               
1293               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1294                 {
1295                   next0 = icmp_in2out_slow_path 
1296                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1297                      next0, now, thread_index, &s0);
1298                   goto trace0;
1299                 }
1300             }
1301           else
1302             {
1303               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1304                 {
1305                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1306                   goto trace0;
1307                 }
1308             }
1309
1310           key0.addr = ip0->src_address;
1311           key0.port = udp0->src_port;
1312           key0.protocol = proto0;
1313           key0.fib_index = rx_fib_index0;
1314           
1315           kv0.key = key0.as_u64;
1316
1317           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1318             {
1319               if (is_slow_path)
1320                 {
1321                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1322                       proto0, rx_fib_index0)))
1323                     goto trace0;
1324
1325                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1326                                      &s0, node, next0, thread_index);
1327
1328                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1329                     goto trace0;
1330                 }
1331               else
1332                 {
1333                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1334                   goto trace0;
1335                 }
1336             }
1337           else
1338             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1339                                     value0.value);
1340
1341           old_addr0 = ip0->src_address.as_u32;
1342           ip0->src_address = s0->out2in.addr;
1343           new_addr0 = ip0->src_address.as_u32;
1344           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1345
1346           sum0 = ip0->checksum;
1347           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1348                                  ip4_header_t,
1349                                  src_address /* changed member */);
1350           ip0->checksum = ip_csum_fold (sum0);
1351
1352           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1353             {
1354               old_port0 = tcp0->src_port;
1355               tcp0->src_port = s0->out2in.port;
1356               new_port0 = tcp0->src_port;
1357
1358               sum0 = tcp0->checksum;
1359               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1360                                      ip4_header_t,
1361                                      dst_address /* changed member */);
1362               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1363                                      ip4_header_t /* cheat */,
1364                                      length /* changed member */);
1365               tcp0->checksum = ip_csum_fold(sum0);
1366             }
1367           else
1368             {
1369               old_port0 = udp0->src_port;
1370               udp0->src_port = s0->out2in.port;
1371               udp0->checksum = 0;
1372             }
1373
1374           /* Hairpinning */
1375           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1376
1377           /* Accounting */
1378           s0->last_heard = now;
1379           s0->total_pkts++;
1380           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1381           /* Per-user LRU list maintenance for dynamic translation */
1382           if (!snat_is_session_static (s0))
1383             {
1384               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1385                                  s0->per_user_index);
1386               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1387                                   s0->per_user_list_head_index,
1388                                   s0->per_user_index);
1389             }
1390
1391         trace0:
1392           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1393                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1394             {
1395               snat_in2out_trace_t *t = 
1396                  vlib_add_trace (vm, node, b0, sizeof (*t));
1397               t->is_slow_path = is_slow_path;
1398               t->sw_if_index = sw_if_index0;
1399               t->next_index = next0;
1400                   t->session_index = ~0;
1401               if (s0)
1402                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1403             }
1404
1405           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1406
1407           /* verify speculative enqueue, maybe switch current next frame */
1408           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1409                                            to_next, n_left_to_next,
1410                                            bi0, next0);
1411         }
1412
1413       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1414     }
1415
1416   vlib_node_increment_counter (vm, stats_node_index, 
1417                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1418                                pkts_processed);
1419   return frame->n_vectors;
1420 }
1421
1422 static uword
1423 snat_in2out_fast_path_fn (vlib_main_t * vm,
1424                           vlib_node_runtime_t * node,
1425                           vlib_frame_t * frame)
1426 {
1427   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1428 }
1429
1430 VLIB_REGISTER_NODE (snat_in2out_node) = {
1431   .function = snat_in2out_fast_path_fn,
1432   .name = "snat-in2out",
1433   .vector_size = sizeof (u32),
1434   .format_trace = format_snat_in2out_trace,
1435   .type = VLIB_NODE_TYPE_INTERNAL,
1436   
1437   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1438   .error_strings = snat_in2out_error_strings,
1439
1440   .runtime_data_bytes = sizeof (snat_runtime_t),
1441   
1442   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1443
1444   /* edit / add dispositions here */
1445   .next_nodes = {
1446     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1447     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1448     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1449     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1450   },
1451 };
1452
1453 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1454
1455 static uword
1456 snat_in2out_slow_path_fn (vlib_main_t * vm,
1457                           vlib_node_runtime_t * node,
1458                           vlib_frame_t * frame)
1459 {
1460   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1461 }
1462
1463 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1464   .function = snat_in2out_slow_path_fn,
1465   .name = "snat-in2out-slowpath",
1466   .vector_size = sizeof (u32),
1467   .format_trace = format_snat_in2out_trace,
1468   .type = VLIB_NODE_TYPE_INTERNAL,
1469   
1470   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1471   .error_strings = snat_in2out_error_strings,
1472
1473   .runtime_data_bytes = sizeof (snat_runtime_t),
1474   
1475   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1476
1477   /* edit / add dispositions here */
1478   .next_nodes = {
1479     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1480     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1481     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1482     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1483   },
1484 };
1485
1486 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1487
1488 /**************************/
1489 /*** deterministic mode ***/
1490 /**************************/
1491 static uword
1492 snat_det_in2out_node_fn (vlib_main_t * vm,
1493                          vlib_node_runtime_t * node,
1494                          vlib_frame_t * frame)
1495 {
1496   u32 n_left_from, * from, * to_next;
1497   snat_in2out_next_t next_index;
1498   u32 pkts_processed = 0;
1499   snat_main_t * sm = &snat_main;
1500   u32 now = (u32) vlib_time_now (vm);
1501
1502   from = vlib_frame_vector_args (frame);
1503   n_left_from = frame->n_vectors;
1504   next_index = node->cached_next_index;
1505
1506   while (n_left_from > 0)
1507     {
1508       u32 n_left_to_next;
1509
1510       vlib_get_next_frame (vm, node, next_index,
1511                            to_next, n_left_to_next);
1512
1513       while (n_left_from >= 4 && n_left_to_next >= 2)
1514         {
1515           u32 bi0, bi1;
1516           vlib_buffer_t * b0, * b1;
1517           u32 next0, next1;
1518           u32 sw_if_index0, sw_if_index1;
1519           ip4_header_t * ip0, * ip1;
1520           ip_csum_t sum0, sum1;
1521           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1522           u16 old_port0, new_port0, lo_port0, i0;
1523           u16 old_port1, new_port1, lo_port1, i1;
1524           udp_header_t * udp0, * udp1;
1525           tcp_header_t * tcp0, * tcp1;
1526           u32 proto0, proto1;
1527           snat_det_out_key_t key0, key1;
1528           snat_det_map_t * dm0, * dm1;
1529           snat_det_session_t * ses0 = 0, * ses1 = 0;
1530
1531           /* Prefetch next iteration. */
1532           {
1533             vlib_buffer_t * p2, * p3;
1534
1535             p2 = vlib_get_buffer (vm, from[2]);
1536             p3 = vlib_get_buffer (vm, from[3]);
1537
1538             vlib_prefetch_buffer_header (p2, LOAD);
1539             vlib_prefetch_buffer_header (p3, LOAD);
1540
1541             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1542             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1543           }
1544
1545           /* speculatively enqueue b0 and b1 to the current next frame */
1546           to_next[0] = bi0 = from[0];
1547           to_next[1] = bi1 = from[1];
1548           from += 2;
1549           to_next += 2;
1550           n_left_from -= 2;
1551           n_left_to_next -= 2;
1552
1553           b0 = vlib_get_buffer (vm, bi0);
1554           b1 = vlib_get_buffer (vm, bi1);
1555
1556           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1557           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1558
1559           ip0 = vlib_buffer_get_current (b0);
1560           udp0 = ip4_next_header (ip0);
1561           tcp0 = (tcp_header_t *) udp0;
1562
1563           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1564
1565           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1566           if (PREDICT_FALSE(!dm0))
1567             {
1568               clib_warning("no match for internal host %U",
1569                            format_ip4_address, &ip0->src_address);
1570               next0 = SNAT_IN2OUT_NEXT_DROP;
1571               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1572               goto trace0;
1573             }
1574
1575           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1576
1577           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1578           if (PREDICT_FALSE(!ses0))
1579             {
1580               key0.ext_host_addr = ip0->dst_address;
1581               key0.ext_host_port = tcp0->dst;
1582               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1583                 {
1584                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1585                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1586
1587                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1588                     continue;
1589
1590                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1591                   break;
1592                 }
1593               if (PREDICT_FALSE(!ses0))
1594                 {
1595                   next0 = SNAT_IN2OUT_NEXT_DROP;
1596                   b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1597                   goto trace0;
1598                 }
1599             }
1600
1601           new_port0 = ses0->out.out_port;
1602           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1603
1604           old_addr0.as_u32 = ip0->src_address.as_u32;
1605           ip0->src_address.as_u32 = new_addr0.as_u32;
1606           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1607
1608           sum0 = ip0->checksum;
1609           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1610                                  ip4_header_t,
1611                                  src_address /* changed member */);
1612           ip0->checksum = ip_csum_fold (sum0);
1613
1614           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1615             {
1616               if (tcp0->flags & TCP_FLAG_SYN)
1617                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1618               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1619                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1620               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1621                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1622               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1623                 snat_det_ses_close(dm0, ses0);
1624               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1625                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1626               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1627                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1628
1629               old_port0 = tcp0->src;
1630               tcp0->src = new_port0;
1631
1632               sum0 = tcp0->checksum;
1633               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1634                                      ip4_header_t,
1635                                      dst_address /* changed member */);
1636               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1637                                      ip4_header_t /* cheat */,
1638                                      length /* changed member */);
1639               tcp0->checksum = ip_csum_fold(sum0);
1640             }
1641           else
1642             {
1643               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1644               old_port0 = udp0->src_port;
1645               udp0->src_port = new_port0;
1646               udp0->checksum = 0;
1647             }
1648
1649           switch(ses0->state)
1650             {
1651             case SNAT_SESSION_UDP_ACTIVE:
1652                 ses0->expire = now + SNAT_UDP_TIMEOUT;
1653                 break;
1654             case SNAT_SESSION_TCP_SYN_SENT:
1655             case SNAT_SESSION_TCP_FIN_WAIT:
1656             case SNAT_SESSION_TCP_CLOSE_WAIT:
1657             case SNAT_SESSION_TCP_LAST_ACK:
1658                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1659                 break;
1660             case SNAT_SESSION_TCP_ESTABLISHED:
1661                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1662                 break;
1663             }
1664
1665         trace0:
1666           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1667                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1668             {
1669               snat_in2out_trace_t *t =
1670                  vlib_add_trace (vm, node, b0, sizeof (*t));
1671               t->is_slow_path = 0;
1672               t->sw_if_index = sw_if_index0;
1673               t->next_index = next0;
1674               t->session_index = ~0;
1675               if (ses0)
1676                 t->session_index = ses0 - dm0->sessions;
1677             }
1678
1679           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1680
1681           ip1 = vlib_buffer_get_current (b1);
1682           udp1 = ip4_next_header (ip1);
1683           tcp1 = (tcp_header_t *) udp1;
1684
1685           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1686
1687           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
1688           if (PREDICT_FALSE(!dm1))
1689             {
1690               clib_warning("no match for internal host %U",
1691                            format_ip4_address, &ip0->src_address);
1692               next1 = SNAT_IN2OUT_NEXT_DROP;
1693               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1694               goto trace1;
1695             }
1696
1697           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
1698
1699           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src);
1700           if (PREDICT_FALSE(!ses1))
1701             {
1702               key1.ext_host_addr = ip1->dst_address;
1703               key1.ext_host_port = tcp1->dst;
1704               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
1705                 {
1706                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
1707                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
1708
1709                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
1710                     continue;
1711
1712                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
1713                   break;
1714                 }
1715               if (PREDICT_FALSE(!ses1))
1716                 {
1717                   next1 = SNAT_IN2OUT_NEXT_DROP;
1718                   b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1719                   goto trace1;
1720                 }
1721             }
1722
1723           new_port1 = ses1->out.out_port;
1724           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1725
1726           old_addr1.as_u32 = ip1->src_address.as_u32;
1727           ip1->src_address.as_u32 = new_addr1.as_u32;
1728           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1729
1730           sum1 = ip1->checksum;
1731           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1732                                  ip4_header_t,
1733                                  src_address /* changed member */);
1734           ip1->checksum = ip_csum_fold (sum1);
1735
1736           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1737             {
1738               if (tcp1->flags & TCP_FLAG_SYN)
1739                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
1740               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
1741                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1742               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1743                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
1744               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
1745                 snat_det_ses_close(dm1, ses1);
1746               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1747                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
1748               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
1749                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
1750
1751               old_port1 = tcp1->src;
1752               tcp1->src = new_port1;
1753
1754               sum1 = tcp1->checksum;
1755               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1756                                      ip4_header_t,
1757                                      dst_address /* changed member */);
1758               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1759                                      ip4_header_t /* cheat */,
1760                                      length /* changed member */);
1761               tcp1->checksum = ip_csum_fold(sum1);
1762             }
1763           else
1764             {
1765               ses1->state = SNAT_SESSION_UDP_ACTIVE;
1766               old_port1 = udp1->src_port;
1767               udp1->src_port = new_port1;
1768               udp1->checksum = 0;
1769             }
1770
1771           switch(ses1->state)
1772             {
1773             case SNAT_SESSION_UDP_ACTIVE:
1774                 ses1->expire = now + SNAT_UDP_TIMEOUT;
1775                 break;
1776             case SNAT_SESSION_TCP_SYN_SENT:
1777             case SNAT_SESSION_TCP_FIN_WAIT:
1778             case SNAT_SESSION_TCP_CLOSE_WAIT:
1779             case SNAT_SESSION_TCP_LAST_ACK:
1780                 ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1781                 break;
1782             case SNAT_SESSION_TCP_ESTABLISHED:
1783                 ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1784                 break;
1785             }
1786
1787         trace1:
1788           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1789                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1790             {
1791               snat_in2out_trace_t *t =
1792                  vlib_add_trace (vm, node, b1, sizeof (*t));
1793               t->is_slow_path = 0;
1794               t->sw_if_index = sw_if_index1;
1795               t->next_index = next1;
1796               t->session_index = ~0;
1797               if (ses1)
1798                 t->session_index = ses1 - dm1->sessions;
1799             }
1800
1801           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1802
1803           /* verify speculative enqueues, maybe switch current next frame */
1804           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1805                                            to_next, n_left_to_next,
1806                                            bi0, bi1, next0, next1);
1807          }
1808
1809       while (n_left_from > 0 && n_left_to_next > 0)
1810         {
1811           u32 bi0;
1812           vlib_buffer_t * b0;
1813           u32 next0;
1814           u32 sw_if_index0;
1815           ip4_header_t * ip0;
1816           ip_csum_t sum0;
1817           ip4_address_t new_addr0, old_addr0;
1818           u16 old_port0, new_port0, lo_port0, i0;
1819           udp_header_t * udp0;
1820           tcp_header_t * tcp0;
1821           u32 proto0;
1822           snat_det_out_key_t key0;
1823           snat_det_map_t * dm0;
1824           snat_det_session_t * ses0 = 0;
1825
1826           /* speculatively enqueue b0 to the current next frame */
1827           bi0 = from[0];
1828           to_next[0] = bi0;
1829           from += 1;
1830           to_next += 1;
1831           n_left_from -= 1;
1832           n_left_to_next -= 1;
1833
1834           b0 = vlib_get_buffer (vm, bi0);
1835           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1836
1837           ip0 = vlib_buffer_get_current (b0);
1838           udp0 = ip4_next_header (ip0);
1839           tcp0 = (tcp_header_t *) udp0;
1840
1841           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1842
1843           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
1844           if (PREDICT_FALSE(!dm0))
1845             {
1846               clib_warning("no match for internal host %U",
1847                            format_ip4_address, &ip0->src_address);
1848               next0 = SNAT_IN2OUT_NEXT_DROP;
1849               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1850               goto trace00;
1851             }
1852
1853           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
1854
1855           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
1856           if (PREDICT_FALSE(!ses0))
1857             {
1858               key0.ext_host_addr = ip0->dst_address;
1859               key0.ext_host_port = tcp0->dst;
1860               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
1861                 {
1862                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
1863                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
1864
1865                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
1866                     continue;
1867
1868                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
1869                   break;
1870                 }
1871               if (PREDICT_FALSE(!ses0))
1872                 {
1873                   next0 = SNAT_IN2OUT_NEXT_DROP;
1874                   b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
1875                   goto trace00;
1876                 }
1877             }
1878
1879           new_port0 = ses0->out.out_port;
1880           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1881
1882           old_addr0.as_u32 = ip0->src_address.as_u32;
1883           ip0->src_address.as_u32 = new_addr0.as_u32;
1884           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1885
1886           sum0 = ip0->checksum;
1887           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1888                                  ip4_header_t,
1889                                  src_address /* changed member */);
1890           ip0->checksum = ip_csum_fold (sum0);
1891
1892           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1893             {
1894               if (tcp0->flags & TCP_FLAG_SYN)
1895                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
1896               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
1897                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1898               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1899                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
1900               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
1901                 snat_det_ses_close(dm0, ses0);
1902               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
1903                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
1904               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
1905                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
1906
1907               old_port0 = tcp0->src;
1908               tcp0->src = new_port0;
1909
1910               sum0 = tcp0->checksum;
1911               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1912                                      ip4_header_t,
1913                                      dst_address /* changed member */);
1914               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1915                                      ip4_header_t /* cheat */,
1916                                      length /* changed member */);
1917               tcp0->checksum = ip_csum_fold(sum0);
1918             }
1919           else
1920             {
1921               ses0->state = SNAT_SESSION_UDP_ACTIVE;
1922               old_port0 = udp0->src_port;
1923               udp0->src_port = new_port0;
1924               udp0->checksum = 0;
1925             }
1926
1927           switch(ses0->state)
1928             {
1929             case SNAT_SESSION_UDP_ACTIVE:
1930                 ses0->expire = now + SNAT_UDP_TIMEOUT;
1931                 break;
1932             case SNAT_SESSION_TCP_SYN_SENT:
1933             case SNAT_SESSION_TCP_FIN_WAIT:
1934             case SNAT_SESSION_TCP_CLOSE_WAIT:
1935             case SNAT_SESSION_TCP_LAST_ACK:
1936                 ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
1937                 break;
1938             case SNAT_SESSION_TCP_ESTABLISHED:
1939                 ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
1940                 break;
1941             }
1942
1943         trace00:
1944           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1945                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1946             {
1947               snat_in2out_trace_t *t =
1948                  vlib_add_trace (vm, node, b0, sizeof (*t));
1949               t->is_slow_path = 0;
1950               t->sw_if_index = sw_if_index0;
1951               t->next_index = next0;
1952               t->session_index = ~0;
1953               if (ses0)
1954                 t->session_index = ses0 - dm0->sessions;
1955             }
1956
1957           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1958
1959           /* verify speculative enqueue, maybe switch current next frame */
1960           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1961                                            to_next, n_left_to_next,
1962                                            bi0, next0);
1963         }
1964
1965       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1966     }
1967
1968   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
1969                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1970                                pkts_processed);
1971   return frame->n_vectors;
1972 }
1973
1974 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
1975   .function = snat_det_in2out_node_fn,
1976   .name = "snat-det-in2out",
1977   .vector_size = sizeof (u32),
1978   .format_trace = format_snat_in2out_trace,
1979   .type = VLIB_NODE_TYPE_INTERNAL,
1980
1981   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1982   .error_strings = snat_in2out_error_strings,
1983
1984   .runtime_data_bytes = sizeof (snat_runtime_t),
1985
1986   .n_next_nodes = 2,
1987
1988   /* edit / add dispositions here */
1989   .next_nodes = {
1990     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1991     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1992   },
1993 };
1994
1995 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
1996
1997 /**********************/
1998 /*** worker handoff ***/
1999 /**********************/
2000 static uword
2001 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
2002                                vlib_node_runtime_t * node,
2003                                vlib_frame_t * frame)
2004 {
2005   snat_main_t *sm = &snat_main;
2006   vlib_thread_main_t *tm = vlib_get_thread_main ();
2007   u32 n_left_from, *from, *to_next = 0;
2008   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2009   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2010     = 0;
2011   vlib_frame_queue_elt_t *hf = 0;
2012   vlib_frame_t *f = 0;
2013   int i;
2014   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2015   u32 next_worker_index = 0;
2016   u32 current_worker_index = ~0;
2017   u32 thread_index = vlib_get_thread_index ();
2018
2019   ASSERT (vec_len (sm->workers));
2020
2021   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2022     {
2023       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2024
2025       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2026                                sm->first_worker_index + sm->num_workers - 1,
2027                                (vlib_frame_queue_t *) (~0));
2028     }
2029
2030   from = vlib_frame_vector_args (frame);
2031   n_left_from = frame->n_vectors;
2032
2033   while (n_left_from > 0)
2034     {
2035       u32 bi0;
2036       vlib_buffer_t *b0;
2037       u32 sw_if_index0;
2038       u32 rx_fib_index0;
2039       ip4_header_t * ip0;
2040       u8 do_handoff;
2041
2042       bi0 = from[0];
2043       from += 1;
2044       n_left_from -= 1;
2045
2046       b0 = vlib_get_buffer (vm, bi0);
2047
2048       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2049       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2050
2051       ip0 = vlib_buffer_get_current (b0);
2052
2053       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2054
2055       if (PREDICT_FALSE (next_worker_index != thread_index))
2056         {
2057           do_handoff = 1;
2058
2059           if (next_worker_index != current_worker_index)
2060             {
2061               if (hf)
2062                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2063
2064               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
2065                                                       next_worker_index,
2066                                                       handoff_queue_elt_by_worker_index);
2067
2068               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2069               to_next_worker = &hf->buffer_index[hf->n_vectors];
2070               current_worker_index = next_worker_index;
2071             }
2072
2073           /* enqueue to correct worker thread */
2074           to_next_worker[0] = bi0;
2075           to_next_worker++;
2076           n_left_to_next_worker--;
2077
2078           if (n_left_to_next_worker == 0)
2079             {
2080               hf->n_vectors = VLIB_FRAME_SIZE;
2081               vlib_put_frame_queue_elt (hf);
2082               current_worker_index = ~0;
2083               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2084               hf = 0;
2085             }
2086         }
2087       else
2088         {
2089           do_handoff = 0;
2090           /* if this is 1st frame */
2091           if (!f)
2092             {
2093               f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
2094               to_next = vlib_frame_vector_args (f);
2095             }
2096
2097           to_next[0] = bi0;
2098           to_next += 1;
2099           f->n_vectors++;
2100         }
2101
2102       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2103                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2104         {
2105           snat_in2out_worker_handoff_trace_t *t =
2106             vlib_add_trace (vm, node, b0, sizeof (*t));
2107           t->next_worker_index = next_worker_index;
2108           t->do_handoff = do_handoff;
2109         }
2110     }
2111
2112   if (f)
2113     vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
2114
2115   if (hf)
2116     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2117
2118   /* Ship frames to the worker nodes */
2119   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2120     {
2121       if (handoff_queue_elt_by_worker_index[i])
2122         {
2123           hf = handoff_queue_elt_by_worker_index[i];
2124           /*
2125            * It works better to let the handoff node
2126            * rate-adapt, always ship the handoff queue element.
2127            */
2128           if (1 || hf->n_vectors == hf->last_n_vectors)
2129             {
2130               vlib_put_frame_queue_elt (hf);
2131               handoff_queue_elt_by_worker_index[i] = 0;
2132             }
2133           else
2134             hf->last_n_vectors = hf->n_vectors;
2135         }
2136       congested_handoff_queue_by_worker_index[i] =
2137         (vlib_frame_queue_t *) (~0);
2138     }
2139   hf = 0;
2140   current_worker_index = ~0;
2141   return frame->n_vectors;
2142 }
2143
2144 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
2145   .function = snat_in2out_worker_handoff_fn,
2146   .name = "snat-in2out-worker-handoff",
2147   .vector_size = sizeof (u32),
2148   .format_trace = format_snat_in2out_worker_handoff_trace,
2149   .type = VLIB_NODE_TYPE_INTERNAL,
2150   
2151   .n_next_nodes = 1,
2152
2153   .next_nodes = {
2154     [0] = "error-drop",
2155   },
2156 };
2157
2158 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
2159
2160 static uword
2161 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
2162                                 vlib_node_runtime_t * node,
2163                                 vlib_frame_t * frame)
2164 {
2165   u32 n_left_from, * from, * to_next;
2166   snat_in2out_next_t next_index;
2167   u32 pkts_processed = 0;
2168   snat_main_t * sm = &snat_main;
2169   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
2170   u32 stats_node_index;
2171
2172   stats_node_index = snat_in2out_fast_node.index;
2173
2174   from = vlib_frame_vector_args (frame);
2175   n_left_from = frame->n_vectors;
2176   next_index = node->cached_next_index;
2177
2178   while (n_left_from > 0)
2179     {
2180       u32 n_left_to_next;
2181
2182       vlib_get_next_frame (vm, node, next_index,
2183                            to_next, n_left_to_next);
2184
2185       while (n_left_from > 0 && n_left_to_next > 0)
2186         {
2187           u32 bi0;
2188           vlib_buffer_t * b0;
2189           u32 next0;
2190           u32 sw_if_index0;
2191           ip4_header_t * ip0;
2192           ip_csum_t sum0;
2193           u32 new_addr0, old_addr0;
2194           u16 old_port0, new_port0;
2195           udp_header_t * udp0;
2196           tcp_header_t * tcp0;
2197           icmp46_header_t * icmp0;
2198           snat_session_key_t key0, sm0;
2199           u32 proto0;
2200           u32 rx_fib_index0;
2201
2202           /* speculatively enqueue b0 to the current next frame */
2203           bi0 = from[0];
2204           to_next[0] = bi0;
2205           from += 1;
2206           to_next += 1;
2207           n_left_from -= 1;
2208           n_left_to_next -= 1;
2209
2210           b0 = vlib_get_buffer (vm, bi0);
2211           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2212
2213           ip0 = vlib_buffer_get_current (b0);
2214           udp0 = ip4_next_header (ip0);
2215           tcp0 = (tcp_header_t *) udp0;
2216           icmp0 = (icmp46_header_t *) udp0;
2217
2218           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2219           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2220
2221           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2222
2223           if (PREDICT_FALSE (proto0 == ~0))
2224               goto trace0;
2225
2226           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2227             {
2228               if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
2229                   proto0, rx_fib_index0)))
2230                 goto trace0;
2231
2232               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2233                                   rx_fib_index0, node, next0, ~0, 0);
2234               goto trace0;
2235             }
2236
2237           key0.addr = ip0->src_address;
2238           key0.port = udp0->src_port;
2239           key0.fib_index = rx_fib_index0;
2240
2241           if (snat_static_mapping_match(sm, key0, &sm0, 0))
2242             {
2243               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2244               next0= SNAT_IN2OUT_NEXT_DROP;
2245               goto trace0;
2246             }
2247
2248           new_addr0 = sm0.addr.as_u32;
2249           new_port0 = sm0.port;
2250           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2251           old_addr0 = ip0->src_address.as_u32;
2252           ip0->src_address.as_u32 = new_addr0;
2253
2254           sum0 = ip0->checksum;
2255           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2256                                  ip4_header_t,
2257                                  src_address /* changed member */);
2258           ip0->checksum = ip_csum_fold (sum0);
2259
2260           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2261             {
2262               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2263                 {
2264                   old_port0 = tcp0->src_port;
2265                   tcp0->src_port = new_port0;
2266
2267                   sum0 = tcp0->checksum;
2268                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2269                                          ip4_header_t,
2270                                          dst_address /* changed member */);
2271                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2272                                          ip4_header_t /* cheat */,
2273                                          length /* changed member */);
2274                   tcp0->checksum = ip_csum_fold(sum0);
2275                 }
2276               else
2277                 {
2278                   old_port0 = udp0->src_port;
2279                   udp0->src_port = new_port0;
2280                   udp0->checksum = 0;
2281                 }
2282             }
2283           else
2284             {
2285               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2286                 {
2287                   sum0 = tcp0->checksum;
2288                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2289                                          ip4_header_t,
2290                                          dst_address /* changed member */);
2291                   tcp0->checksum = ip_csum_fold(sum0);
2292                 }
2293             }
2294
2295           /* Hairpinning */
2296           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2297
2298         trace0:
2299           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2300                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2301             {
2302               snat_in2out_trace_t *t =
2303                  vlib_add_trace (vm, node, b0, sizeof (*t));
2304               t->sw_if_index = sw_if_index0;
2305               t->next_index = next0;
2306             }
2307
2308           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2309
2310           /* verify speculative enqueue, maybe switch current next frame */
2311           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2312                                            to_next, n_left_to_next,
2313                                            bi0, next0);
2314         }
2315
2316       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2317     }
2318
2319   vlib_node_increment_counter (vm, stats_node_index,
2320                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2321                                pkts_processed);
2322   return frame->n_vectors;
2323 }
2324
2325
2326 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
2327   .function = snat_in2out_fast_static_map_fn,
2328   .name = "snat-in2out-fast",
2329   .vector_size = sizeof (u32),
2330   .format_trace = format_snat_in2out_fast_trace,
2331   .type = VLIB_NODE_TYPE_INTERNAL,
2332   
2333   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2334   .error_strings = snat_in2out_error_strings,
2335
2336   .runtime_data_bytes = sizeof (snat_runtime_t),
2337   
2338   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2339
2340   /* edit / add dispositions here */
2341   .next_nodes = {
2342     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2343     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2344     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2345     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2346   },
2347 };
2348
2349 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);