SNAT: in2out translation as an output feature hairpinning (VPP-913)
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26 #include <snat/snat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
53   
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91 vlib_node_registration_t snat_in2out_output_node;
92 vlib_node_registration_t snat_in2out_output_slowpath_node;
93 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
94 vlib_node_registration_t snat_hairpin_dst_node;
95 vlib_node_registration_t snat_hairpin_src_node;
96
97
98 #define foreach_snat_in2out_error                       \
99 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
100 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
101 _(OUT_OF_PORTS, "Out of ports")                         \
102 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
103 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
104 _(NO_TRANSLATION, "No translation")
105   
106 typedef enum {
107 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
108   foreach_snat_in2out_error
109 #undef _
110   SNAT_IN2OUT_N_ERROR,
111 } snat_in2out_error_t;
112
113 static char * snat_in2out_error_strings[] = {
114 #define _(sym,string) string,
115   foreach_snat_in2out_error
116 #undef _
117 };
118
119 typedef enum {
120   SNAT_IN2OUT_NEXT_LOOKUP,
121   SNAT_IN2OUT_NEXT_DROP,
122   SNAT_IN2OUT_NEXT_ICMP_ERROR,
123   SNAT_IN2OUT_NEXT_SLOW_PATH,
124   SNAT_IN2OUT_N_NEXT,
125 } snat_in2out_next_t;
126
127 typedef enum {
128   SNAT_HAIRPIN_SRC_NEXT_DROP,
129   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
130   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
131   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
132   SNAT_HAIRPIN_SRC_N_NEXT,
133 } snat_hairpin_next_t;
134
135 /**
136  * @brief Check if packet should be translated
137  *
138  * Packets aimed at outside interface and external addresss with active session
139  * should be translated.
140  *
141  * @param sm            SNAT main
142  * @param rt            SNAT runtime data
143  * @param sw_if_index0  index of the inside interface
144  * @param ip0           IPv4 header
145  * @param proto0        SNAT protocol
146  * @param rx_fib_index0 RX FIB index
147  *
148  * @returns 0 if packet should be translated otherwise 1
149  */
150 static inline int
151 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
152                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
153                          u32 rx_fib_index0)
154 {
155   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
156   fib_prefix_t pfx = {
157     .fp_proto = FIB_PROTOCOL_IP4,
158     .fp_len = 32,
159     .fp_addr = {
160         .ip4.as_u32 = ip0->dst_address.as_u32,
161     },
162   };
163
164   /* Don't NAT packet aimed at the intfc address */
165   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
166                                       ip0->dst_address.as_u32)))
167     return 1;
168
169   fei = fib_table_lookup (rx_fib_index0, &pfx);
170   if (FIB_NODE_INDEX_INVALID != fei)
171     {
172       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
173       if (sw_if_index == ~0)
174         {
175           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
176           if (FIB_NODE_INDEX_INVALID != fei)
177             sw_if_index = fib_entry_get_resolving_interface (fei);
178         }
179       snat_interface_t *i;
180       pool_foreach (i, sm->interfaces,
181       ({
182         /* NAT packet aimed at outside interface */
183         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
184           return 0;
185       }));
186     }
187
188   return 1;
189 }
190
191 static inline int
192 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
193                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
194                     u32 rx_fib_index0)
195 {
196   udp_header_t * udp0 = ip4_next_header (ip0);
197   snat_session_key_t key0, sm0;
198   clib_bihash_kv_8_8_t kv0, value0;
199
200   key0.addr = ip0->dst_address;
201   key0.port = udp0->dst_port;
202   key0.protocol = proto0;
203   key0.fib_index = sm->outside_fib_index;
204   kv0.key = key0.as_u64;
205
206   /* NAT packet aimed at external address if */
207   /* has active sessions */
208   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
209     {
210       /* or is static mappings */
211       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
212         return 0;
213     }
214   else
215     return 0;
216
217   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
218                                  rx_fib_index0);
219 }
220
221 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
222                       ip4_header_t * ip0,
223                       u32 rx_fib_index0,
224                       snat_session_key_t * key0,
225                       snat_session_t ** sessionp,
226                       vlib_node_runtime_t * node,
227                       u32 next0,
228                       u32 thread_index)
229 {
230   snat_user_t *u;
231   snat_user_key_t user_key;
232   snat_session_t *s;
233   clib_bihash_kv_8_8_t kv0, value0;
234   u32 oldest_per_user_translation_list_index;
235   dlist_elt_t * oldest_per_user_translation_list_elt;
236   dlist_elt_t * per_user_translation_list_elt;
237   dlist_elt_t * per_user_list_head_elt;
238   u32 session_index;
239   snat_session_key_t key1;
240   u32 address_index = ~0;
241   u32 outside_fib_index;
242   uword * p;
243   snat_worker_key_t worker_by_out_key;
244
245   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
246   if (! p)
247     {
248       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
249       return SNAT_IN2OUT_NEXT_DROP;
250     }
251   outside_fib_index = p[0];
252
253   key1.protocol = key0->protocol;
254   user_key.addr = ip0->src_address;
255   user_key.fib_index = rx_fib_index0;
256   kv0.key = user_key.as_u64;
257   
258   /* Ever heard of the "user" = src ip4 address before? */
259   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
260     {
261       /* no, make a new one */
262       pool_get (sm->per_thread_data[thread_index].users, u);
263       memset (u, 0, sizeof (*u));
264       u->addr = ip0->src_address;
265       u->fib_index = rx_fib_index0;
266
267       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
268
269       u->sessions_per_user_list_head_index = per_user_list_head_elt -
270         sm->per_thread_data[thread_index].list_pool;
271
272       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
273                        u->sessions_per_user_list_head_index);
274
275       kv0.value = u - sm->per_thread_data[thread_index].users;
276
277       /* add user */
278       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
279     }
280   else
281     {
282       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
283                              value0.value);
284     }
285
286   /* Over quota? Recycle the least recently used dynamic translation */
287   if (u->nsessions >= sm->max_translations_per_user)
288     {
289       /* Remove the oldest dynamic translation */
290       do {
291           oldest_per_user_translation_list_index =
292             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
293                                     u->sessions_per_user_list_head_index);
294
295           ASSERT (oldest_per_user_translation_list_index != ~0);
296
297           /* add it back to the end of the LRU list */
298           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
299                               u->sessions_per_user_list_head_index,
300                               oldest_per_user_translation_list_index);
301           /* Get the list element */
302           oldest_per_user_translation_list_elt =
303             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
304                                oldest_per_user_translation_list_index);
305
306           /* Get the session index from the list element */
307           session_index = oldest_per_user_translation_list_elt->value;
308
309           /* Get the session */
310           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
311                                  session_index);
312       } while (snat_is_session_static (s));
313
314       if (snat_is_unk_proto_session (s))
315         {
316           clib_bihash_kv_16_8_t up_kv;
317           snat_unk_proto_ses_key_t key;
318
319           /* Remove from lookup tables */
320           key.l_addr = s->in2out.addr;
321           key.r_addr = s->ext_host_addr;
322           key.fib_index = s->in2out.fib_index;
323           key.proto = s->in2out.port;
324           up_kv.key[0] = key.as_u64[0];
325           up_kv.key[1] = key.as_u64[1];
326           if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0))
327             clib_warning ("in2out key del failed");
328
329           key.l_addr = s->out2in.addr;
330           key.fib_index = s->out2in.fib_index;
331           up_kv.key[0] = key.as_u64[0];
332           up_kv.key[1] = key.as_u64[1];
333           if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0))
334             clib_warning ("out2in key del failed");
335         }
336       else
337         {
338           /* Remove in2out, out2in keys */
339           kv0.key = s->in2out.as_u64;
340           if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
341               clib_warning ("in2out key delete failed");
342           kv0.key = s->out2in.as_u64;
343           if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
344               clib_warning ("out2in key delete failed");
345
346           /* log NAT event */
347           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
348                                               s->out2in.addr.as_u32,
349                                               s->in2out.protocol,
350                                               s->in2out.port,
351                                               s->out2in.port,
352                                               s->in2out.fib_index);
353
354           snat_free_outside_address_and_port
355             (sm, &s->out2in, s->outside_address_index);
356         }
357       s->outside_address_index = ~0;
358
359       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
360                                                &address_index))
361         {
362           ASSERT(0);
363
364           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
365           return SNAT_IN2OUT_NEXT_DROP;
366         }
367       s->outside_address_index = address_index;
368     }
369   else
370     {
371       u8 static_mapping = 1;
372
373       /* First try to match static mapping by local address and port */
374       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
375         {
376           static_mapping = 0;
377           /* Try to create dynamic translation */
378           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
379                                                    &address_index))
380             {
381               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
382               return SNAT_IN2OUT_NEXT_DROP;
383             }
384         }
385
386       /* Create a new session */
387       pool_get (sm->per_thread_data[thread_index].sessions, s);
388       memset (s, 0, sizeof (*s));
389       
390       s->outside_address_index = address_index;
391
392       if (static_mapping)
393         {
394           u->nstaticsessions++;
395           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
396         }
397       else
398         {
399           u->nsessions++;
400         }
401
402       /* Create list elts */
403       pool_get (sm->per_thread_data[thread_index].list_pool,
404                 per_user_translation_list_elt);
405       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
406                        per_user_translation_list_elt -
407                        sm->per_thread_data[thread_index].list_pool);
408
409       per_user_translation_list_elt->value =
410         s - sm->per_thread_data[thread_index].sessions;
411       s->per_user_index = per_user_translation_list_elt -
412                           sm->per_thread_data[thread_index].list_pool;
413       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
414
415       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
416                           s->per_user_list_head_index,
417                           per_user_translation_list_elt -
418                           sm->per_thread_data[thread_index].list_pool);
419    }
420   
421   s->in2out = *key0;
422   s->out2in = key1;
423   s->out2in.protocol = key0->protocol;
424   s->out2in.fib_index = outside_fib_index;
425   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
426   *sessionp = s;
427
428   /* Add to translation hashes */
429   kv0.key = s->in2out.as_u64;
430   kv0.value = s - sm->per_thread_data[thread_index].sessions;
431   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
432       clib_warning ("in2out key add failed");
433   
434   kv0.key = s->out2in.as_u64;
435   kv0.value = s - sm->per_thread_data[thread_index].sessions;
436   
437   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
438       clib_warning ("out2in key add failed");
439
440   /* Add to translated packets worker lookup */
441   worker_by_out_key.addr = s->out2in.addr;
442   worker_by_out_key.port = s->out2in.port;
443   worker_by_out_key.fib_index = s->out2in.fib_index;
444   kv0.key = worker_by_out_key.as_u64;
445   kv0.value = thread_index;
446   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
447
448   /* log NAT event */
449   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
450                                       s->out2in.addr.as_u32,
451                                       s->in2out.protocol,
452                                       s->in2out.port,
453                                       s->out2in.port,
454                                       s->in2out.fib_index);
455   return next0;
456 }
457
458 static_always_inline
459 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
460                                  snat_session_key_t *p_key0)
461 {
462   icmp46_header_t *icmp0;
463   snat_session_key_t key0;
464   icmp_echo_header_t *echo0, *inner_echo0 = 0;
465   ip4_header_t *inner_ip0 = 0;
466   void *l4_header = 0;
467   icmp46_header_t *inner_icmp0;
468
469   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
470   echo0 = (icmp_echo_header_t *)(icmp0+1);
471
472   if (!icmp_is_error_message (icmp0))
473     {
474       key0.protocol = SNAT_PROTOCOL_ICMP;
475       key0.addr = ip0->src_address;
476       key0.port = echo0->identifier;
477     }
478   else
479     {
480       inner_ip0 = (ip4_header_t *)(echo0+1);
481       l4_header = ip4_next_header (inner_ip0);
482       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
483       key0.addr = inner_ip0->dst_address;
484       switch (key0.protocol)
485         {
486         case SNAT_PROTOCOL_ICMP:
487           inner_icmp0 = (icmp46_header_t*)l4_header;
488           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
489           key0.port = inner_echo0->identifier;
490           break;
491         case SNAT_PROTOCOL_UDP:
492         case SNAT_PROTOCOL_TCP:
493           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
494           break;
495         default:
496           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
497         }
498     }
499   *p_key0 = key0;
500   return -1; /* success */
501 }
502
503 /**
504  * Get address and port values to be used for packet SNAT translation
505  * and create session if needed
506  *
507  * @param[in,out] sm             SNAT main
508  * @param[in,out] node           SNAT node runtime
509  * @param[in] thread_index       thread index
510  * @param[in,out] b0             buffer containing packet to be translated
511  * @param[out] p_proto           protocol used for matching
512  * @param[out] p_value           address and port after NAT translation
513  * @param[out] p_dont_translate  if packet should not be translated
514  * @param d                      optional parameter
515  * @param e                      optional parameter
516  */
517 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
518                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
519                            snat_session_key_t *p_value,
520                            u8 *p_dont_translate, void *d, void *e)
521 {
522   ip4_header_t *ip0;
523   icmp46_header_t *icmp0;
524   u32 sw_if_index0;
525   u32 rx_fib_index0;
526   snat_session_key_t key0;
527   snat_session_t *s0 = 0;
528   u8 dont_translate = 0;
529   clib_bihash_kv_8_8_t kv0, value0;
530   u32 next0 = ~0;
531   int err;
532   u32 iph_offset0 = 0;
533
534   if (PREDICT_FALSE(vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0))
535     {
536       iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
537     }
538   ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
539   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
540   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
541   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
542
543   err = icmp_get_key (ip0, &key0);
544   if (err != -1)
545     {
546       b0->error = node->errors[err];
547       next0 = SNAT_IN2OUT_NEXT_DROP;
548       goto out;
549     }
550   key0.fib_index = rx_fib_index0;
551
552   kv0.key = key0.as_u64;
553
554   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
555     {
556       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
557           IP_PROTOCOL_ICMP, rx_fib_index0) &&
558           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
559         {
560           dont_translate = 1;
561           goto out;
562         }
563
564       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
565         {
566           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
567           next0 = SNAT_IN2OUT_NEXT_DROP;
568           goto out;
569         }
570
571       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
572                          &s0, node, next0, thread_index);
573
574       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
575         goto out;
576     }
577   else
578     {
579       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
580                         icmp0->type != ICMP4_echo_reply &&
581                         !icmp_is_error_message (icmp0)))
582         {
583           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
584           next0 = SNAT_IN2OUT_NEXT_DROP;
585           goto out;
586         }
587
588       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
589                               value0.value);
590     }
591
592 out:
593   *p_proto = key0.protocol;
594   if (s0)
595     *p_value = s0->out2in;
596   *p_dont_translate = dont_translate;
597   if (d)
598     *(snat_session_t**)d = s0;
599   return next0;
600 }
601
602 /**
603  * Get address and port values to be used for packet SNAT translation
604  *
605  * @param[in] sm                 SNAT main
606  * @param[in,out] node           SNAT node runtime
607  * @param[in] thread_index       thread index
608  * @param[in,out] b0             buffer containing packet to be translated
609  * @param[out] p_proto           protocol used for matching
610  * @param[out] p_value           address and port after NAT translation
611  * @param[out] p_dont_translate  if packet should not be translated
612  * @param d                      optional parameter
613  * @param e                      optional parameter
614  */
615 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
616                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
617                            snat_session_key_t *p_value,
618                            u8 *p_dont_translate, void *d, void *e)
619 {
620   ip4_header_t *ip0;
621   icmp46_header_t *icmp0;
622   u32 sw_if_index0;
623   u32 rx_fib_index0;
624   snat_session_key_t key0;
625   snat_session_key_t sm0;
626   u8 dont_translate = 0;
627   u8 is_addr_only;
628   u32 next0 = ~0;
629   int err;
630
631   ip0 = vlib_buffer_get_current (b0);
632   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
633   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
634   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
635
636   err = icmp_get_key (ip0, &key0);
637   if (err != -1)
638     {
639       b0->error = node->errors[err];
640       next0 = SNAT_IN2OUT_NEXT_DROP;
641       goto out2;
642     }
643   key0.fib_index = rx_fib_index0;
644
645   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
646     {
647       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
648           IP_PROTOCOL_ICMP, rx_fib_index0)))
649         {
650           dont_translate = 1;
651           goto out;
652         }
653
654       if (icmp_is_error_message (icmp0))
655         {
656           next0 = SNAT_IN2OUT_NEXT_DROP;
657           goto out;
658         }
659
660       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
661       next0 = SNAT_IN2OUT_NEXT_DROP;
662       goto out;
663     }
664
665   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
666                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
667                     !icmp_is_error_message (icmp0)))
668     {
669       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
670       next0 = SNAT_IN2OUT_NEXT_DROP;
671       goto out;
672     }
673
674 out:
675   *p_value = sm0;
676 out2:
677   *p_proto = key0.protocol;
678   *p_dont_translate = dont_translate;
679   return next0;
680 }
681
682 static inline u32 icmp_in2out (snat_main_t *sm,
683                                vlib_buffer_t * b0,
684                                ip4_header_t * ip0,
685                                icmp46_header_t * icmp0,
686                                u32 sw_if_index0,
687                                u32 rx_fib_index0,
688                                vlib_node_runtime_t * node,
689                                u32 next0,
690                                u32 thread_index,
691                                void *d,
692                                void *e)
693 {
694   snat_session_key_t sm0;
695   u8 protocol;
696   icmp_echo_header_t *echo0, *inner_echo0 = 0;
697   ip4_header_t *inner_ip0;
698   void *l4_header = 0;
699   icmp46_header_t *inner_icmp0;
700   u8 dont_translate;
701   u32 new_addr0, old_addr0;
702   u16 old_id0, new_id0;
703   ip_csum_t sum0;
704   u16 checksum0;
705   u32 next0_tmp;
706
707   echo0 = (icmp_echo_header_t *)(icmp0+1);
708
709   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
710                                        &protocol, &sm0, &dont_translate, d, e);
711   if (next0_tmp != ~0)
712     next0 = next0_tmp;
713   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
714     goto out;
715
716   sum0 = ip_incremental_checksum (0, icmp0,
717                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
718   checksum0 = ~ip_csum_fold (sum0);
719   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
720     {
721       next0 = SNAT_IN2OUT_NEXT_DROP;
722       goto out;
723     }
724
725   old_addr0 = ip0->src_address.as_u32;
726   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
727   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
728     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
729
730   sum0 = ip0->checksum;
731   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
732                          src_address /* changed member */);
733   ip0->checksum = ip_csum_fold (sum0);
734   
735   if (!icmp_is_error_message (icmp0))
736     {
737       new_id0 = sm0.port;
738       if (PREDICT_FALSE(new_id0 != echo0->identifier))
739         {
740           old_id0 = echo0->identifier;
741           new_id0 = sm0.port;
742           echo0->identifier = new_id0;
743
744           sum0 = icmp0->checksum;
745           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
746                                  identifier);
747           icmp0->checksum = ip_csum_fold (sum0);
748         }
749     }
750   else
751     {
752       inner_ip0 = (ip4_header_t *)(echo0+1);
753       l4_header = ip4_next_header (inner_ip0);
754
755       if (!ip4_header_checksum_is_valid (inner_ip0))
756         {
757           next0 = SNAT_IN2OUT_NEXT_DROP;
758           goto out;
759         }
760
761       old_addr0 = inner_ip0->dst_address.as_u32;
762       inner_ip0->dst_address = sm0.addr;
763       new_addr0 = inner_ip0->dst_address.as_u32;
764
765       sum0 = icmp0->checksum;
766       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
767                              dst_address /* changed member */);
768       icmp0->checksum = ip_csum_fold (sum0);
769
770       switch (protocol)
771         {
772           case SNAT_PROTOCOL_ICMP:
773             inner_icmp0 = (icmp46_header_t*)l4_header;
774             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
775
776             old_id0 = inner_echo0->identifier;
777             new_id0 = sm0.port;
778             inner_echo0->identifier = new_id0;
779
780             sum0 = icmp0->checksum;
781             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
782                                    identifier);
783             icmp0->checksum = ip_csum_fold (sum0);
784             break;
785           case SNAT_PROTOCOL_UDP:
786           case SNAT_PROTOCOL_TCP:
787             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
788             new_id0 = sm0.port;
789             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
790
791             sum0 = icmp0->checksum;
792             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
793                                    dst_port);
794             icmp0->checksum = ip_csum_fold (sum0);
795             break;
796           default:
797             ASSERT(0);
798         }
799     }
800
801 out:
802   return next0;
803 }
804
805 /**
806  * @brief Hairpinning
807  *
808  * Hairpinning allows two endpoints on the internal side of the NAT to
809  * communicate even if they only use each other's external IP addresses
810  * and ports.
811  *
812  * @param sm     SNAT main.
813  * @param b0     Vlib buffer.
814  * @param ip0    IP header.
815  * @param udp0   UDP header.
816  * @param tcp0   TCP header.
817  * @param proto0 SNAT protocol.
818  */
819 static inline void
820 snat_hairpinning (snat_main_t *sm,
821                   vlib_buffer_t * b0,
822                   ip4_header_t * ip0,
823                   udp_header_t * udp0,
824                   tcp_header_t * tcp0,
825                   u32 proto0)
826 {
827   snat_session_key_t key0, sm0;
828   snat_worker_key_t k0;
829   snat_session_t * s0;
830   clib_bihash_kv_8_8_t kv0, value0;
831   ip_csum_t sum0;
832   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
833   u16 new_dst_port0, old_dst_port0;
834
835   key0.addr = ip0->dst_address;
836   key0.port = udp0->dst_port;
837   key0.protocol = proto0;
838   key0.fib_index = sm->outside_fib_index;
839   kv0.key = key0.as_u64;
840
841   /* Check if destination is in active sessions */
842   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
843     {
844       /* or static mappings */
845       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
846         {
847           new_dst_addr0 = sm0.addr.as_u32;
848           new_dst_port0 = sm0.port;
849           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
850         }
851     }
852   else
853     {
854       si = value0.value;
855       if (sm->num_workers > 1)
856         {
857           k0.addr = ip0->dst_address;
858           k0.port = udp0->dst_port;
859           k0.fib_index = sm->outside_fib_index;
860           kv0.key = k0.as_u64;
861           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
862             ASSERT(0);
863           else
864             ti = value0.value;
865         }
866       else
867         ti = sm->num_workers;
868
869       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
870       new_dst_addr0 = s0->in2out.addr.as_u32;
871       new_dst_port0 = s0->in2out.port;
872       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
873     }
874
875   /* Destination is behind the same NAT, use internal address and port */
876   if (new_dst_addr0)
877     {
878       old_dst_addr0 = ip0->dst_address.as_u32;
879       ip0->dst_address.as_u32 = new_dst_addr0;
880       sum0 = ip0->checksum;
881       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
882                              ip4_header_t, dst_address);
883       ip0->checksum = ip_csum_fold (sum0);
884
885       old_dst_port0 = tcp0->dst;
886       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
887         {
888           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
889             {
890               tcp0->dst = new_dst_port0;
891               sum0 = tcp0->checksum;
892               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
893                                      ip4_header_t, dst_address);
894               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
895                                      ip4_header_t /* cheat */, length);
896               tcp0->checksum = ip_csum_fold(sum0);
897             }
898           else
899             {
900               udp0->dst_port = new_dst_port0;
901               udp0->checksum = 0;
902             }
903         }
904       else
905         {
906           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
907             {
908               sum0 = tcp0->checksum;
909               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
910                                      ip4_header_t, dst_address);
911               tcp0->checksum = ip_csum_fold(sum0);
912             }
913         }
914     }
915 }
916
917 static inline void
918 snat_icmp_hairpinning (snat_main_t *sm,
919                        vlib_buffer_t * b0,
920                        ip4_header_t * ip0,
921                        icmp46_header_t * icmp0)
922 {
923   snat_session_key_t key0, sm0;
924   clib_bihash_kv_8_8_t kv0, value0;
925   snat_worker_key_t k0;
926   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
927   ip_csum_t sum0;
928   snat_session_t *s0;
929
930   if (!icmp_is_error_message (icmp0))
931     {
932       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
933       u16 icmp_id0 = echo0->identifier;
934       key0.addr = ip0->dst_address;
935       key0.port = icmp_id0;
936       key0.protocol = SNAT_PROTOCOL_ICMP;
937       key0.fib_index = sm->outside_fib_index;
938       kv0.key = key0.as_u64;
939
940       /* Check if destination is in active sessions */
941       if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
942         {
943           /* or static mappings */
944           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
945             {
946               new_dst_addr0 = sm0.addr.as_u32;
947               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
948             }
949         }
950       else
951         {
952           si = value0.value;
953           if (sm->num_workers > 1)
954             {
955               k0.addr = ip0->dst_address;
956               k0.port = icmp_id0;
957               k0.fib_index = sm->outside_fib_index;
958               kv0.key = k0.as_u64;
959               if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
960                 ASSERT(0);
961               else
962                 ti = value0.value;
963             }
964           else
965             ti = sm->num_workers;
966
967           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
968           new_dst_addr0 = s0->in2out.addr.as_u32;
969           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
970           echo0->identifier = s0->in2out.port;
971           sum0 = icmp0->checksum;
972           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
973                                  icmp_echo_header_t, identifier);
974           icmp0->checksum = ip_csum_fold (sum0);
975         }
976
977       /* Destination is behind the same NAT, use internal address and port */
978       if (new_dst_addr0)
979         {
980           old_dst_addr0 = ip0->dst_address.as_u32;
981           ip0->dst_address.as_u32 = new_dst_addr0;
982           sum0 = ip0->checksum;
983           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
984                                  ip4_header_t, dst_address);
985           ip0->checksum = ip_csum_fold (sum0);
986         }
987     }
988
989 }
990
991 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
992                                          vlib_buffer_t * b0,
993                                          ip4_header_t * ip0,
994                                          icmp46_header_t * icmp0,
995                                          u32 sw_if_index0,
996                                          u32 rx_fib_index0,
997                                          vlib_node_runtime_t * node,
998                                          u32 next0,
999                                          f64 now,
1000                                          u32 thread_index,
1001                                          snat_session_t ** p_s0)
1002 {
1003   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1004                       next0, thread_index, p_s0, 0);
1005   snat_session_t * s0 = *p_s0;
1006   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1007     {
1008       /* Hairpinning */
1009       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1010         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
1011       /* Accounting */
1012       s0->last_heard = now;
1013       s0->total_pkts++;
1014       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1015       /* Per-user LRU list maintenance for dynamic translations */
1016       if (!snat_is_session_static (s0))
1017         {
1018           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1019                              s0->per_user_index);
1020           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1021                               s0->per_user_list_head_index,
1022                               s0->per_user_index);
1023         }
1024     }
1025   return next0;
1026 }
1027 static inline void
1028 snat_hairpinning_unknown_proto (snat_main_t *sm,
1029                                 vlib_buffer_t * b,
1030                                 ip4_header_t * ip)
1031 {
1032   u32 old_addr, new_addr = 0, ti = 0;
1033   clib_bihash_kv_8_8_t kv, value;
1034   clib_bihash_kv_16_8_t s_kv, s_value;
1035   snat_unk_proto_ses_key_t key;
1036   snat_session_key_t m_key;
1037   snat_worker_key_t w_key;
1038   snat_static_mapping_t *m;
1039   ip_csum_t sum;
1040   snat_session_t *s;
1041
1042   old_addr = ip->dst_address.as_u32;
1043   key.l_addr.as_u32 = ip->dst_address.as_u32;
1044   key.r_addr.as_u32 = ip->src_address.as_u32;
1045   key.fib_index = sm->outside_fib_index;
1046   key.proto = ip->protocol;
1047   key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
1048   s_kv.key[0] = key.as_u64[0];
1049   s_kv.key[1] = key.as_u64[1];
1050   if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1051     {
1052       m_key.addr = ip->dst_address;
1053       m_key.fib_index = sm->outside_fib_index;
1054       m_key.port = 0;
1055       m_key.protocol = 0;
1056       kv.key = m_key.as_u64;
1057       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1058         return;
1059
1060       m = pool_elt_at_index (sm->static_mappings, value.value);
1061       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1062         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1063       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1064     }
1065   else
1066     {
1067       if (sm->num_workers > 1)
1068         {
1069           w_key.addr = ip->dst_address;
1070           w_key.port = 0;
1071           w_key.fib_index = sm->outside_fib_index;
1072           kv.key = w_key.as_u64;
1073           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value))
1074             return;
1075           else
1076             ti = value.value;
1077         }
1078       else
1079         ti = sm->num_workers;
1080
1081       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1082       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1083         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1084       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1085     }
1086   sum = ip->checksum;
1087   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1088   ip->checksum = ip_csum_fold (sum);
1089 }
1090
1091 static void
1092 snat_in2out_unknown_proto (snat_main_t *sm,
1093                            vlib_buffer_t * b,
1094                            ip4_header_t * ip,
1095                            u32 rx_fib_index,
1096                            u32 thread_index,
1097                            f64 now,
1098                            vlib_main_t * vm)
1099 {
1100   clib_bihash_kv_8_8_t kv, value;
1101   clib_bihash_kv_16_8_t s_kv, s_value;
1102   snat_static_mapping_t *m;
1103   snat_session_key_t m_key;
1104   u32 old_addr, new_addr = 0;
1105   ip_csum_t sum;
1106   snat_user_key_t u_key;
1107   snat_user_t *u;
1108   dlist_elt_t *head, *elt, *oldest;
1109   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1110   u32 elt_index, head_index, ses_index, oldest_index;
1111   snat_session_t * s;
1112   snat_unk_proto_ses_key_t key;
1113   u32 address_index = ~0;
1114   int i;
1115   u8 is_sm = 0;
1116
1117   old_addr = ip->src_address.as_u32;
1118
1119   key.l_addr = ip->src_address;
1120   key.r_addr = ip->dst_address;
1121   key.fib_index = rx_fib_index;
1122   key.proto = ip->protocol;
1123   key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
1124   s_kv.key[0] = key.as_u64[0];
1125   s_kv.key[1] = key.as_u64[1];
1126
1127   if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value))
1128     {
1129       s = pool_elt_at_index (tsm->sessions, s_value.value);
1130       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1131     }
1132   else
1133     {
1134       u_key.addr = ip->src_address;
1135       u_key.fib_index = rx_fib_index;
1136       kv.key = u_key.as_u64;
1137
1138       /* Ever heard of the "user" = src ip4 address before? */
1139       if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1140         {
1141           /* no, make a new one */
1142           pool_get (tsm->users, u);
1143           memset (u, 0, sizeof (*u));
1144           u->addr = ip->src_address;
1145           u->fib_index = rx_fib_index;
1146
1147           pool_get (tsm->list_pool, head);
1148           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1149
1150           clib_dlist_init (tsm->list_pool,
1151                            u->sessions_per_user_list_head_index);
1152
1153           kv.value = u - tsm->users;
1154
1155           /* add user */
1156           clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1);
1157         }
1158       else
1159         {
1160           u = pool_elt_at_index (tsm->users, value.value);
1161         }
1162
1163       m_key.addr = ip->src_address;
1164       m_key.port = 0;
1165       m_key.protocol = 0;
1166       m_key.fib_index = rx_fib_index;
1167       kv.key = m_key.as_u64;
1168
1169       /* Try to find static mapping first */
1170       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1171         {
1172           m = pool_elt_at_index (sm->static_mappings, value.value);
1173           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1174           is_sm = 1;
1175           goto create_ses;
1176         }
1177       /* Fallback to 3-tuple key */
1178       else
1179         {
1180           /* Choose same out address as for TCP/UDP session to same destination */
1181           if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1182             {
1183               head_index = u->sessions_per_user_list_head_index;
1184               head = pool_elt_at_index (tsm->list_pool, head_index);
1185               elt_index = head->next;
1186               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1187               ses_index = elt->value;
1188               while (ses_index != ~0)
1189                 {
1190                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1191                   elt_index = elt->next;
1192                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1193                   ses_index = elt->value;
1194
1195                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1196                     {
1197                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1198                       address_index = s->outside_address_index;
1199
1200                       key.fib_index = sm->outside_fib_index;
1201                       key.l_addr.as_u32 = new_addr;
1202                       s_kv.key[0] = key.as_u64[0];
1203                       s_kv.key[1] = key.as_u64[1];
1204                       if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1205                         break;
1206
1207                       goto create_ses;
1208                     }
1209                 }
1210             }
1211           key.fib_index = sm->outside_fib_index;
1212           for (i = 0; i < vec_len (sm->addresses); i++)
1213             {
1214               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1215               s_kv.key[0] = key.as_u64[0];
1216               s_kv.key[1] = key.as_u64[1];
1217               if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1218                 {
1219                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1220                   address_index = i;
1221                   goto create_ses;
1222                 }
1223             }
1224           return;
1225         }
1226
1227 create_ses:
1228       /* Over quota? Recycle the least recently used dynamic translation */
1229       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1230         {
1231           /* Remove the oldest dynamic translation */
1232           do {
1233               oldest_index = clib_dlist_remove_head (
1234                 tsm->list_pool, u->sessions_per_user_list_head_index);
1235
1236               ASSERT (oldest_index != ~0);
1237
1238               /* add it back to the end of the LRU list */
1239               clib_dlist_addtail (tsm->list_pool,
1240                                   u->sessions_per_user_list_head_index,
1241                                   oldest_index);
1242               /* Get the list element */
1243               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1244
1245               /* Get the session index from the list element */
1246               ses_index = oldest->value;
1247
1248               /* Get the session */
1249               s = pool_elt_at_index (tsm->sessions, ses_index);
1250           } while (snat_is_session_static (s));
1251
1252           if (snat_is_unk_proto_session (s))
1253             {
1254               /* Remove from lookup tables */
1255               key.l_addr = s->in2out.addr;
1256               key.r_addr = s->ext_host_addr;
1257               key.fib_index = s->in2out.fib_index;
1258               key.proto = s->in2out.port;
1259               s_kv.key[0] = key.as_u64[0];
1260               s_kv.key[1] = key.as_u64[1];
1261               if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0))
1262                 clib_warning ("in2out key del failed");
1263
1264               key.l_addr = s->out2in.addr;
1265               key.fib_index = s->out2in.fib_index;
1266               s_kv.key[0] = key.as_u64[0];
1267               s_kv.key[1] = key.as_u64[1];
1268               if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0))
1269                 clib_warning ("out2in key del failed");
1270             }
1271           else
1272             {
1273               /* log NAT event */
1274               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1275                                                   s->out2in.addr.as_u32,
1276                                                   s->in2out.protocol,
1277                                                   s->in2out.port,
1278                                                   s->out2in.port,
1279                                                   s->in2out.fib_index);
1280
1281               snat_free_outside_address_and_port (sm, &s->out2in,
1282                                                   s->outside_address_index);
1283
1284               /* Remove in2out, out2in keys */
1285               kv.key = s->in2out.as_u64;
1286               if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0))
1287                 clib_warning ("in2out key del failed");
1288               kv.key = s->out2in.as_u64;
1289               if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0))
1290                 clib_warning ("out2in key del failed");
1291             }
1292         }
1293       else
1294         {
1295           /* Create a new session */
1296           pool_get (tsm->sessions, s);
1297           memset (s, 0, sizeof (*s));
1298
1299           /* Create list elts */
1300           pool_get (tsm->list_pool, elt);
1301           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1302           elt->value = s - tsm->sessions;
1303           s->per_user_index = elt - tsm->list_pool;
1304           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1305           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1306                               s->per_user_index);
1307         }
1308
1309       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1310       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1311       s->outside_address_index = address_index;
1312       s->out2in.addr.as_u32 = new_addr;
1313       s->out2in.fib_index = sm->outside_fib_index;
1314       s->in2out.addr.as_u32 = old_addr;
1315       s->in2out.fib_index = rx_fib_index;
1316       s->in2out.port = s->out2in.port = ip->protocol;
1317       if (is_sm)
1318         {
1319           u->nstaticsessions++;
1320           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1321         }
1322       else
1323         {
1324           u->nsessions++;
1325         }
1326
1327       /* Add to lookup tables */
1328       key.l_addr.as_u32 = old_addr;
1329       key.r_addr = ip->dst_address;
1330       key.proto = ip->protocol;
1331       key.fib_index = rx_fib_index;
1332       s_kv.key[0] = key.as_u64[0];
1333       s_kv.key[1] = key.as_u64[1];
1334       s_kv.value = s - tsm->sessions;
1335       if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
1336         clib_warning ("in2out key add failed");
1337
1338       key.l_addr.as_u32 = new_addr;
1339       key.fib_index = sm->outside_fib_index;
1340       s_kv.key[0] = key.as_u64[0];
1341       s_kv.key[1] = key.as_u64[1];
1342       if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
1343         clib_warning ("out2in key add failed");
1344   }
1345
1346   /* Update IP checksum */
1347   sum = ip->checksum;
1348   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1349   ip->checksum = ip_csum_fold (sum);
1350
1351   /* Accounting */
1352   s->last_heard = now;
1353   s->total_pkts++;
1354   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1355   /* Per-user LRU list maintenance */
1356   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1357   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1358                       s->per_user_index);
1359
1360   /* Hairpinning */
1361   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1362     snat_hairpinning_unknown_proto(sm, b, ip);
1363
1364   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1365     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1366 }
1367
1368 static inline uword
1369 snat_in2out_node_fn_inline (vlib_main_t * vm,
1370                             vlib_node_runtime_t * node,
1371                             vlib_frame_t * frame, int is_slow_path,
1372                             int is_output_feature)
1373 {
1374   u32 n_left_from, * from, * to_next;
1375   snat_in2out_next_t next_index;
1376   u32 pkts_processed = 0;
1377   snat_main_t * sm = &snat_main;
1378   f64 now = vlib_time_now (vm);
1379   u32 stats_node_index;
1380   u32 thread_index = vlib_get_thread_index ();
1381
1382   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1383     snat_in2out_node.index;
1384
1385   from = vlib_frame_vector_args (frame);
1386   n_left_from = frame->n_vectors;
1387   next_index = node->cached_next_index;
1388
1389   while (n_left_from > 0)
1390     {
1391       u32 n_left_to_next;
1392
1393       vlib_get_next_frame (vm, node, next_index,
1394                            to_next, n_left_to_next);
1395
1396       while (n_left_from >= 4 && n_left_to_next >= 2)
1397         {
1398           u32 bi0, bi1;
1399           vlib_buffer_t * b0, * b1;
1400           u32 next0, next1;
1401           u32 sw_if_index0, sw_if_index1;
1402           ip4_header_t * ip0, * ip1;
1403           ip_csum_t sum0, sum1;
1404           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1405           u16 old_port0, new_port0, old_port1, new_port1;
1406           udp_header_t * udp0, * udp1;
1407           tcp_header_t * tcp0, * tcp1;
1408           icmp46_header_t * icmp0, * icmp1;
1409           snat_session_key_t key0, key1;
1410           u32 rx_fib_index0, rx_fib_index1;
1411           u32 proto0, proto1;
1412           snat_session_t * s0 = 0, * s1 = 0;
1413           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1414           u32 iph_offset0 = 0, iph_offset1 = 0;
1415           
1416           /* Prefetch next iteration. */
1417           {
1418             vlib_buffer_t * p2, * p3;
1419             
1420             p2 = vlib_get_buffer (vm, from[2]);
1421             p3 = vlib_get_buffer (vm, from[3]);
1422             
1423             vlib_prefetch_buffer_header (p2, LOAD);
1424             vlib_prefetch_buffer_header (p3, LOAD);
1425
1426             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1427             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1428           }
1429
1430           /* speculatively enqueue b0 and b1 to the current next frame */
1431           to_next[0] = bi0 = from[0];
1432           to_next[1] = bi1 = from[1];
1433           from += 2;
1434           to_next += 2;
1435           n_left_from -= 2;
1436           n_left_to_next -= 2;
1437           
1438           b0 = vlib_get_buffer (vm, bi0);
1439           b1 = vlib_get_buffer (vm, bi1);
1440
1441           if (is_output_feature)
1442             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1443
1444           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1445                  iph_offset0);
1446
1447           udp0 = ip4_next_header (ip0);
1448           tcp0 = (tcp_header_t *) udp0;
1449           icmp0 = (icmp46_header_t *) udp0;
1450
1451           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1452           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1453                                    sw_if_index0);
1454
1455           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1456
1457           if (PREDICT_FALSE(ip0->ttl == 1))
1458             {
1459               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1460               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1461                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1462                                            0);
1463               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1464               goto trace00;
1465             }
1466
1467           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1468
1469           /* Next configured feature, probably ip4-lookup */
1470           if (is_slow_path)
1471             {
1472               if (PREDICT_FALSE (proto0 == ~0))
1473                 {
1474                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1475                                              thread_index, now, vm);
1476                   goto trace00;
1477                 }
1478
1479               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1480                 {
1481                   next0 = icmp_in2out_slow_path 
1482                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
1483                      node, next0, now, thread_index, &s0);
1484                   goto trace00;
1485                 }
1486             }
1487           else
1488             {
1489               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1490                 {
1491                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1492                   goto trace00;
1493                 }
1494             }
1495
1496           key0.addr = ip0->src_address;
1497           key0.port = udp0->src_port;
1498           key0.protocol = proto0;
1499           key0.fib_index = rx_fib_index0;
1500           
1501           kv0.key = key0.as_u64;
1502
1503           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1504             {
1505               if (is_slow_path)
1506                 {
1507                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1508                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
1509                     goto trace00;
1510
1511                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1512                                      &s0, node, next0, thread_index);
1513                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1514                     goto trace00;
1515                 }
1516               else
1517                 {
1518                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1519                   goto trace00;
1520                 }
1521             }
1522           else
1523             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1524                                     value0.value);
1525
1526           old_addr0 = ip0->src_address.as_u32;
1527           ip0->src_address = s0->out2in.addr;
1528           new_addr0 = ip0->src_address.as_u32;
1529           if (!is_output_feature)
1530             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1531
1532           sum0 = ip0->checksum;
1533           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1534                                  ip4_header_t,
1535                                  src_address /* changed member */);
1536           ip0->checksum = ip_csum_fold (sum0);
1537
1538           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1539             {
1540               old_port0 = tcp0->src_port;
1541               tcp0->src_port = s0->out2in.port;
1542               new_port0 = tcp0->src_port;
1543
1544               sum0 = tcp0->checksum;
1545               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1546                                      ip4_header_t,
1547                                      dst_address /* changed member */);
1548               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1549                                      ip4_header_t /* cheat */,
1550                                      length /* changed member */);
1551               tcp0->checksum = ip_csum_fold(sum0);
1552             }
1553           else
1554             {
1555               old_port0 = udp0->src_port;
1556               udp0->src_port = s0->out2in.port;
1557               udp0->checksum = 0;
1558             }
1559
1560           /* Hairpinning */
1561           if (!is_output_feature)
1562             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1563
1564           /* Accounting */
1565           s0->last_heard = now;
1566           s0->total_pkts++;
1567           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1568           /* Per-user LRU list maintenance for dynamic translation */
1569           if (!snat_is_session_static (s0))
1570             {
1571               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1572                                  s0->per_user_index);
1573               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1574                                   s0->per_user_list_head_index,
1575                                   s0->per_user_index);
1576             }
1577         trace00:
1578
1579           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1580                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1581             {
1582               snat_in2out_trace_t *t = 
1583                  vlib_add_trace (vm, node, b0, sizeof (*t));
1584               t->is_slow_path = is_slow_path;
1585               t->sw_if_index = sw_if_index0;
1586               t->next_index = next0;
1587                   t->session_index = ~0;
1588               if (s0)
1589                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1590             }
1591
1592           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1593
1594           if (is_output_feature)
1595             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1596
1597           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1598                  iph_offset1);
1599
1600           udp1 = ip4_next_header (ip1);
1601           tcp1 = (tcp_header_t *) udp1;
1602           icmp1 = (icmp46_header_t *) udp1;
1603
1604           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1605           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1606                                    sw_if_index1);
1607
1608           if (PREDICT_FALSE(ip1->ttl == 1))
1609             {
1610               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1611               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1612                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1613                                            0);
1614               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1615               goto trace01;
1616             }
1617
1618           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1619
1620           /* Next configured feature, probably ip4-lookup */
1621           if (is_slow_path)
1622             {
1623               if (PREDICT_FALSE (proto1 == ~0))
1624                 {
1625                   snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1626                                              thread_index, now, vm);
1627                   goto trace01;
1628                 }
1629
1630               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1631                 {
1632                   next1 = icmp_in2out_slow_path 
1633                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1634                      next1, now, thread_index, &s1);
1635                   goto trace01;
1636                 }
1637             }
1638           else
1639             {
1640               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1641                 {
1642                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1643                   goto trace01;
1644                 }
1645             }
1646
1647           key1.addr = ip1->src_address;
1648           key1.port = udp1->src_port;
1649           key1.protocol = proto1;
1650           key1.fib_index = rx_fib_index1;
1651           
1652           kv1.key = key1.as_u64;
1653
1654             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1655             {
1656               if (is_slow_path)
1657                 {
1658                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1659                       ip1, proto1, rx_fib_index1)) && !is_output_feature)
1660                     goto trace01;
1661
1662                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1663                                      &s1, node, next1, thread_index);
1664                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1665                     goto trace01;
1666                 }
1667               else
1668                 {
1669                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1670                   goto trace01;
1671                 }
1672             }
1673           else
1674             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1675                                     value1.value);
1676
1677           old_addr1 = ip1->src_address.as_u32;
1678           ip1->src_address = s1->out2in.addr;
1679           new_addr1 = ip1->src_address.as_u32;
1680           if (!is_output_feature)
1681             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1682
1683           sum1 = ip1->checksum;
1684           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1685                                  ip4_header_t,
1686                                  src_address /* changed member */);
1687           ip1->checksum = ip_csum_fold (sum1);
1688
1689           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1690             {
1691               old_port1 = tcp1->src_port;
1692               tcp1->src_port = s1->out2in.port;
1693               new_port1 = tcp1->src_port;
1694
1695               sum1 = tcp1->checksum;
1696               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1697                                      ip4_header_t,
1698                                      dst_address /* changed member */);
1699               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1700                                      ip4_header_t /* cheat */,
1701                                      length /* changed member */);
1702               tcp1->checksum = ip_csum_fold(sum1);
1703             }
1704           else
1705             {
1706               old_port1 = udp1->src_port;
1707               udp1->src_port = s1->out2in.port;
1708               udp1->checksum = 0;
1709             }
1710
1711           /* Hairpinning */
1712           if (!is_output_feature)
1713             snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1714
1715           /* Accounting */
1716           s1->last_heard = now;
1717           s1->total_pkts++;
1718           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1719           /* Per-user LRU list maintenance for dynamic translation */
1720           if (!snat_is_session_static (s1))
1721             {
1722               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1723                                  s1->per_user_index);
1724               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1725                                   s1->per_user_list_head_index,
1726                                   s1->per_user_index);
1727             }
1728         trace01:
1729
1730           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1731                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1732             {
1733               snat_in2out_trace_t *t = 
1734                  vlib_add_trace (vm, node, b1, sizeof (*t));
1735               t->sw_if_index = sw_if_index1;
1736               t->next_index = next1;
1737               t->session_index = ~0;
1738               if (s1)
1739                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1740             }
1741
1742           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1743
1744           /* verify speculative enqueues, maybe switch current next frame */
1745           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1746                                            to_next, n_left_to_next,
1747                                            bi0, bi1, next0, next1);
1748         }
1749
1750       while (n_left_from > 0 && n_left_to_next > 0)
1751         {
1752           u32 bi0;
1753           vlib_buffer_t * b0;
1754           u32 next0;
1755           u32 sw_if_index0;
1756           ip4_header_t * ip0;
1757           ip_csum_t sum0;
1758           u32 new_addr0, old_addr0;
1759           u16 old_port0, new_port0;
1760           udp_header_t * udp0;
1761           tcp_header_t * tcp0;
1762           icmp46_header_t * icmp0;
1763           snat_session_key_t key0;
1764           u32 rx_fib_index0;
1765           u32 proto0;
1766           snat_session_t * s0 = 0;
1767           clib_bihash_kv_8_8_t kv0, value0;
1768           u32 iph_offset0 = 0;
1769
1770           /* speculatively enqueue b0 to the current next frame */
1771           bi0 = from[0];
1772           to_next[0] = bi0;
1773           from += 1;
1774           to_next += 1;
1775           n_left_from -= 1;
1776           n_left_to_next -= 1;
1777
1778           b0 = vlib_get_buffer (vm, bi0);
1779           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1780
1781           if (is_output_feature)
1782             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1783
1784           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1785                  iph_offset0);
1786
1787           udp0 = ip4_next_header (ip0);
1788           tcp0 = (tcp_header_t *) udp0;
1789           icmp0 = (icmp46_header_t *) udp0;
1790
1791           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1792           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1793                                    sw_if_index0);
1794
1795           if (PREDICT_FALSE(ip0->ttl == 1))
1796             {
1797               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1798               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1799                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1800                                            0);
1801               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1802               goto trace0;
1803             }
1804
1805           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1806
1807           /* Next configured feature, probably ip4-lookup */
1808           if (is_slow_path)
1809             {
1810               if (PREDICT_FALSE (proto0 == ~0))
1811                 {
1812                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1813                                              thread_index, now, vm);
1814                   goto trace0;
1815                 }
1816
1817               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1818                 {
1819                   next0 = icmp_in2out_slow_path 
1820                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1821                      next0, now, thread_index, &s0);
1822                   goto trace0;
1823                 }
1824             }
1825           else
1826             {
1827               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1828                 {
1829                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1830                   goto trace0;
1831                 }
1832             }
1833
1834           key0.addr = ip0->src_address;
1835           key0.port = udp0->src_port;
1836           key0.protocol = proto0;
1837           key0.fib_index = rx_fib_index0;
1838           
1839           kv0.key = key0.as_u64;
1840
1841           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1842             {
1843               if (is_slow_path)
1844                 {
1845                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1846                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
1847                     goto trace0;
1848
1849                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1850                                      &s0, node, next0, thread_index);
1851
1852                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1853                     goto trace0;
1854                 }
1855               else
1856                 {
1857                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1858                   goto trace0;
1859                 }
1860             }
1861           else
1862             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1863                                     value0.value);
1864
1865           old_addr0 = ip0->src_address.as_u32;
1866           ip0->src_address = s0->out2in.addr;
1867           new_addr0 = ip0->src_address.as_u32;
1868           if (!is_output_feature)
1869             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1870
1871           sum0 = ip0->checksum;
1872           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1873                                  ip4_header_t,
1874                                  src_address /* changed member */);
1875           ip0->checksum = ip_csum_fold (sum0);
1876
1877           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1878             {
1879               old_port0 = tcp0->src_port;
1880               tcp0->src_port = s0->out2in.port;
1881               new_port0 = tcp0->src_port;
1882
1883               sum0 = tcp0->checksum;
1884               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1885                                      ip4_header_t,
1886                                      dst_address /* changed member */);
1887               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1888                                      ip4_header_t /* cheat */,
1889                                      length /* changed member */);
1890               tcp0->checksum = ip_csum_fold(sum0);
1891             }
1892           else
1893             {
1894               old_port0 = udp0->src_port;
1895               udp0->src_port = s0->out2in.port;
1896               udp0->checksum = 0;
1897             }
1898
1899           /* Hairpinning */
1900           if (!is_output_feature)
1901             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1902
1903           /* Accounting */
1904           s0->last_heard = now;
1905           s0->total_pkts++;
1906           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1907           /* Per-user LRU list maintenance for dynamic translation */
1908           if (!snat_is_session_static (s0))
1909             {
1910               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1911                                  s0->per_user_index);
1912               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1913                                   s0->per_user_list_head_index,
1914                                   s0->per_user_index);
1915             }
1916
1917         trace0:
1918           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1919                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1920             {
1921               snat_in2out_trace_t *t = 
1922                  vlib_add_trace (vm, node, b0, sizeof (*t));
1923               t->is_slow_path = is_slow_path;
1924               t->sw_if_index = sw_if_index0;
1925               t->next_index = next0;
1926                   t->session_index = ~0;
1927               if (s0)
1928                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1929             }
1930
1931           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1932
1933           /* verify speculative enqueue, maybe switch current next frame */
1934           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1935                                            to_next, n_left_to_next,
1936                                            bi0, next0);
1937         }
1938
1939       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1940     }
1941
1942   vlib_node_increment_counter (vm, stats_node_index, 
1943                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1944                                pkts_processed);
1945   return frame->n_vectors;
1946 }
1947
1948 static uword
1949 snat_in2out_fast_path_fn (vlib_main_t * vm,
1950                           vlib_node_runtime_t * node,
1951                           vlib_frame_t * frame)
1952 {
1953   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1954 }
1955
1956 VLIB_REGISTER_NODE (snat_in2out_node) = {
1957   .function = snat_in2out_fast_path_fn,
1958   .name = "snat-in2out",
1959   .vector_size = sizeof (u32),
1960   .format_trace = format_snat_in2out_trace,
1961   .type = VLIB_NODE_TYPE_INTERNAL,
1962
1963   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1964   .error_strings = snat_in2out_error_strings,
1965
1966   .runtime_data_bytes = sizeof (snat_runtime_t),
1967
1968   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1969
1970   /* edit / add dispositions here */
1971   .next_nodes = {
1972     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1973     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1974     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1975     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1976   },
1977 };
1978
1979 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1980
1981 static uword
1982 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1983                                  vlib_node_runtime_t * node,
1984                                  vlib_frame_t * frame)
1985 {
1986   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1987 }
1988
1989 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1990   .function = snat_in2out_output_fast_path_fn,
1991   .name = "snat-in2out-output",
1992   .vector_size = sizeof (u32),
1993   .format_trace = format_snat_in2out_trace,
1994   .type = VLIB_NODE_TYPE_INTERNAL,
1995
1996   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1997   .error_strings = snat_in2out_error_strings,
1998
1999   .runtime_data_bytes = sizeof (snat_runtime_t),
2000
2001   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2002
2003   /* edit / add dispositions here */
2004   .next_nodes = {
2005     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2006     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2007     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-output-slowpath",
2008     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2009   },
2010 };
2011
2012 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2013                               snat_in2out_output_fast_path_fn);
2014
2015 static uword
2016 snat_in2out_slow_path_fn (vlib_main_t * vm,
2017                           vlib_node_runtime_t * node,
2018                           vlib_frame_t * frame)
2019 {
2020   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2021 }
2022
2023 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2024   .function = snat_in2out_slow_path_fn,
2025   .name = "snat-in2out-slowpath",
2026   .vector_size = sizeof (u32),
2027   .format_trace = format_snat_in2out_trace,
2028   .type = VLIB_NODE_TYPE_INTERNAL,
2029
2030   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2031   .error_strings = snat_in2out_error_strings,
2032
2033   .runtime_data_bytes = sizeof (snat_runtime_t),
2034
2035   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2036
2037   /* edit / add dispositions here */
2038   .next_nodes = {
2039     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2040     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2041     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
2042     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2043   },
2044 };
2045
2046 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2047                               snat_in2out_slow_path_fn);
2048
2049 static uword
2050 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2051                                  vlib_node_runtime_t * node,
2052                                  vlib_frame_t * frame)
2053 {
2054   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2055 }
2056
2057 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2058   .function = snat_in2out_output_slow_path_fn,
2059   .name = "snat-in2out-output-slowpath",
2060   .vector_size = sizeof (u32),
2061   .format_trace = format_snat_in2out_trace,
2062   .type = VLIB_NODE_TYPE_INTERNAL,
2063
2064   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2065   .error_strings = snat_in2out_error_strings,
2066
2067   .runtime_data_bytes = sizeof (snat_runtime_t),
2068
2069   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2070
2071   /* edit / add dispositions here */
2072   .next_nodes = {
2073     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2074     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2075     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-output-slowpath",
2076     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2077   },
2078 };
2079
2080 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2081                               snat_in2out_output_slow_path_fn);
2082
2083 /**************************/
2084 /*** deterministic mode ***/
2085 /**************************/
2086 static uword
2087 snat_det_in2out_node_fn (vlib_main_t * vm,
2088                          vlib_node_runtime_t * node,
2089                          vlib_frame_t * frame)
2090 {
2091   u32 n_left_from, * from, * to_next;
2092   snat_in2out_next_t next_index;
2093   u32 pkts_processed = 0;
2094   snat_main_t * sm = &snat_main;
2095   u32 now = (u32) vlib_time_now (vm);
2096   u32 thread_index = vlib_get_thread_index ();
2097
2098   from = vlib_frame_vector_args (frame);
2099   n_left_from = frame->n_vectors;
2100   next_index = node->cached_next_index;
2101
2102   while (n_left_from > 0)
2103     {
2104       u32 n_left_to_next;
2105
2106       vlib_get_next_frame (vm, node, next_index,
2107                            to_next, n_left_to_next);
2108
2109       while (n_left_from >= 4 && n_left_to_next >= 2)
2110         {
2111           u32 bi0, bi1;
2112           vlib_buffer_t * b0, * b1;
2113           u32 next0, next1;
2114           u32 sw_if_index0, sw_if_index1;
2115           ip4_header_t * ip0, * ip1;
2116           ip_csum_t sum0, sum1;
2117           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2118           u16 old_port0, new_port0, lo_port0, i0;
2119           u16 old_port1, new_port1, lo_port1, i1;
2120           udp_header_t * udp0, * udp1;
2121           tcp_header_t * tcp0, * tcp1;
2122           u32 proto0, proto1;
2123           snat_det_out_key_t key0, key1;
2124           snat_det_map_t * dm0, * dm1;
2125           snat_det_session_t * ses0 = 0, * ses1 = 0;
2126           u32 rx_fib_index0, rx_fib_index1;
2127           icmp46_header_t * icmp0, * icmp1;
2128
2129           /* Prefetch next iteration. */
2130           {
2131             vlib_buffer_t * p2, * p3;
2132
2133             p2 = vlib_get_buffer (vm, from[2]);
2134             p3 = vlib_get_buffer (vm, from[3]);
2135
2136             vlib_prefetch_buffer_header (p2, LOAD);
2137             vlib_prefetch_buffer_header (p3, LOAD);
2138
2139             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2140             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2141           }
2142
2143           /* speculatively enqueue b0 and b1 to the current next frame */
2144           to_next[0] = bi0 = from[0];
2145           to_next[1] = bi1 = from[1];
2146           from += 2;
2147           to_next += 2;
2148           n_left_from -= 2;
2149           n_left_to_next -= 2;
2150
2151           b0 = vlib_get_buffer (vm, bi0);
2152           b1 = vlib_get_buffer (vm, bi1);
2153
2154           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2155           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2156
2157           ip0 = vlib_buffer_get_current (b0);
2158           udp0 = ip4_next_header (ip0);
2159           tcp0 = (tcp_header_t *) udp0;
2160
2161           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2162
2163           if (PREDICT_FALSE(ip0->ttl == 1))
2164             {
2165               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2166               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2167                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2168                                            0);
2169               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2170               goto trace0;
2171             }
2172
2173           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2174
2175           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2176             {
2177               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2178               icmp0 = (icmp46_header_t *) udp0;
2179
2180               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2181                                   rx_fib_index0, node, next0, thread_index,
2182                                   &ses0, &dm0);
2183               goto trace0;
2184             }
2185
2186           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2187           if (PREDICT_FALSE(!dm0))
2188             {
2189               clib_warning("no match for internal host %U",
2190                            format_ip4_address, &ip0->src_address);
2191               next0 = SNAT_IN2OUT_NEXT_DROP;
2192               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2193               goto trace0;
2194             }
2195
2196           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2197
2198           key0.ext_host_addr = ip0->dst_address;
2199           key0.ext_host_port = tcp0->dst;
2200
2201           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2202           if (PREDICT_FALSE(!ses0))
2203             {
2204               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2205                 {
2206                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2207                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2208
2209                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2210                     continue;
2211
2212                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2213                   break;
2214                 }
2215               if (PREDICT_FALSE(!ses0))
2216                 {
2217                   /* too many sessions for user, send ICMP error packet */
2218
2219                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2220                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2221                                                ICMP4_destination_unreachable_destination_unreachable_host,
2222                                                0);
2223                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2224                   goto trace0;
2225                 }
2226             }
2227
2228           new_port0 = ses0->out.out_port;
2229
2230           old_addr0.as_u32 = ip0->src_address.as_u32;
2231           ip0->src_address.as_u32 = new_addr0.as_u32;
2232           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2233
2234           sum0 = ip0->checksum;
2235           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2236                                  ip4_header_t,
2237                                  src_address /* changed member */);
2238           ip0->checksum = ip_csum_fold (sum0);
2239
2240           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2241             {
2242               if (tcp0->flags & TCP_FLAG_SYN)
2243                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2244               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2245                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2246               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2247                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2248               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2249                 snat_det_ses_close(dm0, ses0);
2250               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2251                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2252               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2253                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2254
2255               old_port0 = tcp0->src;
2256               tcp0->src = new_port0;
2257
2258               sum0 = tcp0->checksum;
2259               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2260                                      ip4_header_t,
2261                                      dst_address /* changed member */);
2262               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2263                                      ip4_header_t /* cheat */,
2264                                      length /* changed member */);
2265               tcp0->checksum = ip_csum_fold(sum0);
2266             }
2267           else
2268             {
2269               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2270               old_port0 = udp0->src_port;
2271               udp0->src_port = new_port0;
2272               udp0->checksum = 0;
2273             }
2274
2275           switch(ses0->state)
2276             {
2277             case SNAT_SESSION_UDP_ACTIVE:
2278                 ses0->expire = now + sm->udp_timeout;
2279                 break;
2280             case SNAT_SESSION_TCP_SYN_SENT:
2281             case SNAT_SESSION_TCP_FIN_WAIT:
2282             case SNAT_SESSION_TCP_CLOSE_WAIT:
2283             case SNAT_SESSION_TCP_LAST_ACK:
2284                 ses0->expire = now + sm->tcp_transitory_timeout;
2285                 break;
2286             case SNAT_SESSION_TCP_ESTABLISHED:
2287                 ses0->expire = now + sm->tcp_established_timeout;
2288                 break;
2289             }
2290
2291         trace0:
2292           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2293                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2294             {
2295               snat_in2out_trace_t *t =
2296                  vlib_add_trace (vm, node, b0, sizeof (*t));
2297               t->is_slow_path = 0;
2298               t->sw_if_index = sw_if_index0;
2299               t->next_index = next0;
2300               t->session_index = ~0;
2301               if (ses0)
2302                 t->session_index = ses0 - dm0->sessions;
2303             }
2304
2305           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2306
2307           ip1 = vlib_buffer_get_current (b1);
2308           udp1 = ip4_next_header (ip1);
2309           tcp1 = (tcp_header_t *) udp1;
2310
2311           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2312
2313           if (PREDICT_FALSE(ip1->ttl == 1))
2314             {
2315               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2316               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2317                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2318                                            0);
2319               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2320               goto trace1;
2321             }
2322
2323           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2324
2325           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2326             {
2327               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2328               icmp1 = (icmp46_header_t *) udp1;
2329
2330               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2331                                   rx_fib_index1, node, next1, thread_index,
2332                                   &ses1, &dm1);
2333               goto trace1;
2334             }
2335
2336           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2337           if (PREDICT_FALSE(!dm1))
2338             {
2339               clib_warning("no match for internal host %U",
2340                            format_ip4_address, &ip0->src_address);
2341               next1 = SNAT_IN2OUT_NEXT_DROP;
2342               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2343               goto trace1;
2344             }
2345
2346           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2347
2348           key1.ext_host_addr = ip1->dst_address;
2349           key1.ext_host_port = tcp1->dst;
2350
2351           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2352           if (PREDICT_FALSE(!ses1))
2353             {
2354               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2355                 {
2356                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2357                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2358
2359                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2360                     continue;
2361
2362                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2363                   break;
2364                 }
2365               if (PREDICT_FALSE(!ses1))
2366                 {
2367                   /* too many sessions for user, send ICMP error packet */
2368
2369                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2370                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2371                                                ICMP4_destination_unreachable_destination_unreachable_host,
2372                                                0);
2373                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2374                   goto trace1;
2375                 }
2376             }
2377
2378           new_port1 = ses1->out.out_port;
2379
2380           old_addr1.as_u32 = ip1->src_address.as_u32;
2381           ip1->src_address.as_u32 = new_addr1.as_u32;
2382           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2383
2384           sum1 = ip1->checksum;
2385           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2386                                  ip4_header_t,
2387                                  src_address /* changed member */);
2388           ip1->checksum = ip_csum_fold (sum1);
2389
2390           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2391             {
2392               if (tcp1->flags & TCP_FLAG_SYN)
2393                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2394               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2395                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2396               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2397                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2398               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2399                 snat_det_ses_close(dm1, ses1);
2400               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2401                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2402               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2403                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2404
2405               old_port1 = tcp1->src;
2406               tcp1->src = new_port1;
2407
2408               sum1 = tcp1->checksum;
2409               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2410                                      ip4_header_t,
2411                                      dst_address /* changed member */);
2412               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2413                                      ip4_header_t /* cheat */,
2414                                      length /* changed member */);
2415               tcp1->checksum = ip_csum_fold(sum1);
2416             }
2417           else
2418             {
2419               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2420               old_port1 = udp1->src_port;
2421               udp1->src_port = new_port1;
2422               udp1->checksum = 0;
2423             }
2424
2425           switch(ses1->state)
2426             {
2427             case SNAT_SESSION_UDP_ACTIVE:
2428                 ses1->expire = now + sm->udp_timeout;
2429                 break;
2430             case SNAT_SESSION_TCP_SYN_SENT:
2431             case SNAT_SESSION_TCP_FIN_WAIT:
2432             case SNAT_SESSION_TCP_CLOSE_WAIT:
2433             case SNAT_SESSION_TCP_LAST_ACK:
2434                 ses1->expire = now + sm->tcp_transitory_timeout;
2435                 break;
2436             case SNAT_SESSION_TCP_ESTABLISHED:
2437                 ses1->expire = now + sm->tcp_established_timeout;
2438                 break;
2439             }
2440
2441         trace1:
2442           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2443                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2444             {
2445               snat_in2out_trace_t *t =
2446                  vlib_add_trace (vm, node, b1, sizeof (*t));
2447               t->is_slow_path = 0;
2448               t->sw_if_index = sw_if_index1;
2449               t->next_index = next1;
2450               t->session_index = ~0;
2451               if (ses1)
2452                 t->session_index = ses1 - dm1->sessions;
2453             }
2454
2455           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2456
2457           /* verify speculative enqueues, maybe switch current next frame */
2458           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2459                                            to_next, n_left_to_next,
2460                                            bi0, bi1, next0, next1);
2461          }
2462
2463       while (n_left_from > 0 && n_left_to_next > 0)
2464         {
2465           u32 bi0;
2466           vlib_buffer_t * b0;
2467           u32 next0;
2468           u32 sw_if_index0;
2469           ip4_header_t * ip0;
2470           ip_csum_t sum0;
2471           ip4_address_t new_addr0, old_addr0;
2472           u16 old_port0, new_port0, lo_port0, i0;
2473           udp_header_t * udp0;
2474           tcp_header_t * tcp0;
2475           u32 proto0;
2476           snat_det_out_key_t key0;
2477           snat_det_map_t * dm0;
2478           snat_det_session_t * ses0 = 0;
2479           u32 rx_fib_index0;
2480           icmp46_header_t * icmp0;
2481
2482           /* speculatively enqueue b0 to the current next frame */
2483           bi0 = from[0];
2484           to_next[0] = bi0;
2485           from += 1;
2486           to_next += 1;
2487           n_left_from -= 1;
2488           n_left_to_next -= 1;
2489
2490           b0 = vlib_get_buffer (vm, bi0);
2491           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2492
2493           ip0 = vlib_buffer_get_current (b0);
2494           udp0 = ip4_next_header (ip0);
2495           tcp0 = (tcp_header_t *) udp0;
2496
2497           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2498
2499           if (PREDICT_FALSE(ip0->ttl == 1))
2500             {
2501               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2502               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2503                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2504                                            0);
2505               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2506               goto trace00;
2507             }
2508
2509           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2510
2511           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2512             {
2513               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2514               icmp0 = (icmp46_header_t *) udp0;
2515
2516               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2517                                   rx_fib_index0, node, next0, thread_index,
2518                                   &ses0, &dm0);
2519               goto trace00;
2520             }
2521
2522           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2523           if (PREDICT_FALSE(!dm0))
2524             {
2525               clib_warning("no match for internal host %U",
2526                            format_ip4_address, &ip0->src_address);
2527               next0 = SNAT_IN2OUT_NEXT_DROP;
2528               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2529               goto trace00;
2530             }
2531
2532           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2533
2534           key0.ext_host_addr = ip0->dst_address;
2535           key0.ext_host_port = tcp0->dst;
2536
2537           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2538           if (PREDICT_FALSE(!ses0))
2539             {
2540               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2541                 {
2542                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2543                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2544
2545                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2546                     continue;
2547
2548                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2549                   break;
2550                 }
2551               if (PREDICT_FALSE(!ses0))
2552                 {
2553                   /* too many sessions for user, send ICMP error packet */
2554
2555                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2556                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2557                                                ICMP4_destination_unreachable_destination_unreachable_host,
2558                                                0);
2559                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2560                   goto trace00;
2561                 }
2562             }
2563
2564           new_port0 = ses0->out.out_port;
2565
2566           old_addr0.as_u32 = ip0->src_address.as_u32;
2567           ip0->src_address.as_u32 = new_addr0.as_u32;
2568           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2569
2570           sum0 = ip0->checksum;
2571           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2572                                  ip4_header_t,
2573                                  src_address /* changed member */);
2574           ip0->checksum = ip_csum_fold (sum0);
2575
2576           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2577             {
2578               if (tcp0->flags & TCP_FLAG_SYN)
2579                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2580               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2581                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2582               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2583                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2584               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2585                 snat_det_ses_close(dm0, ses0);
2586               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2587                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2588               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2589                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2590
2591               old_port0 = tcp0->src;
2592               tcp0->src = new_port0;
2593
2594               sum0 = tcp0->checksum;
2595               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2596                                      ip4_header_t,
2597                                      dst_address /* changed member */);
2598               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2599                                      ip4_header_t /* cheat */,
2600                                      length /* changed member */);
2601               tcp0->checksum = ip_csum_fold(sum0);
2602             }
2603           else
2604             {
2605               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2606               old_port0 = udp0->src_port;
2607               udp0->src_port = new_port0;
2608               udp0->checksum = 0;
2609             }
2610
2611           switch(ses0->state)
2612             {
2613             case SNAT_SESSION_UDP_ACTIVE:
2614                 ses0->expire = now + sm->udp_timeout;
2615                 break;
2616             case SNAT_SESSION_TCP_SYN_SENT:
2617             case SNAT_SESSION_TCP_FIN_WAIT:
2618             case SNAT_SESSION_TCP_CLOSE_WAIT:
2619             case SNAT_SESSION_TCP_LAST_ACK:
2620                 ses0->expire = now + sm->tcp_transitory_timeout;
2621                 break;
2622             case SNAT_SESSION_TCP_ESTABLISHED:
2623                 ses0->expire = now + sm->tcp_established_timeout;
2624                 break;
2625             }
2626
2627         trace00:
2628           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2629                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2630             {
2631               snat_in2out_trace_t *t =
2632                  vlib_add_trace (vm, node, b0, sizeof (*t));
2633               t->is_slow_path = 0;
2634               t->sw_if_index = sw_if_index0;
2635               t->next_index = next0;
2636               t->session_index = ~0;
2637               if (ses0)
2638                 t->session_index = ses0 - dm0->sessions;
2639             }
2640
2641           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2642
2643           /* verify speculative enqueue, maybe switch current next frame */
2644           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2645                                            to_next, n_left_to_next,
2646                                            bi0, next0);
2647         }
2648
2649       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2650     }
2651
2652   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2653                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2654                                pkts_processed);
2655   return frame->n_vectors;
2656 }
2657
2658 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2659   .function = snat_det_in2out_node_fn,
2660   .name = "snat-det-in2out",
2661   .vector_size = sizeof (u32),
2662   .format_trace = format_snat_in2out_trace,
2663   .type = VLIB_NODE_TYPE_INTERNAL,
2664
2665   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2666   .error_strings = snat_in2out_error_strings,
2667
2668   .runtime_data_bytes = sizeof (snat_runtime_t),
2669
2670   .n_next_nodes = 3,
2671
2672   /* edit / add dispositions here */
2673   .next_nodes = {
2674     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2675     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2676     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2677   },
2678 };
2679
2680 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2681
2682 /**
2683  * Get address and port values to be used for packet SNAT translation
2684  * and create session if needed
2685  *
2686  * @param[in,out] sm             SNAT main
2687  * @param[in,out] node           SNAT node runtime
2688  * @param[in] thread_index       thread index
2689  * @param[in,out] b0             buffer containing packet to be translated
2690  * @param[out] p_proto           protocol used for matching
2691  * @param[out] p_value           address and port after NAT translation
2692  * @param[out] p_dont_translate  if packet should not be translated
2693  * @param d                      optional parameter
2694  * @param e                      optional parameter
2695  */
2696 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2697                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2698                           snat_session_key_t *p_value,
2699                           u8 *p_dont_translate, void *d, void *e)
2700 {
2701   ip4_header_t *ip0;
2702   icmp46_header_t *icmp0;
2703   u32 sw_if_index0;
2704   u32 rx_fib_index0;
2705   u8 protocol;
2706   snat_det_out_key_t key0;
2707   u8 dont_translate = 0;
2708   u32 next0 = ~0;
2709   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2710   ip4_header_t *inner_ip0;
2711   void *l4_header = 0;
2712   icmp46_header_t *inner_icmp0;
2713   snat_det_map_t * dm0 = 0;
2714   ip4_address_t new_addr0;
2715   u16 lo_port0, i0;
2716   snat_det_session_t * ses0 = 0;
2717   ip4_address_t in_addr;
2718   u16 in_port;
2719
2720   ip0 = vlib_buffer_get_current (b0);
2721   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2722   echo0 = (icmp_echo_header_t *)(icmp0+1);
2723   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2724   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2725
2726   if (!icmp_is_error_message (icmp0))
2727     {
2728       protocol = SNAT_PROTOCOL_ICMP;
2729       in_addr = ip0->src_address;
2730       in_port = echo0->identifier;
2731     }
2732   else
2733     {
2734       inner_ip0 = (ip4_header_t *)(echo0+1);
2735       l4_header = ip4_next_header (inner_ip0);
2736       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2737       in_addr = inner_ip0->dst_address;
2738       switch (protocol)
2739         {
2740         case SNAT_PROTOCOL_ICMP:
2741           inner_icmp0 = (icmp46_header_t*)l4_header;
2742           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2743           in_port = inner_echo0->identifier;
2744           break;
2745         case SNAT_PROTOCOL_UDP:
2746         case SNAT_PROTOCOL_TCP:
2747           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2748           break;
2749         default:
2750           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2751           next0 = SNAT_IN2OUT_NEXT_DROP;
2752           goto out;
2753         }
2754     }
2755
2756   dm0 = snat_det_map_by_user(sm, &in_addr);
2757   if (PREDICT_FALSE(!dm0))
2758     {
2759       clib_warning("no match for internal host %U",
2760                    format_ip4_address, &in_addr);
2761       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2762           IP_PROTOCOL_ICMP, rx_fib_index0)))
2763         {
2764           dont_translate = 1;
2765           goto out;
2766         }
2767       next0 = SNAT_IN2OUT_NEXT_DROP;
2768       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2769       goto out;
2770     }
2771
2772   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2773
2774   key0.ext_host_addr = ip0->dst_address;
2775   key0.ext_host_port = 0;
2776
2777   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2778   if (PREDICT_FALSE(!ses0))
2779     {
2780       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2781           IP_PROTOCOL_ICMP, rx_fib_index0)))
2782         {
2783           dont_translate = 1;
2784           goto out;
2785         }
2786       if (icmp0->type != ICMP4_echo_request)
2787         {
2788           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2789           next0 = SNAT_IN2OUT_NEXT_DROP;
2790           goto out;
2791         }
2792       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2793         {
2794           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2795             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2796
2797           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2798             continue;
2799
2800           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2801           break;
2802         }
2803       if (PREDICT_FALSE(!ses0))
2804         {
2805           next0 = SNAT_IN2OUT_NEXT_DROP;
2806           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2807           goto out;
2808         }
2809     }
2810
2811   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2812                     !icmp_is_error_message (icmp0)))
2813     {
2814       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2815       next0 = SNAT_IN2OUT_NEXT_DROP;
2816       goto out;
2817     }
2818
2819   u32 now = (u32) vlib_time_now (sm->vlib_main);
2820
2821   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2822   ses0->expire = now + sm->icmp_timeout;
2823
2824 out:
2825   *p_proto = protocol;
2826   if (ses0)
2827     {
2828       p_value->addr = new_addr0;
2829       p_value->fib_index = sm->outside_fib_index;
2830       p_value->port = ses0->out.out_port;
2831     }
2832   *p_dont_translate = dont_translate;
2833   if (d)
2834     *(snat_det_session_t**)d = ses0;
2835   if (e)
2836     *(snat_det_map_t**)e = dm0;
2837   return next0;
2838 }
2839
2840 /**********************/
2841 /*** worker handoff ***/
2842 /**********************/
2843 static inline uword
2844 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
2845                                       vlib_node_runtime_t * node,
2846                                       vlib_frame_t * frame,
2847                                       u8 is_output)
2848 {
2849   snat_main_t *sm = &snat_main;
2850   vlib_thread_main_t *tm = vlib_get_thread_main ();
2851   u32 n_left_from, *from, *to_next = 0;
2852   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2853   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2854     = 0;
2855   vlib_frame_queue_elt_t *hf = 0;
2856   vlib_frame_t *f = 0;
2857   int i;
2858   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2859   u32 next_worker_index = 0;
2860   u32 current_worker_index = ~0;
2861   u32 thread_index = vlib_get_thread_index ();
2862   u32 fq_index;
2863   u32 to_node_index;
2864
2865   ASSERT (vec_len (sm->workers));
2866
2867   if (is_output)
2868     {
2869       fq_index = sm->fq_in2out_output_index;
2870       to_node_index = sm->in2out_output_node_index;
2871     }
2872   else
2873     {
2874       fq_index = sm->fq_in2out_index;
2875       to_node_index = sm->in2out_node_index;
2876     }
2877
2878   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2879     {
2880       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2881
2882       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2883                                sm->first_worker_index + sm->num_workers - 1,
2884                                (vlib_frame_queue_t *) (~0));
2885     }
2886
2887   from = vlib_frame_vector_args (frame);
2888   n_left_from = frame->n_vectors;
2889
2890   while (n_left_from > 0)
2891     {
2892       u32 bi0;
2893       vlib_buffer_t *b0;
2894       u32 sw_if_index0;
2895       u32 rx_fib_index0;
2896       ip4_header_t * ip0;
2897       u8 do_handoff;
2898
2899       bi0 = from[0];
2900       from += 1;
2901       n_left_from -= 1;
2902
2903       b0 = vlib_get_buffer (vm, bi0);
2904
2905       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2906       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2907
2908       ip0 = vlib_buffer_get_current (b0);
2909
2910       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2911
2912       if (PREDICT_FALSE (next_worker_index != thread_index))
2913         {
2914           do_handoff = 1;
2915
2916           if (next_worker_index != current_worker_index)
2917             {
2918               if (hf)
2919                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2920
2921               hf = vlib_get_worker_handoff_queue_elt (fq_index,
2922                                                       next_worker_index,
2923                                                       handoff_queue_elt_by_worker_index);
2924
2925               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2926               to_next_worker = &hf->buffer_index[hf->n_vectors];
2927               current_worker_index = next_worker_index;
2928             }
2929
2930           /* enqueue to correct worker thread */
2931           to_next_worker[0] = bi0;
2932           to_next_worker++;
2933           n_left_to_next_worker--;
2934
2935           if (n_left_to_next_worker == 0)
2936             {
2937               hf->n_vectors = VLIB_FRAME_SIZE;
2938               vlib_put_frame_queue_elt (hf);
2939               current_worker_index = ~0;
2940               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2941               hf = 0;
2942             }
2943         }
2944       else
2945         {
2946           do_handoff = 0;
2947           /* if this is 1st frame */
2948           if (!f)
2949             {
2950               f = vlib_get_frame_to_node (vm, to_node_index);
2951               to_next = vlib_frame_vector_args (f);
2952             }
2953
2954           to_next[0] = bi0;
2955           to_next += 1;
2956           f->n_vectors++;
2957         }
2958
2959       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2960                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2961         {
2962           snat_in2out_worker_handoff_trace_t *t =
2963             vlib_add_trace (vm, node, b0, sizeof (*t));
2964           t->next_worker_index = next_worker_index;
2965           t->do_handoff = do_handoff;
2966         }
2967     }
2968
2969   if (f)
2970     vlib_put_frame_to_node (vm, to_node_index, f);
2971
2972   if (hf)
2973     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2974
2975   /* Ship frames to the worker nodes */
2976   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2977     {
2978       if (handoff_queue_elt_by_worker_index[i])
2979         {
2980           hf = handoff_queue_elt_by_worker_index[i];
2981           /*
2982            * It works better to let the handoff node
2983            * rate-adapt, always ship the handoff queue element.
2984            */
2985           if (1 || hf->n_vectors == hf->last_n_vectors)
2986             {
2987               vlib_put_frame_queue_elt (hf);
2988               handoff_queue_elt_by_worker_index[i] = 0;
2989             }
2990           else
2991             hf->last_n_vectors = hf->n_vectors;
2992         }
2993       congested_handoff_queue_by_worker_index[i] =
2994         (vlib_frame_queue_t *) (~0);
2995     }
2996   hf = 0;
2997   current_worker_index = ~0;
2998   return frame->n_vectors;
2999 }
3000
3001 static uword
3002 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3003                                vlib_node_runtime_t * node,
3004                                vlib_frame_t * frame)
3005 {
3006   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3007 }
3008
3009 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3010   .function = snat_in2out_worker_handoff_fn,
3011   .name = "snat-in2out-worker-handoff",
3012   .vector_size = sizeof (u32),
3013   .format_trace = format_snat_in2out_worker_handoff_trace,
3014   .type = VLIB_NODE_TYPE_INTERNAL,
3015
3016   .n_next_nodes = 1,
3017
3018   .next_nodes = {
3019     [0] = "error-drop",
3020   },
3021 };
3022
3023 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3024                               snat_in2out_worker_handoff_fn);
3025
3026 static uword
3027 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3028                                       vlib_node_runtime_t * node,
3029                                       vlib_frame_t * frame)
3030 {
3031   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3032 }
3033
3034 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3035   .function = snat_in2out_output_worker_handoff_fn,
3036   .name = "snat-in2out-output-worker-handoff",
3037   .vector_size = sizeof (u32),
3038   .format_trace = format_snat_in2out_worker_handoff_trace,
3039   .type = VLIB_NODE_TYPE_INTERNAL,
3040
3041   .n_next_nodes = 1,
3042
3043   .next_nodes = {
3044     [0] = "error-drop",
3045   },
3046 };
3047
3048 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3049                               snat_in2out_output_worker_handoff_fn);
3050
3051 static_always_inline int
3052 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3053 {
3054   snat_address_t * ap;
3055   clib_bihash_kv_8_8_t kv, value;
3056   snat_session_key_t m_key;
3057
3058   vec_foreach (ap, sm->addresses)
3059     {
3060       if (ap->addr.as_u32 == dst_addr->as_u32)
3061         return 1;
3062     }
3063
3064   m_key.addr.as_u32 = dst_addr->as_u32;
3065   m_key.fib_index = sm->outside_fib_index;
3066   m_key.port = 0;
3067   m_key.protocol = 0;
3068   kv.key = m_key.as_u64;
3069   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3070     return 1;
3071
3072   return 0;
3073 }
3074
3075 static uword
3076 snat_hairpin_dst_fn (vlib_main_t * vm,
3077                      vlib_node_runtime_t * node,
3078                      vlib_frame_t * frame)
3079 {
3080   u32 n_left_from, * from, * to_next;
3081   snat_in2out_next_t next_index;
3082   u32 pkts_processed = 0;
3083   snat_main_t * sm = &snat_main;
3084
3085   from = vlib_frame_vector_args (frame);
3086   n_left_from = frame->n_vectors;
3087   next_index = node->cached_next_index;
3088
3089   while (n_left_from > 0)
3090     {
3091       u32 n_left_to_next;
3092
3093       vlib_get_next_frame (vm, node, next_index,
3094                            to_next, n_left_to_next);
3095
3096       while (n_left_from > 0 && n_left_to_next > 0)
3097         {
3098           u32 bi0;
3099           vlib_buffer_t * b0;
3100           u32 next0;
3101           ip4_header_t * ip0;
3102           u32 proto0;
3103
3104           /* speculatively enqueue b0 to the current next frame */
3105           bi0 = from[0];
3106           to_next[0] = bi0;
3107           from += 1;
3108           to_next += 1;
3109           n_left_from -= 1;
3110           n_left_to_next -= 1;
3111
3112           b0 = vlib_get_buffer (vm, bi0);
3113           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3114           ip0 = vlib_buffer_get_current (b0);
3115
3116           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3117
3118           vnet_buffer (b0)->snat.flags = 0;
3119           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3120             {
3121               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3122                 {
3123                   udp_header_t * udp0 = ip4_next_header (ip0);
3124                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3125
3126                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3127                 }
3128               else if (proto0 == SNAT_PROTOCOL_ICMP)
3129                 {
3130                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3131
3132                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3133                 }
3134               else
3135                 {
3136                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3137                 }
3138
3139               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3140               clib_warning("is hairpinning");
3141             }
3142
3143           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3144
3145           /* verify speculative enqueue, maybe switch current next frame */
3146           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3147                                            to_next, n_left_to_next,
3148                                            bi0, next0);
3149          }
3150
3151       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3152     }
3153
3154   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3155                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3156                                pkts_processed);
3157   return frame->n_vectors;
3158 }
3159
3160 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3161   .function = snat_hairpin_dst_fn,
3162   .name = "snat-hairpin-dst",
3163   .vector_size = sizeof (u32),
3164   .type = VLIB_NODE_TYPE_INTERNAL,
3165   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3166   .error_strings = snat_in2out_error_strings,
3167   .n_next_nodes = 2,
3168   .next_nodes = {
3169     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3170     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3171   },
3172 };
3173
3174 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3175                               snat_hairpin_dst_fn);
3176
3177 static uword
3178 snat_hairpin_src_fn (vlib_main_t * vm,
3179                      vlib_node_runtime_t * node,
3180                      vlib_frame_t * frame)
3181 {
3182   u32 n_left_from, * from, * to_next;
3183   snat_in2out_next_t next_index;
3184   u32 pkts_processed = 0;
3185   snat_main_t *sm = &snat_main;
3186
3187   from = vlib_frame_vector_args (frame);
3188   n_left_from = frame->n_vectors;
3189   next_index = node->cached_next_index;
3190
3191   while (n_left_from > 0)
3192     {
3193       u32 n_left_to_next;
3194
3195       vlib_get_next_frame (vm, node, next_index,
3196                            to_next, n_left_to_next);
3197
3198       while (n_left_from > 0 && n_left_to_next > 0)
3199         {
3200           u32 bi0;
3201           vlib_buffer_t * b0;
3202           u32 next0;
3203
3204           /* speculatively enqueue b0 to the current next frame */
3205           bi0 = from[0];
3206           to_next[0] = bi0;
3207           from += 1;
3208           to_next += 1;
3209           n_left_from -= 1;
3210           n_left_to_next -= 1;
3211
3212           b0 = vlib_get_buffer (vm, bi0);
3213           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3214
3215           if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) & SNAT_FLAG_HAIRPINNING))
3216             {
3217               if (PREDICT_TRUE (sm->num_workers > 1))
3218                 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3219               else
3220                 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3221             }
3222
3223           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3224
3225           /* verify speculative enqueue, maybe switch current next frame */
3226           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3227                                            to_next, n_left_to_next,
3228                                            bi0, next0);
3229          }
3230
3231       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3232     }
3233
3234   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3235                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3236                                pkts_processed);
3237   return frame->n_vectors;
3238 }
3239
3240 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3241   .function = snat_hairpin_src_fn,
3242   .name = "snat-hairpin-src",
3243   .vector_size = sizeof (u32),
3244   .type = VLIB_NODE_TYPE_INTERNAL,
3245   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3246   .error_strings = snat_in2out_error_strings,
3247   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3248   .next_nodes = {
3249      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3250      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "snat-in2out-output",
3251      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3252      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "snat-in2out-output-worker-handoff",
3253   },
3254 };
3255
3256 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3257                               snat_hairpin_src_fn);
3258
3259 static uword
3260 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3261                                 vlib_node_runtime_t * node,
3262                                 vlib_frame_t * frame)
3263 {
3264   u32 n_left_from, * from, * to_next;
3265   snat_in2out_next_t next_index;
3266   u32 pkts_processed = 0;
3267   snat_main_t * sm = &snat_main;
3268   u32 stats_node_index;
3269
3270   stats_node_index = snat_in2out_fast_node.index;
3271
3272   from = vlib_frame_vector_args (frame);
3273   n_left_from = frame->n_vectors;
3274   next_index = node->cached_next_index;
3275
3276   while (n_left_from > 0)
3277     {
3278       u32 n_left_to_next;
3279
3280       vlib_get_next_frame (vm, node, next_index,
3281                            to_next, n_left_to_next);
3282
3283       while (n_left_from > 0 && n_left_to_next > 0)
3284         {
3285           u32 bi0;
3286           vlib_buffer_t * b0;
3287           u32 next0;
3288           u32 sw_if_index0;
3289           ip4_header_t * ip0;
3290           ip_csum_t sum0;
3291           u32 new_addr0, old_addr0;
3292           u16 old_port0, new_port0;
3293           udp_header_t * udp0;
3294           tcp_header_t * tcp0;
3295           icmp46_header_t * icmp0;
3296           snat_session_key_t key0, sm0;
3297           u32 proto0;
3298           u32 rx_fib_index0;
3299
3300           /* speculatively enqueue b0 to the current next frame */
3301           bi0 = from[0];
3302           to_next[0] = bi0;
3303           from += 1;
3304           to_next += 1;
3305           n_left_from -= 1;
3306           n_left_to_next -= 1;
3307
3308           b0 = vlib_get_buffer (vm, bi0);
3309           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3310
3311           ip0 = vlib_buffer_get_current (b0);
3312           udp0 = ip4_next_header (ip0);
3313           tcp0 = (tcp_header_t *) udp0;
3314           icmp0 = (icmp46_header_t *) udp0;
3315
3316           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3317           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3318
3319           if (PREDICT_FALSE(ip0->ttl == 1))
3320             {
3321               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3322               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3323                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3324                                            0);
3325               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3326               goto trace0;
3327             }
3328
3329           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3330
3331           if (PREDICT_FALSE (proto0 == ~0))
3332               goto trace0;
3333
3334           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3335             {
3336               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3337                                   rx_fib_index0, node, next0, ~0, 0, 0);
3338               goto trace0;
3339             }
3340
3341           key0.addr = ip0->src_address;
3342           key0.port = udp0->src_port;
3343           key0.fib_index = rx_fib_index0;
3344
3345           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
3346             {
3347               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3348               next0= SNAT_IN2OUT_NEXT_DROP;
3349               goto trace0;
3350             }
3351
3352           new_addr0 = sm0.addr.as_u32;
3353           new_port0 = sm0.port;
3354           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3355           old_addr0 = ip0->src_address.as_u32;
3356           ip0->src_address.as_u32 = new_addr0;
3357
3358           sum0 = ip0->checksum;
3359           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3360                                  ip4_header_t,
3361                                  src_address /* changed member */);
3362           ip0->checksum = ip_csum_fold (sum0);
3363
3364           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3365             {
3366               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3367                 {
3368                   old_port0 = tcp0->src_port;
3369                   tcp0->src_port = new_port0;
3370
3371                   sum0 = tcp0->checksum;
3372                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3373                                          ip4_header_t,
3374                                          dst_address /* changed member */);
3375                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3376                                          ip4_header_t /* cheat */,
3377                                          length /* changed member */);
3378                   tcp0->checksum = ip_csum_fold(sum0);
3379                 }
3380               else
3381                 {
3382                   old_port0 = udp0->src_port;
3383                   udp0->src_port = new_port0;
3384                   udp0->checksum = 0;
3385                 }
3386             }
3387           else
3388             {
3389               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3390                 {
3391                   sum0 = tcp0->checksum;
3392                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3393                                          ip4_header_t,
3394                                          dst_address /* changed member */);
3395                   tcp0->checksum = ip_csum_fold(sum0);
3396                 }
3397             }
3398
3399           /* Hairpinning */
3400           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3401
3402         trace0:
3403           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3404                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3405             {
3406               snat_in2out_trace_t *t =
3407                  vlib_add_trace (vm, node, b0, sizeof (*t));
3408               t->sw_if_index = sw_if_index0;
3409               t->next_index = next0;
3410             }
3411
3412           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3413
3414           /* verify speculative enqueue, maybe switch current next frame */
3415           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3416                                            to_next, n_left_to_next,
3417                                            bi0, next0);
3418         }
3419
3420       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3421     }
3422
3423   vlib_node_increment_counter (vm, stats_node_index,
3424                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3425                                pkts_processed);
3426   return frame->n_vectors;
3427 }
3428
3429
3430 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3431   .function = snat_in2out_fast_static_map_fn,
3432   .name = "snat-in2out-fast",
3433   .vector_size = sizeof (u32),
3434   .format_trace = format_snat_in2out_fast_trace,
3435   .type = VLIB_NODE_TYPE_INTERNAL,
3436   
3437   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3438   .error_strings = snat_in2out_error_strings,
3439
3440   .runtime_data_bytes = sizeof (snat_runtime_t),
3441   
3442   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3443
3444   /* edit / add dispositions here */
3445   .next_nodes = {
3446     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3447     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3448     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
3449     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3450   },
3451 };
3452
3453 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);