NAT: session number limitation to avoid running out of memory crash (VPP-984)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
53
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91 vlib_node_registration_t snat_in2out_output_node;
92 vlib_node_registration_t snat_in2out_output_slowpath_node;
93 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
94 vlib_node_registration_t snat_hairpin_dst_node;
95 vlib_node_registration_t snat_hairpin_src_node;
96
97
98 #define foreach_snat_in2out_error                       \
99 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
100 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
101 _(OUT_OF_PORTS, "Out of ports")                         \
102 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
103 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
104 _(NO_TRANSLATION, "No translation")                     \
105 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
106
107 typedef enum {
108 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
109   foreach_snat_in2out_error
110 #undef _
111   SNAT_IN2OUT_N_ERROR,
112 } snat_in2out_error_t;
113
114 static char * snat_in2out_error_strings[] = {
115 #define _(sym,string) string,
116   foreach_snat_in2out_error
117 #undef _
118 };
119
120 typedef enum {
121   SNAT_IN2OUT_NEXT_LOOKUP,
122   SNAT_IN2OUT_NEXT_DROP,
123   SNAT_IN2OUT_NEXT_ICMP_ERROR,
124   SNAT_IN2OUT_NEXT_SLOW_PATH,
125   SNAT_IN2OUT_N_NEXT,
126 } snat_in2out_next_t;
127
128 typedef enum {
129   SNAT_HAIRPIN_SRC_NEXT_DROP,
130   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
131   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
132   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
133   SNAT_HAIRPIN_SRC_N_NEXT,
134 } snat_hairpin_next_t;
135
136 /**
137  * @brief Check if packet should be translated
138  *
139  * Packets aimed at outside interface and external addresss with active session
140  * should be translated.
141  *
142  * @param sm            NAT main
143  * @param rt            NAT runtime data
144  * @param sw_if_index0  index of the inside interface
145  * @param ip0           IPv4 header
146  * @param proto0        NAT protocol
147  * @param rx_fib_index0 RX FIB index
148  *
149  * @returns 0 if packet should be translated otherwise 1
150  */
151 static inline int
152 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
153                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
154                          u32 rx_fib_index0)
155 {
156   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
157   fib_prefix_t pfx = {
158     .fp_proto = FIB_PROTOCOL_IP4,
159     .fp_len = 32,
160     .fp_addr = {
161         .ip4.as_u32 = ip0->dst_address.as_u32,
162     },
163   };
164
165   /* Don't NAT packet aimed at the intfc address */
166   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
167                                       ip0->dst_address.as_u32)))
168     return 1;
169
170   fei = fib_table_lookup (rx_fib_index0, &pfx);
171   if (FIB_NODE_INDEX_INVALID != fei)
172     {
173       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
174       if (sw_if_index == ~0)
175         {
176           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
177           if (FIB_NODE_INDEX_INVALID != fei)
178             sw_if_index = fib_entry_get_resolving_interface (fei);
179         }
180       snat_interface_t *i;
181       pool_foreach (i, sm->interfaces,
182       ({
183         /* NAT packet aimed at outside interface */
184         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
185           return 0;
186       }));
187     }
188
189   return 1;
190 }
191
192 static inline int
193 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
194                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
195                     u32 rx_fib_index0, u32 thread_index)
196 {
197   udp_header_t * udp0 = ip4_next_header (ip0);
198   snat_session_key_t key0, sm0;
199   clib_bihash_kv_8_8_t kv0, value0;
200
201   key0.addr = ip0->dst_address;
202   key0.port = udp0->dst_port;
203   key0.protocol = proto0;
204   key0.fib_index = sm->outside_fib_index;
205   kv0.key = key0.as_u64;
206
207   /* NAT packet aimed at external address if */
208   /* has active sessions */
209   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
210                               &value0))
211     {
212       /* or is static mappings */
213       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
214         return 0;
215     }
216   else
217     return 0;
218
219   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
220                                  rx_fib_index0);
221 }
222
223 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
224                       ip4_header_t * ip0,
225                       u32 rx_fib_index0,
226                       snat_session_key_t * key0,
227                       snat_session_t ** sessionp,
228                       vlib_node_runtime_t * node,
229                       u32 next0,
230                       u32 thread_index)
231 {
232   snat_user_t *u;
233   snat_user_key_t user_key;
234   snat_session_t *s;
235   clib_bihash_kv_8_8_t kv0, value0;
236   u32 oldest_per_user_translation_list_index;
237   dlist_elt_t * oldest_per_user_translation_list_elt;
238   dlist_elt_t * per_user_translation_list_elt;
239   dlist_elt_t * per_user_list_head_elt;
240   u32 session_index;
241   snat_session_key_t key1;
242   u32 address_index = ~0;
243   u32 outside_fib_index;
244   uword * p;
245
246   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
247     {
248       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
249       return SNAT_IN2OUT_NEXT_DROP;
250     }
251
252   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
253   if (! p)
254     {
255       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
256       return SNAT_IN2OUT_NEXT_DROP;
257     }
258   outside_fib_index = p[0];
259
260   key1.protocol = key0->protocol;
261   user_key.addr = ip0->src_address;
262   user_key.fib_index = rx_fib_index0;
263   kv0.key = user_key.as_u64;
264
265   /* Ever heard of the "user" = src ip4 address before? */
266   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
267                               &kv0, &value0))
268     {
269       /* no, make a new one */
270       pool_get (sm->per_thread_data[thread_index].users, u);
271       memset (u, 0, sizeof (*u));
272       u->addr = ip0->src_address;
273       u->fib_index = rx_fib_index0;
274
275       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
276
277       u->sessions_per_user_list_head_index = per_user_list_head_elt -
278         sm->per_thread_data[thread_index].list_pool;
279
280       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
281                        u->sessions_per_user_list_head_index);
282
283       kv0.value = u - sm->per_thread_data[thread_index].users;
284
285       /* add user */
286       clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
287                                &kv0, 1 /* is_add */);
288     }
289   else
290     {
291       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
292                              value0.value);
293     }
294
295   /* Over quota? Recycle the least recently used dynamic translation */
296   if (u->nsessions >= sm->max_translations_per_user)
297     {
298       /* Remove the oldest dynamic translation */
299       do {
300           oldest_per_user_translation_list_index =
301             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
302                                     u->sessions_per_user_list_head_index);
303
304           ASSERT (oldest_per_user_translation_list_index != ~0);
305
306           /* add it back to the end of the LRU list */
307           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
308                               u->sessions_per_user_list_head_index,
309                               oldest_per_user_translation_list_index);
310           /* Get the list element */
311           oldest_per_user_translation_list_elt =
312             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
313                                oldest_per_user_translation_list_index);
314
315           /* Get the session index from the list element */
316           session_index = oldest_per_user_translation_list_elt->value;
317
318           /* Get the session */
319           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
320                                  session_index);
321       } while (snat_is_session_static (s));
322
323       if (snat_is_unk_proto_session (s))
324         {
325           clib_bihash_kv_16_8_t up_kv;
326           nat_ed_ses_key_t key;
327
328           /* Remove from lookup tables */
329           key.l_addr = s->in2out.addr;
330           key.r_addr = s->ext_host_addr;
331           key.fib_index = s->in2out.fib_index;
332           key.proto = s->in2out.port;
333           key.rsvd = 0;
334           key.l_port = 0;
335           up_kv.key[0] = key.as_u64[0];
336           up_kv.key[1] = key.as_u64[1];
337           if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0))
338             clib_warning ("in2out key del failed");
339
340           key.l_addr = s->out2in.addr;
341           key.fib_index = s->out2in.fib_index;
342           up_kv.key[0] = key.as_u64[0];
343           up_kv.key[1] = key.as_u64[1];
344           if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0))
345             clib_warning ("out2in key del failed");
346         }
347       else
348         {
349           /* Remove in2out, out2in keys */
350           kv0.key = s->in2out.as_u64;
351           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out,
352                                        &kv0, 0 /* is_add */))
353               clib_warning ("in2out key delete failed");
354           kv0.key = s->out2in.as_u64;
355           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in,
356                                        &kv0, 0 /* is_add */))
357               clib_warning ("out2in key delete failed");
358
359           /* log NAT event */
360           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
361                                               s->out2in.addr.as_u32,
362                                               s->in2out.protocol,
363                                               s->in2out.port,
364                                               s->out2in.port,
365                                               s->in2out.fib_index);
366
367           snat_free_outside_address_and_port
368             (sm, thread_index, &s->out2in, s->outside_address_index);
369         }
370       s->outside_address_index = ~0;
371
372       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, thread_index,
373                                                &key1, &address_index))
374         {
375           ASSERT(0);
376
377           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
378           return SNAT_IN2OUT_NEXT_DROP;
379         }
380       s->outside_address_index = address_index;
381     }
382   else
383     {
384       u8 static_mapping = 1;
385
386       /* First try to match static mapping by local address and port */
387       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
388         {
389           static_mapping = 0;
390           /* Try to create dynamic translation */
391           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0,
392                                                    thread_index, &key1,
393                                                    &address_index))
394             {
395               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
396               return SNAT_IN2OUT_NEXT_DROP;
397             }
398         }
399
400       /* Create a new session */
401       pool_get (sm->per_thread_data[thread_index].sessions, s);
402       memset (s, 0, sizeof (*s));
403
404       s->outside_address_index = address_index;
405
406       if (static_mapping)
407         {
408           u->nstaticsessions++;
409           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
410         }
411       else
412         {
413           u->nsessions++;
414         }
415
416       /* Create list elts */
417       pool_get (sm->per_thread_data[thread_index].list_pool,
418                 per_user_translation_list_elt);
419       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
420                        per_user_translation_list_elt -
421                        sm->per_thread_data[thread_index].list_pool);
422
423       per_user_translation_list_elt->value =
424         s - sm->per_thread_data[thread_index].sessions;
425       s->per_user_index = per_user_translation_list_elt -
426                           sm->per_thread_data[thread_index].list_pool;
427       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
428
429       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
430                           s->per_user_list_head_index,
431                           per_user_translation_list_elt -
432                           sm->per_thread_data[thread_index].list_pool);
433    }
434
435   s->in2out = *key0;
436   s->out2in = key1;
437   s->out2in.protocol = key0->protocol;
438   s->out2in.fib_index = outside_fib_index;
439   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
440   *sessionp = s;
441
442   /* Add to translation hashes */
443   kv0.key = s->in2out.as_u64;
444   kv0.value = s - sm->per_thread_data[thread_index].sessions;
445   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
446                                1 /* is_add */))
447       clib_warning ("in2out key add failed");
448
449   kv0.key = s->out2in.as_u64;
450   kv0.value = s - sm->per_thread_data[thread_index].sessions;
451
452   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
453                                1 /* is_add */))
454       clib_warning ("out2in key add failed");
455
456   /* log NAT event */
457   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
458                                       s->out2in.addr.as_u32,
459                                       s->in2out.protocol,
460                                       s->in2out.port,
461                                       s->out2in.port,
462                                       s->in2out.fib_index);
463   return next0;
464 }
465
466 static_always_inline
467 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
468                                  snat_session_key_t *p_key0)
469 {
470   icmp46_header_t *icmp0;
471   snat_session_key_t key0;
472   icmp_echo_header_t *echo0, *inner_echo0 = 0;
473   ip4_header_t *inner_ip0 = 0;
474   void *l4_header = 0;
475   icmp46_header_t *inner_icmp0;
476
477   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
478   echo0 = (icmp_echo_header_t *)(icmp0+1);
479
480   if (!icmp_is_error_message (icmp0))
481     {
482       key0.protocol = SNAT_PROTOCOL_ICMP;
483       key0.addr = ip0->src_address;
484       key0.port = echo0->identifier;
485     }
486   else
487     {
488       inner_ip0 = (ip4_header_t *)(echo0+1);
489       l4_header = ip4_next_header (inner_ip0);
490       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
491       key0.addr = inner_ip0->dst_address;
492       switch (key0.protocol)
493         {
494         case SNAT_PROTOCOL_ICMP:
495           inner_icmp0 = (icmp46_header_t*)l4_header;
496           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
497           key0.port = inner_echo0->identifier;
498           break;
499         case SNAT_PROTOCOL_UDP:
500         case SNAT_PROTOCOL_TCP:
501           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
502           break;
503         default:
504           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
505         }
506     }
507   *p_key0 = key0;
508   return -1; /* success */
509 }
510
511 /**
512  * Get address and port values to be used for ICMP packet translation
513  * and create session if needed
514  *
515  * @param[in,out] sm             NAT main
516  * @param[in,out] node           NAT node runtime
517  * @param[in] thread_index       thread index
518  * @param[in,out] b0             buffer containing packet to be translated
519  * @param[out] p_proto           protocol used for matching
520  * @param[out] p_value           address and port after NAT translation
521  * @param[out] p_dont_translate  if packet should not be translated
522  * @param d                      optional parameter
523  * @param e                      optional parameter
524  */
525 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
526                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
527                            snat_session_key_t *p_value,
528                            u8 *p_dont_translate, void *d, void *e)
529 {
530   ip4_header_t *ip0;
531   icmp46_header_t *icmp0;
532   u32 sw_if_index0;
533   u32 rx_fib_index0;
534   snat_session_key_t key0;
535   snat_session_t *s0 = 0;
536   u8 dont_translate = 0;
537   clib_bihash_kv_8_8_t kv0, value0;
538   u32 next0 = ~0;
539   int err;
540   u32 iph_offset0 = 0;
541
542   if (PREDICT_FALSE(vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0))
543     {
544       iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
545     }
546   ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
547   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
548   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
549   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
550
551   err = icmp_get_key (ip0, &key0);
552   if (err != -1)
553     {
554       b0->error = node->errors[err];
555       next0 = SNAT_IN2OUT_NEXT_DROP;
556       goto out;
557     }
558   key0.fib_index = rx_fib_index0;
559
560   kv0.key = key0.as_u64;
561
562   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
563                               &value0))
564     {
565       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
566           IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
567           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
568         {
569           dont_translate = 1;
570           goto out;
571         }
572
573       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
574         {
575           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
576           next0 = SNAT_IN2OUT_NEXT_DROP;
577           goto out;
578         }
579
580       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
581                          &s0, node, next0, thread_index);
582
583       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
584         goto out;
585     }
586   else
587     {
588       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
589                         icmp0->type != ICMP4_echo_reply &&
590                         !icmp_is_error_message (icmp0)))
591         {
592           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
593           next0 = SNAT_IN2OUT_NEXT_DROP;
594           goto out;
595         }
596
597       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
598                               value0.value);
599     }
600
601 out:
602   *p_proto = key0.protocol;
603   if (s0)
604     *p_value = s0->out2in;
605   *p_dont_translate = dont_translate;
606   if (d)
607     *(snat_session_t**)d = s0;
608   return next0;
609 }
610
611 /**
612  * Get address and port values to be used for ICMP packet translation
613  *
614  * @param[in] sm                 NAT main
615  * @param[in,out] node           NAT node runtime
616  * @param[in] thread_index       thread index
617  * @param[in,out] b0             buffer containing packet to be translated
618  * @param[out] p_proto           protocol used for matching
619  * @param[out] p_value           address and port after NAT translation
620  * @param[out] p_dont_translate  if packet should not be translated
621  * @param d                      optional parameter
622  * @param e                      optional parameter
623  */
624 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
625                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
626                            snat_session_key_t *p_value,
627                            u8 *p_dont_translate, void *d, void *e)
628 {
629   ip4_header_t *ip0;
630   icmp46_header_t *icmp0;
631   u32 sw_if_index0;
632   u32 rx_fib_index0;
633   snat_session_key_t key0;
634   snat_session_key_t sm0;
635   u8 dont_translate = 0;
636   u8 is_addr_only;
637   u32 next0 = ~0;
638   int err;
639
640   ip0 = vlib_buffer_get_current (b0);
641   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
642   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
643   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
644
645   err = icmp_get_key (ip0, &key0);
646   if (err != -1)
647     {
648       b0->error = node->errors[err];
649       next0 = SNAT_IN2OUT_NEXT_DROP;
650       goto out2;
651     }
652   key0.fib_index = rx_fib_index0;
653
654   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
655     {
656       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
657           IP_PROTOCOL_ICMP, rx_fib_index0)))
658         {
659           dont_translate = 1;
660           goto out;
661         }
662
663       if (icmp_is_error_message (icmp0))
664         {
665           next0 = SNAT_IN2OUT_NEXT_DROP;
666           goto out;
667         }
668
669       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
670       next0 = SNAT_IN2OUT_NEXT_DROP;
671       goto out;
672     }
673
674   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
675                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
676                     !icmp_is_error_message (icmp0)))
677     {
678       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
679       next0 = SNAT_IN2OUT_NEXT_DROP;
680       goto out;
681     }
682
683 out:
684   *p_value = sm0;
685 out2:
686   *p_proto = key0.protocol;
687   *p_dont_translate = dont_translate;
688   return next0;
689 }
690
691 static inline u32 icmp_in2out (snat_main_t *sm,
692                                vlib_buffer_t * b0,
693                                ip4_header_t * ip0,
694                                icmp46_header_t * icmp0,
695                                u32 sw_if_index0,
696                                u32 rx_fib_index0,
697                                vlib_node_runtime_t * node,
698                                u32 next0,
699                                u32 thread_index,
700                                void *d,
701                                void *e)
702 {
703   snat_session_key_t sm0;
704   u8 protocol;
705   icmp_echo_header_t *echo0, *inner_echo0 = 0;
706   ip4_header_t *inner_ip0;
707   void *l4_header = 0;
708   icmp46_header_t *inner_icmp0;
709   u8 dont_translate;
710   u32 new_addr0, old_addr0;
711   u16 old_id0, new_id0;
712   ip_csum_t sum0;
713   u16 checksum0;
714   u32 next0_tmp;
715
716   echo0 = (icmp_echo_header_t *)(icmp0+1);
717
718   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
719                                        &protocol, &sm0, &dont_translate, d, e);
720   if (next0_tmp != ~0)
721     next0 = next0_tmp;
722   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
723     goto out;
724
725   sum0 = ip_incremental_checksum (0, icmp0,
726                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
727   checksum0 = ~ip_csum_fold (sum0);
728   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
729     {
730       next0 = SNAT_IN2OUT_NEXT_DROP;
731       goto out;
732     }
733
734   old_addr0 = ip0->src_address.as_u32;
735   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
736   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
737     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
738
739   sum0 = ip0->checksum;
740   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
741                          src_address /* changed member */);
742   ip0->checksum = ip_csum_fold (sum0);
743
744   if (!icmp_is_error_message (icmp0))
745     {
746       new_id0 = sm0.port;
747       if (PREDICT_FALSE(new_id0 != echo0->identifier))
748         {
749           old_id0 = echo0->identifier;
750           new_id0 = sm0.port;
751           echo0->identifier = new_id0;
752
753           sum0 = icmp0->checksum;
754           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
755                                  identifier);
756           icmp0->checksum = ip_csum_fold (sum0);
757         }
758     }
759   else
760     {
761       inner_ip0 = (ip4_header_t *)(echo0+1);
762       l4_header = ip4_next_header (inner_ip0);
763
764       if (!ip4_header_checksum_is_valid (inner_ip0))
765         {
766           next0 = SNAT_IN2OUT_NEXT_DROP;
767           goto out;
768         }
769
770       old_addr0 = inner_ip0->dst_address.as_u32;
771       inner_ip0->dst_address = sm0.addr;
772       new_addr0 = inner_ip0->dst_address.as_u32;
773
774       sum0 = icmp0->checksum;
775       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
776                              dst_address /* changed member */);
777       icmp0->checksum = ip_csum_fold (sum0);
778
779       switch (protocol)
780         {
781           case SNAT_PROTOCOL_ICMP:
782             inner_icmp0 = (icmp46_header_t*)l4_header;
783             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
784
785             old_id0 = inner_echo0->identifier;
786             new_id0 = sm0.port;
787             inner_echo0->identifier = new_id0;
788
789             sum0 = icmp0->checksum;
790             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
791                                    identifier);
792             icmp0->checksum = ip_csum_fold (sum0);
793             break;
794           case SNAT_PROTOCOL_UDP:
795           case SNAT_PROTOCOL_TCP:
796             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
797             new_id0 = sm0.port;
798             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
799
800             sum0 = icmp0->checksum;
801             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
802                                    dst_port);
803             icmp0->checksum = ip_csum_fold (sum0);
804             break;
805           default:
806             ASSERT(0);
807         }
808     }
809
810 out:
811   return next0;
812 }
813
814 /**
815  * @brief Hairpinning
816  *
817  * Hairpinning allows two endpoints on the internal side of the NAT to
818  * communicate even if they only use each other's external IP addresses
819  * and ports.
820  *
821  * @param sm     NAT main.
822  * @param b0     Vlib buffer.
823  * @param ip0    IP header.
824  * @param udp0   UDP header.
825  * @param tcp0   TCP header.
826  * @param proto0 NAT protocol.
827  */
828 static inline void
829 snat_hairpinning (snat_main_t *sm,
830                   vlib_buffer_t * b0,
831                   ip4_header_t * ip0,
832                   udp_header_t * udp0,
833                   tcp_header_t * tcp0,
834                   u32 proto0)
835 {
836   snat_session_key_t key0, sm0;
837   snat_session_t * s0;
838   clib_bihash_kv_8_8_t kv0, value0;
839   ip_csum_t sum0;
840   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
841   u16 new_dst_port0, old_dst_port0;
842
843   key0.addr = ip0->dst_address;
844   key0.port = udp0->dst_port;
845   key0.protocol = proto0;
846   key0.fib_index = sm->outside_fib_index;
847   kv0.key = key0.as_u64;
848
849   /* Check if destination is static mappings */
850   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
851     {
852       new_dst_addr0 = sm0.addr.as_u32;
853       new_dst_port0 = sm0.port;
854       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
855     }
856   /* or active session */
857   else
858     {
859       if (sm->num_workers > 1)
860         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
861       else
862         ti = sm->num_workers;
863
864       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
865         {
866           si = value0.value;
867
868           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
869           new_dst_addr0 = s0->in2out.addr.as_u32;
870           new_dst_port0 = s0->in2out.port;
871           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
872         }
873     }
874
875   /* Destination is behind the same NAT, use internal address and port */
876   if (new_dst_addr0)
877     {
878       old_dst_addr0 = ip0->dst_address.as_u32;
879       ip0->dst_address.as_u32 = new_dst_addr0;
880       sum0 = ip0->checksum;
881       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
882                              ip4_header_t, dst_address);
883       ip0->checksum = ip_csum_fold (sum0);
884
885       old_dst_port0 = tcp0->dst;
886       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
887         {
888           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
889             {
890               tcp0->dst = new_dst_port0;
891               sum0 = tcp0->checksum;
892               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
893                                      ip4_header_t, dst_address);
894               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
895                                      ip4_header_t /* cheat */, length);
896               tcp0->checksum = ip_csum_fold(sum0);
897             }
898           else
899             {
900               udp0->dst_port = new_dst_port0;
901               udp0->checksum = 0;
902             }
903         }
904       else
905         {
906           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
907             {
908               sum0 = tcp0->checksum;
909               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
910                                      ip4_header_t, dst_address);
911               tcp0->checksum = ip_csum_fold(sum0);
912             }
913         }
914     }
915 }
916
917 static inline void
918 snat_icmp_hairpinning (snat_main_t *sm,
919                        vlib_buffer_t * b0,
920                        ip4_header_t * ip0,
921                        icmp46_header_t * icmp0)
922 {
923   snat_session_key_t key0, sm0;
924   clib_bihash_kv_8_8_t kv0, value0;
925   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
926   ip_csum_t sum0;
927   snat_session_t *s0;
928
929   if (!icmp_is_error_message (icmp0))
930     {
931       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
932       u16 icmp_id0 = echo0->identifier;
933       key0.addr = ip0->dst_address;
934       key0.port = icmp_id0;
935       key0.protocol = SNAT_PROTOCOL_ICMP;
936       key0.fib_index = sm->outside_fib_index;
937       kv0.key = key0.as_u64;
938
939       if (sm->num_workers > 1)
940         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
941       else
942         ti = sm->num_workers;
943
944       /* Check if destination is in active sessions */
945       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
946                                   &value0))
947         {
948           /* or static mappings */
949           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
950             {
951               new_dst_addr0 = sm0.addr.as_u32;
952               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
953             }
954         }
955       else
956         {
957           si = value0.value;
958
959           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
960           new_dst_addr0 = s0->in2out.addr.as_u32;
961           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
962           echo0->identifier = s0->in2out.port;
963           sum0 = icmp0->checksum;
964           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
965                                  icmp_echo_header_t, identifier);
966           icmp0->checksum = ip_csum_fold (sum0);
967         }
968
969       /* Destination is behind the same NAT, use internal address and port */
970       if (new_dst_addr0)
971         {
972           old_dst_addr0 = ip0->dst_address.as_u32;
973           ip0->dst_address.as_u32 = new_dst_addr0;
974           sum0 = ip0->checksum;
975           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
976                                  ip4_header_t, dst_address);
977           ip0->checksum = ip_csum_fold (sum0);
978         }
979     }
980
981 }
982
983 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
984                                          vlib_buffer_t * b0,
985                                          ip4_header_t * ip0,
986                                          icmp46_header_t * icmp0,
987                                          u32 sw_if_index0,
988                                          u32 rx_fib_index0,
989                                          vlib_node_runtime_t * node,
990                                          u32 next0,
991                                          f64 now,
992                                          u32 thread_index,
993                                          snat_session_t ** p_s0)
994 {
995   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
996                       next0, thread_index, p_s0, 0);
997   snat_session_t * s0 = *p_s0;
998   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
999     {
1000       /* Hairpinning */
1001       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1002         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
1003       /* Accounting */
1004       s0->last_heard = now;
1005       s0->total_pkts++;
1006       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1007       /* Per-user LRU list maintenance for dynamic translations */
1008       if (!snat_is_session_static (s0))
1009         {
1010           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1011                              s0->per_user_index);
1012           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1013                               s0->per_user_list_head_index,
1014                               s0->per_user_index);
1015         }
1016     }
1017   return next0;
1018 }
1019 static inline void
1020 snat_hairpinning_unknown_proto (snat_main_t *sm,
1021                                 vlib_buffer_t * b,
1022                                 ip4_header_t * ip)
1023 {
1024   u32 old_addr, new_addr = 0, ti = 0;
1025   clib_bihash_kv_8_8_t kv, value;
1026   clib_bihash_kv_16_8_t s_kv, s_value;
1027   nat_ed_ses_key_t key;
1028   snat_session_key_t m_key;
1029   snat_static_mapping_t *m;
1030   ip_csum_t sum;
1031   snat_session_t *s;
1032
1033   old_addr = ip->dst_address.as_u32;
1034   key.l_addr.as_u32 = ip->dst_address.as_u32;
1035   key.r_addr.as_u32 = ip->src_address.as_u32;
1036   key.fib_index = sm->outside_fib_index;
1037   key.proto = ip->protocol;
1038   key.rsvd = 0;
1039   key.l_port = 0;
1040   s_kv.key[0] = key.as_u64[0];
1041   s_kv.key[1] = key.as_u64[1];
1042   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1043     {
1044       m_key.addr = ip->dst_address;
1045       m_key.fib_index = sm->outside_fib_index;
1046       m_key.port = 0;
1047       m_key.protocol = 0;
1048       kv.key = m_key.as_u64;
1049       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1050         return;
1051
1052       m = pool_elt_at_index (sm->static_mappings, value.value);
1053       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1054         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1055       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1056     }
1057   else
1058     {
1059       if (sm->num_workers > 1)
1060         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
1061       else
1062         ti = sm->num_workers;
1063
1064       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1065       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1066         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1067       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1068     }
1069   sum = ip->checksum;
1070   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1071   ip->checksum = ip_csum_fold (sum);
1072 }
1073
1074 static snat_session_t *
1075 snat_in2out_unknown_proto (snat_main_t *sm,
1076                            vlib_buffer_t * b,
1077                            ip4_header_t * ip,
1078                            u32 rx_fib_index,
1079                            u32 thread_index,
1080                            f64 now,
1081                            vlib_main_t * vm,
1082                            vlib_node_runtime_t * node)
1083 {
1084   clib_bihash_kv_8_8_t kv, value;
1085   clib_bihash_kv_16_8_t s_kv, s_value;
1086   snat_static_mapping_t *m;
1087   snat_session_key_t m_key;
1088   u32 old_addr, new_addr = 0;
1089   ip_csum_t sum;
1090   snat_user_key_t u_key;
1091   snat_user_t *u;
1092   dlist_elt_t *head, *elt, *oldest;
1093   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1094   u32 elt_index, head_index, ses_index, oldest_index;
1095   snat_session_t * s;
1096   nat_ed_ses_key_t key;
1097   u32 address_index = ~0;
1098   int i;
1099   u8 is_sm = 0;
1100
1101   old_addr = ip->src_address.as_u32;
1102
1103   key.l_addr = ip->src_address;
1104   key.r_addr = ip->dst_address;
1105   key.fib_index = rx_fib_index;
1106   key.proto = ip->protocol;
1107   key.rsvd = 0;
1108   key.l_port = 0;
1109   s_kv.key[0] = key.as_u64[0];
1110   s_kv.key[1] = key.as_u64[1];
1111
1112   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1113     {
1114       s = pool_elt_at_index (tsm->sessions, s_value.value);
1115       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1116     }
1117   else
1118     {
1119       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1120         {
1121           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1122           return 0;
1123         }
1124
1125       u_key.addr = ip->src_address;
1126       u_key.fib_index = rx_fib_index;
1127       kv.key = u_key.as_u64;
1128
1129       /* Ever heard of the "user" = src ip4 address before? */
1130       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1131         {
1132           /* no, make a new one */
1133           pool_get (tsm->users, u);
1134           memset (u, 0, sizeof (*u));
1135           u->addr = ip->src_address;
1136           u->fib_index = rx_fib_index;
1137
1138           pool_get (tsm->list_pool, head);
1139           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1140
1141           clib_dlist_init (tsm->list_pool,
1142                            u->sessions_per_user_list_head_index);
1143
1144           kv.value = u - tsm->users;
1145
1146           /* add user */
1147           clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
1148         }
1149       else
1150         {
1151           u = pool_elt_at_index (tsm->users, value.value);
1152         }
1153
1154       m_key.addr = ip->src_address;
1155       m_key.port = 0;
1156       m_key.protocol = 0;
1157       m_key.fib_index = rx_fib_index;
1158       kv.key = m_key.as_u64;
1159
1160       /* Try to find static mapping first */
1161       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1162         {
1163           m = pool_elt_at_index (sm->static_mappings, value.value);
1164           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1165           is_sm = 1;
1166           goto create_ses;
1167         }
1168       /* Fallback to 3-tuple key */
1169       else
1170         {
1171           /* Choose same out address as for TCP/UDP session to same destination */
1172           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1173             {
1174               head_index = u->sessions_per_user_list_head_index;
1175               head = pool_elt_at_index (tsm->list_pool, head_index);
1176               elt_index = head->next;
1177               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1178               ses_index = elt->value;
1179               while (ses_index != ~0)
1180                 {
1181                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1182                   elt_index = elt->next;
1183                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1184                   ses_index = elt->value;
1185
1186                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1187                     {
1188                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1189                       address_index = s->outside_address_index;
1190
1191                       key.fib_index = sm->outside_fib_index;
1192                       key.l_addr.as_u32 = new_addr;
1193                       s_kv.key[0] = key.as_u64[0];
1194                       s_kv.key[1] = key.as_u64[1];
1195                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1196                         break;
1197
1198                       goto create_ses;
1199                     }
1200                 }
1201             }
1202           key.fib_index = sm->outside_fib_index;
1203           for (i = 0; i < vec_len (sm->addresses); i++)
1204             {
1205               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1206               s_kv.key[0] = key.as_u64[0];
1207               s_kv.key[1] = key.as_u64[1];
1208               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1209                 {
1210                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1211                   address_index = i;
1212                   goto create_ses;
1213                 }
1214             }
1215           return 0;
1216         }
1217
1218 create_ses:
1219       /* Over quota? Recycle the least recently used dynamic translation */
1220       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1221         {
1222           /* Remove the oldest dynamic translation */
1223           do {
1224               oldest_index = clib_dlist_remove_head (
1225                 tsm->list_pool, u->sessions_per_user_list_head_index);
1226
1227               ASSERT (oldest_index != ~0);
1228
1229               /* add it back to the end of the LRU list */
1230               clib_dlist_addtail (tsm->list_pool,
1231                                   u->sessions_per_user_list_head_index,
1232                                   oldest_index);
1233               /* Get the list element */
1234               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1235
1236               /* Get the session index from the list element */
1237               ses_index = oldest->value;
1238
1239               /* Get the session */
1240               s = pool_elt_at_index (tsm->sessions, ses_index);
1241           } while (snat_is_session_static (s));
1242
1243           if (snat_is_unk_proto_session (s))
1244             {
1245               /* Remove from lookup tables */
1246               key.l_addr = s->in2out.addr;
1247               key.r_addr = s->ext_host_addr;
1248               key.fib_index = s->in2out.fib_index;
1249               key.proto = s->in2out.port;
1250               s_kv.key[0] = key.as_u64[0];
1251               s_kv.key[1] = key.as_u64[1];
1252               if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0))
1253                 clib_warning ("in2out key del failed");
1254
1255               key.l_addr = s->out2in.addr;
1256               key.fib_index = s->out2in.fib_index;
1257               s_kv.key[0] = key.as_u64[0];
1258               s_kv.key[1] = key.as_u64[1];
1259               if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0))
1260                 clib_warning ("out2in key del failed");
1261             }
1262           else
1263             {
1264               /* log NAT event */
1265               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1266                                                   s->out2in.addr.as_u32,
1267                                                   s->in2out.protocol,
1268                                                   s->in2out.port,
1269                                                   s->out2in.port,
1270                                                   s->in2out.fib_index);
1271
1272               snat_free_outside_address_and_port (sm, thread_index, &s->out2in,
1273                                                   s->outside_address_index);
1274
1275               /* Remove in2out, out2in keys */
1276               kv.key = s->in2out.as_u64;
1277               if (clib_bihash_add_del_8_8 (
1278                     &sm->per_thread_data[thread_index].in2out, &kv, 0))
1279                 clib_warning ("in2out key del failed");
1280               kv.key = s->out2in.as_u64;
1281               if (clib_bihash_add_del_8_8 (
1282                     &sm->per_thread_data[thread_index].out2in, &kv, 0))
1283                 clib_warning ("out2in key del failed");
1284             }
1285         }
1286       else
1287         {
1288           /* Create a new session */
1289           pool_get (tsm->sessions, s);
1290           memset (s, 0, sizeof (*s));
1291
1292           /* Create list elts */
1293           pool_get (tsm->list_pool, elt);
1294           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1295           elt->value = s - tsm->sessions;
1296           s->per_user_index = elt - tsm->list_pool;
1297           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1298           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1299                               s->per_user_index);
1300         }
1301
1302       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1303       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1304       s->outside_address_index = address_index;
1305       s->out2in.addr.as_u32 = new_addr;
1306       s->out2in.fib_index = sm->outside_fib_index;
1307       s->in2out.addr.as_u32 = old_addr;
1308       s->in2out.fib_index = rx_fib_index;
1309       s->in2out.port = s->out2in.port = ip->protocol;
1310       if (is_sm)
1311         {
1312           u->nstaticsessions++;
1313           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1314         }
1315       else
1316         {
1317           u->nsessions++;
1318         }
1319
1320       /* Add to lookup tables */
1321       key.l_addr.as_u32 = old_addr;
1322       key.r_addr = ip->dst_address;
1323       key.proto = ip->protocol;
1324       key.fib_index = rx_fib_index;
1325       s_kv.key[0] = key.as_u64[0];
1326       s_kv.key[1] = key.as_u64[1];
1327       s_kv.value = s - tsm->sessions;
1328       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1329         clib_warning ("in2out key add failed");
1330
1331       key.l_addr.as_u32 = new_addr;
1332       key.fib_index = sm->outside_fib_index;
1333       s_kv.key[0] = key.as_u64[0];
1334       s_kv.key[1] = key.as_u64[1];
1335       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1336         clib_warning ("out2in key add failed");
1337   }
1338
1339   /* Update IP checksum */
1340   sum = ip->checksum;
1341   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1342   ip->checksum = ip_csum_fold (sum);
1343
1344   /* Accounting */
1345   s->last_heard = now;
1346   s->total_pkts++;
1347   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1348   /* Per-user LRU list maintenance */
1349   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1350   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1351                       s->per_user_index);
1352
1353   /* Hairpinning */
1354   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1355     snat_hairpinning_unknown_proto(sm, b, ip);
1356
1357   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1358     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1359
1360   return s;
1361 }
1362
1363 static snat_session_t *
1364 snat_in2out_lb (snat_main_t *sm,
1365                 vlib_buffer_t * b,
1366                 ip4_header_t * ip,
1367                 u32 rx_fib_index,
1368                 u32 thread_index,
1369                 f64 now,
1370                 vlib_main_t * vm,
1371                 vlib_node_runtime_t * node)
1372 {
1373   nat_ed_ses_key_t key;
1374   clib_bihash_kv_16_8_t s_kv, s_value;
1375   udp_header_t *udp = ip4_next_header (ip);
1376   tcp_header_t *tcp = (tcp_header_t *) udp;
1377   snat_session_t *s = 0;
1378   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1379   u32 old_addr, new_addr;
1380   u16 new_port, old_port;
1381   ip_csum_t sum;
1382   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1383   snat_session_key_t e_key, l_key;
1384   clib_bihash_kv_8_8_t kv, value;
1385   snat_user_key_t u_key;
1386   snat_user_t *u;
1387   dlist_elt_t *head, *elt;
1388
1389   old_addr = ip->src_address.as_u32;
1390
1391   key.l_addr = ip->src_address;
1392   key.r_addr = ip->dst_address;
1393   key.fib_index = rx_fib_index;
1394   key.proto = ip->protocol;
1395   key.rsvd = 0;
1396   key.l_port = udp->src_port;
1397   s_kv.key[0] = key.as_u64[0];
1398   s_kv.key[1] = key.as_u64[1];
1399
1400   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1401     {
1402       s = pool_elt_at_index (tsm->sessions, s_value.value);
1403     }
1404   else
1405     {
1406       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1407         {
1408           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1409           return 0;
1410         }
1411
1412       l_key.addr = ip->src_address;
1413       l_key.port = udp->src_port;
1414       l_key.protocol = proto;
1415       l_key.fib_index = rx_fib_index;
1416       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0))
1417         return 0;
1418
1419       u_key.addr = ip->src_address;
1420       u_key.fib_index = rx_fib_index;
1421       kv.key = u_key.as_u64;
1422
1423       /* Ever heard of the "user" = src ip4 address before? */
1424       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1425         {
1426           /* no, make a new one */
1427           pool_get (tsm->users, u);
1428           memset (u, 0, sizeof (*u));
1429           u->addr = ip->src_address;
1430           u->fib_index = rx_fib_index;
1431
1432           pool_get (tsm->list_pool, head);
1433           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1434
1435           clib_dlist_init (tsm->list_pool,
1436                            u->sessions_per_user_list_head_index);
1437
1438           kv.value = u - tsm->users;
1439
1440           /* add user */
1441           if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
1442             clib_warning ("user key add failed");
1443         }
1444       else
1445         {
1446           u = pool_elt_at_index (tsm->users, value.value);
1447         }
1448
1449       /* Create a new session */
1450       pool_get (tsm->sessions, s);
1451       memset (s, 0, sizeof (*s));
1452
1453       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1454       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1455       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1456       s->outside_address_index = ~0;
1457       s->in2out = l_key;
1458       s->out2in = e_key;
1459       u->nstaticsessions++;
1460
1461       /* Create list elts */
1462       pool_get (tsm->list_pool, elt);
1463       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1464       elt->value = s - tsm->sessions;
1465       s->per_user_index = elt - tsm->list_pool;
1466       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1467       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1468                           s->per_user_index);
1469
1470       /* Add to lookup tables */
1471       s_kv.value = s - tsm->sessions;
1472       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1473         clib_warning ("in2out-ed key add failed");
1474
1475       key.l_addr = e_key.addr;
1476       key.fib_index = e_key.fib_index;
1477       key.l_port = e_key.port;
1478       s_kv.key[0] = key.as_u64[0];
1479       s_kv.key[1] = key.as_u64[1];
1480       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1481         clib_warning ("out2in-ed key add failed");
1482     }
1483
1484   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1485
1486   /* Update IP checksum */
1487   sum = ip->checksum;
1488   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1489   ip->checksum = ip_csum_fold (sum);
1490
1491   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1492     {
1493       old_port = tcp->src_port;
1494       tcp->src_port = s->out2in.port;
1495       new_port = tcp->src_port;
1496
1497       sum = tcp->checksum;
1498       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1499       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1500       tcp->checksum = ip_csum_fold(sum);
1501     }
1502   else
1503     {
1504       udp->src_port = s->out2in.port;
1505       udp->checksum = 0;
1506     }
1507
1508   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1509     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1510
1511   /* Accounting */
1512   s->last_heard = now;
1513   s->total_pkts++;
1514   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1515   return s;
1516 }
1517
1518 static inline uword
1519 snat_in2out_node_fn_inline (vlib_main_t * vm,
1520                             vlib_node_runtime_t * node,
1521                             vlib_frame_t * frame, int is_slow_path,
1522                             int is_output_feature)
1523 {
1524   u32 n_left_from, * from, * to_next;
1525   snat_in2out_next_t next_index;
1526   u32 pkts_processed = 0;
1527   snat_main_t * sm = &snat_main;
1528   f64 now = vlib_time_now (vm);
1529   u32 stats_node_index;
1530   u32 thread_index = vlib_get_thread_index ();
1531
1532   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1533     snat_in2out_node.index;
1534
1535   from = vlib_frame_vector_args (frame);
1536   n_left_from = frame->n_vectors;
1537   next_index = node->cached_next_index;
1538
1539   while (n_left_from > 0)
1540     {
1541       u32 n_left_to_next;
1542
1543       vlib_get_next_frame (vm, node, next_index,
1544                            to_next, n_left_to_next);
1545
1546       while (n_left_from >= 4 && n_left_to_next >= 2)
1547         {
1548           u32 bi0, bi1;
1549           vlib_buffer_t * b0, * b1;
1550           u32 next0, next1;
1551           u32 sw_if_index0, sw_if_index1;
1552           ip4_header_t * ip0, * ip1;
1553           ip_csum_t sum0, sum1;
1554           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1555           u16 old_port0, new_port0, old_port1, new_port1;
1556           udp_header_t * udp0, * udp1;
1557           tcp_header_t * tcp0, * tcp1;
1558           icmp46_header_t * icmp0, * icmp1;
1559           snat_session_key_t key0, key1;
1560           u32 rx_fib_index0, rx_fib_index1;
1561           u32 proto0, proto1;
1562           snat_session_t * s0 = 0, * s1 = 0;
1563           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1564           u32 iph_offset0 = 0, iph_offset1 = 0;
1565
1566           /* Prefetch next iteration. */
1567           {
1568             vlib_buffer_t * p2, * p3;
1569
1570             p2 = vlib_get_buffer (vm, from[2]);
1571             p3 = vlib_get_buffer (vm, from[3]);
1572
1573             vlib_prefetch_buffer_header (p2, LOAD);
1574             vlib_prefetch_buffer_header (p3, LOAD);
1575
1576             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1577             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1578           }
1579
1580           /* speculatively enqueue b0 and b1 to the current next frame */
1581           to_next[0] = bi0 = from[0];
1582           to_next[1] = bi1 = from[1];
1583           from += 2;
1584           to_next += 2;
1585           n_left_from -= 2;
1586           n_left_to_next -= 2;
1587
1588           b0 = vlib_get_buffer (vm, bi0);
1589           b1 = vlib_get_buffer (vm, bi1);
1590
1591           if (is_output_feature)
1592             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1593
1594           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1595                  iph_offset0);
1596
1597           udp0 = ip4_next_header (ip0);
1598           tcp0 = (tcp_header_t *) udp0;
1599           icmp0 = (icmp46_header_t *) udp0;
1600
1601           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1602           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1603                                    sw_if_index0);
1604
1605           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1606
1607           if (PREDICT_FALSE(ip0->ttl == 1))
1608             {
1609               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1610               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1611                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1612                                            0);
1613               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1614               goto trace00;
1615             }
1616
1617           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1618
1619           /* Next configured feature, probably ip4-lookup */
1620           if (is_slow_path)
1621             {
1622               if (PREDICT_FALSE (proto0 == ~0))
1623                 {
1624                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1625                                                   thread_index, now, vm, node);
1626                   if (!s0)
1627                     next0 = SNAT_IN2OUT_NEXT_DROP;
1628                   goto trace00;
1629                 }
1630
1631               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1632                 {
1633                   next0 = icmp_in2out_slow_path
1634                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1635                      node, next0, now, thread_index, &s0);
1636                   goto trace00;
1637                 }
1638             }
1639           else
1640             {
1641               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1642                 {
1643                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1644                   goto trace00;
1645                 }
1646             }
1647
1648           key0.addr = ip0->src_address;
1649           key0.port = udp0->src_port;
1650           key0.protocol = proto0;
1651           key0.fib_index = rx_fib_index0;
1652
1653           kv0.key = key0.as_u64;
1654
1655           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1656               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1657             {
1658               if (is_slow_path)
1659                 {
1660                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1661                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1662                     goto trace00;
1663
1664                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1665                                      &s0, node, next0, thread_index);
1666                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1667                     goto trace00;
1668                 }
1669               else
1670                 {
1671                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1672                   goto trace00;
1673                 }
1674             }
1675           else
1676             {
1677               if (PREDICT_FALSE (value0.value == ~0ULL))
1678                 {
1679                   if (is_slow_path)
1680                     {
1681                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1682                                           thread_index, now, vm, node);
1683                       if (!s0)
1684                         next0 = SNAT_IN2OUT_NEXT_DROP;
1685                       goto trace00;
1686                     }
1687                   else
1688                     {
1689                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1690                       goto trace00;
1691                     }
1692                 }
1693               else
1694                 {
1695                   s0 = pool_elt_at_index (
1696                     sm->per_thread_data[thread_index].sessions,
1697                     value0.value);
1698                 }
1699             }
1700
1701           old_addr0 = ip0->src_address.as_u32;
1702           ip0->src_address = s0->out2in.addr;
1703           new_addr0 = ip0->src_address.as_u32;
1704           if (!is_output_feature)
1705             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1706
1707           sum0 = ip0->checksum;
1708           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1709                                  ip4_header_t,
1710                                  src_address /* changed member */);
1711           ip0->checksum = ip_csum_fold (sum0);
1712
1713           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1714             {
1715               old_port0 = tcp0->src_port;
1716               tcp0->src_port = s0->out2in.port;
1717               new_port0 = tcp0->src_port;
1718
1719               sum0 = tcp0->checksum;
1720               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1721                                      ip4_header_t,
1722                                      dst_address /* changed member */);
1723               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1724                                      ip4_header_t /* cheat */,
1725                                      length /* changed member */);
1726               tcp0->checksum = ip_csum_fold(sum0);
1727             }
1728           else
1729             {
1730               old_port0 = udp0->src_port;
1731               udp0->src_port = s0->out2in.port;
1732               udp0->checksum = 0;
1733             }
1734
1735           /* Hairpinning */
1736           if (!is_output_feature)
1737             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1738
1739           /* Accounting */
1740           s0->last_heard = now;
1741           s0->total_pkts++;
1742           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1743           /* Per-user LRU list maintenance for dynamic translation */
1744           if (!snat_is_session_static (s0))
1745             {
1746               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1747                                  s0->per_user_index);
1748               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1749                                   s0->per_user_list_head_index,
1750                                   s0->per_user_index);
1751             }
1752         trace00:
1753
1754           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1755                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1756             {
1757               snat_in2out_trace_t *t =
1758                  vlib_add_trace (vm, node, b0, sizeof (*t));
1759               t->is_slow_path = is_slow_path;
1760               t->sw_if_index = sw_if_index0;
1761               t->next_index = next0;
1762                   t->session_index = ~0;
1763               if (s0)
1764                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1765             }
1766
1767           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1768
1769           if (is_output_feature)
1770             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1771
1772           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1773                  iph_offset1);
1774
1775           udp1 = ip4_next_header (ip1);
1776           tcp1 = (tcp_header_t *) udp1;
1777           icmp1 = (icmp46_header_t *) udp1;
1778
1779           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1780           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1781                                    sw_if_index1);
1782
1783           if (PREDICT_FALSE(ip1->ttl == 1))
1784             {
1785               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1786               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1787                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1788                                            0);
1789               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1790               goto trace01;
1791             }
1792
1793           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1794
1795           /* Next configured feature, probably ip4-lookup */
1796           if (is_slow_path)
1797             {
1798               if (PREDICT_FALSE (proto1 == ~0))
1799                 {
1800                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1801                                                   thread_index, now, vm, node);
1802                   if (!s1)
1803                     next1 = SNAT_IN2OUT_NEXT_DROP;
1804                   goto trace01;
1805                 }
1806
1807               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1808                 {
1809                   next1 = icmp_in2out_slow_path
1810                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1811                      next1, now, thread_index, &s1);
1812                   goto trace01;
1813                 }
1814             }
1815           else
1816             {
1817               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1818                 {
1819                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1820                   goto trace01;
1821                 }
1822             }
1823
1824           key1.addr = ip1->src_address;
1825           key1.port = udp1->src_port;
1826           key1.protocol = proto1;
1827           key1.fib_index = rx_fib_index1;
1828
1829           kv1.key = key1.as_u64;
1830
1831             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1832                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1833             {
1834               if (is_slow_path)
1835                 {
1836                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1837                       ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
1838                     goto trace01;
1839
1840                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1841                                      &s1, node, next1, thread_index);
1842                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1843                     goto trace01;
1844                 }
1845               else
1846                 {
1847                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1848                   goto trace01;
1849                 }
1850             }
1851           else
1852             {
1853               if (PREDICT_FALSE (value1.value == ~0ULL))
1854                 {
1855                   if (is_slow_path)
1856                     {
1857                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1858                                           thread_index, now, vm, node);
1859                       if (!s1)
1860                         next1 = SNAT_IN2OUT_NEXT_DROP;
1861                       goto trace01;
1862                     }
1863                   else
1864                     {
1865                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1866                       goto trace01;
1867                     }
1868                 }
1869               else
1870                 {
1871                   s1 = pool_elt_at_index (
1872                     sm->per_thread_data[thread_index].sessions,
1873                     value1.value);
1874                 }
1875             }
1876
1877           old_addr1 = ip1->src_address.as_u32;
1878           ip1->src_address = s1->out2in.addr;
1879           new_addr1 = ip1->src_address.as_u32;
1880           if (!is_output_feature)
1881             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1882
1883           sum1 = ip1->checksum;
1884           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1885                                  ip4_header_t,
1886                                  src_address /* changed member */);
1887           ip1->checksum = ip_csum_fold (sum1);
1888
1889           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1890             {
1891               old_port1 = tcp1->src_port;
1892               tcp1->src_port = s1->out2in.port;
1893               new_port1 = tcp1->src_port;
1894
1895               sum1 = tcp1->checksum;
1896               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1897                                      ip4_header_t,
1898                                      dst_address /* changed member */);
1899               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1900                                      ip4_header_t /* cheat */,
1901                                      length /* changed member */);
1902               tcp1->checksum = ip_csum_fold(sum1);
1903             }
1904           else
1905             {
1906               old_port1 = udp1->src_port;
1907               udp1->src_port = s1->out2in.port;
1908               udp1->checksum = 0;
1909             }
1910
1911           /* Hairpinning */
1912           if (!is_output_feature)
1913             snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1914
1915           /* Accounting */
1916           s1->last_heard = now;
1917           s1->total_pkts++;
1918           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1919           /* Per-user LRU list maintenance for dynamic translation */
1920           if (!snat_is_session_static (s1))
1921             {
1922               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1923                                  s1->per_user_index);
1924               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1925                                   s1->per_user_list_head_index,
1926                                   s1->per_user_index);
1927             }
1928         trace01:
1929
1930           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1931                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1932             {
1933               snat_in2out_trace_t *t =
1934                  vlib_add_trace (vm, node, b1, sizeof (*t));
1935               t->sw_if_index = sw_if_index1;
1936               t->next_index = next1;
1937               t->session_index = ~0;
1938               if (s1)
1939                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1940             }
1941
1942           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1943
1944           /* verify speculative enqueues, maybe switch current next frame */
1945           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1946                                            to_next, n_left_to_next,
1947                                            bi0, bi1, next0, next1);
1948         }
1949
1950       while (n_left_from > 0 && n_left_to_next > 0)
1951         {
1952           u32 bi0;
1953           vlib_buffer_t * b0;
1954           u32 next0;
1955           u32 sw_if_index0;
1956           ip4_header_t * ip0;
1957           ip_csum_t sum0;
1958           u32 new_addr0, old_addr0;
1959           u16 old_port0, new_port0;
1960           udp_header_t * udp0;
1961           tcp_header_t * tcp0;
1962           icmp46_header_t * icmp0;
1963           snat_session_key_t key0;
1964           u32 rx_fib_index0;
1965           u32 proto0;
1966           snat_session_t * s0 = 0;
1967           clib_bihash_kv_8_8_t kv0, value0;
1968           u32 iph_offset0 = 0;
1969
1970           /* speculatively enqueue b0 to the current next frame */
1971           bi0 = from[0];
1972           to_next[0] = bi0;
1973           from += 1;
1974           to_next += 1;
1975           n_left_from -= 1;
1976           n_left_to_next -= 1;
1977
1978           b0 = vlib_get_buffer (vm, bi0);
1979           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1980
1981           if (is_output_feature)
1982             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1983
1984           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1985                  iph_offset0);
1986
1987           udp0 = ip4_next_header (ip0);
1988           tcp0 = (tcp_header_t *) udp0;
1989           icmp0 = (icmp46_header_t *) udp0;
1990
1991           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1992           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1993                                    sw_if_index0);
1994
1995           if (PREDICT_FALSE(ip0->ttl == 1))
1996             {
1997               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1998               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1999                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2000                                            0);
2001               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2002               goto trace0;
2003             }
2004
2005           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2006
2007           /* Next configured feature, probably ip4-lookup */
2008           if (is_slow_path)
2009             {
2010               if (PREDICT_FALSE (proto0 == ~0))
2011                 {
2012                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
2013                                                   thread_index, now, vm, node);
2014                   if (!s0)
2015                     next0 = SNAT_IN2OUT_NEXT_DROP;
2016                   goto trace0;
2017                 }
2018
2019               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2020                 {
2021                   next0 = icmp_in2out_slow_path
2022                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2023                      next0, now, thread_index, &s0);
2024                   goto trace0;
2025                 }
2026             }
2027           else
2028             {
2029               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2030                 {
2031                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2032                   goto trace0;
2033                 }
2034             }
2035
2036           key0.addr = ip0->src_address;
2037           key0.port = udp0->src_port;
2038           key0.protocol = proto0;
2039           key0.fib_index = rx_fib_index0;
2040
2041           kv0.key = key0.as_u64;
2042
2043           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
2044                                       &kv0, &value0))
2045             {
2046               if (is_slow_path)
2047                 {
2048                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2049                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
2050                     goto trace0;
2051
2052                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2053                                      &s0, node, next0, thread_index);
2054
2055                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2056                     goto trace0;
2057                 }
2058               else
2059                 {
2060                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2061                   goto trace0;
2062                 }
2063             }
2064           else
2065             {
2066               if (PREDICT_FALSE (value0.value == ~0ULL))
2067                 {
2068                   if (is_slow_path)
2069                     {
2070                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
2071                                           thread_index, now, vm, node);
2072                       if (!s0)
2073                         next0 = SNAT_IN2OUT_NEXT_DROP;
2074                       goto trace0;
2075                     }
2076                   else
2077                     {
2078                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2079                       goto trace0;
2080                     }
2081                 }
2082               else
2083                 {
2084                   s0 = pool_elt_at_index (
2085                     sm->per_thread_data[thread_index].sessions,
2086                     value0.value);
2087                 }
2088             }
2089
2090           old_addr0 = ip0->src_address.as_u32;
2091           ip0->src_address = s0->out2in.addr;
2092           new_addr0 = ip0->src_address.as_u32;
2093           if (!is_output_feature)
2094             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2095
2096           sum0 = ip0->checksum;
2097           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2098                                  ip4_header_t,
2099                                  src_address /* changed member */);
2100           ip0->checksum = ip_csum_fold (sum0);
2101
2102           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2103             {
2104               old_port0 = tcp0->src_port;
2105               tcp0->src_port = s0->out2in.port;
2106               new_port0 = tcp0->src_port;
2107
2108               sum0 = tcp0->checksum;
2109               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2110                                      ip4_header_t,
2111                                      dst_address /* changed member */);
2112               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2113                                      ip4_header_t /* cheat */,
2114                                      length /* changed member */);
2115               tcp0->checksum = ip_csum_fold(sum0);
2116             }
2117           else
2118             {
2119               old_port0 = udp0->src_port;
2120               udp0->src_port = s0->out2in.port;
2121               udp0->checksum = 0;
2122             }
2123
2124           /* Hairpinning */
2125           if (!is_output_feature)
2126             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2127
2128           /* Accounting */
2129           s0->last_heard = now;
2130           s0->total_pkts++;
2131           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2132           /* Per-user LRU list maintenance for dynamic translation */
2133           if (!snat_is_session_static (s0))
2134             {
2135               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2136                                  s0->per_user_index);
2137               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2138                                   s0->per_user_list_head_index,
2139                                   s0->per_user_index);
2140             }
2141
2142         trace0:
2143           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2144                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2145             {
2146               snat_in2out_trace_t *t =
2147                  vlib_add_trace (vm, node, b0, sizeof (*t));
2148               t->is_slow_path = is_slow_path;
2149               t->sw_if_index = sw_if_index0;
2150               t->next_index = next0;
2151                   t->session_index = ~0;
2152               if (s0)
2153                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
2154             }
2155
2156           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2157
2158           /* verify speculative enqueue, maybe switch current next frame */
2159           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2160                                            to_next, n_left_to_next,
2161                                            bi0, next0);
2162         }
2163
2164       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2165     }
2166
2167   vlib_node_increment_counter (vm, stats_node_index,
2168                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2169                                pkts_processed);
2170   return frame->n_vectors;
2171 }
2172
2173 static uword
2174 snat_in2out_fast_path_fn (vlib_main_t * vm,
2175                           vlib_node_runtime_t * node,
2176                           vlib_frame_t * frame)
2177 {
2178   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
2179 }
2180
2181 VLIB_REGISTER_NODE (snat_in2out_node) = {
2182   .function = snat_in2out_fast_path_fn,
2183   .name = "nat44-in2out",
2184   .vector_size = sizeof (u32),
2185   .format_trace = format_snat_in2out_trace,
2186   .type = VLIB_NODE_TYPE_INTERNAL,
2187
2188   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2189   .error_strings = snat_in2out_error_strings,
2190
2191   .runtime_data_bytes = sizeof (snat_runtime_t),
2192
2193   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2194
2195   /* edit / add dispositions here */
2196   .next_nodes = {
2197     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2198     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2199     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2200     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2201   },
2202 };
2203
2204 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
2205
2206 static uword
2207 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
2208                                  vlib_node_runtime_t * node,
2209                                  vlib_frame_t * frame)
2210 {
2211   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
2212 }
2213
2214 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2215   .function = snat_in2out_output_fast_path_fn,
2216   .name = "nat44-in2out-output",
2217   .vector_size = sizeof (u32),
2218   .format_trace = format_snat_in2out_trace,
2219   .type = VLIB_NODE_TYPE_INTERNAL,
2220
2221   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2222   .error_strings = snat_in2out_error_strings,
2223
2224   .runtime_data_bytes = sizeof (snat_runtime_t),
2225
2226   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2227
2228   /* edit / add dispositions here */
2229   .next_nodes = {
2230     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2231     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2232     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2233     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2234   },
2235 };
2236
2237 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2238                               snat_in2out_output_fast_path_fn);
2239
2240 static uword
2241 snat_in2out_slow_path_fn (vlib_main_t * vm,
2242                           vlib_node_runtime_t * node,
2243                           vlib_frame_t * frame)
2244 {
2245   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2246 }
2247
2248 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2249   .function = snat_in2out_slow_path_fn,
2250   .name = "nat44-in2out-slowpath",
2251   .vector_size = sizeof (u32),
2252   .format_trace = format_snat_in2out_trace,
2253   .type = VLIB_NODE_TYPE_INTERNAL,
2254
2255   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2256   .error_strings = snat_in2out_error_strings,
2257
2258   .runtime_data_bytes = sizeof (snat_runtime_t),
2259
2260   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2261
2262   /* edit / add dispositions here */
2263   .next_nodes = {
2264     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2265     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2266     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2267     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2268   },
2269 };
2270
2271 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2272                               snat_in2out_slow_path_fn);
2273
2274 static uword
2275 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2276                                  vlib_node_runtime_t * node,
2277                                  vlib_frame_t * frame)
2278 {
2279   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2280 }
2281
2282 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2283   .function = snat_in2out_output_slow_path_fn,
2284   .name = "nat44-in2out-output-slowpath",
2285   .vector_size = sizeof (u32),
2286   .format_trace = format_snat_in2out_trace,
2287   .type = VLIB_NODE_TYPE_INTERNAL,
2288
2289   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2290   .error_strings = snat_in2out_error_strings,
2291
2292   .runtime_data_bytes = sizeof (snat_runtime_t),
2293
2294   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2295
2296   /* edit / add dispositions here */
2297   .next_nodes = {
2298     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2299     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2300     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2301     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2302   },
2303 };
2304
2305 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2306                               snat_in2out_output_slow_path_fn);
2307
2308 /**************************/
2309 /*** deterministic mode ***/
2310 /**************************/
2311 static uword
2312 snat_det_in2out_node_fn (vlib_main_t * vm,
2313                          vlib_node_runtime_t * node,
2314                          vlib_frame_t * frame)
2315 {
2316   u32 n_left_from, * from, * to_next;
2317   snat_in2out_next_t next_index;
2318   u32 pkts_processed = 0;
2319   snat_main_t * sm = &snat_main;
2320   u32 now = (u32) vlib_time_now (vm);
2321   u32 thread_index = vlib_get_thread_index ();
2322
2323   from = vlib_frame_vector_args (frame);
2324   n_left_from = frame->n_vectors;
2325   next_index = node->cached_next_index;
2326
2327   while (n_left_from > 0)
2328     {
2329       u32 n_left_to_next;
2330
2331       vlib_get_next_frame (vm, node, next_index,
2332                            to_next, n_left_to_next);
2333
2334       while (n_left_from >= 4 && n_left_to_next >= 2)
2335         {
2336           u32 bi0, bi1;
2337           vlib_buffer_t * b0, * b1;
2338           u32 next0, next1;
2339           u32 sw_if_index0, sw_if_index1;
2340           ip4_header_t * ip0, * ip1;
2341           ip_csum_t sum0, sum1;
2342           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2343           u16 old_port0, new_port0, lo_port0, i0;
2344           u16 old_port1, new_port1, lo_port1, i1;
2345           udp_header_t * udp0, * udp1;
2346           tcp_header_t * tcp0, * tcp1;
2347           u32 proto0, proto1;
2348           snat_det_out_key_t key0, key1;
2349           snat_det_map_t * dm0, * dm1;
2350           snat_det_session_t * ses0 = 0, * ses1 = 0;
2351           u32 rx_fib_index0, rx_fib_index1;
2352           icmp46_header_t * icmp0, * icmp1;
2353
2354           /* Prefetch next iteration. */
2355           {
2356             vlib_buffer_t * p2, * p3;
2357
2358             p2 = vlib_get_buffer (vm, from[2]);
2359             p3 = vlib_get_buffer (vm, from[3]);
2360
2361             vlib_prefetch_buffer_header (p2, LOAD);
2362             vlib_prefetch_buffer_header (p3, LOAD);
2363
2364             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2365             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2366           }
2367
2368           /* speculatively enqueue b0 and b1 to the current next frame */
2369           to_next[0] = bi0 = from[0];
2370           to_next[1] = bi1 = from[1];
2371           from += 2;
2372           to_next += 2;
2373           n_left_from -= 2;
2374           n_left_to_next -= 2;
2375
2376           b0 = vlib_get_buffer (vm, bi0);
2377           b1 = vlib_get_buffer (vm, bi1);
2378
2379           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2380           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2381
2382           ip0 = vlib_buffer_get_current (b0);
2383           udp0 = ip4_next_header (ip0);
2384           tcp0 = (tcp_header_t *) udp0;
2385
2386           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2387
2388           if (PREDICT_FALSE(ip0->ttl == 1))
2389             {
2390               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2391               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2392                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2393                                            0);
2394               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2395               goto trace0;
2396             }
2397
2398           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2399
2400           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2401             {
2402               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2403               icmp0 = (icmp46_header_t *) udp0;
2404
2405               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2406                                   rx_fib_index0, node, next0, thread_index,
2407                                   &ses0, &dm0);
2408               goto trace0;
2409             }
2410
2411           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2412           if (PREDICT_FALSE(!dm0))
2413             {
2414               clib_warning("no match for internal host %U",
2415                            format_ip4_address, &ip0->src_address);
2416               next0 = SNAT_IN2OUT_NEXT_DROP;
2417               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2418               goto trace0;
2419             }
2420
2421           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2422
2423           key0.ext_host_addr = ip0->dst_address;
2424           key0.ext_host_port = tcp0->dst;
2425
2426           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2427           if (PREDICT_FALSE(!ses0))
2428             {
2429               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2430                 {
2431                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2432                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2433
2434                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2435                     continue;
2436
2437                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2438                   break;
2439                 }
2440               if (PREDICT_FALSE(!ses0))
2441                 {
2442                   /* too many sessions for user, send ICMP error packet */
2443
2444                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2445                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2446                                                ICMP4_destination_unreachable_destination_unreachable_host,
2447                                                0);
2448                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2449                   goto trace0;
2450                 }
2451             }
2452
2453           new_port0 = ses0->out.out_port;
2454
2455           old_addr0.as_u32 = ip0->src_address.as_u32;
2456           ip0->src_address.as_u32 = new_addr0.as_u32;
2457           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2458
2459           sum0 = ip0->checksum;
2460           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2461                                  ip4_header_t,
2462                                  src_address /* changed member */);
2463           ip0->checksum = ip_csum_fold (sum0);
2464
2465           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2466             {
2467               if (tcp0->flags & TCP_FLAG_SYN)
2468                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2469               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2470                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2471               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2472                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2473               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2474                 snat_det_ses_close(dm0, ses0);
2475               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2476                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2477               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2478                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2479
2480               old_port0 = tcp0->src;
2481               tcp0->src = new_port0;
2482
2483               sum0 = tcp0->checksum;
2484               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2485                                      ip4_header_t,
2486                                      dst_address /* changed member */);
2487               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2488                                      ip4_header_t /* cheat */,
2489                                      length /* changed member */);
2490               tcp0->checksum = ip_csum_fold(sum0);
2491             }
2492           else
2493             {
2494               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2495               old_port0 = udp0->src_port;
2496               udp0->src_port = new_port0;
2497               udp0->checksum = 0;
2498             }
2499
2500           switch(ses0->state)
2501             {
2502             case SNAT_SESSION_UDP_ACTIVE:
2503                 ses0->expire = now + sm->udp_timeout;
2504                 break;
2505             case SNAT_SESSION_TCP_SYN_SENT:
2506             case SNAT_SESSION_TCP_FIN_WAIT:
2507             case SNAT_SESSION_TCP_CLOSE_WAIT:
2508             case SNAT_SESSION_TCP_LAST_ACK:
2509                 ses0->expire = now + sm->tcp_transitory_timeout;
2510                 break;
2511             case SNAT_SESSION_TCP_ESTABLISHED:
2512                 ses0->expire = now + sm->tcp_established_timeout;
2513                 break;
2514             }
2515
2516         trace0:
2517           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2518                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2519             {
2520               snat_in2out_trace_t *t =
2521                  vlib_add_trace (vm, node, b0, sizeof (*t));
2522               t->is_slow_path = 0;
2523               t->sw_if_index = sw_if_index0;
2524               t->next_index = next0;
2525               t->session_index = ~0;
2526               if (ses0)
2527                 t->session_index = ses0 - dm0->sessions;
2528             }
2529
2530           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2531
2532           ip1 = vlib_buffer_get_current (b1);
2533           udp1 = ip4_next_header (ip1);
2534           tcp1 = (tcp_header_t *) udp1;
2535
2536           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2537
2538           if (PREDICT_FALSE(ip1->ttl == 1))
2539             {
2540               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2541               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2542                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2543                                            0);
2544               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2545               goto trace1;
2546             }
2547
2548           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2549
2550           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2551             {
2552               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2553               icmp1 = (icmp46_header_t *) udp1;
2554
2555               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2556                                   rx_fib_index1, node, next1, thread_index,
2557                                   &ses1, &dm1);
2558               goto trace1;
2559             }
2560
2561           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2562           if (PREDICT_FALSE(!dm1))
2563             {
2564               clib_warning("no match for internal host %U",
2565                            format_ip4_address, &ip0->src_address);
2566               next1 = SNAT_IN2OUT_NEXT_DROP;
2567               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2568               goto trace1;
2569             }
2570
2571           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2572
2573           key1.ext_host_addr = ip1->dst_address;
2574           key1.ext_host_port = tcp1->dst;
2575
2576           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2577           if (PREDICT_FALSE(!ses1))
2578             {
2579               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2580                 {
2581                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2582                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2583
2584                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2585                     continue;
2586
2587                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2588                   break;
2589                 }
2590               if (PREDICT_FALSE(!ses1))
2591                 {
2592                   /* too many sessions for user, send ICMP error packet */
2593
2594                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2595                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2596                                                ICMP4_destination_unreachable_destination_unreachable_host,
2597                                                0);
2598                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2599                   goto trace1;
2600                 }
2601             }
2602
2603           new_port1 = ses1->out.out_port;
2604
2605           old_addr1.as_u32 = ip1->src_address.as_u32;
2606           ip1->src_address.as_u32 = new_addr1.as_u32;
2607           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2608
2609           sum1 = ip1->checksum;
2610           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2611                                  ip4_header_t,
2612                                  src_address /* changed member */);
2613           ip1->checksum = ip_csum_fold (sum1);
2614
2615           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2616             {
2617               if (tcp1->flags & TCP_FLAG_SYN)
2618                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2619               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2620                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2621               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2622                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2623               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2624                 snat_det_ses_close(dm1, ses1);
2625               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2626                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2627               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2628                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2629
2630               old_port1 = tcp1->src;
2631               tcp1->src = new_port1;
2632
2633               sum1 = tcp1->checksum;
2634               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2635                                      ip4_header_t,
2636                                      dst_address /* changed member */);
2637               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2638                                      ip4_header_t /* cheat */,
2639                                      length /* changed member */);
2640               tcp1->checksum = ip_csum_fold(sum1);
2641             }
2642           else
2643             {
2644               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2645               old_port1 = udp1->src_port;
2646               udp1->src_port = new_port1;
2647               udp1->checksum = 0;
2648             }
2649
2650           switch(ses1->state)
2651             {
2652             case SNAT_SESSION_UDP_ACTIVE:
2653                 ses1->expire = now + sm->udp_timeout;
2654                 break;
2655             case SNAT_SESSION_TCP_SYN_SENT:
2656             case SNAT_SESSION_TCP_FIN_WAIT:
2657             case SNAT_SESSION_TCP_CLOSE_WAIT:
2658             case SNAT_SESSION_TCP_LAST_ACK:
2659                 ses1->expire = now + sm->tcp_transitory_timeout;
2660                 break;
2661             case SNAT_SESSION_TCP_ESTABLISHED:
2662                 ses1->expire = now + sm->tcp_established_timeout;
2663                 break;
2664             }
2665
2666         trace1:
2667           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2668                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2669             {
2670               snat_in2out_trace_t *t =
2671                  vlib_add_trace (vm, node, b1, sizeof (*t));
2672               t->is_slow_path = 0;
2673               t->sw_if_index = sw_if_index1;
2674               t->next_index = next1;
2675               t->session_index = ~0;
2676               if (ses1)
2677                 t->session_index = ses1 - dm1->sessions;
2678             }
2679
2680           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2681
2682           /* verify speculative enqueues, maybe switch current next frame */
2683           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2684                                            to_next, n_left_to_next,
2685                                            bi0, bi1, next0, next1);
2686          }
2687
2688       while (n_left_from > 0 && n_left_to_next > 0)
2689         {
2690           u32 bi0;
2691           vlib_buffer_t * b0;
2692           u32 next0;
2693           u32 sw_if_index0;
2694           ip4_header_t * ip0;
2695           ip_csum_t sum0;
2696           ip4_address_t new_addr0, old_addr0;
2697           u16 old_port0, new_port0, lo_port0, i0;
2698           udp_header_t * udp0;
2699           tcp_header_t * tcp0;
2700           u32 proto0;
2701           snat_det_out_key_t key0;
2702           snat_det_map_t * dm0;
2703           snat_det_session_t * ses0 = 0;
2704           u32 rx_fib_index0;
2705           icmp46_header_t * icmp0;
2706
2707           /* speculatively enqueue b0 to the current next frame */
2708           bi0 = from[0];
2709           to_next[0] = bi0;
2710           from += 1;
2711           to_next += 1;
2712           n_left_from -= 1;
2713           n_left_to_next -= 1;
2714
2715           b0 = vlib_get_buffer (vm, bi0);
2716           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2717
2718           ip0 = vlib_buffer_get_current (b0);
2719           udp0 = ip4_next_header (ip0);
2720           tcp0 = (tcp_header_t *) udp0;
2721
2722           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2723
2724           if (PREDICT_FALSE(ip0->ttl == 1))
2725             {
2726               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2727               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2728                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2729                                            0);
2730               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2731               goto trace00;
2732             }
2733
2734           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2735
2736           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2737             {
2738               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2739               icmp0 = (icmp46_header_t *) udp0;
2740
2741               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2742                                   rx_fib_index0, node, next0, thread_index,
2743                                   &ses0, &dm0);
2744               goto trace00;
2745             }
2746
2747           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2748           if (PREDICT_FALSE(!dm0))
2749             {
2750               clib_warning("no match for internal host %U",
2751                            format_ip4_address, &ip0->src_address);
2752               next0 = SNAT_IN2OUT_NEXT_DROP;
2753               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2754               goto trace00;
2755             }
2756
2757           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2758
2759           key0.ext_host_addr = ip0->dst_address;
2760           key0.ext_host_port = tcp0->dst;
2761
2762           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2763           if (PREDICT_FALSE(!ses0))
2764             {
2765               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2766                 {
2767                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2768                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2769
2770                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2771                     continue;
2772
2773                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2774                   break;
2775                 }
2776               if (PREDICT_FALSE(!ses0))
2777                 {
2778                   /* too many sessions for user, send ICMP error packet */
2779
2780                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2781                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2782                                                ICMP4_destination_unreachable_destination_unreachable_host,
2783                                                0);
2784                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2785                   goto trace00;
2786                 }
2787             }
2788
2789           new_port0 = ses0->out.out_port;
2790
2791           old_addr0.as_u32 = ip0->src_address.as_u32;
2792           ip0->src_address.as_u32 = new_addr0.as_u32;
2793           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2794
2795           sum0 = ip0->checksum;
2796           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2797                                  ip4_header_t,
2798                                  src_address /* changed member */);
2799           ip0->checksum = ip_csum_fold (sum0);
2800
2801           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2802             {
2803               if (tcp0->flags & TCP_FLAG_SYN)
2804                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2805               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2806                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2807               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2808                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2809               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2810                 snat_det_ses_close(dm0, ses0);
2811               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2812                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2813               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2814                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2815
2816               old_port0 = tcp0->src;
2817               tcp0->src = new_port0;
2818
2819               sum0 = tcp0->checksum;
2820               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2821                                      ip4_header_t,
2822                                      dst_address /* changed member */);
2823               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2824                                      ip4_header_t /* cheat */,
2825                                      length /* changed member */);
2826               tcp0->checksum = ip_csum_fold(sum0);
2827             }
2828           else
2829             {
2830               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2831               old_port0 = udp0->src_port;
2832               udp0->src_port = new_port0;
2833               udp0->checksum = 0;
2834             }
2835
2836           switch(ses0->state)
2837             {
2838             case SNAT_SESSION_UDP_ACTIVE:
2839                 ses0->expire = now + sm->udp_timeout;
2840                 break;
2841             case SNAT_SESSION_TCP_SYN_SENT:
2842             case SNAT_SESSION_TCP_FIN_WAIT:
2843             case SNAT_SESSION_TCP_CLOSE_WAIT:
2844             case SNAT_SESSION_TCP_LAST_ACK:
2845                 ses0->expire = now + sm->tcp_transitory_timeout;
2846                 break;
2847             case SNAT_SESSION_TCP_ESTABLISHED:
2848                 ses0->expire = now + sm->tcp_established_timeout;
2849                 break;
2850             }
2851
2852         trace00:
2853           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2854                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2855             {
2856               snat_in2out_trace_t *t =
2857                  vlib_add_trace (vm, node, b0, sizeof (*t));
2858               t->is_slow_path = 0;
2859               t->sw_if_index = sw_if_index0;
2860               t->next_index = next0;
2861               t->session_index = ~0;
2862               if (ses0)
2863                 t->session_index = ses0 - dm0->sessions;
2864             }
2865
2866           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2867
2868           /* verify speculative enqueue, maybe switch current next frame */
2869           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2870                                            to_next, n_left_to_next,
2871                                            bi0, next0);
2872         }
2873
2874       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2875     }
2876
2877   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2878                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2879                                pkts_processed);
2880   return frame->n_vectors;
2881 }
2882
2883 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2884   .function = snat_det_in2out_node_fn,
2885   .name = "nat44-det-in2out",
2886   .vector_size = sizeof (u32),
2887   .format_trace = format_snat_in2out_trace,
2888   .type = VLIB_NODE_TYPE_INTERNAL,
2889
2890   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2891   .error_strings = snat_in2out_error_strings,
2892
2893   .runtime_data_bytes = sizeof (snat_runtime_t),
2894
2895   .n_next_nodes = 3,
2896
2897   /* edit / add dispositions here */
2898   .next_nodes = {
2899     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2900     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2901     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2902   },
2903 };
2904
2905 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2906
2907 /**
2908  * Get address and port values to be used for ICMP packet translation
2909  * and create session if needed
2910  *
2911  * @param[in,out] sm             NAT main
2912  * @param[in,out] node           NAT node runtime
2913  * @param[in] thread_index       thread index
2914  * @param[in,out] b0             buffer containing packet to be translated
2915  * @param[out] p_proto           protocol used for matching
2916  * @param[out] p_value           address and port after NAT translation
2917  * @param[out] p_dont_translate  if packet should not be translated
2918  * @param d                      optional parameter
2919  * @param e                      optional parameter
2920  */
2921 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2922                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2923                           snat_session_key_t *p_value,
2924                           u8 *p_dont_translate, void *d, void *e)
2925 {
2926   ip4_header_t *ip0;
2927   icmp46_header_t *icmp0;
2928   u32 sw_if_index0;
2929   u32 rx_fib_index0;
2930   u8 protocol;
2931   snat_det_out_key_t key0;
2932   u8 dont_translate = 0;
2933   u32 next0 = ~0;
2934   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2935   ip4_header_t *inner_ip0;
2936   void *l4_header = 0;
2937   icmp46_header_t *inner_icmp0;
2938   snat_det_map_t * dm0 = 0;
2939   ip4_address_t new_addr0;
2940   u16 lo_port0, i0;
2941   snat_det_session_t * ses0 = 0;
2942   ip4_address_t in_addr;
2943   u16 in_port;
2944
2945   ip0 = vlib_buffer_get_current (b0);
2946   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2947   echo0 = (icmp_echo_header_t *)(icmp0+1);
2948   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2949   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2950
2951   if (!icmp_is_error_message (icmp0))
2952     {
2953       protocol = SNAT_PROTOCOL_ICMP;
2954       in_addr = ip0->src_address;
2955       in_port = echo0->identifier;
2956     }
2957   else
2958     {
2959       inner_ip0 = (ip4_header_t *)(echo0+1);
2960       l4_header = ip4_next_header (inner_ip0);
2961       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2962       in_addr = inner_ip0->dst_address;
2963       switch (protocol)
2964         {
2965         case SNAT_PROTOCOL_ICMP:
2966           inner_icmp0 = (icmp46_header_t*)l4_header;
2967           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2968           in_port = inner_echo0->identifier;
2969           break;
2970         case SNAT_PROTOCOL_UDP:
2971         case SNAT_PROTOCOL_TCP:
2972           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2973           break;
2974         default:
2975           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2976           next0 = SNAT_IN2OUT_NEXT_DROP;
2977           goto out;
2978         }
2979     }
2980
2981   dm0 = snat_det_map_by_user(sm, &in_addr);
2982   if (PREDICT_FALSE(!dm0))
2983     {
2984       clib_warning("no match for internal host %U",
2985                    format_ip4_address, &in_addr);
2986       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2987           IP_PROTOCOL_ICMP, rx_fib_index0)))
2988         {
2989           dont_translate = 1;
2990           goto out;
2991         }
2992       next0 = SNAT_IN2OUT_NEXT_DROP;
2993       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2994       goto out;
2995     }
2996
2997   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2998
2999   key0.ext_host_addr = ip0->dst_address;
3000   key0.ext_host_port = 0;
3001
3002   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3003   if (PREDICT_FALSE(!ses0))
3004     {
3005       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3006           IP_PROTOCOL_ICMP, rx_fib_index0)))
3007         {
3008           dont_translate = 1;
3009           goto out;
3010         }
3011       if (icmp0->type != ICMP4_echo_request)
3012         {
3013           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3014           next0 = SNAT_IN2OUT_NEXT_DROP;
3015           goto out;
3016         }
3017       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3018         {
3019           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3020             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3021
3022           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3023             continue;
3024
3025           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3026           break;
3027         }
3028       if (PREDICT_FALSE(!ses0))
3029         {
3030           next0 = SNAT_IN2OUT_NEXT_DROP;
3031           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3032           goto out;
3033         }
3034     }
3035
3036   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3037                     !icmp_is_error_message (icmp0)))
3038     {
3039       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3040       next0 = SNAT_IN2OUT_NEXT_DROP;
3041       goto out;
3042     }
3043
3044   u32 now = (u32) vlib_time_now (sm->vlib_main);
3045
3046   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3047   ses0->expire = now + sm->icmp_timeout;
3048
3049 out:
3050   *p_proto = protocol;
3051   if (ses0)
3052     {
3053       p_value->addr = new_addr0;
3054       p_value->fib_index = sm->outside_fib_index;
3055       p_value->port = ses0->out.out_port;
3056     }
3057   *p_dont_translate = dont_translate;
3058   if (d)
3059     *(snat_det_session_t**)d = ses0;
3060   if (e)
3061     *(snat_det_map_t**)e = dm0;
3062   return next0;
3063 }
3064
3065 /**********************/
3066 /*** worker handoff ***/
3067 /**********************/
3068 static inline uword
3069 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3070                                       vlib_node_runtime_t * node,
3071                                       vlib_frame_t * frame,
3072                                       u8 is_output)
3073 {
3074   snat_main_t *sm = &snat_main;
3075   vlib_thread_main_t *tm = vlib_get_thread_main ();
3076   u32 n_left_from, *from, *to_next = 0;
3077   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3078   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3079     = 0;
3080   vlib_frame_queue_elt_t *hf = 0;
3081   vlib_frame_t *f = 0;
3082   int i;
3083   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3084   u32 next_worker_index = 0;
3085   u32 current_worker_index = ~0;
3086   u32 thread_index = vlib_get_thread_index ();
3087   u32 fq_index;
3088   u32 to_node_index;
3089
3090   ASSERT (vec_len (sm->workers));
3091
3092   if (is_output)
3093     {
3094       fq_index = sm->fq_in2out_output_index;
3095       to_node_index = sm->in2out_output_node_index;
3096     }
3097   else
3098     {
3099       fq_index = sm->fq_in2out_index;
3100       to_node_index = sm->in2out_node_index;
3101     }
3102
3103   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3104     {
3105       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3106
3107       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3108                                sm->first_worker_index + sm->num_workers - 1,
3109                                (vlib_frame_queue_t *) (~0));
3110     }
3111
3112   from = vlib_frame_vector_args (frame);
3113   n_left_from = frame->n_vectors;
3114
3115   while (n_left_from > 0)
3116     {
3117       u32 bi0;
3118       vlib_buffer_t *b0;
3119       u32 sw_if_index0;
3120       u32 rx_fib_index0;
3121       ip4_header_t * ip0;
3122       u8 do_handoff;
3123
3124       bi0 = from[0];
3125       from += 1;
3126       n_left_from -= 1;
3127
3128       b0 = vlib_get_buffer (vm, bi0);
3129
3130       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3131       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3132
3133       ip0 = vlib_buffer_get_current (b0);
3134
3135       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3136
3137       if (PREDICT_FALSE (next_worker_index != thread_index))
3138         {
3139           do_handoff = 1;
3140
3141           if (next_worker_index != current_worker_index)
3142             {
3143               if (hf)
3144                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3145
3146               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3147                                                       next_worker_index,
3148                                                       handoff_queue_elt_by_worker_index);
3149
3150               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3151               to_next_worker = &hf->buffer_index[hf->n_vectors];
3152               current_worker_index = next_worker_index;
3153             }
3154
3155           /* enqueue to correct worker thread */
3156           to_next_worker[0] = bi0;
3157           to_next_worker++;
3158           n_left_to_next_worker--;
3159
3160           if (n_left_to_next_worker == 0)
3161             {
3162               hf->n_vectors = VLIB_FRAME_SIZE;
3163               vlib_put_frame_queue_elt (hf);
3164               current_worker_index = ~0;
3165               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3166               hf = 0;
3167             }
3168         }
3169       else
3170         {
3171           do_handoff = 0;
3172           /* if this is 1st frame */
3173           if (!f)
3174             {
3175               f = vlib_get_frame_to_node (vm, to_node_index);
3176               to_next = vlib_frame_vector_args (f);
3177             }
3178
3179           to_next[0] = bi0;
3180           to_next += 1;
3181           f->n_vectors++;
3182         }
3183
3184       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3185                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3186         {
3187           snat_in2out_worker_handoff_trace_t *t =
3188             vlib_add_trace (vm, node, b0, sizeof (*t));
3189           t->next_worker_index = next_worker_index;
3190           t->do_handoff = do_handoff;
3191         }
3192     }
3193
3194   if (f)
3195     vlib_put_frame_to_node (vm, to_node_index, f);
3196
3197   if (hf)
3198     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3199
3200   /* Ship frames to the worker nodes */
3201   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3202     {
3203       if (handoff_queue_elt_by_worker_index[i])
3204         {
3205           hf = handoff_queue_elt_by_worker_index[i];
3206           /*
3207            * It works better to let the handoff node
3208            * rate-adapt, always ship the handoff queue element.
3209            */
3210           if (1 || hf->n_vectors == hf->last_n_vectors)
3211             {
3212               vlib_put_frame_queue_elt (hf);
3213               handoff_queue_elt_by_worker_index[i] = 0;
3214             }
3215           else
3216             hf->last_n_vectors = hf->n_vectors;
3217         }
3218       congested_handoff_queue_by_worker_index[i] =
3219         (vlib_frame_queue_t *) (~0);
3220     }
3221   hf = 0;
3222   current_worker_index = ~0;
3223   return frame->n_vectors;
3224 }
3225
3226 static uword
3227 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3228                                vlib_node_runtime_t * node,
3229                                vlib_frame_t * frame)
3230 {
3231   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3232 }
3233
3234 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3235   .function = snat_in2out_worker_handoff_fn,
3236   .name = "nat44-in2out-worker-handoff",
3237   .vector_size = sizeof (u32),
3238   .format_trace = format_snat_in2out_worker_handoff_trace,
3239   .type = VLIB_NODE_TYPE_INTERNAL,
3240
3241   .n_next_nodes = 1,
3242
3243   .next_nodes = {
3244     [0] = "error-drop",
3245   },
3246 };
3247
3248 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3249                               snat_in2out_worker_handoff_fn);
3250
3251 static uword
3252 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3253                                       vlib_node_runtime_t * node,
3254                                       vlib_frame_t * frame)
3255 {
3256   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3257 }
3258
3259 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3260   .function = snat_in2out_output_worker_handoff_fn,
3261   .name = "nat44-in2out-output-worker-handoff",
3262   .vector_size = sizeof (u32),
3263   .format_trace = format_snat_in2out_worker_handoff_trace,
3264   .type = VLIB_NODE_TYPE_INTERNAL,
3265
3266   .n_next_nodes = 1,
3267
3268   .next_nodes = {
3269     [0] = "error-drop",
3270   },
3271 };
3272
3273 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3274                               snat_in2out_output_worker_handoff_fn);
3275
3276 static_always_inline int
3277 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3278 {
3279   snat_address_t * ap;
3280   clib_bihash_kv_8_8_t kv, value;
3281   snat_session_key_t m_key;
3282
3283   vec_foreach (ap, sm->addresses)
3284     {
3285       if (ap->addr.as_u32 == dst_addr->as_u32)
3286         return 1;
3287     }
3288
3289   m_key.addr.as_u32 = dst_addr->as_u32;
3290   m_key.fib_index = sm->outside_fib_index;
3291   m_key.port = 0;
3292   m_key.protocol = 0;
3293   kv.key = m_key.as_u64;
3294   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3295     return 1;
3296
3297   return 0;
3298 }
3299
3300 static uword
3301 snat_hairpin_dst_fn (vlib_main_t * vm,
3302                      vlib_node_runtime_t * node,
3303                      vlib_frame_t * frame)
3304 {
3305   u32 n_left_from, * from, * to_next;
3306   snat_in2out_next_t next_index;
3307   u32 pkts_processed = 0;
3308   snat_main_t * sm = &snat_main;
3309
3310   from = vlib_frame_vector_args (frame);
3311   n_left_from = frame->n_vectors;
3312   next_index = node->cached_next_index;
3313
3314   while (n_left_from > 0)
3315     {
3316       u32 n_left_to_next;
3317
3318       vlib_get_next_frame (vm, node, next_index,
3319                            to_next, n_left_to_next);
3320
3321       while (n_left_from > 0 && n_left_to_next > 0)
3322         {
3323           u32 bi0;
3324           vlib_buffer_t * b0;
3325           u32 next0;
3326           ip4_header_t * ip0;
3327           u32 proto0;
3328
3329           /* speculatively enqueue b0 to the current next frame */
3330           bi0 = from[0];
3331           to_next[0] = bi0;
3332           from += 1;
3333           to_next += 1;
3334           n_left_from -= 1;
3335           n_left_to_next -= 1;
3336
3337           b0 = vlib_get_buffer (vm, bi0);
3338           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3339           ip0 = vlib_buffer_get_current (b0);
3340
3341           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3342
3343           vnet_buffer (b0)->snat.flags = 0;
3344           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3345             {
3346               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3347                 {
3348                   udp_header_t * udp0 = ip4_next_header (ip0);
3349                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3350
3351                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3352                 }
3353               else if (proto0 == SNAT_PROTOCOL_ICMP)
3354                 {
3355                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3356
3357                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3358                 }
3359               else
3360                 {
3361                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3362                 }
3363
3364               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3365               clib_warning("is hairpinning");
3366             }
3367
3368           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3369
3370           /* verify speculative enqueue, maybe switch current next frame */
3371           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3372                                            to_next, n_left_to_next,
3373                                            bi0, next0);
3374          }
3375
3376       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3377     }
3378
3379   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3380                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3381                                pkts_processed);
3382   return frame->n_vectors;
3383 }
3384
3385 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3386   .function = snat_hairpin_dst_fn,
3387   .name = "nat44-hairpin-dst",
3388   .vector_size = sizeof (u32),
3389   .type = VLIB_NODE_TYPE_INTERNAL,
3390   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3391   .error_strings = snat_in2out_error_strings,
3392   .n_next_nodes = 2,
3393   .next_nodes = {
3394     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3395     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3396   },
3397 };
3398
3399 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3400                               snat_hairpin_dst_fn);
3401
3402 static uword
3403 snat_hairpin_src_fn (vlib_main_t * vm,
3404                      vlib_node_runtime_t * node,
3405                      vlib_frame_t * frame)
3406 {
3407   u32 n_left_from, * from, * to_next;
3408   snat_in2out_next_t next_index;
3409   u32 pkts_processed = 0;
3410   snat_main_t *sm = &snat_main;
3411
3412   from = vlib_frame_vector_args (frame);
3413   n_left_from = frame->n_vectors;
3414   next_index = node->cached_next_index;
3415
3416   while (n_left_from > 0)
3417     {
3418       u32 n_left_to_next;
3419
3420       vlib_get_next_frame (vm, node, next_index,
3421                            to_next, n_left_to_next);
3422
3423       while (n_left_from > 0 && n_left_to_next > 0)
3424         {
3425           u32 bi0;
3426           vlib_buffer_t * b0;
3427           u32 next0;
3428           snat_interface_t *i;
3429           u32 sw_if_index0;
3430
3431           /* speculatively enqueue b0 to the current next frame */
3432           bi0 = from[0];
3433           to_next[0] = bi0;
3434           from += 1;
3435           to_next += 1;
3436           n_left_from -= 1;
3437           n_left_to_next -= 1;
3438
3439           b0 = vlib_get_buffer (vm, bi0);
3440           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3441           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3442
3443           pool_foreach (i, sm->output_feature_interfaces,
3444           ({
3445             /* Only packets from NAT inside interface */
3446             if ((i->is_inside == 1) && (sw_if_index0 == i->sw_if_index))
3447               {
3448                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3449                                     SNAT_FLAG_HAIRPINNING))
3450                   {
3451                     if (PREDICT_TRUE (sm->num_workers > 1))
3452                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3453                     else
3454                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3455                   }
3456                 break;
3457               }
3458           }));
3459
3460           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3461
3462           /* verify speculative enqueue, maybe switch current next frame */
3463           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3464                                            to_next, n_left_to_next,
3465                                            bi0, next0);
3466          }
3467
3468       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3469     }
3470
3471   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3472                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3473                                pkts_processed);
3474   return frame->n_vectors;
3475 }
3476
3477 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3478   .function = snat_hairpin_src_fn,
3479   .name = "nat44-hairpin-src",
3480   .vector_size = sizeof (u32),
3481   .type = VLIB_NODE_TYPE_INTERNAL,
3482   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3483   .error_strings = snat_in2out_error_strings,
3484   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3485   .next_nodes = {
3486      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3487      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3488      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3489      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3490   },
3491 };
3492
3493 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3494                               snat_hairpin_src_fn);
3495
3496 static uword
3497 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3498                                 vlib_node_runtime_t * node,
3499                                 vlib_frame_t * frame)
3500 {
3501   u32 n_left_from, * from, * to_next;
3502   snat_in2out_next_t next_index;
3503   u32 pkts_processed = 0;
3504   snat_main_t * sm = &snat_main;
3505   u32 stats_node_index;
3506
3507   stats_node_index = snat_in2out_fast_node.index;
3508
3509   from = vlib_frame_vector_args (frame);
3510   n_left_from = frame->n_vectors;
3511   next_index = node->cached_next_index;
3512
3513   while (n_left_from > 0)
3514     {
3515       u32 n_left_to_next;
3516
3517       vlib_get_next_frame (vm, node, next_index,
3518                            to_next, n_left_to_next);
3519
3520       while (n_left_from > 0 && n_left_to_next > 0)
3521         {
3522           u32 bi0;
3523           vlib_buffer_t * b0;
3524           u32 next0;
3525           u32 sw_if_index0;
3526           ip4_header_t * ip0;
3527           ip_csum_t sum0;
3528           u32 new_addr0, old_addr0;
3529           u16 old_port0, new_port0;
3530           udp_header_t * udp0;
3531           tcp_header_t * tcp0;
3532           icmp46_header_t * icmp0;
3533           snat_session_key_t key0, sm0;
3534           u32 proto0;
3535           u32 rx_fib_index0;
3536
3537           /* speculatively enqueue b0 to the current next frame */
3538           bi0 = from[0];
3539           to_next[0] = bi0;
3540           from += 1;
3541           to_next += 1;
3542           n_left_from -= 1;
3543           n_left_to_next -= 1;
3544
3545           b0 = vlib_get_buffer (vm, bi0);
3546           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3547
3548           ip0 = vlib_buffer_get_current (b0);
3549           udp0 = ip4_next_header (ip0);
3550           tcp0 = (tcp_header_t *) udp0;
3551           icmp0 = (icmp46_header_t *) udp0;
3552
3553           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3554           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3555
3556           if (PREDICT_FALSE(ip0->ttl == 1))
3557             {
3558               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3559               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3560                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3561                                            0);
3562               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3563               goto trace0;
3564             }
3565
3566           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3567
3568           if (PREDICT_FALSE (proto0 == ~0))
3569               goto trace0;
3570
3571           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3572             {
3573               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3574                                   rx_fib_index0, node, next0, ~0, 0, 0);
3575               goto trace0;
3576             }
3577
3578           key0.addr = ip0->src_address;
3579           key0.protocol = proto0;
3580           key0.port = udp0->src_port;
3581           key0.fib_index = rx_fib_index0;
3582
3583           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
3584             {
3585               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3586               next0= SNAT_IN2OUT_NEXT_DROP;
3587               goto trace0;
3588             }
3589
3590           new_addr0 = sm0.addr.as_u32;
3591           new_port0 = sm0.port;
3592           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3593           old_addr0 = ip0->src_address.as_u32;
3594           ip0->src_address.as_u32 = new_addr0;
3595
3596           sum0 = ip0->checksum;
3597           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3598                                  ip4_header_t,
3599                                  src_address /* changed member */);
3600           ip0->checksum = ip_csum_fold (sum0);
3601
3602           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3603             {
3604               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3605                 {
3606                   old_port0 = tcp0->src_port;
3607                   tcp0->src_port = new_port0;
3608
3609                   sum0 = tcp0->checksum;
3610                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3611                                          ip4_header_t,
3612                                          dst_address /* changed member */);
3613                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3614                                          ip4_header_t /* cheat */,
3615                                          length /* changed member */);
3616                   tcp0->checksum = ip_csum_fold(sum0);
3617                 }
3618               else
3619                 {
3620                   old_port0 = udp0->src_port;
3621                   udp0->src_port = new_port0;
3622                   udp0->checksum = 0;
3623                 }
3624             }
3625           else
3626             {
3627               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3628                 {
3629                   sum0 = tcp0->checksum;
3630                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3631                                          ip4_header_t,
3632                                          dst_address /* changed member */);
3633                   tcp0->checksum = ip_csum_fold(sum0);
3634                 }
3635             }
3636
3637           /* Hairpinning */
3638           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3639
3640         trace0:
3641           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3642                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3643             {
3644               snat_in2out_trace_t *t =
3645                  vlib_add_trace (vm, node, b0, sizeof (*t));
3646               t->sw_if_index = sw_if_index0;
3647               t->next_index = next0;
3648             }
3649
3650           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3651
3652           /* verify speculative enqueue, maybe switch current next frame */
3653           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3654                                            to_next, n_left_to_next,
3655                                            bi0, next0);
3656         }
3657
3658       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3659     }
3660
3661   vlib_node_increment_counter (vm, stats_node_index,
3662                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3663                                pkts_processed);
3664   return frame->n_vectors;
3665 }
3666
3667
3668 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3669   .function = snat_in2out_fast_static_map_fn,
3670   .name = "nat44-in2out-fast",
3671   .vector_size = sizeof (u32),
3672   .format_trace = format_snat_in2out_fast_trace,
3673   .type = VLIB_NODE_TYPE_INTERNAL,
3674
3675   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3676   .error_strings = snat_in2out_error_strings,
3677
3678   .runtime_data_bytes = sizeof (snat_runtime_t),
3679
3680   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3681
3682   /* edit / add dispositions here */
3683   .next_nodes = {
3684     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3685     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3686     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3687     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3688   },
3689 };
3690
3691 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);