NAT: fixed ICMP broken translation for GRE tunnel interface (VPP-1008)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
53
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91 vlib_node_registration_t snat_in2out_output_node;
92 vlib_node_registration_t snat_in2out_output_slowpath_node;
93 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
94 vlib_node_registration_t snat_hairpin_dst_node;
95 vlib_node_registration_t snat_hairpin_src_node;
96
97
98 #define foreach_snat_in2out_error                       \
99 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
100 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
101 _(OUT_OF_PORTS, "Out of ports")                         \
102 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
103 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
104 _(NO_TRANSLATION, "No translation")                     \
105 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
106
107 typedef enum {
108 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
109   foreach_snat_in2out_error
110 #undef _
111   SNAT_IN2OUT_N_ERROR,
112 } snat_in2out_error_t;
113
114 static char * snat_in2out_error_strings[] = {
115 #define _(sym,string) string,
116   foreach_snat_in2out_error
117 #undef _
118 };
119
120 typedef enum {
121   SNAT_IN2OUT_NEXT_LOOKUP,
122   SNAT_IN2OUT_NEXT_DROP,
123   SNAT_IN2OUT_NEXT_ICMP_ERROR,
124   SNAT_IN2OUT_NEXT_SLOW_PATH,
125   SNAT_IN2OUT_N_NEXT,
126 } snat_in2out_next_t;
127
128 typedef enum {
129   SNAT_HAIRPIN_SRC_NEXT_DROP,
130   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
131   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
132   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
133   SNAT_HAIRPIN_SRC_N_NEXT,
134 } snat_hairpin_next_t;
135
136 /**
137  * @brief Check if packet should be translated
138  *
139  * Packets aimed at outside interface and external addresss with active session
140  * should be translated.
141  *
142  * @param sm            NAT main
143  * @param rt            NAT runtime data
144  * @param sw_if_index0  index of the inside interface
145  * @param ip0           IPv4 header
146  * @param proto0        NAT protocol
147  * @param rx_fib_index0 RX FIB index
148  *
149  * @returns 0 if packet should be translated otherwise 1
150  */
151 static inline int
152 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
153                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
154                          u32 rx_fib_index0)
155 {
156   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
157   fib_prefix_t pfx = {
158     .fp_proto = FIB_PROTOCOL_IP4,
159     .fp_len = 32,
160     .fp_addr = {
161         .ip4.as_u32 = ip0->dst_address.as_u32,
162     },
163   };
164
165   /* Don't NAT packet aimed at the intfc address */
166   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
167                                       ip0->dst_address.as_u32)))
168     return 1;
169
170   fei = fib_table_lookup (rx_fib_index0, &pfx);
171   if (FIB_NODE_INDEX_INVALID != fei)
172     {
173       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
174       if (sw_if_index == ~0)
175         {
176           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
177           if (FIB_NODE_INDEX_INVALID != fei)
178             sw_if_index = fib_entry_get_resolving_interface (fei);
179         }
180       snat_interface_t *i;
181       pool_foreach (i, sm->interfaces,
182       ({
183         /* NAT packet aimed at outside interface */
184         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
185           return 0;
186       }));
187     }
188
189   return 1;
190 }
191
192 static inline int
193 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
194                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
195                     u32 rx_fib_index0, u32 thread_index)
196 {
197   udp_header_t * udp0 = ip4_next_header (ip0);
198   snat_session_key_t key0, sm0;
199   clib_bihash_kv_8_8_t kv0, value0;
200
201   key0.addr = ip0->dst_address;
202   key0.port = udp0->dst_port;
203   key0.protocol = proto0;
204   key0.fib_index = sm->outside_fib_index;
205   kv0.key = key0.as_u64;
206
207   /* NAT packet aimed at external address if */
208   /* has active sessions */
209   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
210                               &value0))
211     {
212       /* or is static mappings */
213       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
214         return 0;
215     }
216   else
217     return 0;
218
219   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
220                                  rx_fib_index0);
221 }
222
223 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
224                       ip4_header_t * ip0,
225                       u32 rx_fib_index0,
226                       snat_session_key_t * key0,
227                       snat_session_t ** sessionp,
228                       vlib_node_runtime_t * node,
229                       u32 next0,
230                       u32 thread_index)
231 {
232   snat_user_t *u;
233   snat_user_key_t user_key;
234   snat_session_t *s;
235   clib_bihash_kv_8_8_t kv0, value0;
236   u32 oldest_per_user_translation_list_index;
237   dlist_elt_t * oldest_per_user_translation_list_elt;
238   dlist_elt_t * per_user_translation_list_elt;
239   dlist_elt_t * per_user_list_head_elt;
240   u32 session_index;
241   snat_session_key_t key1;
242   u32 address_index = ~0;
243   u32 outside_fib_index;
244   uword * p;
245
246   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
247     {
248       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
249       return SNAT_IN2OUT_NEXT_DROP;
250     }
251
252   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
253   if (! p)
254     {
255       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
256       return SNAT_IN2OUT_NEXT_DROP;
257     }
258   outside_fib_index = p[0];
259
260   key1.protocol = key0->protocol;
261   user_key.addr = ip0->src_address;
262   user_key.fib_index = rx_fib_index0;
263   kv0.key = user_key.as_u64;
264
265   /* Ever heard of the "user" = src ip4 address before? */
266   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
267                               &kv0, &value0))
268     {
269       /* no, make a new one */
270       pool_get (sm->per_thread_data[thread_index].users, u);
271       memset (u, 0, sizeof (*u));
272       u->addr = ip0->src_address;
273       u->fib_index = rx_fib_index0;
274
275       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
276
277       u->sessions_per_user_list_head_index = per_user_list_head_elt -
278         sm->per_thread_data[thread_index].list_pool;
279
280       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
281                        u->sessions_per_user_list_head_index);
282
283       kv0.value = u - sm->per_thread_data[thread_index].users;
284
285       /* add user */
286       clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
287                                &kv0, 1 /* is_add */);
288     }
289   else
290     {
291       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
292                              value0.value);
293     }
294
295   /* Over quota? Recycle the least recently used dynamic translation */
296   if (u->nsessions >= sm->max_translations_per_user)
297     {
298       /* Remove the oldest dynamic translation */
299       do {
300           oldest_per_user_translation_list_index =
301             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
302                                     u->sessions_per_user_list_head_index);
303
304           ASSERT (oldest_per_user_translation_list_index != ~0);
305
306           /* add it back to the end of the LRU list */
307           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
308                               u->sessions_per_user_list_head_index,
309                               oldest_per_user_translation_list_index);
310           /* Get the list element */
311           oldest_per_user_translation_list_elt =
312             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
313                                oldest_per_user_translation_list_index);
314
315           /* Get the session index from the list element */
316           session_index = oldest_per_user_translation_list_elt->value;
317
318           /* Get the session */
319           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
320                                  session_index);
321       } while (snat_is_session_static (s));
322
323       if (snat_is_unk_proto_session (s))
324         {
325           clib_bihash_kv_16_8_t up_kv;
326           nat_ed_ses_key_t key;
327
328           /* Remove from lookup tables */
329           key.l_addr = s->in2out.addr;
330           key.r_addr = s->ext_host_addr;
331           key.fib_index = s->in2out.fib_index;
332           key.proto = s->in2out.port;
333           key.rsvd = 0;
334           key.l_port = 0;
335           up_kv.key[0] = key.as_u64[0];
336           up_kv.key[1] = key.as_u64[1];
337           if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0))
338             clib_warning ("in2out key del failed");
339
340           key.l_addr = s->out2in.addr;
341           key.fib_index = s->out2in.fib_index;
342           up_kv.key[0] = key.as_u64[0];
343           up_kv.key[1] = key.as_u64[1];
344           if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0))
345             clib_warning ("out2in key del failed");
346         }
347       else
348         {
349           /* Remove in2out, out2in keys */
350           kv0.key = s->in2out.as_u64;
351           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out,
352                                        &kv0, 0 /* is_add */))
353               clib_warning ("in2out key delete failed");
354           kv0.key = s->out2in.as_u64;
355           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in,
356                                        &kv0, 0 /* is_add */))
357               clib_warning ("out2in key delete failed");
358
359           /* log NAT event */
360           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
361                                               s->out2in.addr.as_u32,
362                                               s->in2out.protocol,
363                                               s->in2out.port,
364                                               s->out2in.port,
365                                               s->in2out.fib_index);
366
367           snat_free_outside_address_and_port
368             (sm, thread_index, &s->out2in, s->outside_address_index);
369         }
370       s->outside_address_index = ~0;
371
372       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, thread_index,
373                                                &key1, &address_index))
374         {
375           ASSERT(0);
376
377           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
378           return SNAT_IN2OUT_NEXT_DROP;
379         }
380       s->outside_address_index = address_index;
381     }
382   else
383     {
384       u8 static_mapping = 1;
385
386       /* First try to match static mapping by local address and port */
387       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
388         {
389           static_mapping = 0;
390           /* Try to create dynamic translation */
391           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0,
392                                                    thread_index, &key1,
393                                                    &address_index))
394             {
395               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
396               return SNAT_IN2OUT_NEXT_DROP;
397             }
398         }
399
400       /* Create a new session */
401       pool_get (sm->per_thread_data[thread_index].sessions, s);
402       memset (s, 0, sizeof (*s));
403
404       s->outside_address_index = address_index;
405
406       if (static_mapping)
407         {
408           u->nstaticsessions++;
409           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
410         }
411       else
412         {
413           u->nsessions++;
414         }
415
416       /* Create list elts */
417       pool_get (sm->per_thread_data[thread_index].list_pool,
418                 per_user_translation_list_elt);
419       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
420                        per_user_translation_list_elt -
421                        sm->per_thread_data[thread_index].list_pool);
422
423       per_user_translation_list_elt->value =
424         s - sm->per_thread_data[thread_index].sessions;
425       s->per_user_index = per_user_translation_list_elt -
426                           sm->per_thread_data[thread_index].list_pool;
427       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
428
429       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
430                           s->per_user_list_head_index,
431                           per_user_translation_list_elt -
432                           sm->per_thread_data[thread_index].list_pool);
433    }
434
435   s->in2out = *key0;
436   s->out2in = key1;
437   s->out2in.protocol = key0->protocol;
438   s->out2in.fib_index = outside_fib_index;
439   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
440   *sessionp = s;
441
442   /* Add to translation hashes */
443   kv0.key = s->in2out.as_u64;
444   kv0.value = s - sm->per_thread_data[thread_index].sessions;
445   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
446                                1 /* is_add */))
447       clib_warning ("in2out key add failed");
448
449   kv0.key = s->out2in.as_u64;
450   kv0.value = s - sm->per_thread_data[thread_index].sessions;
451
452   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
453                                1 /* is_add */))
454       clib_warning ("out2in key add failed");
455
456   /* log NAT event */
457   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
458                                       s->out2in.addr.as_u32,
459                                       s->in2out.protocol,
460                                       s->in2out.port,
461                                       s->out2in.port,
462                                       s->in2out.fib_index);
463   return next0;
464 }
465
466 static_always_inline
467 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
468                                  snat_session_key_t *p_key0)
469 {
470   icmp46_header_t *icmp0;
471   snat_session_key_t key0;
472   icmp_echo_header_t *echo0, *inner_echo0 = 0;
473   ip4_header_t *inner_ip0 = 0;
474   void *l4_header = 0;
475   icmp46_header_t *inner_icmp0;
476
477   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
478   echo0 = (icmp_echo_header_t *)(icmp0+1);
479
480   if (!icmp_is_error_message (icmp0))
481     {
482       key0.protocol = SNAT_PROTOCOL_ICMP;
483       key0.addr = ip0->src_address;
484       key0.port = echo0->identifier;
485     }
486   else
487     {
488       inner_ip0 = (ip4_header_t *)(echo0+1);
489       l4_header = ip4_next_header (inner_ip0);
490       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
491       key0.addr = inner_ip0->dst_address;
492       switch (key0.protocol)
493         {
494         case SNAT_PROTOCOL_ICMP:
495           inner_icmp0 = (icmp46_header_t*)l4_header;
496           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
497           key0.port = inner_echo0->identifier;
498           break;
499         case SNAT_PROTOCOL_UDP:
500         case SNAT_PROTOCOL_TCP:
501           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
502           break;
503         default:
504           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
505         }
506     }
507   *p_key0 = key0;
508   return -1; /* success */
509 }
510
511 /**
512  * Get address and port values to be used for ICMP packet translation
513  * and create session if needed
514  *
515  * @param[in,out] sm             NAT main
516  * @param[in,out] node           NAT node runtime
517  * @param[in] thread_index       thread index
518  * @param[in,out] b0             buffer containing packet to be translated
519  * @param[out] p_proto           protocol used for matching
520  * @param[out] p_value           address and port after NAT translation
521  * @param[out] p_dont_translate  if packet should not be translated
522  * @param d                      optional parameter
523  * @param e                      optional parameter
524  */
525 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
526                            u32 thread_index, vlib_buffer_t *b0,
527                            ip4_header_t *ip0, u8 *p_proto,
528                            snat_session_key_t *p_value,
529                            u8 *p_dont_translate, void *d, void *e)
530 {
531   icmp46_header_t *icmp0;
532   u32 sw_if_index0;
533   u32 rx_fib_index0;
534   snat_session_key_t key0;
535   snat_session_t *s0 = 0;
536   u8 dont_translate = 0;
537   clib_bihash_kv_8_8_t kv0, value0;
538   u32 next0 = ~0;
539   int err;
540
541   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
542   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
543   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
544
545   err = icmp_get_key (ip0, &key0);
546   if (err != -1)
547     {
548       b0->error = node->errors[err];
549       next0 = SNAT_IN2OUT_NEXT_DROP;
550       goto out;
551     }
552   key0.fib_index = rx_fib_index0;
553
554   kv0.key = key0.as_u64;
555
556   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
557                               &value0))
558     {
559       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
560           IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
561           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
562         {
563           dont_translate = 1;
564           goto out;
565         }
566
567       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
568         {
569           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
570           next0 = SNAT_IN2OUT_NEXT_DROP;
571           goto out;
572         }
573
574       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
575                          &s0, node, next0, thread_index);
576
577       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
578         goto out;
579     }
580   else
581     {
582       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
583                         icmp0->type != ICMP4_echo_reply &&
584                         !icmp_is_error_message (icmp0)))
585         {
586           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
587           next0 = SNAT_IN2OUT_NEXT_DROP;
588           goto out;
589         }
590
591       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
592                               value0.value);
593     }
594
595 out:
596   *p_proto = key0.protocol;
597   if (s0)
598     *p_value = s0->out2in;
599   *p_dont_translate = dont_translate;
600   if (d)
601     *(snat_session_t**)d = s0;
602   return next0;
603 }
604
605 /**
606  * Get address and port values to be used for ICMP packet translation
607  *
608  * @param[in] sm                 NAT main
609  * @param[in,out] node           NAT node runtime
610  * @param[in] thread_index       thread index
611  * @param[in,out] b0             buffer containing packet to be translated
612  * @param[out] p_proto           protocol used for matching
613  * @param[out] p_value           address and port after NAT translation
614  * @param[out] p_dont_translate  if packet should not be translated
615  * @param d                      optional parameter
616  * @param e                      optional parameter
617  */
618 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
619                            u32 thread_index, vlib_buffer_t *b0,
620                            ip4_header_t *ip0, u8 *p_proto,
621                            snat_session_key_t *p_value,
622                            u8 *p_dont_translate, void *d, void *e)
623 {
624   icmp46_header_t *icmp0;
625   u32 sw_if_index0;
626   u32 rx_fib_index0;
627   snat_session_key_t key0;
628   snat_session_key_t sm0;
629   u8 dont_translate = 0;
630   u8 is_addr_only;
631   u32 next0 = ~0;
632   int err;
633
634   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
635   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
636   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
637
638   err = icmp_get_key (ip0, &key0);
639   if (err != -1)
640     {
641       b0->error = node->errors[err];
642       next0 = SNAT_IN2OUT_NEXT_DROP;
643       goto out2;
644     }
645   key0.fib_index = rx_fib_index0;
646
647   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
648     {
649       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
650           IP_PROTOCOL_ICMP, rx_fib_index0)))
651         {
652           dont_translate = 1;
653           goto out;
654         }
655
656       if (icmp_is_error_message (icmp0))
657         {
658           next0 = SNAT_IN2OUT_NEXT_DROP;
659           goto out;
660         }
661
662       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
663       next0 = SNAT_IN2OUT_NEXT_DROP;
664       goto out;
665     }
666
667   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
668                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
669                     !icmp_is_error_message (icmp0)))
670     {
671       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
672       next0 = SNAT_IN2OUT_NEXT_DROP;
673       goto out;
674     }
675
676 out:
677   *p_value = sm0;
678 out2:
679   *p_proto = key0.protocol;
680   *p_dont_translate = dont_translate;
681   return next0;
682 }
683
684 static inline u32 icmp_in2out (snat_main_t *sm,
685                                vlib_buffer_t * b0,
686                                ip4_header_t * ip0,
687                                icmp46_header_t * icmp0,
688                                u32 sw_if_index0,
689                                u32 rx_fib_index0,
690                                vlib_node_runtime_t * node,
691                                u32 next0,
692                                u32 thread_index,
693                                void *d,
694                                void *e)
695 {
696   snat_session_key_t sm0;
697   u8 protocol;
698   icmp_echo_header_t *echo0, *inner_echo0 = 0;
699   ip4_header_t *inner_ip0;
700   void *l4_header = 0;
701   icmp46_header_t *inner_icmp0;
702   u8 dont_translate;
703   u32 new_addr0, old_addr0;
704   u16 old_id0, new_id0;
705   ip_csum_t sum0;
706   u16 checksum0;
707   u32 next0_tmp;
708
709   echo0 = (icmp_echo_header_t *)(icmp0+1);
710
711   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
712                                        &protocol, &sm0, &dont_translate, d, e);
713   if (next0_tmp != ~0)
714     next0 = next0_tmp;
715   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
716     goto out;
717
718   sum0 = ip_incremental_checksum (0, icmp0,
719                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
720   checksum0 = ~ip_csum_fold (sum0);
721   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
722     {
723       next0 = SNAT_IN2OUT_NEXT_DROP;
724       goto out;
725     }
726
727   old_addr0 = ip0->src_address.as_u32;
728   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
729   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
730     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
731
732   sum0 = ip0->checksum;
733   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
734                          src_address /* changed member */);
735   ip0->checksum = ip_csum_fold (sum0);
736
737   if (!icmp_is_error_message (icmp0))
738     {
739       new_id0 = sm0.port;
740       if (PREDICT_FALSE(new_id0 != echo0->identifier))
741         {
742           old_id0 = echo0->identifier;
743           new_id0 = sm0.port;
744           echo0->identifier = new_id0;
745
746           sum0 = icmp0->checksum;
747           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
748                                  identifier);
749           icmp0->checksum = ip_csum_fold (sum0);
750         }
751     }
752   else
753     {
754       inner_ip0 = (ip4_header_t *)(echo0+1);
755       l4_header = ip4_next_header (inner_ip0);
756
757       if (!ip4_header_checksum_is_valid (inner_ip0))
758         {
759           next0 = SNAT_IN2OUT_NEXT_DROP;
760           goto out;
761         }
762
763       old_addr0 = inner_ip0->dst_address.as_u32;
764       inner_ip0->dst_address = sm0.addr;
765       new_addr0 = inner_ip0->dst_address.as_u32;
766
767       sum0 = icmp0->checksum;
768       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
769                              dst_address /* changed member */);
770       icmp0->checksum = ip_csum_fold (sum0);
771
772       switch (protocol)
773         {
774           case SNAT_PROTOCOL_ICMP:
775             inner_icmp0 = (icmp46_header_t*)l4_header;
776             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
777
778             old_id0 = inner_echo0->identifier;
779             new_id0 = sm0.port;
780             inner_echo0->identifier = new_id0;
781
782             sum0 = icmp0->checksum;
783             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
784                                    identifier);
785             icmp0->checksum = ip_csum_fold (sum0);
786             break;
787           case SNAT_PROTOCOL_UDP:
788           case SNAT_PROTOCOL_TCP:
789             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
790             new_id0 = sm0.port;
791             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
792
793             sum0 = icmp0->checksum;
794             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
795                                    dst_port);
796             icmp0->checksum = ip_csum_fold (sum0);
797             break;
798           default:
799             ASSERT(0);
800         }
801     }
802
803 out:
804   return next0;
805 }
806
807 /**
808  * @brief Hairpinning
809  *
810  * Hairpinning allows two endpoints on the internal side of the NAT to
811  * communicate even if they only use each other's external IP addresses
812  * and ports.
813  *
814  * @param sm     NAT main.
815  * @param b0     Vlib buffer.
816  * @param ip0    IP header.
817  * @param udp0   UDP header.
818  * @param tcp0   TCP header.
819  * @param proto0 NAT protocol.
820  */
821 static inline void
822 snat_hairpinning (snat_main_t *sm,
823                   vlib_buffer_t * b0,
824                   ip4_header_t * ip0,
825                   udp_header_t * udp0,
826                   tcp_header_t * tcp0,
827                   u32 proto0)
828 {
829   snat_session_key_t key0, sm0;
830   snat_session_t * s0;
831   clib_bihash_kv_8_8_t kv0, value0;
832   ip_csum_t sum0;
833   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
834   u16 new_dst_port0, old_dst_port0;
835
836   key0.addr = ip0->dst_address;
837   key0.port = udp0->dst_port;
838   key0.protocol = proto0;
839   key0.fib_index = sm->outside_fib_index;
840   kv0.key = key0.as_u64;
841
842   /* Check if destination is static mappings */
843   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
844     {
845       new_dst_addr0 = sm0.addr.as_u32;
846       new_dst_port0 = sm0.port;
847       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
848     }
849   /* or active session */
850   else
851     {
852       if (sm->num_workers > 1)
853         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
854       else
855         ti = sm->num_workers;
856
857       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
858         {
859           si = value0.value;
860
861           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
862           new_dst_addr0 = s0->in2out.addr.as_u32;
863           new_dst_port0 = s0->in2out.port;
864           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
865         }
866     }
867
868   /* Destination is behind the same NAT, use internal address and port */
869   if (new_dst_addr0)
870     {
871       old_dst_addr0 = ip0->dst_address.as_u32;
872       ip0->dst_address.as_u32 = new_dst_addr0;
873       sum0 = ip0->checksum;
874       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
875                              ip4_header_t, dst_address);
876       ip0->checksum = ip_csum_fold (sum0);
877
878       old_dst_port0 = tcp0->dst;
879       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
880         {
881           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
882             {
883               tcp0->dst = new_dst_port0;
884               sum0 = tcp0->checksum;
885               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
886                                      ip4_header_t, dst_address);
887               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
888                                      ip4_header_t /* cheat */, length);
889               tcp0->checksum = ip_csum_fold(sum0);
890             }
891           else
892             {
893               udp0->dst_port = new_dst_port0;
894               udp0->checksum = 0;
895             }
896         }
897       else
898         {
899           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
900             {
901               sum0 = tcp0->checksum;
902               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
903                                      ip4_header_t, dst_address);
904               tcp0->checksum = ip_csum_fold(sum0);
905             }
906         }
907     }
908 }
909
910 static inline void
911 snat_icmp_hairpinning (snat_main_t *sm,
912                        vlib_buffer_t * b0,
913                        ip4_header_t * ip0,
914                        icmp46_header_t * icmp0)
915 {
916   snat_session_key_t key0, sm0;
917   clib_bihash_kv_8_8_t kv0, value0;
918   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
919   ip_csum_t sum0;
920   snat_session_t *s0;
921
922   if (!icmp_is_error_message (icmp0))
923     {
924       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
925       u16 icmp_id0 = echo0->identifier;
926       key0.addr = ip0->dst_address;
927       key0.port = icmp_id0;
928       key0.protocol = SNAT_PROTOCOL_ICMP;
929       key0.fib_index = sm->outside_fib_index;
930       kv0.key = key0.as_u64;
931
932       if (sm->num_workers > 1)
933         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
934       else
935         ti = sm->num_workers;
936
937       /* Check if destination is in active sessions */
938       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
939                                   &value0))
940         {
941           /* or static mappings */
942           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
943             {
944               new_dst_addr0 = sm0.addr.as_u32;
945               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
946             }
947         }
948       else
949         {
950           si = value0.value;
951
952           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
953           new_dst_addr0 = s0->in2out.addr.as_u32;
954           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
955           echo0->identifier = s0->in2out.port;
956           sum0 = icmp0->checksum;
957           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
958                                  icmp_echo_header_t, identifier);
959           icmp0->checksum = ip_csum_fold (sum0);
960         }
961
962       /* Destination is behind the same NAT, use internal address and port */
963       if (new_dst_addr0)
964         {
965           old_dst_addr0 = ip0->dst_address.as_u32;
966           ip0->dst_address.as_u32 = new_dst_addr0;
967           sum0 = ip0->checksum;
968           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
969                                  ip4_header_t, dst_address);
970           ip0->checksum = ip_csum_fold (sum0);
971         }
972     }
973
974 }
975
976 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
977                                          vlib_buffer_t * b0,
978                                          ip4_header_t * ip0,
979                                          icmp46_header_t * icmp0,
980                                          u32 sw_if_index0,
981                                          u32 rx_fib_index0,
982                                          vlib_node_runtime_t * node,
983                                          u32 next0,
984                                          f64 now,
985                                          u32 thread_index,
986                                          snat_session_t ** p_s0)
987 {
988   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
989                       next0, thread_index, p_s0, 0);
990   snat_session_t * s0 = *p_s0;
991   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
992     {
993       /* Hairpinning */
994       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
995         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
996       /* Accounting */
997       s0->last_heard = now;
998       s0->total_pkts++;
999       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1000       /* Per-user LRU list maintenance for dynamic translations */
1001       if (!snat_is_session_static (s0))
1002         {
1003           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1004                              s0->per_user_index);
1005           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1006                               s0->per_user_list_head_index,
1007                               s0->per_user_index);
1008         }
1009     }
1010   return next0;
1011 }
1012 static inline void
1013 snat_hairpinning_unknown_proto (snat_main_t *sm,
1014                                 vlib_buffer_t * b,
1015                                 ip4_header_t * ip)
1016 {
1017   u32 old_addr, new_addr = 0, ti = 0;
1018   clib_bihash_kv_8_8_t kv, value;
1019   clib_bihash_kv_16_8_t s_kv, s_value;
1020   nat_ed_ses_key_t key;
1021   snat_session_key_t m_key;
1022   snat_static_mapping_t *m;
1023   ip_csum_t sum;
1024   snat_session_t *s;
1025
1026   old_addr = ip->dst_address.as_u32;
1027   key.l_addr.as_u32 = ip->dst_address.as_u32;
1028   key.r_addr.as_u32 = ip->src_address.as_u32;
1029   key.fib_index = sm->outside_fib_index;
1030   key.proto = ip->protocol;
1031   key.rsvd = 0;
1032   key.l_port = 0;
1033   s_kv.key[0] = key.as_u64[0];
1034   s_kv.key[1] = key.as_u64[1];
1035   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1036     {
1037       m_key.addr = ip->dst_address;
1038       m_key.fib_index = sm->outside_fib_index;
1039       m_key.port = 0;
1040       m_key.protocol = 0;
1041       kv.key = m_key.as_u64;
1042       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1043         return;
1044
1045       m = pool_elt_at_index (sm->static_mappings, value.value);
1046       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1047         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1048       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1049     }
1050   else
1051     {
1052       if (sm->num_workers > 1)
1053         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
1054       else
1055         ti = sm->num_workers;
1056
1057       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1058       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1059         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1060       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1061     }
1062   sum = ip->checksum;
1063   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1064   ip->checksum = ip_csum_fold (sum);
1065 }
1066
1067 static snat_session_t *
1068 snat_in2out_unknown_proto (snat_main_t *sm,
1069                            vlib_buffer_t * b,
1070                            ip4_header_t * ip,
1071                            u32 rx_fib_index,
1072                            u32 thread_index,
1073                            f64 now,
1074                            vlib_main_t * vm,
1075                            vlib_node_runtime_t * node)
1076 {
1077   clib_bihash_kv_8_8_t kv, value;
1078   clib_bihash_kv_16_8_t s_kv, s_value;
1079   snat_static_mapping_t *m;
1080   snat_session_key_t m_key;
1081   u32 old_addr, new_addr = 0;
1082   ip_csum_t sum;
1083   snat_user_key_t u_key;
1084   snat_user_t *u;
1085   dlist_elt_t *head, *elt, *oldest;
1086   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1087   u32 elt_index, head_index, ses_index, oldest_index;
1088   snat_session_t * s;
1089   nat_ed_ses_key_t key;
1090   u32 address_index = ~0;
1091   int i;
1092   u8 is_sm = 0;
1093
1094   old_addr = ip->src_address.as_u32;
1095
1096   key.l_addr = ip->src_address;
1097   key.r_addr = ip->dst_address;
1098   key.fib_index = rx_fib_index;
1099   key.proto = ip->protocol;
1100   key.rsvd = 0;
1101   key.l_port = 0;
1102   s_kv.key[0] = key.as_u64[0];
1103   s_kv.key[1] = key.as_u64[1];
1104
1105   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1106     {
1107       s = pool_elt_at_index (tsm->sessions, s_value.value);
1108       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1109     }
1110   else
1111     {
1112       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1113         {
1114           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1115           return 0;
1116         }
1117
1118       u_key.addr = ip->src_address;
1119       u_key.fib_index = rx_fib_index;
1120       kv.key = u_key.as_u64;
1121
1122       /* Ever heard of the "user" = src ip4 address before? */
1123       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1124         {
1125           /* no, make a new one */
1126           pool_get (tsm->users, u);
1127           memset (u, 0, sizeof (*u));
1128           u->addr = ip->src_address;
1129           u->fib_index = rx_fib_index;
1130
1131           pool_get (tsm->list_pool, head);
1132           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1133
1134           clib_dlist_init (tsm->list_pool,
1135                            u->sessions_per_user_list_head_index);
1136
1137           kv.value = u - tsm->users;
1138
1139           /* add user */
1140           clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
1141         }
1142       else
1143         {
1144           u = pool_elt_at_index (tsm->users, value.value);
1145         }
1146
1147       m_key.addr = ip->src_address;
1148       m_key.port = 0;
1149       m_key.protocol = 0;
1150       m_key.fib_index = rx_fib_index;
1151       kv.key = m_key.as_u64;
1152
1153       /* Try to find static mapping first */
1154       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1155         {
1156           m = pool_elt_at_index (sm->static_mappings, value.value);
1157           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1158           is_sm = 1;
1159           goto create_ses;
1160         }
1161       /* Fallback to 3-tuple key */
1162       else
1163         {
1164           /* Choose same out address as for TCP/UDP session to same destination */
1165           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1166             {
1167               head_index = u->sessions_per_user_list_head_index;
1168               head = pool_elt_at_index (tsm->list_pool, head_index);
1169               elt_index = head->next;
1170               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1171               ses_index = elt->value;
1172               while (ses_index != ~0)
1173                 {
1174                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1175                   elt_index = elt->next;
1176                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1177                   ses_index = elt->value;
1178
1179                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1180                     {
1181                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1182                       address_index = s->outside_address_index;
1183
1184                       key.fib_index = sm->outside_fib_index;
1185                       key.l_addr.as_u32 = new_addr;
1186                       s_kv.key[0] = key.as_u64[0];
1187                       s_kv.key[1] = key.as_u64[1];
1188                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1189                         break;
1190
1191                       goto create_ses;
1192                     }
1193                 }
1194             }
1195           key.fib_index = sm->outside_fib_index;
1196           for (i = 0; i < vec_len (sm->addresses); i++)
1197             {
1198               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1199               s_kv.key[0] = key.as_u64[0];
1200               s_kv.key[1] = key.as_u64[1];
1201               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1202                 {
1203                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1204                   address_index = i;
1205                   goto create_ses;
1206                 }
1207             }
1208           return 0;
1209         }
1210
1211 create_ses:
1212       /* Over quota? Recycle the least recently used dynamic translation */
1213       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1214         {
1215           /* Remove the oldest dynamic translation */
1216           do {
1217               oldest_index = clib_dlist_remove_head (
1218                 tsm->list_pool, u->sessions_per_user_list_head_index);
1219
1220               ASSERT (oldest_index != ~0);
1221
1222               /* add it back to the end of the LRU list */
1223               clib_dlist_addtail (tsm->list_pool,
1224                                   u->sessions_per_user_list_head_index,
1225                                   oldest_index);
1226               /* Get the list element */
1227               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1228
1229               /* Get the session index from the list element */
1230               ses_index = oldest->value;
1231
1232               /* Get the session */
1233               s = pool_elt_at_index (tsm->sessions, ses_index);
1234           } while (snat_is_session_static (s));
1235
1236           if (snat_is_unk_proto_session (s))
1237             {
1238               /* Remove from lookup tables */
1239               key.l_addr = s->in2out.addr;
1240               key.r_addr = s->ext_host_addr;
1241               key.fib_index = s->in2out.fib_index;
1242               key.proto = s->in2out.port;
1243               s_kv.key[0] = key.as_u64[0];
1244               s_kv.key[1] = key.as_u64[1];
1245               if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0))
1246                 clib_warning ("in2out key del failed");
1247
1248               key.l_addr = s->out2in.addr;
1249               key.fib_index = s->out2in.fib_index;
1250               s_kv.key[0] = key.as_u64[0];
1251               s_kv.key[1] = key.as_u64[1];
1252               if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0))
1253                 clib_warning ("out2in key del failed");
1254             }
1255           else
1256             {
1257               /* log NAT event */
1258               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1259                                                   s->out2in.addr.as_u32,
1260                                                   s->in2out.protocol,
1261                                                   s->in2out.port,
1262                                                   s->out2in.port,
1263                                                   s->in2out.fib_index);
1264
1265               snat_free_outside_address_and_port (sm, thread_index, &s->out2in,
1266                                                   s->outside_address_index);
1267
1268               /* Remove in2out, out2in keys */
1269               kv.key = s->in2out.as_u64;
1270               if (clib_bihash_add_del_8_8 (
1271                     &sm->per_thread_data[thread_index].in2out, &kv, 0))
1272                 clib_warning ("in2out key del failed");
1273               kv.key = s->out2in.as_u64;
1274               if (clib_bihash_add_del_8_8 (
1275                     &sm->per_thread_data[thread_index].out2in, &kv, 0))
1276                 clib_warning ("out2in key del failed");
1277             }
1278         }
1279       else
1280         {
1281           /* Create a new session */
1282           pool_get (tsm->sessions, s);
1283           memset (s, 0, sizeof (*s));
1284
1285           /* Create list elts */
1286           pool_get (tsm->list_pool, elt);
1287           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1288           elt->value = s - tsm->sessions;
1289           s->per_user_index = elt - tsm->list_pool;
1290           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1291           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1292                               s->per_user_index);
1293         }
1294
1295       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1296       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1297       s->outside_address_index = address_index;
1298       s->out2in.addr.as_u32 = new_addr;
1299       s->out2in.fib_index = sm->outside_fib_index;
1300       s->in2out.addr.as_u32 = old_addr;
1301       s->in2out.fib_index = rx_fib_index;
1302       s->in2out.port = s->out2in.port = ip->protocol;
1303       if (is_sm)
1304         {
1305           u->nstaticsessions++;
1306           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1307         }
1308       else
1309         {
1310           u->nsessions++;
1311         }
1312
1313       /* Add to lookup tables */
1314       key.l_addr.as_u32 = old_addr;
1315       key.r_addr = ip->dst_address;
1316       key.proto = ip->protocol;
1317       key.fib_index = rx_fib_index;
1318       s_kv.key[0] = key.as_u64[0];
1319       s_kv.key[1] = key.as_u64[1];
1320       s_kv.value = s - tsm->sessions;
1321       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1322         clib_warning ("in2out key add failed");
1323
1324       key.l_addr.as_u32 = new_addr;
1325       key.fib_index = sm->outside_fib_index;
1326       s_kv.key[0] = key.as_u64[0];
1327       s_kv.key[1] = key.as_u64[1];
1328       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1329         clib_warning ("out2in key add failed");
1330   }
1331
1332   /* Update IP checksum */
1333   sum = ip->checksum;
1334   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1335   ip->checksum = ip_csum_fold (sum);
1336
1337   /* Accounting */
1338   s->last_heard = now;
1339   s->total_pkts++;
1340   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1341   /* Per-user LRU list maintenance */
1342   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1343   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1344                       s->per_user_index);
1345
1346   /* Hairpinning */
1347   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1348     snat_hairpinning_unknown_proto(sm, b, ip);
1349
1350   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1351     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1352
1353   return s;
1354 }
1355
1356 static snat_session_t *
1357 snat_in2out_lb (snat_main_t *sm,
1358                 vlib_buffer_t * b,
1359                 ip4_header_t * ip,
1360                 u32 rx_fib_index,
1361                 u32 thread_index,
1362                 f64 now,
1363                 vlib_main_t * vm,
1364                 vlib_node_runtime_t * node)
1365 {
1366   nat_ed_ses_key_t key;
1367   clib_bihash_kv_16_8_t s_kv, s_value;
1368   udp_header_t *udp = ip4_next_header (ip);
1369   tcp_header_t *tcp = (tcp_header_t *) udp;
1370   snat_session_t *s = 0;
1371   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1372   u32 old_addr, new_addr;
1373   u16 new_port, old_port;
1374   ip_csum_t sum;
1375   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1376   snat_session_key_t e_key, l_key;
1377   clib_bihash_kv_8_8_t kv, value;
1378   snat_user_key_t u_key;
1379   snat_user_t *u;
1380   dlist_elt_t *head, *elt;
1381
1382   old_addr = ip->src_address.as_u32;
1383
1384   key.l_addr = ip->src_address;
1385   key.r_addr = ip->dst_address;
1386   key.fib_index = rx_fib_index;
1387   key.proto = ip->protocol;
1388   key.rsvd = 0;
1389   key.l_port = udp->src_port;
1390   s_kv.key[0] = key.as_u64[0];
1391   s_kv.key[1] = key.as_u64[1];
1392
1393   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1394     {
1395       s = pool_elt_at_index (tsm->sessions, s_value.value);
1396     }
1397   else
1398     {
1399       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1400         {
1401           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1402           return 0;
1403         }
1404
1405       l_key.addr = ip->src_address;
1406       l_key.port = udp->src_port;
1407       l_key.protocol = proto;
1408       l_key.fib_index = rx_fib_index;
1409       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0))
1410         return 0;
1411
1412       u_key.addr = ip->src_address;
1413       u_key.fib_index = rx_fib_index;
1414       kv.key = u_key.as_u64;
1415
1416       /* Ever heard of the "user" = src ip4 address before? */
1417       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1418         {
1419           /* no, make a new one */
1420           pool_get (tsm->users, u);
1421           memset (u, 0, sizeof (*u));
1422           u->addr = ip->src_address;
1423           u->fib_index = rx_fib_index;
1424
1425           pool_get (tsm->list_pool, head);
1426           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1427
1428           clib_dlist_init (tsm->list_pool,
1429                            u->sessions_per_user_list_head_index);
1430
1431           kv.value = u - tsm->users;
1432
1433           /* add user */
1434           if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
1435             clib_warning ("user key add failed");
1436         }
1437       else
1438         {
1439           u = pool_elt_at_index (tsm->users, value.value);
1440         }
1441
1442       /* Create a new session */
1443       pool_get (tsm->sessions, s);
1444       memset (s, 0, sizeof (*s));
1445
1446       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1447       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1448       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1449       s->outside_address_index = ~0;
1450       s->in2out = l_key;
1451       s->out2in = e_key;
1452       u->nstaticsessions++;
1453
1454       /* Create list elts */
1455       pool_get (tsm->list_pool, elt);
1456       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1457       elt->value = s - tsm->sessions;
1458       s->per_user_index = elt - tsm->list_pool;
1459       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1460       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1461                           s->per_user_index);
1462
1463       /* Add to lookup tables */
1464       s_kv.value = s - tsm->sessions;
1465       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1466         clib_warning ("in2out-ed key add failed");
1467
1468       key.l_addr = e_key.addr;
1469       key.fib_index = e_key.fib_index;
1470       key.l_port = e_key.port;
1471       s_kv.key[0] = key.as_u64[0];
1472       s_kv.key[1] = key.as_u64[1];
1473       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1474         clib_warning ("out2in-ed key add failed");
1475     }
1476
1477   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1478
1479   /* Update IP checksum */
1480   sum = ip->checksum;
1481   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1482   ip->checksum = ip_csum_fold (sum);
1483
1484   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1485     {
1486       old_port = tcp->src_port;
1487       tcp->src_port = s->out2in.port;
1488       new_port = tcp->src_port;
1489
1490       sum = tcp->checksum;
1491       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1492       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1493       tcp->checksum = ip_csum_fold(sum);
1494     }
1495   else
1496     {
1497       udp->src_port = s->out2in.port;
1498       udp->checksum = 0;
1499     }
1500
1501   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1502     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1503
1504   /* Accounting */
1505   s->last_heard = now;
1506   s->total_pkts++;
1507   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1508   return s;
1509 }
1510
1511 static inline uword
1512 snat_in2out_node_fn_inline (vlib_main_t * vm,
1513                             vlib_node_runtime_t * node,
1514                             vlib_frame_t * frame, int is_slow_path,
1515                             int is_output_feature)
1516 {
1517   u32 n_left_from, * from, * to_next;
1518   snat_in2out_next_t next_index;
1519   u32 pkts_processed = 0;
1520   snat_main_t * sm = &snat_main;
1521   f64 now = vlib_time_now (vm);
1522   u32 stats_node_index;
1523   u32 thread_index = vlib_get_thread_index ();
1524
1525   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1526     snat_in2out_node.index;
1527
1528   from = vlib_frame_vector_args (frame);
1529   n_left_from = frame->n_vectors;
1530   next_index = node->cached_next_index;
1531
1532   while (n_left_from > 0)
1533     {
1534       u32 n_left_to_next;
1535
1536       vlib_get_next_frame (vm, node, next_index,
1537                            to_next, n_left_to_next);
1538
1539       while (n_left_from >= 4 && n_left_to_next >= 2)
1540         {
1541           u32 bi0, bi1;
1542           vlib_buffer_t * b0, * b1;
1543           u32 next0, next1;
1544           u32 sw_if_index0, sw_if_index1;
1545           ip4_header_t * ip0, * ip1;
1546           ip_csum_t sum0, sum1;
1547           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1548           u16 old_port0, new_port0, old_port1, new_port1;
1549           udp_header_t * udp0, * udp1;
1550           tcp_header_t * tcp0, * tcp1;
1551           icmp46_header_t * icmp0, * icmp1;
1552           snat_session_key_t key0, key1;
1553           u32 rx_fib_index0, rx_fib_index1;
1554           u32 proto0, proto1;
1555           snat_session_t * s0 = 0, * s1 = 0;
1556           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1557           u32 iph_offset0 = 0, iph_offset1 = 0;
1558
1559           /* Prefetch next iteration. */
1560           {
1561             vlib_buffer_t * p2, * p3;
1562
1563             p2 = vlib_get_buffer (vm, from[2]);
1564             p3 = vlib_get_buffer (vm, from[3]);
1565
1566             vlib_prefetch_buffer_header (p2, LOAD);
1567             vlib_prefetch_buffer_header (p3, LOAD);
1568
1569             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1570             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1571           }
1572
1573           /* speculatively enqueue b0 and b1 to the current next frame */
1574           to_next[0] = bi0 = from[0];
1575           to_next[1] = bi1 = from[1];
1576           from += 2;
1577           to_next += 2;
1578           n_left_from -= 2;
1579           n_left_to_next -= 2;
1580
1581           b0 = vlib_get_buffer (vm, bi0);
1582           b1 = vlib_get_buffer (vm, bi1);
1583
1584           if (is_output_feature)
1585             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1586
1587           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1588                  iph_offset0);
1589
1590           udp0 = ip4_next_header (ip0);
1591           tcp0 = (tcp_header_t *) udp0;
1592           icmp0 = (icmp46_header_t *) udp0;
1593
1594           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1595           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1596                                    sw_if_index0);
1597
1598           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1599
1600           if (PREDICT_FALSE(ip0->ttl == 1))
1601             {
1602               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1603               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1604                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1605                                            0);
1606               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1607               goto trace00;
1608             }
1609
1610           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1611
1612           /* Next configured feature, probably ip4-lookup */
1613           if (is_slow_path)
1614             {
1615               if (PREDICT_FALSE (proto0 == ~0))
1616                 {
1617                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1618                                                   thread_index, now, vm, node);
1619                   if (!s0)
1620                     next0 = SNAT_IN2OUT_NEXT_DROP;
1621                   goto trace00;
1622                 }
1623
1624               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1625                 {
1626                   next0 = icmp_in2out_slow_path
1627                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1628                      node, next0, now, thread_index, &s0);
1629                   goto trace00;
1630                 }
1631             }
1632           else
1633             {
1634               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1635                 {
1636                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1637                   goto trace00;
1638                 }
1639             }
1640
1641           key0.addr = ip0->src_address;
1642           key0.port = udp0->src_port;
1643           key0.protocol = proto0;
1644           key0.fib_index = rx_fib_index0;
1645
1646           kv0.key = key0.as_u64;
1647
1648           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1649               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1650             {
1651               if (is_slow_path)
1652                 {
1653                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1654                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1655                     goto trace00;
1656
1657                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1658                                      &s0, node, next0, thread_index);
1659                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1660                     goto trace00;
1661                 }
1662               else
1663                 {
1664                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1665                   goto trace00;
1666                 }
1667             }
1668           else
1669             {
1670               if (PREDICT_FALSE (value0.value == ~0ULL))
1671                 {
1672                   if (is_slow_path)
1673                     {
1674                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1675                                           thread_index, now, vm, node);
1676                       if (!s0)
1677                         next0 = SNAT_IN2OUT_NEXT_DROP;
1678                       goto trace00;
1679                     }
1680                   else
1681                     {
1682                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1683                       goto trace00;
1684                     }
1685                 }
1686               else
1687                 {
1688                   s0 = pool_elt_at_index (
1689                     sm->per_thread_data[thread_index].sessions,
1690                     value0.value);
1691                 }
1692             }
1693
1694           old_addr0 = ip0->src_address.as_u32;
1695           ip0->src_address = s0->out2in.addr;
1696           new_addr0 = ip0->src_address.as_u32;
1697           if (!is_output_feature)
1698             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1699
1700           sum0 = ip0->checksum;
1701           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1702                                  ip4_header_t,
1703                                  src_address /* changed member */);
1704           ip0->checksum = ip_csum_fold (sum0);
1705
1706           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1707             {
1708               old_port0 = tcp0->src_port;
1709               tcp0->src_port = s0->out2in.port;
1710               new_port0 = tcp0->src_port;
1711
1712               sum0 = tcp0->checksum;
1713               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1714                                      ip4_header_t,
1715                                      dst_address /* changed member */);
1716               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1717                                      ip4_header_t /* cheat */,
1718                                      length /* changed member */);
1719               tcp0->checksum = ip_csum_fold(sum0);
1720             }
1721           else
1722             {
1723               old_port0 = udp0->src_port;
1724               udp0->src_port = s0->out2in.port;
1725               udp0->checksum = 0;
1726             }
1727
1728           /* Hairpinning */
1729           if (!is_output_feature)
1730             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1731
1732           /* Accounting */
1733           s0->last_heard = now;
1734           s0->total_pkts++;
1735           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1736           /* Per-user LRU list maintenance for dynamic translation */
1737           if (!snat_is_session_static (s0))
1738             {
1739               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1740                                  s0->per_user_index);
1741               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1742                                   s0->per_user_list_head_index,
1743                                   s0->per_user_index);
1744             }
1745         trace00:
1746
1747           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1748                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1749             {
1750               snat_in2out_trace_t *t =
1751                  vlib_add_trace (vm, node, b0, sizeof (*t));
1752               t->is_slow_path = is_slow_path;
1753               t->sw_if_index = sw_if_index0;
1754               t->next_index = next0;
1755                   t->session_index = ~0;
1756               if (s0)
1757                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1758             }
1759
1760           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1761
1762           if (is_output_feature)
1763             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1764
1765           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1766                  iph_offset1);
1767
1768           udp1 = ip4_next_header (ip1);
1769           tcp1 = (tcp_header_t *) udp1;
1770           icmp1 = (icmp46_header_t *) udp1;
1771
1772           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1773           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1774                                    sw_if_index1);
1775
1776           if (PREDICT_FALSE(ip1->ttl == 1))
1777             {
1778               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1779               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1780                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1781                                            0);
1782               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1783               goto trace01;
1784             }
1785
1786           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1787
1788           /* Next configured feature, probably ip4-lookup */
1789           if (is_slow_path)
1790             {
1791               if (PREDICT_FALSE (proto1 == ~0))
1792                 {
1793                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1794                                                   thread_index, now, vm, node);
1795                   if (!s1)
1796                     next1 = SNAT_IN2OUT_NEXT_DROP;
1797                   goto trace01;
1798                 }
1799
1800               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1801                 {
1802                   next1 = icmp_in2out_slow_path
1803                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1804                      next1, now, thread_index, &s1);
1805                   goto trace01;
1806                 }
1807             }
1808           else
1809             {
1810               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1811                 {
1812                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1813                   goto trace01;
1814                 }
1815             }
1816
1817           key1.addr = ip1->src_address;
1818           key1.port = udp1->src_port;
1819           key1.protocol = proto1;
1820           key1.fib_index = rx_fib_index1;
1821
1822           kv1.key = key1.as_u64;
1823
1824             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1825                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1826             {
1827               if (is_slow_path)
1828                 {
1829                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1830                       ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
1831                     goto trace01;
1832
1833                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1834                                      &s1, node, next1, thread_index);
1835                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1836                     goto trace01;
1837                 }
1838               else
1839                 {
1840                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1841                   goto trace01;
1842                 }
1843             }
1844           else
1845             {
1846               if (PREDICT_FALSE (value1.value == ~0ULL))
1847                 {
1848                   if (is_slow_path)
1849                     {
1850                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1851                                           thread_index, now, vm, node);
1852                       if (!s1)
1853                         next1 = SNAT_IN2OUT_NEXT_DROP;
1854                       goto trace01;
1855                     }
1856                   else
1857                     {
1858                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1859                       goto trace01;
1860                     }
1861                 }
1862               else
1863                 {
1864                   s1 = pool_elt_at_index (
1865                     sm->per_thread_data[thread_index].sessions,
1866                     value1.value);
1867                 }
1868             }
1869
1870           old_addr1 = ip1->src_address.as_u32;
1871           ip1->src_address = s1->out2in.addr;
1872           new_addr1 = ip1->src_address.as_u32;
1873           if (!is_output_feature)
1874             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1875
1876           sum1 = ip1->checksum;
1877           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1878                                  ip4_header_t,
1879                                  src_address /* changed member */);
1880           ip1->checksum = ip_csum_fold (sum1);
1881
1882           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1883             {
1884               old_port1 = tcp1->src_port;
1885               tcp1->src_port = s1->out2in.port;
1886               new_port1 = tcp1->src_port;
1887
1888               sum1 = tcp1->checksum;
1889               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1890                                      ip4_header_t,
1891                                      dst_address /* changed member */);
1892               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1893                                      ip4_header_t /* cheat */,
1894                                      length /* changed member */);
1895               tcp1->checksum = ip_csum_fold(sum1);
1896             }
1897           else
1898             {
1899               old_port1 = udp1->src_port;
1900               udp1->src_port = s1->out2in.port;
1901               udp1->checksum = 0;
1902             }
1903
1904           /* Hairpinning */
1905           if (!is_output_feature)
1906             snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1907
1908           /* Accounting */
1909           s1->last_heard = now;
1910           s1->total_pkts++;
1911           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1912           /* Per-user LRU list maintenance for dynamic translation */
1913           if (!snat_is_session_static (s1))
1914             {
1915               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1916                                  s1->per_user_index);
1917               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1918                                   s1->per_user_list_head_index,
1919                                   s1->per_user_index);
1920             }
1921         trace01:
1922
1923           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1924                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1925             {
1926               snat_in2out_trace_t *t =
1927                  vlib_add_trace (vm, node, b1, sizeof (*t));
1928               t->sw_if_index = sw_if_index1;
1929               t->next_index = next1;
1930               t->session_index = ~0;
1931               if (s1)
1932                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1933             }
1934
1935           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1936
1937           /* verify speculative enqueues, maybe switch current next frame */
1938           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1939                                            to_next, n_left_to_next,
1940                                            bi0, bi1, next0, next1);
1941         }
1942
1943       while (n_left_from > 0 && n_left_to_next > 0)
1944         {
1945           u32 bi0;
1946           vlib_buffer_t * b0;
1947           u32 next0;
1948           u32 sw_if_index0;
1949           ip4_header_t * ip0;
1950           ip_csum_t sum0;
1951           u32 new_addr0, old_addr0;
1952           u16 old_port0, new_port0;
1953           udp_header_t * udp0;
1954           tcp_header_t * tcp0;
1955           icmp46_header_t * icmp0;
1956           snat_session_key_t key0;
1957           u32 rx_fib_index0;
1958           u32 proto0;
1959           snat_session_t * s0 = 0;
1960           clib_bihash_kv_8_8_t kv0, value0;
1961           u32 iph_offset0 = 0;
1962
1963           /* speculatively enqueue b0 to the current next frame */
1964           bi0 = from[0];
1965           to_next[0] = bi0;
1966           from += 1;
1967           to_next += 1;
1968           n_left_from -= 1;
1969           n_left_to_next -= 1;
1970
1971           b0 = vlib_get_buffer (vm, bi0);
1972           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1973
1974           if (is_output_feature)
1975             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1976
1977           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1978                  iph_offset0);
1979
1980           udp0 = ip4_next_header (ip0);
1981           tcp0 = (tcp_header_t *) udp0;
1982           icmp0 = (icmp46_header_t *) udp0;
1983
1984           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1985           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1986                                    sw_if_index0);
1987
1988           if (PREDICT_FALSE(ip0->ttl == 1))
1989             {
1990               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1991               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1992                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1993                                            0);
1994               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1995               goto trace0;
1996             }
1997
1998           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1999
2000           /* Next configured feature, probably ip4-lookup */
2001           if (is_slow_path)
2002             {
2003               if (PREDICT_FALSE (proto0 == ~0))
2004                 {
2005                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
2006                                                   thread_index, now, vm, node);
2007                   if (!s0)
2008                     next0 = SNAT_IN2OUT_NEXT_DROP;
2009                   goto trace0;
2010                 }
2011
2012               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2013                 {
2014                   next0 = icmp_in2out_slow_path
2015                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2016                      next0, now, thread_index, &s0);
2017                   goto trace0;
2018                 }
2019             }
2020           else
2021             {
2022               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2023                 {
2024                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2025                   goto trace0;
2026                 }
2027             }
2028
2029           key0.addr = ip0->src_address;
2030           key0.port = udp0->src_port;
2031           key0.protocol = proto0;
2032           key0.fib_index = rx_fib_index0;
2033
2034           kv0.key = key0.as_u64;
2035
2036           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
2037                                       &kv0, &value0))
2038             {
2039               if (is_slow_path)
2040                 {
2041                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2042                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
2043                     goto trace0;
2044
2045                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2046                                      &s0, node, next0, thread_index);
2047
2048                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2049                     goto trace0;
2050                 }
2051               else
2052                 {
2053                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2054                   goto trace0;
2055                 }
2056             }
2057           else
2058             {
2059               if (PREDICT_FALSE (value0.value == ~0ULL))
2060                 {
2061                   if (is_slow_path)
2062                     {
2063                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
2064                                           thread_index, now, vm, node);
2065                       if (!s0)
2066                         next0 = SNAT_IN2OUT_NEXT_DROP;
2067                       goto trace0;
2068                     }
2069                   else
2070                     {
2071                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2072                       goto trace0;
2073                     }
2074                 }
2075               else
2076                 {
2077                   s0 = pool_elt_at_index (
2078                     sm->per_thread_data[thread_index].sessions,
2079                     value0.value);
2080                 }
2081             }
2082
2083           old_addr0 = ip0->src_address.as_u32;
2084           ip0->src_address = s0->out2in.addr;
2085           new_addr0 = ip0->src_address.as_u32;
2086           if (!is_output_feature)
2087             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2088
2089           sum0 = ip0->checksum;
2090           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2091                                  ip4_header_t,
2092                                  src_address /* changed member */);
2093           ip0->checksum = ip_csum_fold (sum0);
2094
2095           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2096             {
2097               old_port0 = tcp0->src_port;
2098               tcp0->src_port = s0->out2in.port;
2099               new_port0 = tcp0->src_port;
2100
2101               sum0 = tcp0->checksum;
2102               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2103                                      ip4_header_t,
2104                                      dst_address /* changed member */);
2105               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2106                                      ip4_header_t /* cheat */,
2107                                      length /* changed member */);
2108               tcp0->checksum = ip_csum_fold(sum0);
2109             }
2110           else
2111             {
2112               old_port0 = udp0->src_port;
2113               udp0->src_port = s0->out2in.port;
2114               udp0->checksum = 0;
2115             }
2116
2117           /* Hairpinning */
2118           if (!is_output_feature)
2119             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2120
2121           /* Accounting */
2122           s0->last_heard = now;
2123           s0->total_pkts++;
2124           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2125           /* Per-user LRU list maintenance for dynamic translation */
2126           if (!snat_is_session_static (s0))
2127             {
2128               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2129                                  s0->per_user_index);
2130               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2131                                   s0->per_user_list_head_index,
2132                                   s0->per_user_index);
2133             }
2134
2135         trace0:
2136           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2137                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2138             {
2139               snat_in2out_trace_t *t =
2140                  vlib_add_trace (vm, node, b0, sizeof (*t));
2141               t->is_slow_path = is_slow_path;
2142               t->sw_if_index = sw_if_index0;
2143               t->next_index = next0;
2144                   t->session_index = ~0;
2145               if (s0)
2146                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
2147             }
2148
2149           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2150
2151           /* verify speculative enqueue, maybe switch current next frame */
2152           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2153                                            to_next, n_left_to_next,
2154                                            bi0, next0);
2155         }
2156
2157       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2158     }
2159
2160   vlib_node_increment_counter (vm, stats_node_index,
2161                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2162                                pkts_processed);
2163   return frame->n_vectors;
2164 }
2165
2166 static uword
2167 snat_in2out_fast_path_fn (vlib_main_t * vm,
2168                           vlib_node_runtime_t * node,
2169                           vlib_frame_t * frame)
2170 {
2171   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
2172 }
2173
2174 VLIB_REGISTER_NODE (snat_in2out_node) = {
2175   .function = snat_in2out_fast_path_fn,
2176   .name = "nat44-in2out",
2177   .vector_size = sizeof (u32),
2178   .format_trace = format_snat_in2out_trace,
2179   .type = VLIB_NODE_TYPE_INTERNAL,
2180
2181   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2182   .error_strings = snat_in2out_error_strings,
2183
2184   .runtime_data_bytes = sizeof (snat_runtime_t),
2185
2186   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2187
2188   /* edit / add dispositions here */
2189   .next_nodes = {
2190     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2191     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2192     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2193     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2194   },
2195 };
2196
2197 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
2198
2199 static uword
2200 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
2201                                  vlib_node_runtime_t * node,
2202                                  vlib_frame_t * frame)
2203 {
2204   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
2205 }
2206
2207 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2208   .function = snat_in2out_output_fast_path_fn,
2209   .name = "nat44-in2out-output",
2210   .vector_size = sizeof (u32),
2211   .format_trace = format_snat_in2out_trace,
2212   .type = VLIB_NODE_TYPE_INTERNAL,
2213
2214   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2215   .error_strings = snat_in2out_error_strings,
2216
2217   .runtime_data_bytes = sizeof (snat_runtime_t),
2218
2219   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2220
2221   /* edit / add dispositions here */
2222   .next_nodes = {
2223     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2224     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2225     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2226     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2227   },
2228 };
2229
2230 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2231                               snat_in2out_output_fast_path_fn);
2232
2233 static uword
2234 snat_in2out_slow_path_fn (vlib_main_t * vm,
2235                           vlib_node_runtime_t * node,
2236                           vlib_frame_t * frame)
2237 {
2238   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2239 }
2240
2241 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2242   .function = snat_in2out_slow_path_fn,
2243   .name = "nat44-in2out-slowpath",
2244   .vector_size = sizeof (u32),
2245   .format_trace = format_snat_in2out_trace,
2246   .type = VLIB_NODE_TYPE_INTERNAL,
2247
2248   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2249   .error_strings = snat_in2out_error_strings,
2250
2251   .runtime_data_bytes = sizeof (snat_runtime_t),
2252
2253   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2254
2255   /* edit / add dispositions here */
2256   .next_nodes = {
2257     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2258     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2259     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2260     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2261   },
2262 };
2263
2264 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2265                               snat_in2out_slow_path_fn);
2266
2267 static uword
2268 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2269                                  vlib_node_runtime_t * node,
2270                                  vlib_frame_t * frame)
2271 {
2272   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2273 }
2274
2275 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2276   .function = snat_in2out_output_slow_path_fn,
2277   .name = "nat44-in2out-output-slowpath",
2278   .vector_size = sizeof (u32),
2279   .format_trace = format_snat_in2out_trace,
2280   .type = VLIB_NODE_TYPE_INTERNAL,
2281
2282   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2283   .error_strings = snat_in2out_error_strings,
2284
2285   .runtime_data_bytes = sizeof (snat_runtime_t),
2286
2287   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2288
2289   /* edit / add dispositions here */
2290   .next_nodes = {
2291     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2292     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2293     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2294     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2295   },
2296 };
2297
2298 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2299                               snat_in2out_output_slow_path_fn);
2300
2301 /**************************/
2302 /*** deterministic mode ***/
2303 /**************************/
2304 static uword
2305 snat_det_in2out_node_fn (vlib_main_t * vm,
2306                          vlib_node_runtime_t * node,
2307                          vlib_frame_t * frame)
2308 {
2309   u32 n_left_from, * from, * to_next;
2310   snat_in2out_next_t next_index;
2311   u32 pkts_processed = 0;
2312   snat_main_t * sm = &snat_main;
2313   u32 now = (u32) vlib_time_now (vm);
2314   u32 thread_index = vlib_get_thread_index ();
2315
2316   from = vlib_frame_vector_args (frame);
2317   n_left_from = frame->n_vectors;
2318   next_index = node->cached_next_index;
2319
2320   while (n_left_from > 0)
2321     {
2322       u32 n_left_to_next;
2323
2324       vlib_get_next_frame (vm, node, next_index,
2325                            to_next, n_left_to_next);
2326
2327       while (n_left_from >= 4 && n_left_to_next >= 2)
2328         {
2329           u32 bi0, bi1;
2330           vlib_buffer_t * b0, * b1;
2331           u32 next0, next1;
2332           u32 sw_if_index0, sw_if_index1;
2333           ip4_header_t * ip0, * ip1;
2334           ip_csum_t sum0, sum1;
2335           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2336           u16 old_port0, new_port0, lo_port0, i0;
2337           u16 old_port1, new_port1, lo_port1, i1;
2338           udp_header_t * udp0, * udp1;
2339           tcp_header_t * tcp0, * tcp1;
2340           u32 proto0, proto1;
2341           snat_det_out_key_t key0, key1;
2342           snat_det_map_t * dm0, * dm1;
2343           snat_det_session_t * ses0 = 0, * ses1 = 0;
2344           u32 rx_fib_index0, rx_fib_index1;
2345           icmp46_header_t * icmp0, * icmp1;
2346
2347           /* Prefetch next iteration. */
2348           {
2349             vlib_buffer_t * p2, * p3;
2350
2351             p2 = vlib_get_buffer (vm, from[2]);
2352             p3 = vlib_get_buffer (vm, from[3]);
2353
2354             vlib_prefetch_buffer_header (p2, LOAD);
2355             vlib_prefetch_buffer_header (p3, LOAD);
2356
2357             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2358             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2359           }
2360
2361           /* speculatively enqueue b0 and b1 to the current next frame */
2362           to_next[0] = bi0 = from[0];
2363           to_next[1] = bi1 = from[1];
2364           from += 2;
2365           to_next += 2;
2366           n_left_from -= 2;
2367           n_left_to_next -= 2;
2368
2369           b0 = vlib_get_buffer (vm, bi0);
2370           b1 = vlib_get_buffer (vm, bi1);
2371
2372           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2373           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2374
2375           ip0 = vlib_buffer_get_current (b0);
2376           udp0 = ip4_next_header (ip0);
2377           tcp0 = (tcp_header_t *) udp0;
2378
2379           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2380
2381           if (PREDICT_FALSE(ip0->ttl == 1))
2382             {
2383               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2384               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2385                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2386                                            0);
2387               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2388               goto trace0;
2389             }
2390
2391           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2392
2393           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2394             {
2395               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2396               icmp0 = (icmp46_header_t *) udp0;
2397
2398               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2399                                   rx_fib_index0, node, next0, thread_index,
2400                                   &ses0, &dm0);
2401               goto trace0;
2402             }
2403
2404           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2405           if (PREDICT_FALSE(!dm0))
2406             {
2407               clib_warning("no match for internal host %U",
2408                            format_ip4_address, &ip0->src_address);
2409               next0 = SNAT_IN2OUT_NEXT_DROP;
2410               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2411               goto trace0;
2412             }
2413
2414           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2415
2416           key0.ext_host_addr = ip0->dst_address;
2417           key0.ext_host_port = tcp0->dst;
2418
2419           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2420           if (PREDICT_FALSE(!ses0))
2421             {
2422               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2423                 {
2424                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2425                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2426
2427                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2428                     continue;
2429
2430                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2431                   break;
2432                 }
2433               if (PREDICT_FALSE(!ses0))
2434                 {
2435                   /* too many sessions for user, send ICMP error packet */
2436
2437                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2438                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2439                                                ICMP4_destination_unreachable_destination_unreachable_host,
2440                                                0);
2441                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2442                   goto trace0;
2443                 }
2444             }
2445
2446           new_port0 = ses0->out.out_port;
2447
2448           old_addr0.as_u32 = ip0->src_address.as_u32;
2449           ip0->src_address.as_u32 = new_addr0.as_u32;
2450           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2451
2452           sum0 = ip0->checksum;
2453           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2454                                  ip4_header_t,
2455                                  src_address /* changed member */);
2456           ip0->checksum = ip_csum_fold (sum0);
2457
2458           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2459             {
2460               if (tcp0->flags & TCP_FLAG_SYN)
2461                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2462               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2463                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2464               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2465                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2466               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2467                 snat_det_ses_close(dm0, ses0);
2468               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2469                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2470               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2471                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2472
2473               old_port0 = tcp0->src;
2474               tcp0->src = new_port0;
2475
2476               sum0 = tcp0->checksum;
2477               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2478                                      ip4_header_t,
2479                                      dst_address /* changed member */);
2480               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2481                                      ip4_header_t /* cheat */,
2482                                      length /* changed member */);
2483               tcp0->checksum = ip_csum_fold(sum0);
2484             }
2485           else
2486             {
2487               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2488               old_port0 = udp0->src_port;
2489               udp0->src_port = new_port0;
2490               udp0->checksum = 0;
2491             }
2492
2493           switch(ses0->state)
2494             {
2495             case SNAT_SESSION_UDP_ACTIVE:
2496                 ses0->expire = now + sm->udp_timeout;
2497                 break;
2498             case SNAT_SESSION_TCP_SYN_SENT:
2499             case SNAT_SESSION_TCP_FIN_WAIT:
2500             case SNAT_SESSION_TCP_CLOSE_WAIT:
2501             case SNAT_SESSION_TCP_LAST_ACK:
2502                 ses0->expire = now + sm->tcp_transitory_timeout;
2503                 break;
2504             case SNAT_SESSION_TCP_ESTABLISHED:
2505                 ses0->expire = now + sm->tcp_established_timeout;
2506                 break;
2507             }
2508
2509         trace0:
2510           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2511                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2512             {
2513               snat_in2out_trace_t *t =
2514                  vlib_add_trace (vm, node, b0, sizeof (*t));
2515               t->is_slow_path = 0;
2516               t->sw_if_index = sw_if_index0;
2517               t->next_index = next0;
2518               t->session_index = ~0;
2519               if (ses0)
2520                 t->session_index = ses0 - dm0->sessions;
2521             }
2522
2523           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2524
2525           ip1 = vlib_buffer_get_current (b1);
2526           udp1 = ip4_next_header (ip1);
2527           tcp1 = (tcp_header_t *) udp1;
2528
2529           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2530
2531           if (PREDICT_FALSE(ip1->ttl == 1))
2532             {
2533               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2534               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2535                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2536                                            0);
2537               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2538               goto trace1;
2539             }
2540
2541           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2542
2543           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2544             {
2545               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2546               icmp1 = (icmp46_header_t *) udp1;
2547
2548               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2549                                   rx_fib_index1, node, next1, thread_index,
2550                                   &ses1, &dm1);
2551               goto trace1;
2552             }
2553
2554           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2555           if (PREDICT_FALSE(!dm1))
2556             {
2557               clib_warning("no match for internal host %U",
2558                            format_ip4_address, &ip0->src_address);
2559               next1 = SNAT_IN2OUT_NEXT_DROP;
2560               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2561               goto trace1;
2562             }
2563
2564           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2565
2566           key1.ext_host_addr = ip1->dst_address;
2567           key1.ext_host_port = tcp1->dst;
2568
2569           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2570           if (PREDICT_FALSE(!ses1))
2571             {
2572               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2573                 {
2574                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2575                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2576
2577                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2578                     continue;
2579
2580                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2581                   break;
2582                 }
2583               if (PREDICT_FALSE(!ses1))
2584                 {
2585                   /* too many sessions for user, send ICMP error packet */
2586
2587                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2588                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2589                                                ICMP4_destination_unreachable_destination_unreachable_host,
2590                                                0);
2591                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2592                   goto trace1;
2593                 }
2594             }
2595
2596           new_port1 = ses1->out.out_port;
2597
2598           old_addr1.as_u32 = ip1->src_address.as_u32;
2599           ip1->src_address.as_u32 = new_addr1.as_u32;
2600           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2601
2602           sum1 = ip1->checksum;
2603           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2604                                  ip4_header_t,
2605                                  src_address /* changed member */);
2606           ip1->checksum = ip_csum_fold (sum1);
2607
2608           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2609             {
2610               if (tcp1->flags & TCP_FLAG_SYN)
2611                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2612               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2613                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2614               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2615                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2616               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2617                 snat_det_ses_close(dm1, ses1);
2618               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2619                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2620               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2621                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2622
2623               old_port1 = tcp1->src;
2624               tcp1->src = new_port1;
2625
2626               sum1 = tcp1->checksum;
2627               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2628                                      ip4_header_t,
2629                                      dst_address /* changed member */);
2630               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2631                                      ip4_header_t /* cheat */,
2632                                      length /* changed member */);
2633               tcp1->checksum = ip_csum_fold(sum1);
2634             }
2635           else
2636             {
2637               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2638               old_port1 = udp1->src_port;
2639               udp1->src_port = new_port1;
2640               udp1->checksum = 0;
2641             }
2642
2643           switch(ses1->state)
2644             {
2645             case SNAT_SESSION_UDP_ACTIVE:
2646                 ses1->expire = now + sm->udp_timeout;
2647                 break;
2648             case SNAT_SESSION_TCP_SYN_SENT:
2649             case SNAT_SESSION_TCP_FIN_WAIT:
2650             case SNAT_SESSION_TCP_CLOSE_WAIT:
2651             case SNAT_SESSION_TCP_LAST_ACK:
2652                 ses1->expire = now + sm->tcp_transitory_timeout;
2653                 break;
2654             case SNAT_SESSION_TCP_ESTABLISHED:
2655                 ses1->expire = now + sm->tcp_established_timeout;
2656                 break;
2657             }
2658
2659         trace1:
2660           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2661                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2662             {
2663               snat_in2out_trace_t *t =
2664                  vlib_add_trace (vm, node, b1, sizeof (*t));
2665               t->is_slow_path = 0;
2666               t->sw_if_index = sw_if_index1;
2667               t->next_index = next1;
2668               t->session_index = ~0;
2669               if (ses1)
2670                 t->session_index = ses1 - dm1->sessions;
2671             }
2672
2673           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2674
2675           /* verify speculative enqueues, maybe switch current next frame */
2676           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2677                                            to_next, n_left_to_next,
2678                                            bi0, bi1, next0, next1);
2679          }
2680
2681       while (n_left_from > 0 && n_left_to_next > 0)
2682         {
2683           u32 bi0;
2684           vlib_buffer_t * b0;
2685           u32 next0;
2686           u32 sw_if_index0;
2687           ip4_header_t * ip0;
2688           ip_csum_t sum0;
2689           ip4_address_t new_addr0, old_addr0;
2690           u16 old_port0, new_port0, lo_port0, i0;
2691           udp_header_t * udp0;
2692           tcp_header_t * tcp0;
2693           u32 proto0;
2694           snat_det_out_key_t key0;
2695           snat_det_map_t * dm0;
2696           snat_det_session_t * ses0 = 0;
2697           u32 rx_fib_index0;
2698           icmp46_header_t * icmp0;
2699
2700           /* speculatively enqueue b0 to the current next frame */
2701           bi0 = from[0];
2702           to_next[0] = bi0;
2703           from += 1;
2704           to_next += 1;
2705           n_left_from -= 1;
2706           n_left_to_next -= 1;
2707
2708           b0 = vlib_get_buffer (vm, bi0);
2709           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2710
2711           ip0 = vlib_buffer_get_current (b0);
2712           udp0 = ip4_next_header (ip0);
2713           tcp0 = (tcp_header_t *) udp0;
2714
2715           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2716
2717           if (PREDICT_FALSE(ip0->ttl == 1))
2718             {
2719               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2720               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2721                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2722                                            0);
2723               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2724               goto trace00;
2725             }
2726
2727           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2728
2729           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2730             {
2731               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2732               icmp0 = (icmp46_header_t *) udp0;
2733
2734               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2735                                   rx_fib_index0, node, next0, thread_index,
2736                                   &ses0, &dm0);
2737               goto trace00;
2738             }
2739
2740           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2741           if (PREDICT_FALSE(!dm0))
2742             {
2743               clib_warning("no match for internal host %U",
2744                            format_ip4_address, &ip0->src_address);
2745               next0 = SNAT_IN2OUT_NEXT_DROP;
2746               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2747               goto trace00;
2748             }
2749
2750           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2751
2752           key0.ext_host_addr = ip0->dst_address;
2753           key0.ext_host_port = tcp0->dst;
2754
2755           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2756           if (PREDICT_FALSE(!ses0))
2757             {
2758               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2759                 {
2760                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2761                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2762
2763                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2764                     continue;
2765
2766                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2767                   break;
2768                 }
2769               if (PREDICT_FALSE(!ses0))
2770                 {
2771                   /* too many sessions for user, send ICMP error packet */
2772
2773                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2774                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2775                                                ICMP4_destination_unreachable_destination_unreachable_host,
2776                                                0);
2777                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2778                   goto trace00;
2779                 }
2780             }
2781
2782           new_port0 = ses0->out.out_port;
2783
2784           old_addr0.as_u32 = ip0->src_address.as_u32;
2785           ip0->src_address.as_u32 = new_addr0.as_u32;
2786           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2787
2788           sum0 = ip0->checksum;
2789           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2790                                  ip4_header_t,
2791                                  src_address /* changed member */);
2792           ip0->checksum = ip_csum_fold (sum0);
2793
2794           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2795             {
2796               if (tcp0->flags & TCP_FLAG_SYN)
2797                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2798               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2799                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2800               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2801                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2802               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2803                 snat_det_ses_close(dm0, ses0);
2804               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2805                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2806               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2807                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2808
2809               old_port0 = tcp0->src;
2810               tcp0->src = new_port0;
2811
2812               sum0 = tcp0->checksum;
2813               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2814                                      ip4_header_t,
2815                                      dst_address /* changed member */);
2816               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2817                                      ip4_header_t /* cheat */,
2818                                      length /* changed member */);
2819               tcp0->checksum = ip_csum_fold(sum0);
2820             }
2821           else
2822             {
2823               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2824               old_port0 = udp0->src_port;
2825               udp0->src_port = new_port0;
2826               udp0->checksum = 0;
2827             }
2828
2829           switch(ses0->state)
2830             {
2831             case SNAT_SESSION_UDP_ACTIVE:
2832                 ses0->expire = now + sm->udp_timeout;
2833                 break;
2834             case SNAT_SESSION_TCP_SYN_SENT:
2835             case SNAT_SESSION_TCP_FIN_WAIT:
2836             case SNAT_SESSION_TCP_CLOSE_WAIT:
2837             case SNAT_SESSION_TCP_LAST_ACK:
2838                 ses0->expire = now + sm->tcp_transitory_timeout;
2839                 break;
2840             case SNAT_SESSION_TCP_ESTABLISHED:
2841                 ses0->expire = now + sm->tcp_established_timeout;
2842                 break;
2843             }
2844
2845         trace00:
2846           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2847                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2848             {
2849               snat_in2out_trace_t *t =
2850                  vlib_add_trace (vm, node, b0, sizeof (*t));
2851               t->is_slow_path = 0;
2852               t->sw_if_index = sw_if_index0;
2853               t->next_index = next0;
2854               t->session_index = ~0;
2855               if (ses0)
2856                 t->session_index = ses0 - dm0->sessions;
2857             }
2858
2859           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2860
2861           /* verify speculative enqueue, maybe switch current next frame */
2862           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2863                                            to_next, n_left_to_next,
2864                                            bi0, next0);
2865         }
2866
2867       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2868     }
2869
2870   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2871                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2872                                pkts_processed);
2873   return frame->n_vectors;
2874 }
2875
2876 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2877   .function = snat_det_in2out_node_fn,
2878   .name = "nat44-det-in2out",
2879   .vector_size = sizeof (u32),
2880   .format_trace = format_snat_in2out_trace,
2881   .type = VLIB_NODE_TYPE_INTERNAL,
2882
2883   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2884   .error_strings = snat_in2out_error_strings,
2885
2886   .runtime_data_bytes = sizeof (snat_runtime_t),
2887
2888   .n_next_nodes = 3,
2889
2890   /* edit / add dispositions here */
2891   .next_nodes = {
2892     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2893     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2894     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2895   },
2896 };
2897
2898 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2899
2900 /**
2901  * Get address and port values to be used for ICMP packet translation
2902  * and create session if needed
2903  *
2904  * @param[in,out] sm             NAT main
2905  * @param[in,out] node           NAT node runtime
2906  * @param[in] thread_index       thread index
2907  * @param[in,out] b0             buffer containing packet to be translated
2908  * @param[out] p_proto           protocol used for matching
2909  * @param[out] p_value           address and port after NAT translation
2910  * @param[out] p_dont_translate  if packet should not be translated
2911  * @param d                      optional parameter
2912  * @param e                      optional parameter
2913  */
2914 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2915                           u32 thread_index, vlib_buffer_t *b0,
2916                           ip4_header_t *ip0, u8 *p_proto,
2917                           snat_session_key_t *p_value,
2918                           u8 *p_dont_translate, void *d, void *e)
2919 {
2920   icmp46_header_t *icmp0;
2921   u32 sw_if_index0;
2922   u32 rx_fib_index0;
2923   u8 protocol;
2924   snat_det_out_key_t key0;
2925   u8 dont_translate = 0;
2926   u32 next0 = ~0;
2927   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2928   ip4_header_t *inner_ip0;
2929   void *l4_header = 0;
2930   icmp46_header_t *inner_icmp0;
2931   snat_det_map_t * dm0 = 0;
2932   ip4_address_t new_addr0;
2933   u16 lo_port0, i0;
2934   snat_det_session_t * ses0 = 0;
2935   ip4_address_t in_addr;
2936   u16 in_port;
2937
2938   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2939   echo0 = (icmp_echo_header_t *)(icmp0+1);
2940   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2941   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2942
2943   if (!icmp_is_error_message (icmp0))
2944     {
2945       protocol = SNAT_PROTOCOL_ICMP;
2946       in_addr = ip0->src_address;
2947       in_port = echo0->identifier;
2948     }
2949   else
2950     {
2951       inner_ip0 = (ip4_header_t *)(echo0+1);
2952       l4_header = ip4_next_header (inner_ip0);
2953       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2954       in_addr = inner_ip0->dst_address;
2955       switch (protocol)
2956         {
2957         case SNAT_PROTOCOL_ICMP:
2958           inner_icmp0 = (icmp46_header_t*)l4_header;
2959           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2960           in_port = inner_echo0->identifier;
2961           break;
2962         case SNAT_PROTOCOL_UDP:
2963         case SNAT_PROTOCOL_TCP:
2964           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2965           break;
2966         default:
2967           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2968           next0 = SNAT_IN2OUT_NEXT_DROP;
2969           goto out;
2970         }
2971     }
2972
2973   dm0 = snat_det_map_by_user(sm, &in_addr);
2974   if (PREDICT_FALSE(!dm0))
2975     {
2976       clib_warning("no match for internal host %U",
2977                    format_ip4_address, &in_addr);
2978       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2979           IP_PROTOCOL_ICMP, rx_fib_index0)))
2980         {
2981           dont_translate = 1;
2982           goto out;
2983         }
2984       next0 = SNAT_IN2OUT_NEXT_DROP;
2985       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2986       goto out;
2987     }
2988
2989   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2990
2991   key0.ext_host_addr = ip0->dst_address;
2992   key0.ext_host_port = 0;
2993
2994   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2995   if (PREDICT_FALSE(!ses0))
2996     {
2997       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2998           IP_PROTOCOL_ICMP, rx_fib_index0)))
2999         {
3000           dont_translate = 1;
3001           goto out;
3002         }
3003       if (icmp0->type != ICMP4_echo_request)
3004         {
3005           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3006           next0 = SNAT_IN2OUT_NEXT_DROP;
3007           goto out;
3008         }
3009       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3010         {
3011           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3012             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3013
3014           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3015             continue;
3016
3017           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3018           break;
3019         }
3020       if (PREDICT_FALSE(!ses0))
3021         {
3022           next0 = SNAT_IN2OUT_NEXT_DROP;
3023           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3024           goto out;
3025         }
3026     }
3027
3028   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3029                     !icmp_is_error_message (icmp0)))
3030     {
3031       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3032       next0 = SNAT_IN2OUT_NEXT_DROP;
3033       goto out;
3034     }
3035
3036   u32 now = (u32) vlib_time_now (sm->vlib_main);
3037
3038   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3039   ses0->expire = now + sm->icmp_timeout;
3040
3041 out:
3042   *p_proto = protocol;
3043   if (ses0)
3044     {
3045       p_value->addr = new_addr0;
3046       p_value->fib_index = sm->outside_fib_index;
3047       p_value->port = ses0->out.out_port;
3048     }
3049   *p_dont_translate = dont_translate;
3050   if (d)
3051     *(snat_det_session_t**)d = ses0;
3052   if (e)
3053     *(snat_det_map_t**)e = dm0;
3054   return next0;
3055 }
3056
3057 /**********************/
3058 /*** worker handoff ***/
3059 /**********************/
3060 static inline uword
3061 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3062                                       vlib_node_runtime_t * node,
3063                                       vlib_frame_t * frame,
3064                                       u8 is_output)
3065 {
3066   snat_main_t *sm = &snat_main;
3067   vlib_thread_main_t *tm = vlib_get_thread_main ();
3068   u32 n_left_from, *from, *to_next = 0;
3069   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3070   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3071     = 0;
3072   vlib_frame_queue_elt_t *hf = 0;
3073   vlib_frame_t *f = 0;
3074   int i;
3075   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3076   u32 next_worker_index = 0;
3077   u32 current_worker_index = ~0;
3078   u32 thread_index = vlib_get_thread_index ();
3079   u32 fq_index;
3080   u32 to_node_index;
3081
3082   ASSERT (vec_len (sm->workers));
3083
3084   if (is_output)
3085     {
3086       fq_index = sm->fq_in2out_output_index;
3087       to_node_index = sm->in2out_output_node_index;
3088     }
3089   else
3090     {
3091       fq_index = sm->fq_in2out_index;
3092       to_node_index = sm->in2out_node_index;
3093     }
3094
3095   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3096     {
3097       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3098
3099       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3100                                sm->first_worker_index + sm->num_workers - 1,
3101                                (vlib_frame_queue_t *) (~0));
3102     }
3103
3104   from = vlib_frame_vector_args (frame);
3105   n_left_from = frame->n_vectors;
3106
3107   while (n_left_from > 0)
3108     {
3109       u32 bi0;
3110       vlib_buffer_t *b0;
3111       u32 sw_if_index0;
3112       u32 rx_fib_index0;
3113       ip4_header_t * ip0;
3114       u8 do_handoff;
3115
3116       bi0 = from[0];
3117       from += 1;
3118       n_left_from -= 1;
3119
3120       b0 = vlib_get_buffer (vm, bi0);
3121
3122       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3123       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3124
3125       ip0 = vlib_buffer_get_current (b0);
3126
3127       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3128
3129       if (PREDICT_FALSE (next_worker_index != thread_index))
3130         {
3131           do_handoff = 1;
3132
3133           if (next_worker_index != current_worker_index)
3134             {
3135               if (hf)
3136                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3137
3138               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3139                                                       next_worker_index,
3140                                                       handoff_queue_elt_by_worker_index);
3141
3142               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3143               to_next_worker = &hf->buffer_index[hf->n_vectors];
3144               current_worker_index = next_worker_index;
3145             }
3146
3147           /* enqueue to correct worker thread */
3148           to_next_worker[0] = bi0;
3149           to_next_worker++;
3150           n_left_to_next_worker--;
3151
3152           if (n_left_to_next_worker == 0)
3153             {
3154               hf->n_vectors = VLIB_FRAME_SIZE;
3155               vlib_put_frame_queue_elt (hf);
3156               current_worker_index = ~0;
3157               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3158               hf = 0;
3159             }
3160         }
3161       else
3162         {
3163           do_handoff = 0;
3164           /* if this is 1st frame */
3165           if (!f)
3166             {
3167               f = vlib_get_frame_to_node (vm, to_node_index);
3168               to_next = vlib_frame_vector_args (f);
3169             }
3170
3171           to_next[0] = bi0;
3172           to_next += 1;
3173           f->n_vectors++;
3174         }
3175
3176       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3177                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3178         {
3179           snat_in2out_worker_handoff_trace_t *t =
3180             vlib_add_trace (vm, node, b0, sizeof (*t));
3181           t->next_worker_index = next_worker_index;
3182           t->do_handoff = do_handoff;
3183         }
3184     }
3185
3186   if (f)
3187     vlib_put_frame_to_node (vm, to_node_index, f);
3188
3189   if (hf)
3190     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3191
3192   /* Ship frames to the worker nodes */
3193   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3194     {
3195       if (handoff_queue_elt_by_worker_index[i])
3196         {
3197           hf = handoff_queue_elt_by_worker_index[i];
3198           /*
3199            * It works better to let the handoff node
3200            * rate-adapt, always ship the handoff queue element.
3201            */
3202           if (1 || hf->n_vectors == hf->last_n_vectors)
3203             {
3204               vlib_put_frame_queue_elt (hf);
3205               handoff_queue_elt_by_worker_index[i] = 0;
3206             }
3207           else
3208             hf->last_n_vectors = hf->n_vectors;
3209         }
3210       congested_handoff_queue_by_worker_index[i] =
3211         (vlib_frame_queue_t *) (~0);
3212     }
3213   hf = 0;
3214   current_worker_index = ~0;
3215   return frame->n_vectors;
3216 }
3217
3218 static uword
3219 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3220                                vlib_node_runtime_t * node,
3221                                vlib_frame_t * frame)
3222 {
3223   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3224 }
3225
3226 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3227   .function = snat_in2out_worker_handoff_fn,
3228   .name = "nat44-in2out-worker-handoff",
3229   .vector_size = sizeof (u32),
3230   .format_trace = format_snat_in2out_worker_handoff_trace,
3231   .type = VLIB_NODE_TYPE_INTERNAL,
3232
3233   .n_next_nodes = 1,
3234
3235   .next_nodes = {
3236     [0] = "error-drop",
3237   },
3238 };
3239
3240 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3241                               snat_in2out_worker_handoff_fn);
3242
3243 static uword
3244 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3245                                       vlib_node_runtime_t * node,
3246                                       vlib_frame_t * frame)
3247 {
3248   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3249 }
3250
3251 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3252   .function = snat_in2out_output_worker_handoff_fn,
3253   .name = "nat44-in2out-output-worker-handoff",
3254   .vector_size = sizeof (u32),
3255   .format_trace = format_snat_in2out_worker_handoff_trace,
3256   .type = VLIB_NODE_TYPE_INTERNAL,
3257
3258   .n_next_nodes = 1,
3259
3260   .next_nodes = {
3261     [0] = "error-drop",
3262   },
3263 };
3264
3265 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3266                               snat_in2out_output_worker_handoff_fn);
3267
3268 static_always_inline int
3269 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3270 {
3271   snat_address_t * ap;
3272   clib_bihash_kv_8_8_t kv, value;
3273   snat_session_key_t m_key;
3274
3275   vec_foreach (ap, sm->addresses)
3276     {
3277       if (ap->addr.as_u32 == dst_addr->as_u32)
3278         return 1;
3279     }
3280
3281   m_key.addr.as_u32 = dst_addr->as_u32;
3282   m_key.fib_index = sm->outside_fib_index;
3283   m_key.port = 0;
3284   m_key.protocol = 0;
3285   kv.key = m_key.as_u64;
3286   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3287     return 1;
3288
3289   return 0;
3290 }
3291
3292 static uword
3293 snat_hairpin_dst_fn (vlib_main_t * vm,
3294                      vlib_node_runtime_t * node,
3295                      vlib_frame_t * frame)
3296 {
3297   u32 n_left_from, * from, * to_next;
3298   snat_in2out_next_t next_index;
3299   u32 pkts_processed = 0;
3300   snat_main_t * sm = &snat_main;
3301
3302   from = vlib_frame_vector_args (frame);
3303   n_left_from = frame->n_vectors;
3304   next_index = node->cached_next_index;
3305
3306   while (n_left_from > 0)
3307     {
3308       u32 n_left_to_next;
3309
3310       vlib_get_next_frame (vm, node, next_index,
3311                            to_next, n_left_to_next);
3312
3313       while (n_left_from > 0 && n_left_to_next > 0)
3314         {
3315           u32 bi0;
3316           vlib_buffer_t * b0;
3317           u32 next0;
3318           ip4_header_t * ip0;
3319           u32 proto0;
3320
3321           /* speculatively enqueue b0 to the current next frame */
3322           bi0 = from[0];
3323           to_next[0] = bi0;
3324           from += 1;
3325           to_next += 1;
3326           n_left_from -= 1;
3327           n_left_to_next -= 1;
3328
3329           b0 = vlib_get_buffer (vm, bi0);
3330           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3331           ip0 = vlib_buffer_get_current (b0);
3332
3333           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3334
3335           vnet_buffer (b0)->snat.flags = 0;
3336           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3337             {
3338               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3339                 {
3340                   udp_header_t * udp0 = ip4_next_header (ip0);
3341                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3342
3343                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3344                 }
3345               else if (proto0 == SNAT_PROTOCOL_ICMP)
3346                 {
3347                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3348
3349                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3350                 }
3351               else
3352                 {
3353                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3354                 }
3355
3356               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3357               clib_warning("is hairpinning");
3358             }
3359
3360           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3361
3362           /* verify speculative enqueue, maybe switch current next frame */
3363           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3364                                            to_next, n_left_to_next,
3365                                            bi0, next0);
3366          }
3367
3368       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3369     }
3370
3371   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3372                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3373                                pkts_processed);
3374   return frame->n_vectors;
3375 }
3376
3377 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3378   .function = snat_hairpin_dst_fn,
3379   .name = "nat44-hairpin-dst",
3380   .vector_size = sizeof (u32),
3381   .type = VLIB_NODE_TYPE_INTERNAL,
3382   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3383   .error_strings = snat_in2out_error_strings,
3384   .n_next_nodes = 2,
3385   .next_nodes = {
3386     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3387     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3388   },
3389 };
3390
3391 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3392                               snat_hairpin_dst_fn);
3393
3394 static uword
3395 snat_hairpin_src_fn (vlib_main_t * vm,
3396                      vlib_node_runtime_t * node,
3397                      vlib_frame_t * frame)
3398 {
3399   u32 n_left_from, * from, * to_next;
3400   snat_in2out_next_t next_index;
3401   u32 pkts_processed = 0;
3402   snat_main_t *sm = &snat_main;
3403
3404   from = vlib_frame_vector_args (frame);
3405   n_left_from = frame->n_vectors;
3406   next_index = node->cached_next_index;
3407
3408   while (n_left_from > 0)
3409     {
3410       u32 n_left_to_next;
3411
3412       vlib_get_next_frame (vm, node, next_index,
3413                            to_next, n_left_to_next);
3414
3415       while (n_left_from > 0 && n_left_to_next > 0)
3416         {
3417           u32 bi0;
3418           vlib_buffer_t * b0;
3419           u32 next0;
3420           snat_interface_t *i;
3421           u32 sw_if_index0;
3422
3423           /* speculatively enqueue b0 to the current next frame */
3424           bi0 = from[0];
3425           to_next[0] = bi0;
3426           from += 1;
3427           to_next += 1;
3428           n_left_from -= 1;
3429           n_left_to_next -= 1;
3430
3431           b0 = vlib_get_buffer (vm, bi0);
3432           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3433           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3434
3435           pool_foreach (i, sm->output_feature_interfaces,
3436           ({
3437             /* Only packets from NAT inside interface */
3438             if ((i->is_inside == 1) && (sw_if_index0 == i->sw_if_index))
3439               {
3440                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3441                                     SNAT_FLAG_HAIRPINNING))
3442                   {
3443                     if (PREDICT_TRUE (sm->num_workers > 1))
3444                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3445                     else
3446                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3447                   }
3448                 break;
3449               }
3450           }));
3451
3452           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3453
3454           /* verify speculative enqueue, maybe switch current next frame */
3455           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3456                                            to_next, n_left_to_next,
3457                                            bi0, next0);
3458          }
3459
3460       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3461     }
3462
3463   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3464                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3465                                pkts_processed);
3466   return frame->n_vectors;
3467 }
3468
3469 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3470   .function = snat_hairpin_src_fn,
3471   .name = "nat44-hairpin-src",
3472   .vector_size = sizeof (u32),
3473   .type = VLIB_NODE_TYPE_INTERNAL,
3474   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3475   .error_strings = snat_in2out_error_strings,
3476   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3477   .next_nodes = {
3478      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3479      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3480      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3481      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3482   },
3483 };
3484
3485 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3486                               snat_hairpin_src_fn);
3487
3488 static uword
3489 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3490                                 vlib_node_runtime_t * node,
3491                                 vlib_frame_t * frame)
3492 {
3493   u32 n_left_from, * from, * to_next;
3494   snat_in2out_next_t next_index;
3495   u32 pkts_processed = 0;
3496   snat_main_t * sm = &snat_main;
3497   u32 stats_node_index;
3498
3499   stats_node_index = snat_in2out_fast_node.index;
3500
3501   from = vlib_frame_vector_args (frame);
3502   n_left_from = frame->n_vectors;
3503   next_index = node->cached_next_index;
3504
3505   while (n_left_from > 0)
3506     {
3507       u32 n_left_to_next;
3508
3509       vlib_get_next_frame (vm, node, next_index,
3510                            to_next, n_left_to_next);
3511
3512       while (n_left_from > 0 && n_left_to_next > 0)
3513         {
3514           u32 bi0;
3515           vlib_buffer_t * b0;
3516           u32 next0;
3517           u32 sw_if_index0;
3518           ip4_header_t * ip0;
3519           ip_csum_t sum0;
3520           u32 new_addr0, old_addr0;
3521           u16 old_port0, new_port0;
3522           udp_header_t * udp0;
3523           tcp_header_t * tcp0;
3524           icmp46_header_t * icmp0;
3525           snat_session_key_t key0, sm0;
3526           u32 proto0;
3527           u32 rx_fib_index0;
3528
3529           /* speculatively enqueue b0 to the current next frame */
3530           bi0 = from[0];
3531           to_next[0] = bi0;
3532           from += 1;
3533           to_next += 1;
3534           n_left_from -= 1;
3535           n_left_to_next -= 1;
3536
3537           b0 = vlib_get_buffer (vm, bi0);
3538           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3539
3540           ip0 = vlib_buffer_get_current (b0);
3541           udp0 = ip4_next_header (ip0);
3542           tcp0 = (tcp_header_t *) udp0;
3543           icmp0 = (icmp46_header_t *) udp0;
3544
3545           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3546           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3547
3548           if (PREDICT_FALSE(ip0->ttl == 1))
3549             {
3550               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3551               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3552                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3553                                            0);
3554               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3555               goto trace0;
3556             }
3557
3558           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3559
3560           if (PREDICT_FALSE (proto0 == ~0))
3561               goto trace0;
3562
3563           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3564             {
3565               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3566                                   rx_fib_index0, node, next0, ~0, 0, 0);
3567               goto trace0;
3568             }
3569
3570           key0.addr = ip0->src_address;
3571           key0.protocol = proto0;
3572           key0.port = udp0->src_port;
3573           key0.fib_index = rx_fib_index0;
3574
3575           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
3576             {
3577               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3578               next0= SNAT_IN2OUT_NEXT_DROP;
3579               goto trace0;
3580             }
3581
3582           new_addr0 = sm0.addr.as_u32;
3583           new_port0 = sm0.port;
3584           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3585           old_addr0 = ip0->src_address.as_u32;
3586           ip0->src_address.as_u32 = new_addr0;
3587
3588           sum0 = ip0->checksum;
3589           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3590                                  ip4_header_t,
3591                                  src_address /* changed member */);
3592           ip0->checksum = ip_csum_fold (sum0);
3593
3594           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3595             {
3596               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3597                 {
3598                   old_port0 = tcp0->src_port;
3599                   tcp0->src_port = new_port0;
3600
3601                   sum0 = tcp0->checksum;
3602                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3603                                          ip4_header_t,
3604                                          dst_address /* changed member */);
3605                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3606                                          ip4_header_t /* cheat */,
3607                                          length /* changed member */);
3608                   tcp0->checksum = ip_csum_fold(sum0);
3609                 }
3610               else
3611                 {
3612                   old_port0 = udp0->src_port;
3613                   udp0->src_port = new_port0;
3614                   udp0->checksum = 0;
3615                 }
3616             }
3617           else
3618             {
3619               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3620                 {
3621                   sum0 = tcp0->checksum;
3622                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3623                                          ip4_header_t,
3624                                          dst_address /* changed member */);
3625                   tcp0->checksum = ip_csum_fold(sum0);
3626                 }
3627             }
3628
3629           /* Hairpinning */
3630           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3631
3632         trace0:
3633           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3634                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3635             {
3636               snat_in2out_trace_t *t =
3637                  vlib_add_trace (vm, node, b0, sizeof (*t));
3638               t->sw_if_index = sw_if_index0;
3639               t->next_index = next0;
3640             }
3641
3642           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3643
3644           /* verify speculative enqueue, maybe switch current next frame */
3645           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3646                                            to_next, n_left_to_next,
3647                                            bi0, next0);
3648         }
3649
3650       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3651     }
3652
3653   vlib_node_increment_counter (vm, stats_node_index,
3654                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3655                                pkts_processed);
3656   return frame->n_vectors;
3657 }
3658
3659
3660 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3661   .function = snat_in2out_fast_static_map_fn,
3662   .name = "nat44-in2out-fast",
3663   .vector_size = sizeof (u32),
3664   .format_trace = format_snat_in2out_fast_trace,
3665   .type = VLIB_NODE_TYPE_INTERNAL,
3666
3667   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3668   .error_strings = snat_in2out_error_strings,
3669
3670   .runtime_data_bytes = sizeof (snat_runtime_t),
3671
3672   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3673
3674   /* edit / add dispositions here */
3675   .next_nodes = {
3676     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3677     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3678     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3679     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3680   },
3681 };
3682
3683 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);