Fix static mapping lookup issue for NAT plugin
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
53
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91 vlib_node_registration_t snat_in2out_output_node;
92 vlib_node_registration_t snat_in2out_output_slowpath_node;
93 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
94 vlib_node_registration_t snat_hairpin_dst_node;
95 vlib_node_registration_t snat_hairpin_src_node;
96
97
98 #define foreach_snat_in2out_error                       \
99 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
100 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
101 _(OUT_OF_PORTS, "Out of ports")                         \
102 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
103 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
104 _(NO_TRANSLATION, "No translation")
105
106 typedef enum {
107 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
108   foreach_snat_in2out_error
109 #undef _
110   SNAT_IN2OUT_N_ERROR,
111 } snat_in2out_error_t;
112
113 static char * snat_in2out_error_strings[] = {
114 #define _(sym,string) string,
115   foreach_snat_in2out_error
116 #undef _
117 };
118
119 typedef enum {
120   SNAT_IN2OUT_NEXT_LOOKUP,
121   SNAT_IN2OUT_NEXT_DROP,
122   SNAT_IN2OUT_NEXT_ICMP_ERROR,
123   SNAT_IN2OUT_NEXT_SLOW_PATH,
124   SNAT_IN2OUT_N_NEXT,
125 } snat_in2out_next_t;
126
127 typedef enum {
128   SNAT_HAIRPIN_SRC_NEXT_DROP,
129   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
130   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
131   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
132   SNAT_HAIRPIN_SRC_N_NEXT,
133 } snat_hairpin_next_t;
134
135 /**
136  * @brief Check if packet should be translated
137  *
138  * Packets aimed at outside interface and external addresss with active session
139  * should be translated.
140  *
141  * @param sm            NAT main
142  * @param rt            NAT runtime data
143  * @param sw_if_index0  index of the inside interface
144  * @param ip0           IPv4 header
145  * @param proto0        NAT protocol
146  * @param rx_fib_index0 RX FIB index
147  *
148  * @returns 0 if packet should be translated otherwise 1
149  */
150 static inline int
151 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
152                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
153                          u32 rx_fib_index0)
154 {
155   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
156   fib_prefix_t pfx = {
157     .fp_proto = FIB_PROTOCOL_IP4,
158     .fp_len = 32,
159     .fp_addr = {
160         .ip4.as_u32 = ip0->dst_address.as_u32,
161     },
162   };
163
164   /* Don't NAT packet aimed at the intfc address */
165   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
166                                       ip0->dst_address.as_u32)))
167     return 1;
168
169   fei = fib_table_lookup (rx_fib_index0, &pfx);
170   if (FIB_NODE_INDEX_INVALID != fei)
171     {
172       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
173       if (sw_if_index == ~0)
174         {
175           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
176           if (FIB_NODE_INDEX_INVALID != fei)
177             sw_if_index = fib_entry_get_resolving_interface (fei);
178         }
179       snat_interface_t *i;
180       pool_foreach (i, sm->interfaces,
181       ({
182         /* NAT packet aimed at outside interface */
183         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
184           return 0;
185       }));
186     }
187
188   return 1;
189 }
190
191 static inline int
192 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
193                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
194                     u32 rx_fib_index0)
195 {
196   udp_header_t * udp0 = ip4_next_header (ip0);
197   snat_session_key_t key0, sm0;
198   clib_bihash_kv_8_8_t kv0, value0;
199
200   key0.addr = ip0->dst_address;
201   key0.port = udp0->dst_port;
202   key0.protocol = proto0;
203   key0.fib_index = sm->outside_fib_index;
204   kv0.key = key0.as_u64;
205
206   /* NAT packet aimed at external address if */
207   /* has active sessions */
208   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
209     {
210       /* or is static mappings */
211       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
212         return 0;
213     }
214   else
215     return 0;
216
217   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
218                                  rx_fib_index0);
219 }
220
221 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
222                       ip4_header_t * ip0,
223                       u32 rx_fib_index0,
224                       snat_session_key_t * key0,
225                       snat_session_t ** sessionp,
226                       vlib_node_runtime_t * node,
227                       u32 next0,
228                       u32 thread_index)
229 {
230   snat_user_t *u;
231   snat_user_key_t user_key;
232   snat_session_t *s;
233   clib_bihash_kv_8_8_t kv0, value0;
234   u32 oldest_per_user_translation_list_index;
235   dlist_elt_t * oldest_per_user_translation_list_elt;
236   dlist_elt_t * per_user_translation_list_elt;
237   dlist_elt_t * per_user_list_head_elt;
238   u32 session_index;
239   snat_session_key_t key1;
240   u32 address_index = ~0;
241   u32 outside_fib_index;
242   uword * p;
243   snat_worker_key_t worker_by_out_key;
244
245   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
246   if (! p)
247     {
248       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
249       return SNAT_IN2OUT_NEXT_DROP;
250     }
251   outside_fib_index = p[0];
252
253   key1.protocol = key0->protocol;
254   user_key.addr = ip0->src_address;
255   user_key.fib_index = rx_fib_index0;
256   kv0.key = user_key.as_u64;
257
258   /* Ever heard of the "user" = src ip4 address before? */
259   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
260     {
261       /* no, make a new one */
262       pool_get (sm->per_thread_data[thread_index].users, u);
263       memset (u, 0, sizeof (*u));
264       u->addr = ip0->src_address;
265       u->fib_index = rx_fib_index0;
266
267       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
268
269       u->sessions_per_user_list_head_index = per_user_list_head_elt -
270         sm->per_thread_data[thread_index].list_pool;
271
272       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
273                        u->sessions_per_user_list_head_index);
274
275       kv0.value = u - sm->per_thread_data[thread_index].users;
276
277       /* add user */
278       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
279     }
280   else
281     {
282       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
283                              value0.value);
284     }
285
286   /* Over quota? Recycle the least recently used dynamic translation */
287   if (u->nsessions >= sm->max_translations_per_user)
288     {
289       /* Remove the oldest dynamic translation */
290       do {
291           oldest_per_user_translation_list_index =
292             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
293                                     u->sessions_per_user_list_head_index);
294
295           ASSERT (oldest_per_user_translation_list_index != ~0);
296
297           /* add it back to the end of the LRU list */
298           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
299                               u->sessions_per_user_list_head_index,
300                               oldest_per_user_translation_list_index);
301           /* Get the list element */
302           oldest_per_user_translation_list_elt =
303             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
304                                oldest_per_user_translation_list_index);
305
306           /* Get the session index from the list element */
307           session_index = oldest_per_user_translation_list_elt->value;
308
309           /* Get the session */
310           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
311                                  session_index);
312       } while (snat_is_session_static (s));
313
314       if (snat_is_unk_proto_session (s))
315         {
316           clib_bihash_kv_16_8_t up_kv;
317           snat_unk_proto_ses_key_t key;
318
319           /* Remove from lookup tables */
320           key.l_addr = s->in2out.addr;
321           key.r_addr = s->ext_host_addr;
322           key.fib_index = s->in2out.fib_index;
323           key.proto = s->in2out.port;
324           up_kv.key[0] = key.as_u64[0];
325           up_kv.key[1] = key.as_u64[1];
326           if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0))
327             clib_warning ("in2out key del failed");
328
329           key.l_addr = s->out2in.addr;
330           key.fib_index = s->out2in.fib_index;
331           up_kv.key[0] = key.as_u64[0];
332           up_kv.key[1] = key.as_u64[1];
333           if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0))
334             clib_warning ("out2in key del failed");
335         }
336       else
337         {
338           /* Remove in2out, out2in keys */
339           kv0.key = s->in2out.as_u64;
340           if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
341               clib_warning ("in2out key delete failed");
342           kv0.key = s->out2in.as_u64;
343           if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
344               clib_warning ("out2in key delete failed");
345
346           /* log NAT event */
347           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
348                                               s->out2in.addr.as_u32,
349                                               s->in2out.protocol,
350                                               s->in2out.port,
351                                               s->out2in.port,
352                                               s->in2out.fib_index);
353
354           snat_free_outside_address_and_port
355             (sm, &s->out2in, s->outside_address_index);
356         }
357       s->outside_address_index = ~0;
358
359       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, thread_index,
360                                                &key1, &address_index))
361         {
362           ASSERT(0);
363
364           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
365           return SNAT_IN2OUT_NEXT_DROP;
366         }
367       s->outside_address_index = address_index;
368     }
369   else
370     {
371       u8 static_mapping = 1;
372
373       /* First try to match static mapping by local address and port */
374       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
375         {
376           static_mapping = 0;
377           /* Try to create dynamic translation */
378           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0,
379                                                    thread_index, &key1,
380                                                    &address_index))
381             {
382               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
383               return SNAT_IN2OUT_NEXT_DROP;
384             }
385         }
386
387       /* Create a new session */
388       pool_get (sm->per_thread_data[thread_index].sessions, s);
389       memset (s, 0, sizeof (*s));
390
391       s->outside_address_index = address_index;
392
393       if (static_mapping)
394         {
395           u->nstaticsessions++;
396           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
397         }
398       else
399         {
400           u->nsessions++;
401         }
402
403       /* Create list elts */
404       pool_get (sm->per_thread_data[thread_index].list_pool,
405                 per_user_translation_list_elt);
406       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
407                        per_user_translation_list_elt -
408                        sm->per_thread_data[thread_index].list_pool);
409
410       per_user_translation_list_elt->value =
411         s - sm->per_thread_data[thread_index].sessions;
412       s->per_user_index = per_user_translation_list_elt -
413                           sm->per_thread_data[thread_index].list_pool;
414       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
415
416       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
417                           s->per_user_list_head_index,
418                           per_user_translation_list_elt -
419                           sm->per_thread_data[thread_index].list_pool);
420    }
421
422   s->in2out = *key0;
423   s->out2in = key1;
424   s->out2in.protocol = key0->protocol;
425   s->out2in.fib_index = outside_fib_index;
426   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
427   *sessionp = s;
428
429   /* Add to translation hashes */
430   kv0.key = s->in2out.as_u64;
431   kv0.value = s - sm->per_thread_data[thread_index].sessions;
432   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
433       clib_warning ("in2out key add failed");
434
435   kv0.key = s->out2in.as_u64;
436   kv0.value = s - sm->per_thread_data[thread_index].sessions;
437
438   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
439       clib_warning ("out2in key add failed");
440
441   /* Add to translated packets worker lookup */
442   worker_by_out_key.addr = s->out2in.addr;
443   worker_by_out_key.port = s->out2in.port;
444   worker_by_out_key.fib_index = s->out2in.fib_index;
445   kv0.key = worker_by_out_key.as_u64;
446   kv0.value = thread_index;
447   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
448
449   /* log NAT event */
450   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
451                                       s->out2in.addr.as_u32,
452                                       s->in2out.protocol,
453                                       s->in2out.port,
454                                       s->out2in.port,
455                                       s->in2out.fib_index);
456   return next0;
457 }
458
459 static_always_inline
460 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
461                                  snat_session_key_t *p_key0)
462 {
463   icmp46_header_t *icmp0;
464   snat_session_key_t key0;
465   icmp_echo_header_t *echo0, *inner_echo0 = 0;
466   ip4_header_t *inner_ip0 = 0;
467   void *l4_header = 0;
468   icmp46_header_t *inner_icmp0;
469
470   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
471   echo0 = (icmp_echo_header_t *)(icmp0+1);
472
473   if (!icmp_is_error_message (icmp0))
474     {
475       key0.protocol = SNAT_PROTOCOL_ICMP;
476       key0.addr = ip0->src_address;
477       key0.port = echo0->identifier;
478     }
479   else
480     {
481       inner_ip0 = (ip4_header_t *)(echo0+1);
482       l4_header = ip4_next_header (inner_ip0);
483       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
484       key0.addr = inner_ip0->dst_address;
485       switch (key0.protocol)
486         {
487         case SNAT_PROTOCOL_ICMP:
488           inner_icmp0 = (icmp46_header_t*)l4_header;
489           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
490           key0.port = inner_echo0->identifier;
491           break;
492         case SNAT_PROTOCOL_UDP:
493         case SNAT_PROTOCOL_TCP:
494           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
495           break;
496         default:
497           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
498         }
499     }
500   *p_key0 = key0;
501   return -1; /* success */
502 }
503
504 /**
505  * Get address and port values to be used for ICMP packet translation
506  * and create session if needed
507  *
508  * @param[in,out] sm             NAT main
509  * @param[in,out] node           NAT node runtime
510  * @param[in] thread_index       thread index
511  * @param[in,out] b0             buffer containing packet to be translated
512  * @param[out] p_proto           protocol used for matching
513  * @param[out] p_value           address and port after NAT translation
514  * @param[out] p_dont_translate  if packet should not be translated
515  * @param d                      optional parameter
516  * @param e                      optional parameter
517  */
518 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
519                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
520                            snat_session_key_t *p_value,
521                            u8 *p_dont_translate, void *d, void *e)
522 {
523   ip4_header_t *ip0;
524   icmp46_header_t *icmp0;
525   u32 sw_if_index0;
526   u32 rx_fib_index0;
527   snat_session_key_t key0;
528   snat_session_t *s0 = 0;
529   u8 dont_translate = 0;
530   clib_bihash_kv_8_8_t kv0, value0;
531   u32 next0 = ~0;
532   int err;
533   u32 iph_offset0 = 0;
534
535   if (PREDICT_FALSE(vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0))
536     {
537       iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
538     }
539   ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
540   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
541   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
542   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
543
544   err = icmp_get_key (ip0, &key0);
545   if (err != -1)
546     {
547       b0->error = node->errors[err];
548       next0 = SNAT_IN2OUT_NEXT_DROP;
549       goto out;
550     }
551   key0.fib_index = rx_fib_index0;
552
553   kv0.key = key0.as_u64;
554
555   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
556     {
557       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
558           IP_PROTOCOL_ICMP, rx_fib_index0) &&
559           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
560         {
561           dont_translate = 1;
562           goto out;
563         }
564
565       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
566         {
567           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
568           next0 = SNAT_IN2OUT_NEXT_DROP;
569           goto out;
570         }
571
572       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
573                          &s0, node, next0, thread_index);
574
575       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
576         goto out;
577     }
578   else
579     {
580       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
581                         icmp0->type != ICMP4_echo_reply &&
582                         !icmp_is_error_message (icmp0)))
583         {
584           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
585           next0 = SNAT_IN2OUT_NEXT_DROP;
586           goto out;
587         }
588
589       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
590                               value0.value);
591     }
592
593 out:
594   *p_proto = key0.protocol;
595   if (s0)
596     *p_value = s0->out2in;
597   *p_dont_translate = dont_translate;
598   if (d)
599     *(snat_session_t**)d = s0;
600   return next0;
601 }
602
603 /**
604  * Get address and port values to be used for ICMP packet translation
605  *
606  * @param[in] sm                 NAT main
607  * @param[in,out] node           NAT node runtime
608  * @param[in] thread_index       thread index
609  * @param[in,out] b0             buffer containing packet to be translated
610  * @param[out] p_proto           protocol used for matching
611  * @param[out] p_value           address and port after NAT translation
612  * @param[out] p_dont_translate  if packet should not be translated
613  * @param d                      optional parameter
614  * @param e                      optional parameter
615  */
616 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
617                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
618                            snat_session_key_t *p_value,
619                            u8 *p_dont_translate, void *d, void *e)
620 {
621   ip4_header_t *ip0;
622   icmp46_header_t *icmp0;
623   u32 sw_if_index0;
624   u32 rx_fib_index0;
625   snat_session_key_t key0;
626   snat_session_key_t sm0;
627   u8 dont_translate = 0;
628   u8 is_addr_only;
629   u32 next0 = ~0;
630   int err;
631
632   ip0 = vlib_buffer_get_current (b0);
633   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
634   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
635   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
636
637   err = icmp_get_key (ip0, &key0);
638   if (err != -1)
639     {
640       b0->error = node->errors[err];
641       next0 = SNAT_IN2OUT_NEXT_DROP;
642       goto out2;
643     }
644   key0.fib_index = rx_fib_index0;
645
646   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
647     {
648       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
649           IP_PROTOCOL_ICMP, rx_fib_index0)))
650         {
651           dont_translate = 1;
652           goto out;
653         }
654
655       if (icmp_is_error_message (icmp0))
656         {
657           next0 = SNAT_IN2OUT_NEXT_DROP;
658           goto out;
659         }
660
661       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
662       next0 = SNAT_IN2OUT_NEXT_DROP;
663       goto out;
664     }
665
666   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
667                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
668                     !icmp_is_error_message (icmp0)))
669     {
670       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
671       next0 = SNAT_IN2OUT_NEXT_DROP;
672       goto out;
673     }
674
675 out:
676   *p_value = sm0;
677 out2:
678   *p_proto = key0.protocol;
679   *p_dont_translate = dont_translate;
680   return next0;
681 }
682
683 static inline u32 icmp_in2out (snat_main_t *sm,
684                                vlib_buffer_t * b0,
685                                ip4_header_t * ip0,
686                                icmp46_header_t * icmp0,
687                                u32 sw_if_index0,
688                                u32 rx_fib_index0,
689                                vlib_node_runtime_t * node,
690                                u32 next0,
691                                u32 thread_index,
692                                void *d,
693                                void *e)
694 {
695   snat_session_key_t sm0;
696   u8 protocol;
697   icmp_echo_header_t *echo0, *inner_echo0 = 0;
698   ip4_header_t *inner_ip0;
699   void *l4_header = 0;
700   icmp46_header_t *inner_icmp0;
701   u8 dont_translate;
702   u32 new_addr0, old_addr0;
703   u16 old_id0, new_id0;
704   ip_csum_t sum0;
705   u16 checksum0;
706   u32 next0_tmp;
707
708   echo0 = (icmp_echo_header_t *)(icmp0+1);
709
710   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
711                                        &protocol, &sm0, &dont_translate, d, e);
712   if (next0_tmp != ~0)
713     next0 = next0_tmp;
714   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
715     goto out;
716
717   sum0 = ip_incremental_checksum (0, icmp0,
718                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
719   checksum0 = ~ip_csum_fold (sum0);
720   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
721     {
722       next0 = SNAT_IN2OUT_NEXT_DROP;
723       goto out;
724     }
725
726   old_addr0 = ip0->src_address.as_u32;
727   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
728   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
729     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
730
731   sum0 = ip0->checksum;
732   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
733                          src_address /* changed member */);
734   ip0->checksum = ip_csum_fold (sum0);
735
736   if (!icmp_is_error_message (icmp0))
737     {
738       new_id0 = sm0.port;
739       if (PREDICT_FALSE(new_id0 != echo0->identifier))
740         {
741           old_id0 = echo0->identifier;
742           new_id0 = sm0.port;
743           echo0->identifier = new_id0;
744
745           sum0 = icmp0->checksum;
746           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
747                                  identifier);
748           icmp0->checksum = ip_csum_fold (sum0);
749         }
750     }
751   else
752     {
753       inner_ip0 = (ip4_header_t *)(echo0+1);
754       l4_header = ip4_next_header (inner_ip0);
755
756       if (!ip4_header_checksum_is_valid (inner_ip0))
757         {
758           next0 = SNAT_IN2OUT_NEXT_DROP;
759           goto out;
760         }
761
762       old_addr0 = inner_ip0->dst_address.as_u32;
763       inner_ip0->dst_address = sm0.addr;
764       new_addr0 = inner_ip0->dst_address.as_u32;
765
766       sum0 = icmp0->checksum;
767       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
768                              dst_address /* changed member */);
769       icmp0->checksum = ip_csum_fold (sum0);
770
771       switch (protocol)
772         {
773           case SNAT_PROTOCOL_ICMP:
774             inner_icmp0 = (icmp46_header_t*)l4_header;
775             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
776
777             old_id0 = inner_echo0->identifier;
778             new_id0 = sm0.port;
779             inner_echo0->identifier = new_id0;
780
781             sum0 = icmp0->checksum;
782             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
783                                    identifier);
784             icmp0->checksum = ip_csum_fold (sum0);
785             break;
786           case SNAT_PROTOCOL_UDP:
787           case SNAT_PROTOCOL_TCP:
788             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
789             new_id0 = sm0.port;
790             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
791
792             sum0 = icmp0->checksum;
793             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
794                                    dst_port);
795             icmp0->checksum = ip_csum_fold (sum0);
796             break;
797           default:
798             ASSERT(0);
799         }
800     }
801
802 out:
803   return next0;
804 }
805
806 /**
807  * @brief Hairpinning
808  *
809  * Hairpinning allows two endpoints on the internal side of the NAT to
810  * communicate even if they only use each other's external IP addresses
811  * and ports.
812  *
813  * @param sm     NAT main.
814  * @param b0     Vlib buffer.
815  * @param ip0    IP header.
816  * @param udp0   UDP header.
817  * @param tcp0   TCP header.
818  * @param proto0 NAT protocol.
819  */
820 static inline void
821 snat_hairpinning (snat_main_t *sm,
822                   vlib_buffer_t * b0,
823                   ip4_header_t * ip0,
824                   udp_header_t * udp0,
825                   tcp_header_t * tcp0,
826                   u32 proto0)
827 {
828   snat_session_key_t key0, sm0;
829   snat_worker_key_t k0;
830   snat_session_t * s0;
831   clib_bihash_kv_8_8_t kv0, value0;
832   ip_csum_t sum0;
833   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
834   u16 new_dst_port0, old_dst_port0;
835
836   key0.addr = ip0->dst_address;
837   key0.port = udp0->dst_port;
838   key0.protocol = proto0;
839   key0.fib_index = sm->outside_fib_index;
840   kv0.key = key0.as_u64;
841
842   /* Check if destination is in active sessions */
843   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
844     {
845       /* or static mappings */
846       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
847         {
848           new_dst_addr0 = sm0.addr.as_u32;
849           new_dst_port0 = sm0.port;
850           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
851         }
852     }
853   else
854     {
855       si = value0.value;
856       if (sm->num_workers > 1)
857         {
858           k0.addr = ip0->dst_address;
859           k0.port = udp0->dst_port;
860           k0.fib_index = sm->outside_fib_index;
861           kv0.key = k0.as_u64;
862           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
863             ASSERT(0);
864           else
865             ti = value0.value;
866         }
867       else
868         ti = sm->num_workers;
869
870       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
871       new_dst_addr0 = s0->in2out.addr.as_u32;
872       new_dst_port0 = s0->in2out.port;
873       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
874     }
875
876   /* Destination is behind the same NAT, use internal address and port */
877   if (new_dst_addr0)
878     {
879       old_dst_addr0 = ip0->dst_address.as_u32;
880       ip0->dst_address.as_u32 = new_dst_addr0;
881       sum0 = ip0->checksum;
882       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
883                              ip4_header_t, dst_address);
884       ip0->checksum = ip_csum_fold (sum0);
885
886       old_dst_port0 = tcp0->dst;
887       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
888         {
889           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
890             {
891               tcp0->dst = new_dst_port0;
892               sum0 = tcp0->checksum;
893               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
894                                      ip4_header_t, dst_address);
895               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
896                                      ip4_header_t /* cheat */, length);
897               tcp0->checksum = ip_csum_fold(sum0);
898             }
899           else
900             {
901               udp0->dst_port = new_dst_port0;
902               udp0->checksum = 0;
903             }
904         }
905       else
906         {
907           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
908             {
909               sum0 = tcp0->checksum;
910               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
911                                      ip4_header_t, dst_address);
912               tcp0->checksum = ip_csum_fold(sum0);
913             }
914         }
915     }
916 }
917
918 static inline void
919 snat_icmp_hairpinning (snat_main_t *sm,
920                        vlib_buffer_t * b0,
921                        ip4_header_t * ip0,
922                        icmp46_header_t * icmp0)
923 {
924   snat_session_key_t key0, sm0;
925   clib_bihash_kv_8_8_t kv0, value0;
926   snat_worker_key_t k0;
927   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
928   ip_csum_t sum0;
929   snat_session_t *s0;
930
931   if (!icmp_is_error_message (icmp0))
932     {
933       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
934       u16 icmp_id0 = echo0->identifier;
935       key0.addr = ip0->dst_address;
936       key0.port = icmp_id0;
937       key0.protocol = SNAT_PROTOCOL_ICMP;
938       key0.fib_index = sm->outside_fib_index;
939       kv0.key = key0.as_u64;
940
941       /* Check if destination is in active sessions */
942       if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
943         {
944           /* or static mappings */
945           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
946             {
947               new_dst_addr0 = sm0.addr.as_u32;
948               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
949             }
950         }
951       else
952         {
953           si = value0.value;
954           if (sm->num_workers > 1)
955             {
956               k0.addr = ip0->dst_address;
957               k0.port = icmp_id0;
958               k0.fib_index = sm->outside_fib_index;
959               kv0.key = k0.as_u64;
960               if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
961                 ASSERT(0);
962               else
963                 ti = value0.value;
964             }
965           else
966             ti = sm->num_workers;
967
968           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
969           new_dst_addr0 = s0->in2out.addr.as_u32;
970           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
971           echo0->identifier = s0->in2out.port;
972           sum0 = icmp0->checksum;
973           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
974                                  icmp_echo_header_t, identifier);
975           icmp0->checksum = ip_csum_fold (sum0);
976         }
977
978       /* Destination is behind the same NAT, use internal address and port */
979       if (new_dst_addr0)
980         {
981           old_dst_addr0 = ip0->dst_address.as_u32;
982           ip0->dst_address.as_u32 = new_dst_addr0;
983           sum0 = ip0->checksum;
984           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
985                                  ip4_header_t, dst_address);
986           ip0->checksum = ip_csum_fold (sum0);
987         }
988     }
989
990 }
991
992 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
993                                          vlib_buffer_t * b0,
994                                          ip4_header_t * ip0,
995                                          icmp46_header_t * icmp0,
996                                          u32 sw_if_index0,
997                                          u32 rx_fib_index0,
998                                          vlib_node_runtime_t * node,
999                                          u32 next0,
1000                                          f64 now,
1001                                          u32 thread_index,
1002                                          snat_session_t ** p_s0)
1003 {
1004   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1005                       next0, thread_index, p_s0, 0);
1006   snat_session_t * s0 = *p_s0;
1007   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1008     {
1009       /* Hairpinning */
1010       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1011         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
1012       /* Accounting */
1013       s0->last_heard = now;
1014       s0->total_pkts++;
1015       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1016       /* Per-user LRU list maintenance for dynamic translations */
1017       if (!snat_is_session_static (s0))
1018         {
1019           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1020                              s0->per_user_index);
1021           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1022                               s0->per_user_list_head_index,
1023                               s0->per_user_index);
1024         }
1025     }
1026   return next0;
1027 }
1028 static inline void
1029 snat_hairpinning_unknown_proto (snat_main_t *sm,
1030                                 vlib_buffer_t * b,
1031                                 ip4_header_t * ip)
1032 {
1033   u32 old_addr, new_addr = 0, ti = 0;
1034   clib_bihash_kv_8_8_t kv, value;
1035   clib_bihash_kv_16_8_t s_kv, s_value;
1036   snat_unk_proto_ses_key_t key;
1037   snat_session_key_t m_key;
1038   snat_worker_key_t w_key;
1039   snat_static_mapping_t *m;
1040   ip_csum_t sum;
1041   snat_session_t *s;
1042
1043   old_addr = ip->dst_address.as_u32;
1044   key.l_addr.as_u32 = ip->dst_address.as_u32;
1045   key.r_addr.as_u32 = ip->src_address.as_u32;
1046   key.fib_index = sm->outside_fib_index;
1047   key.proto = ip->protocol;
1048   key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
1049   s_kv.key[0] = key.as_u64[0];
1050   s_kv.key[1] = key.as_u64[1];
1051   if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1052     {
1053       m_key.addr = ip->dst_address;
1054       m_key.fib_index = sm->outside_fib_index;
1055       m_key.port = 0;
1056       m_key.protocol = 0;
1057       kv.key = m_key.as_u64;
1058       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1059         return;
1060
1061       m = pool_elt_at_index (sm->static_mappings, value.value);
1062       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1063         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1064       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1065     }
1066   else
1067     {
1068       if (sm->num_workers > 1)
1069         {
1070           w_key.addr = ip->dst_address;
1071           w_key.port = 0;
1072           w_key.fib_index = sm->outside_fib_index;
1073           kv.key = w_key.as_u64;
1074           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value))
1075             return;
1076           else
1077             ti = value.value;
1078         }
1079       else
1080         ti = sm->num_workers;
1081
1082       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1083       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1084         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1085       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1086     }
1087   sum = ip->checksum;
1088   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1089   ip->checksum = ip_csum_fold (sum);
1090 }
1091
1092 static void
1093 snat_in2out_unknown_proto (snat_main_t *sm,
1094                            vlib_buffer_t * b,
1095                            ip4_header_t * ip,
1096                            u32 rx_fib_index,
1097                            u32 thread_index,
1098                            f64 now,
1099                            vlib_main_t * vm)
1100 {
1101   clib_bihash_kv_8_8_t kv, value;
1102   clib_bihash_kv_16_8_t s_kv, s_value;
1103   snat_static_mapping_t *m;
1104   snat_session_key_t m_key;
1105   u32 old_addr, new_addr = 0;
1106   ip_csum_t sum;
1107   snat_user_key_t u_key;
1108   snat_user_t *u;
1109   dlist_elt_t *head, *elt, *oldest;
1110   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1111   u32 elt_index, head_index, ses_index, oldest_index;
1112   snat_session_t * s;
1113   snat_unk_proto_ses_key_t key;
1114   u32 address_index = ~0;
1115   int i;
1116   u8 is_sm = 0;
1117
1118   old_addr = ip->src_address.as_u32;
1119
1120   key.l_addr = ip->src_address;
1121   key.r_addr = ip->dst_address;
1122   key.fib_index = rx_fib_index;
1123   key.proto = ip->protocol;
1124   key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
1125   s_kv.key[0] = key.as_u64[0];
1126   s_kv.key[1] = key.as_u64[1];
1127
1128   if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value))
1129     {
1130       s = pool_elt_at_index (tsm->sessions, s_value.value);
1131       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1132     }
1133   else
1134     {
1135       u_key.addr = ip->src_address;
1136       u_key.fib_index = rx_fib_index;
1137       kv.key = u_key.as_u64;
1138
1139       /* Ever heard of the "user" = src ip4 address before? */
1140       if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1141         {
1142           /* no, make a new one */
1143           pool_get (tsm->users, u);
1144           memset (u, 0, sizeof (*u));
1145           u->addr = ip->src_address;
1146           u->fib_index = rx_fib_index;
1147
1148           pool_get (tsm->list_pool, head);
1149           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1150
1151           clib_dlist_init (tsm->list_pool,
1152                            u->sessions_per_user_list_head_index);
1153
1154           kv.value = u - tsm->users;
1155
1156           /* add user */
1157           clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1);
1158         }
1159       else
1160         {
1161           u = pool_elt_at_index (tsm->users, value.value);
1162         }
1163
1164       m_key.addr = ip->src_address;
1165       m_key.port = 0;
1166       m_key.protocol = 0;
1167       m_key.fib_index = rx_fib_index;
1168       kv.key = m_key.as_u64;
1169
1170       /* Try to find static mapping first */
1171       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1172         {
1173           m = pool_elt_at_index (sm->static_mappings, value.value);
1174           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1175           is_sm = 1;
1176           goto create_ses;
1177         }
1178       /* Fallback to 3-tuple key */
1179       else
1180         {
1181           /* Choose same out address as for TCP/UDP session to same destination */
1182           if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1183             {
1184               head_index = u->sessions_per_user_list_head_index;
1185               head = pool_elt_at_index (tsm->list_pool, head_index);
1186               elt_index = head->next;
1187               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1188               ses_index = elt->value;
1189               while (ses_index != ~0)
1190                 {
1191                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1192                   elt_index = elt->next;
1193                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1194                   ses_index = elt->value;
1195
1196                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1197                     {
1198                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1199                       address_index = s->outside_address_index;
1200
1201                       key.fib_index = sm->outside_fib_index;
1202                       key.l_addr.as_u32 = new_addr;
1203                       s_kv.key[0] = key.as_u64[0];
1204                       s_kv.key[1] = key.as_u64[1];
1205                       if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1206                         break;
1207
1208                       goto create_ses;
1209                     }
1210                 }
1211             }
1212           key.fib_index = sm->outside_fib_index;
1213           for (i = 0; i < vec_len (sm->addresses); i++)
1214             {
1215               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1216               s_kv.key[0] = key.as_u64[0];
1217               s_kv.key[1] = key.as_u64[1];
1218               if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
1219                 {
1220                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1221                   address_index = i;
1222                   goto create_ses;
1223                 }
1224             }
1225           return;
1226         }
1227
1228 create_ses:
1229       /* Over quota? Recycle the least recently used dynamic translation */
1230       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1231         {
1232           /* Remove the oldest dynamic translation */
1233           do {
1234               oldest_index = clib_dlist_remove_head (
1235                 tsm->list_pool, u->sessions_per_user_list_head_index);
1236
1237               ASSERT (oldest_index != ~0);
1238
1239               /* add it back to the end of the LRU list */
1240               clib_dlist_addtail (tsm->list_pool,
1241                                   u->sessions_per_user_list_head_index,
1242                                   oldest_index);
1243               /* Get the list element */
1244               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1245
1246               /* Get the session index from the list element */
1247               ses_index = oldest->value;
1248
1249               /* Get the session */
1250               s = pool_elt_at_index (tsm->sessions, ses_index);
1251           } while (snat_is_session_static (s));
1252
1253           if (snat_is_unk_proto_session (s))
1254             {
1255               /* Remove from lookup tables */
1256               key.l_addr = s->in2out.addr;
1257               key.r_addr = s->ext_host_addr;
1258               key.fib_index = s->in2out.fib_index;
1259               key.proto = s->in2out.port;
1260               s_kv.key[0] = key.as_u64[0];
1261               s_kv.key[1] = key.as_u64[1];
1262               if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0))
1263                 clib_warning ("in2out key del failed");
1264
1265               key.l_addr = s->out2in.addr;
1266               key.fib_index = s->out2in.fib_index;
1267               s_kv.key[0] = key.as_u64[0];
1268               s_kv.key[1] = key.as_u64[1];
1269               if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0))
1270                 clib_warning ("out2in key del failed");
1271             }
1272           else
1273             {
1274               /* log NAT event */
1275               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1276                                                   s->out2in.addr.as_u32,
1277                                                   s->in2out.protocol,
1278                                                   s->in2out.port,
1279                                                   s->out2in.port,
1280                                                   s->in2out.fib_index);
1281
1282               snat_free_outside_address_and_port (sm, &s->out2in,
1283                                                   s->outside_address_index);
1284
1285               /* Remove in2out, out2in keys */
1286               kv.key = s->in2out.as_u64;
1287               if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0))
1288                 clib_warning ("in2out key del failed");
1289               kv.key = s->out2in.as_u64;
1290               if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0))
1291                 clib_warning ("out2in key del failed");
1292             }
1293         }
1294       else
1295         {
1296           /* Create a new session */
1297           pool_get (tsm->sessions, s);
1298           memset (s, 0, sizeof (*s));
1299
1300           /* Create list elts */
1301           pool_get (tsm->list_pool, elt);
1302           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1303           elt->value = s - tsm->sessions;
1304           s->per_user_index = elt - tsm->list_pool;
1305           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1306           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1307                               s->per_user_index);
1308         }
1309
1310       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1311       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1312       s->outside_address_index = address_index;
1313       s->out2in.addr.as_u32 = new_addr;
1314       s->out2in.fib_index = sm->outside_fib_index;
1315       s->in2out.addr.as_u32 = old_addr;
1316       s->in2out.fib_index = rx_fib_index;
1317       s->in2out.port = s->out2in.port = ip->protocol;
1318       if (is_sm)
1319         {
1320           u->nstaticsessions++;
1321           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1322         }
1323       else
1324         {
1325           u->nsessions++;
1326         }
1327
1328       /* Add to lookup tables */
1329       key.l_addr.as_u32 = old_addr;
1330       key.r_addr = ip->dst_address;
1331       key.proto = ip->protocol;
1332       key.fib_index = rx_fib_index;
1333       s_kv.key[0] = key.as_u64[0];
1334       s_kv.key[1] = key.as_u64[1];
1335       s_kv.value = s - tsm->sessions;
1336       if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
1337         clib_warning ("in2out key add failed");
1338
1339       key.l_addr.as_u32 = new_addr;
1340       key.fib_index = sm->outside_fib_index;
1341       s_kv.key[0] = key.as_u64[0];
1342       s_kv.key[1] = key.as_u64[1];
1343       if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
1344         clib_warning ("out2in key add failed");
1345   }
1346
1347   /* Update IP checksum */
1348   sum = ip->checksum;
1349   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1350   ip->checksum = ip_csum_fold (sum);
1351
1352   /* Accounting */
1353   s->last_heard = now;
1354   s->total_pkts++;
1355   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1356   /* Per-user LRU list maintenance */
1357   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1358   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1359                       s->per_user_index);
1360
1361   /* Hairpinning */
1362   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1363     snat_hairpinning_unknown_proto(sm, b, ip);
1364
1365   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1366     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1367 }
1368
1369 static inline uword
1370 snat_in2out_node_fn_inline (vlib_main_t * vm,
1371                             vlib_node_runtime_t * node,
1372                             vlib_frame_t * frame, int is_slow_path,
1373                             int is_output_feature)
1374 {
1375   u32 n_left_from, * from, * to_next;
1376   snat_in2out_next_t next_index;
1377   u32 pkts_processed = 0;
1378   snat_main_t * sm = &snat_main;
1379   f64 now = vlib_time_now (vm);
1380   u32 stats_node_index;
1381   u32 thread_index = vlib_get_thread_index ();
1382
1383   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1384     snat_in2out_node.index;
1385
1386   from = vlib_frame_vector_args (frame);
1387   n_left_from = frame->n_vectors;
1388   next_index = node->cached_next_index;
1389
1390   while (n_left_from > 0)
1391     {
1392       u32 n_left_to_next;
1393
1394       vlib_get_next_frame (vm, node, next_index,
1395                            to_next, n_left_to_next);
1396
1397       while (n_left_from >= 4 && n_left_to_next >= 2)
1398         {
1399           u32 bi0, bi1;
1400           vlib_buffer_t * b0, * b1;
1401           u32 next0, next1;
1402           u32 sw_if_index0, sw_if_index1;
1403           ip4_header_t * ip0, * ip1;
1404           ip_csum_t sum0, sum1;
1405           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1406           u16 old_port0, new_port0, old_port1, new_port1;
1407           udp_header_t * udp0, * udp1;
1408           tcp_header_t * tcp0, * tcp1;
1409           icmp46_header_t * icmp0, * icmp1;
1410           snat_session_key_t key0, key1;
1411           u32 rx_fib_index0, rx_fib_index1;
1412           u32 proto0, proto1;
1413           snat_session_t * s0 = 0, * s1 = 0;
1414           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1415           u32 iph_offset0 = 0, iph_offset1 = 0;
1416
1417           /* Prefetch next iteration. */
1418           {
1419             vlib_buffer_t * p2, * p3;
1420
1421             p2 = vlib_get_buffer (vm, from[2]);
1422             p3 = vlib_get_buffer (vm, from[3]);
1423
1424             vlib_prefetch_buffer_header (p2, LOAD);
1425             vlib_prefetch_buffer_header (p3, LOAD);
1426
1427             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1428             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1429           }
1430
1431           /* speculatively enqueue b0 and b1 to the current next frame */
1432           to_next[0] = bi0 = from[0];
1433           to_next[1] = bi1 = from[1];
1434           from += 2;
1435           to_next += 2;
1436           n_left_from -= 2;
1437           n_left_to_next -= 2;
1438
1439           b0 = vlib_get_buffer (vm, bi0);
1440           b1 = vlib_get_buffer (vm, bi1);
1441
1442           if (is_output_feature)
1443             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1444
1445           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1446                  iph_offset0);
1447
1448           udp0 = ip4_next_header (ip0);
1449           tcp0 = (tcp_header_t *) udp0;
1450           icmp0 = (icmp46_header_t *) udp0;
1451
1452           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1453           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1454                                    sw_if_index0);
1455
1456           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1457
1458           if (PREDICT_FALSE(ip0->ttl == 1))
1459             {
1460               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1461               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1462                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1463                                            0);
1464               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1465               goto trace00;
1466             }
1467
1468           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1469
1470           /* Next configured feature, probably ip4-lookup */
1471           if (is_slow_path)
1472             {
1473               if (PREDICT_FALSE (proto0 == ~0))
1474                 {
1475                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1476                                              thread_index, now, vm);
1477                   goto trace00;
1478                 }
1479
1480               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1481                 {
1482                   next0 = icmp_in2out_slow_path
1483                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1484                      node, next0, now, thread_index, &s0);
1485                   goto trace00;
1486                 }
1487             }
1488           else
1489             {
1490               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1491                 {
1492                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1493                   goto trace00;
1494                 }
1495             }
1496
1497           key0.addr = ip0->src_address;
1498           key0.port = udp0->src_port;
1499           key0.protocol = proto0;
1500           key0.fib_index = rx_fib_index0;
1501
1502           kv0.key = key0.as_u64;
1503
1504           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1505             {
1506               if (is_slow_path)
1507                 {
1508                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1509                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
1510                     goto trace00;
1511
1512                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1513                                      &s0, node, next0, thread_index);
1514                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1515                     goto trace00;
1516                 }
1517               else
1518                 {
1519                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1520                   goto trace00;
1521                 }
1522             }
1523           else
1524             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1525                                     value0.value);
1526
1527           old_addr0 = ip0->src_address.as_u32;
1528           ip0->src_address = s0->out2in.addr;
1529           new_addr0 = ip0->src_address.as_u32;
1530           if (!is_output_feature)
1531             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1532
1533           sum0 = ip0->checksum;
1534           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1535                                  ip4_header_t,
1536                                  src_address /* changed member */);
1537           ip0->checksum = ip_csum_fold (sum0);
1538
1539           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1540             {
1541               old_port0 = tcp0->src_port;
1542               tcp0->src_port = s0->out2in.port;
1543               new_port0 = tcp0->src_port;
1544
1545               sum0 = tcp0->checksum;
1546               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1547                                      ip4_header_t,
1548                                      dst_address /* changed member */);
1549               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1550                                      ip4_header_t /* cheat */,
1551                                      length /* changed member */);
1552               tcp0->checksum = ip_csum_fold(sum0);
1553             }
1554           else
1555             {
1556               old_port0 = udp0->src_port;
1557               udp0->src_port = s0->out2in.port;
1558               udp0->checksum = 0;
1559             }
1560
1561           /* Hairpinning */
1562           if (!is_output_feature)
1563             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1564
1565           /* Accounting */
1566           s0->last_heard = now;
1567           s0->total_pkts++;
1568           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1569           /* Per-user LRU list maintenance for dynamic translation */
1570           if (!snat_is_session_static (s0))
1571             {
1572               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1573                                  s0->per_user_index);
1574               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1575                                   s0->per_user_list_head_index,
1576                                   s0->per_user_index);
1577             }
1578         trace00:
1579
1580           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1581                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1582             {
1583               snat_in2out_trace_t *t =
1584                  vlib_add_trace (vm, node, b0, sizeof (*t));
1585               t->is_slow_path = is_slow_path;
1586               t->sw_if_index = sw_if_index0;
1587               t->next_index = next0;
1588                   t->session_index = ~0;
1589               if (s0)
1590                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1591             }
1592
1593           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1594
1595           if (is_output_feature)
1596             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1597
1598           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1599                  iph_offset1);
1600
1601           udp1 = ip4_next_header (ip1);
1602           tcp1 = (tcp_header_t *) udp1;
1603           icmp1 = (icmp46_header_t *) udp1;
1604
1605           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1606           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1607                                    sw_if_index1);
1608
1609           if (PREDICT_FALSE(ip1->ttl == 1))
1610             {
1611               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1612               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1613                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1614                                            0);
1615               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1616               goto trace01;
1617             }
1618
1619           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1620
1621           /* Next configured feature, probably ip4-lookup */
1622           if (is_slow_path)
1623             {
1624               if (PREDICT_FALSE (proto1 == ~0))
1625                 {
1626                   snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1627                                              thread_index, now, vm);
1628                   goto trace01;
1629                 }
1630
1631               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1632                 {
1633                   next1 = icmp_in2out_slow_path
1634                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1635                      next1, now, thread_index, &s1);
1636                   goto trace01;
1637                 }
1638             }
1639           else
1640             {
1641               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1642                 {
1643                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1644                   goto trace01;
1645                 }
1646             }
1647
1648           key1.addr = ip1->src_address;
1649           key1.port = udp1->src_port;
1650           key1.protocol = proto1;
1651           key1.fib_index = rx_fib_index1;
1652
1653           kv1.key = key1.as_u64;
1654
1655             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1656             {
1657               if (is_slow_path)
1658                 {
1659                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1660                       ip1, proto1, rx_fib_index1)) && !is_output_feature)
1661                     goto trace01;
1662
1663                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1664                                      &s1, node, next1, thread_index);
1665                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1666                     goto trace01;
1667                 }
1668               else
1669                 {
1670                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1671                   goto trace01;
1672                 }
1673             }
1674           else
1675             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1676                                     value1.value);
1677
1678           old_addr1 = ip1->src_address.as_u32;
1679           ip1->src_address = s1->out2in.addr;
1680           new_addr1 = ip1->src_address.as_u32;
1681           if (!is_output_feature)
1682             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1683
1684           sum1 = ip1->checksum;
1685           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1686                                  ip4_header_t,
1687                                  src_address /* changed member */);
1688           ip1->checksum = ip_csum_fold (sum1);
1689
1690           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1691             {
1692               old_port1 = tcp1->src_port;
1693               tcp1->src_port = s1->out2in.port;
1694               new_port1 = tcp1->src_port;
1695
1696               sum1 = tcp1->checksum;
1697               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1698                                      ip4_header_t,
1699                                      dst_address /* changed member */);
1700               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1701                                      ip4_header_t /* cheat */,
1702                                      length /* changed member */);
1703               tcp1->checksum = ip_csum_fold(sum1);
1704             }
1705           else
1706             {
1707               old_port1 = udp1->src_port;
1708               udp1->src_port = s1->out2in.port;
1709               udp1->checksum = 0;
1710             }
1711
1712           /* Hairpinning */
1713           if (!is_output_feature)
1714             snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1715
1716           /* Accounting */
1717           s1->last_heard = now;
1718           s1->total_pkts++;
1719           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1720           /* Per-user LRU list maintenance for dynamic translation */
1721           if (!snat_is_session_static (s1))
1722             {
1723               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1724                                  s1->per_user_index);
1725               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1726                                   s1->per_user_list_head_index,
1727                                   s1->per_user_index);
1728             }
1729         trace01:
1730
1731           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1732                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1733             {
1734               snat_in2out_trace_t *t =
1735                  vlib_add_trace (vm, node, b1, sizeof (*t));
1736               t->sw_if_index = sw_if_index1;
1737               t->next_index = next1;
1738               t->session_index = ~0;
1739               if (s1)
1740                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1741             }
1742
1743           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1744
1745           /* verify speculative enqueues, maybe switch current next frame */
1746           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1747                                            to_next, n_left_to_next,
1748                                            bi0, bi1, next0, next1);
1749         }
1750
1751       while (n_left_from > 0 && n_left_to_next > 0)
1752         {
1753           u32 bi0;
1754           vlib_buffer_t * b0;
1755           u32 next0;
1756           u32 sw_if_index0;
1757           ip4_header_t * ip0;
1758           ip_csum_t sum0;
1759           u32 new_addr0, old_addr0;
1760           u16 old_port0, new_port0;
1761           udp_header_t * udp0;
1762           tcp_header_t * tcp0;
1763           icmp46_header_t * icmp0;
1764           snat_session_key_t key0;
1765           u32 rx_fib_index0;
1766           u32 proto0;
1767           snat_session_t * s0 = 0;
1768           clib_bihash_kv_8_8_t kv0, value0;
1769           u32 iph_offset0 = 0;
1770
1771           /* speculatively enqueue b0 to the current next frame */
1772           bi0 = from[0];
1773           to_next[0] = bi0;
1774           from += 1;
1775           to_next += 1;
1776           n_left_from -= 1;
1777           n_left_to_next -= 1;
1778
1779           b0 = vlib_get_buffer (vm, bi0);
1780           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1781
1782           if (is_output_feature)
1783             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1784
1785           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1786                  iph_offset0);
1787
1788           udp0 = ip4_next_header (ip0);
1789           tcp0 = (tcp_header_t *) udp0;
1790           icmp0 = (icmp46_header_t *) udp0;
1791
1792           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1793           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1794                                    sw_if_index0);
1795
1796           if (PREDICT_FALSE(ip0->ttl == 1))
1797             {
1798               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1799               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1800                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1801                                            0);
1802               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1803               goto trace0;
1804             }
1805
1806           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1807
1808           /* Next configured feature, probably ip4-lookup */
1809           if (is_slow_path)
1810             {
1811               if (PREDICT_FALSE (proto0 == ~0))
1812                 {
1813                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1814                                              thread_index, now, vm);
1815                   goto trace0;
1816                 }
1817
1818               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1819                 {
1820                   next0 = icmp_in2out_slow_path
1821                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1822                      next0, now, thread_index, &s0);
1823                   goto trace0;
1824                 }
1825             }
1826           else
1827             {
1828               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1829                 {
1830                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1831                   goto trace0;
1832                 }
1833             }
1834
1835           key0.addr = ip0->src_address;
1836           key0.port = udp0->src_port;
1837           key0.protocol = proto0;
1838           key0.fib_index = rx_fib_index0;
1839
1840           kv0.key = key0.as_u64;
1841
1842           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1843             {
1844               if (is_slow_path)
1845                 {
1846                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1847                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
1848                     goto trace0;
1849
1850                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1851                                      &s0, node, next0, thread_index);
1852
1853                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1854                     goto trace0;
1855                 }
1856               else
1857                 {
1858                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1859                   goto trace0;
1860                 }
1861             }
1862           else
1863             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1864                                     value0.value);
1865
1866           old_addr0 = ip0->src_address.as_u32;
1867           ip0->src_address = s0->out2in.addr;
1868           new_addr0 = ip0->src_address.as_u32;
1869           if (!is_output_feature)
1870             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1871
1872           sum0 = ip0->checksum;
1873           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1874                                  ip4_header_t,
1875                                  src_address /* changed member */);
1876           ip0->checksum = ip_csum_fold (sum0);
1877
1878           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1879             {
1880               old_port0 = tcp0->src_port;
1881               tcp0->src_port = s0->out2in.port;
1882               new_port0 = tcp0->src_port;
1883
1884               sum0 = tcp0->checksum;
1885               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1886                                      ip4_header_t,
1887                                      dst_address /* changed member */);
1888               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1889                                      ip4_header_t /* cheat */,
1890                                      length /* changed member */);
1891               tcp0->checksum = ip_csum_fold(sum0);
1892             }
1893           else
1894             {
1895               old_port0 = udp0->src_port;
1896               udp0->src_port = s0->out2in.port;
1897               udp0->checksum = 0;
1898             }
1899
1900           /* Hairpinning */
1901           if (!is_output_feature)
1902             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1903
1904           /* Accounting */
1905           s0->last_heard = now;
1906           s0->total_pkts++;
1907           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1908           /* Per-user LRU list maintenance for dynamic translation */
1909           if (!snat_is_session_static (s0))
1910             {
1911               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1912                                  s0->per_user_index);
1913               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1914                                   s0->per_user_list_head_index,
1915                                   s0->per_user_index);
1916             }
1917
1918         trace0:
1919           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1920                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1921             {
1922               snat_in2out_trace_t *t =
1923                  vlib_add_trace (vm, node, b0, sizeof (*t));
1924               t->is_slow_path = is_slow_path;
1925               t->sw_if_index = sw_if_index0;
1926               t->next_index = next0;
1927                   t->session_index = ~0;
1928               if (s0)
1929                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1930             }
1931
1932           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1933
1934           /* verify speculative enqueue, maybe switch current next frame */
1935           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1936                                            to_next, n_left_to_next,
1937                                            bi0, next0);
1938         }
1939
1940       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1941     }
1942
1943   vlib_node_increment_counter (vm, stats_node_index,
1944                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1945                                pkts_processed);
1946   return frame->n_vectors;
1947 }
1948
1949 static uword
1950 snat_in2out_fast_path_fn (vlib_main_t * vm,
1951                           vlib_node_runtime_t * node,
1952                           vlib_frame_t * frame)
1953 {
1954   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1955 }
1956
1957 VLIB_REGISTER_NODE (snat_in2out_node) = {
1958   .function = snat_in2out_fast_path_fn,
1959   .name = "nat44-in2out",
1960   .vector_size = sizeof (u32),
1961   .format_trace = format_snat_in2out_trace,
1962   .type = VLIB_NODE_TYPE_INTERNAL,
1963
1964   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1965   .error_strings = snat_in2out_error_strings,
1966
1967   .runtime_data_bytes = sizeof (snat_runtime_t),
1968
1969   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1970
1971   /* edit / add dispositions here */
1972   .next_nodes = {
1973     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1974     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1975     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1976     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1977   },
1978 };
1979
1980 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1981
1982 static uword
1983 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1984                                  vlib_node_runtime_t * node,
1985                                  vlib_frame_t * frame)
1986 {
1987   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1988 }
1989
1990 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1991   .function = snat_in2out_output_fast_path_fn,
1992   .name = "nat44-in2out-output",
1993   .vector_size = sizeof (u32),
1994   .format_trace = format_snat_in2out_trace,
1995   .type = VLIB_NODE_TYPE_INTERNAL,
1996
1997   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1998   .error_strings = snat_in2out_error_strings,
1999
2000   .runtime_data_bytes = sizeof (snat_runtime_t),
2001
2002   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2003
2004   /* edit / add dispositions here */
2005   .next_nodes = {
2006     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2007     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2008     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2009     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2010   },
2011 };
2012
2013 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2014                               snat_in2out_output_fast_path_fn);
2015
2016 static uword
2017 snat_in2out_slow_path_fn (vlib_main_t * vm,
2018                           vlib_node_runtime_t * node,
2019                           vlib_frame_t * frame)
2020 {
2021   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2022 }
2023
2024 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2025   .function = snat_in2out_slow_path_fn,
2026   .name = "nat44-in2out-slowpath",
2027   .vector_size = sizeof (u32),
2028   .format_trace = format_snat_in2out_trace,
2029   .type = VLIB_NODE_TYPE_INTERNAL,
2030
2031   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2032   .error_strings = snat_in2out_error_strings,
2033
2034   .runtime_data_bytes = sizeof (snat_runtime_t),
2035
2036   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2037
2038   /* edit / add dispositions here */
2039   .next_nodes = {
2040     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2041     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2042     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2043     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2044   },
2045 };
2046
2047 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2048                               snat_in2out_slow_path_fn);
2049
2050 static uword
2051 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2052                                  vlib_node_runtime_t * node,
2053                                  vlib_frame_t * frame)
2054 {
2055   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2056 }
2057
2058 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2059   .function = snat_in2out_output_slow_path_fn,
2060   .name = "nat44-in2out-output-slowpath",
2061   .vector_size = sizeof (u32),
2062   .format_trace = format_snat_in2out_trace,
2063   .type = VLIB_NODE_TYPE_INTERNAL,
2064
2065   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2066   .error_strings = snat_in2out_error_strings,
2067
2068   .runtime_data_bytes = sizeof (snat_runtime_t),
2069
2070   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2071
2072   /* edit / add dispositions here */
2073   .next_nodes = {
2074     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2075     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2076     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2077     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2078   },
2079 };
2080
2081 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2082                               snat_in2out_output_slow_path_fn);
2083
2084 /**************************/
2085 /*** deterministic mode ***/
2086 /**************************/
2087 static uword
2088 snat_det_in2out_node_fn (vlib_main_t * vm,
2089                          vlib_node_runtime_t * node,
2090                          vlib_frame_t * frame)
2091 {
2092   u32 n_left_from, * from, * to_next;
2093   snat_in2out_next_t next_index;
2094   u32 pkts_processed = 0;
2095   snat_main_t * sm = &snat_main;
2096   u32 now = (u32) vlib_time_now (vm);
2097   u32 thread_index = vlib_get_thread_index ();
2098
2099   from = vlib_frame_vector_args (frame);
2100   n_left_from = frame->n_vectors;
2101   next_index = node->cached_next_index;
2102
2103   while (n_left_from > 0)
2104     {
2105       u32 n_left_to_next;
2106
2107       vlib_get_next_frame (vm, node, next_index,
2108                            to_next, n_left_to_next);
2109
2110       while (n_left_from >= 4 && n_left_to_next >= 2)
2111         {
2112           u32 bi0, bi1;
2113           vlib_buffer_t * b0, * b1;
2114           u32 next0, next1;
2115           u32 sw_if_index0, sw_if_index1;
2116           ip4_header_t * ip0, * ip1;
2117           ip_csum_t sum0, sum1;
2118           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2119           u16 old_port0, new_port0, lo_port0, i0;
2120           u16 old_port1, new_port1, lo_port1, i1;
2121           udp_header_t * udp0, * udp1;
2122           tcp_header_t * tcp0, * tcp1;
2123           u32 proto0, proto1;
2124           snat_det_out_key_t key0, key1;
2125           snat_det_map_t * dm0, * dm1;
2126           snat_det_session_t * ses0 = 0, * ses1 = 0;
2127           u32 rx_fib_index0, rx_fib_index1;
2128           icmp46_header_t * icmp0, * icmp1;
2129
2130           /* Prefetch next iteration. */
2131           {
2132             vlib_buffer_t * p2, * p3;
2133
2134             p2 = vlib_get_buffer (vm, from[2]);
2135             p3 = vlib_get_buffer (vm, from[3]);
2136
2137             vlib_prefetch_buffer_header (p2, LOAD);
2138             vlib_prefetch_buffer_header (p3, LOAD);
2139
2140             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2141             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2142           }
2143
2144           /* speculatively enqueue b0 and b1 to the current next frame */
2145           to_next[0] = bi0 = from[0];
2146           to_next[1] = bi1 = from[1];
2147           from += 2;
2148           to_next += 2;
2149           n_left_from -= 2;
2150           n_left_to_next -= 2;
2151
2152           b0 = vlib_get_buffer (vm, bi0);
2153           b1 = vlib_get_buffer (vm, bi1);
2154
2155           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2156           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2157
2158           ip0 = vlib_buffer_get_current (b0);
2159           udp0 = ip4_next_header (ip0);
2160           tcp0 = (tcp_header_t *) udp0;
2161
2162           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2163
2164           if (PREDICT_FALSE(ip0->ttl == 1))
2165             {
2166               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2167               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2168                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2169                                            0);
2170               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2171               goto trace0;
2172             }
2173
2174           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2175
2176           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2177             {
2178               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2179               icmp0 = (icmp46_header_t *) udp0;
2180
2181               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2182                                   rx_fib_index0, node, next0, thread_index,
2183                                   &ses0, &dm0);
2184               goto trace0;
2185             }
2186
2187           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2188           if (PREDICT_FALSE(!dm0))
2189             {
2190               clib_warning("no match for internal host %U",
2191                            format_ip4_address, &ip0->src_address);
2192               next0 = SNAT_IN2OUT_NEXT_DROP;
2193               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2194               goto trace0;
2195             }
2196
2197           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2198
2199           key0.ext_host_addr = ip0->dst_address;
2200           key0.ext_host_port = tcp0->dst;
2201
2202           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2203           if (PREDICT_FALSE(!ses0))
2204             {
2205               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2206                 {
2207                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2208                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2209
2210                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2211                     continue;
2212
2213                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2214                   break;
2215                 }
2216               if (PREDICT_FALSE(!ses0))
2217                 {
2218                   /* too many sessions for user, send ICMP error packet */
2219
2220                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2221                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2222                                                ICMP4_destination_unreachable_destination_unreachable_host,
2223                                                0);
2224                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2225                   goto trace0;
2226                 }
2227             }
2228
2229           new_port0 = ses0->out.out_port;
2230
2231           old_addr0.as_u32 = ip0->src_address.as_u32;
2232           ip0->src_address.as_u32 = new_addr0.as_u32;
2233           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2234
2235           sum0 = ip0->checksum;
2236           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2237                                  ip4_header_t,
2238                                  src_address /* changed member */);
2239           ip0->checksum = ip_csum_fold (sum0);
2240
2241           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2242             {
2243               if (tcp0->flags & TCP_FLAG_SYN)
2244                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2245               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2246                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2247               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2248                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2249               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2250                 snat_det_ses_close(dm0, ses0);
2251               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2252                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2253               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2254                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2255
2256               old_port0 = tcp0->src;
2257               tcp0->src = new_port0;
2258
2259               sum0 = tcp0->checksum;
2260               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2261                                      ip4_header_t,
2262                                      dst_address /* changed member */);
2263               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2264                                      ip4_header_t /* cheat */,
2265                                      length /* changed member */);
2266               tcp0->checksum = ip_csum_fold(sum0);
2267             }
2268           else
2269             {
2270               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2271               old_port0 = udp0->src_port;
2272               udp0->src_port = new_port0;
2273               udp0->checksum = 0;
2274             }
2275
2276           switch(ses0->state)
2277             {
2278             case SNAT_SESSION_UDP_ACTIVE:
2279                 ses0->expire = now + sm->udp_timeout;
2280                 break;
2281             case SNAT_SESSION_TCP_SYN_SENT:
2282             case SNAT_SESSION_TCP_FIN_WAIT:
2283             case SNAT_SESSION_TCP_CLOSE_WAIT:
2284             case SNAT_SESSION_TCP_LAST_ACK:
2285                 ses0->expire = now + sm->tcp_transitory_timeout;
2286                 break;
2287             case SNAT_SESSION_TCP_ESTABLISHED:
2288                 ses0->expire = now + sm->tcp_established_timeout;
2289                 break;
2290             }
2291
2292         trace0:
2293           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2294                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2295             {
2296               snat_in2out_trace_t *t =
2297                  vlib_add_trace (vm, node, b0, sizeof (*t));
2298               t->is_slow_path = 0;
2299               t->sw_if_index = sw_if_index0;
2300               t->next_index = next0;
2301               t->session_index = ~0;
2302               if (ses0)
2303                 t->session_index = ses0 - dm0->sessions;
2304             }
2305
2306           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2307
2308           ip1 = vlib_buffer_get_current (b1);
2309           udp1 = ip4_next_header (ip1);
2310           tcp1 = (tcp_header_t *) udp1;
2311
2312           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2313
2314           if (PREDICT_FALSE(ip1->ttl == 1))
2315             {
2316               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2317               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2318                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2319                                            0);
2320               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2321               goto trace1;
2322             }
2323
2324           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2325
2326           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2327             {
2328               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2329               icmp1 = (icmp46_header_t *) udp1;
2330
2331               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2332                                   rx_fib_index1, node, next1, thread_index,
2333                                   &ses1, &dm1);
2334               goto trace1;
2335             }
2336
2337           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2338           if (PREDICT_FALSE(!dm1))
2339             {
2340               clib_warning("no match for internal host %U",
2341                            format_ip4_address, &ip0->src_address);
2342               next1 = SNAT_IN2OUT_NEXT_DROP;
2343               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2344               goto trace1;
2345             }
2346
2347           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2348
2349           key1.ext_host_addr = ip1->dst_address;
2350           key1.ext_host_port = tcp1->dst;
2351
2352           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2353           if (PREDICT_FALSE(!ses1))
2354             {
2355               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2356                 {
2357                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2358                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2359
2360                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2361                     continue;
2362
2363                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2364                   break;
2365                 }
2366               if (PREDICT_FALSE(!ses1))
2367                 {
2368                   /* too many sessions for user, send ICMP error packet */
2369
2370                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2371                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2372                                                ICMP4_destination_unreachable_destination_unreachable_host,
2373                                                0);
2374                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2375                   goto trace1;
2376                 }
2377             }
2378
2379           new_port1 = ses1->out.out_port;
2380
2381           old_addr1.as_u32 = ip1->src_address.as_u32;
2382           ip1->src_address.as_u32 = new_addr1.as_u32;
2383           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2384
2385           sum1 = ip1->checksum;
2386           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2387                                  ip4_header_t,
2388                                  src_address /* changed member */);
2389           ip1->checksum = ip_csum_fold (sum1);
2390
2391           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2392             {
2393               if (tcp1->flags & TCP_FLAG_SYN)
2394                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2395               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2396                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2397               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2398                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2399               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2400                 snat_det_ses_close(dm1, ses1);
2401               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2402                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2403               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2404                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2405
2406               old_port1 = tcp1->src;
2407               tcp1->src = new_port1;
2408
2409               sum1 = tcp1->checksum;
2410               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2411                                      ip4_header_t,
2412                                      dst_address /* changed member */);
2413               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2414                                      ip4_header_t /* cheat */,
2415                                      length /* changed member */);
2416               tcp1->checksum = ip_csum_fold(sum1);
2417             }
2418           else
2419             {
2420               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2421               old_port1 = udp1->src_port;
2422               udp1->src_port = new_port1;
2423               udp1->checksum = 0;
2424             }
2425
2426           switch(ses1->state)
2427             {
2428             case SNAT_SESSION_UDP_ACTIVE:
2429                 ses1->expire = now + sm->udp_timeout;
2430                 break;
2431             case SNAT_SESSION_TCP_SYN_SENT:
2432             case SNAT_SESSION_TCP_FIN_WAIT:
2433             case SNAT_SESSION_TCP_CLOSE_WAIT:
2434             case SNAT_SESSION_TCP_LAST_ACK:
2435                 ses1->expire = now + sm->tcp_transitory_timeout;
2436                 break;
2437             case SNAT_SESSION_TCP_ESTABLISHED:
2438                 ses1->expire = now + sm->tcp_established_timeout;
2439                 break;
2440             }
2441
2442         trace1:
2443           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2444                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2445             {
2446               snat_in2out_trace_t *t =
2447                  vlib_add_trace (vm, node, b1, sizeof (*t));
2448               t->is_slow_path = 0;
2449               t->sw_if_index = sw_if_index1;
2450               t->next_index = next1;
2451               t->session_index = ~0;
2452               if (ses1)
2453                 t->session_index = ses1 - dm1->sessions;
2454             }
2455
2456           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2457
2458           /* verify speculative enqueues, maybe switch current next frame */
2459           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2460                                            to_next, n_left_to_next,
2461                                            bi0, bi1, next0, next1);
2462          }
2463
2464       while (n_left_from > 0 && n_left_to_next > 0)
2465         {
2466           u32 bi0;
2467           vlib_buffer_t * b0;
2468           u32 next0;
2469           u32 sw_if_index0;
2470           ip4_header_t * ip0;
2471           ip_csum_t sum0;
2472           ip4_address_t new_addr0, old_addr0;
2473           u16 old_port0, new_port0, lo_port0, i0;
2474           udp_header_t * udp0;
2475           tcp_header_t * tcp0;
2476           u32 proto0;
2477           snat_det_out_key_t key0;
2478           snat_det_map_t * dm0;
2479           snat_det_session_t * ses0 = 0;
2480           u32 rx_fib_index0;
2481           icmp46_header_t * icmp0;
2482
2483           /* speculatively enqueue b0 to the current next frame */
2484           bi0 = from[0];
2485           to_next[0] = bi0;
2486           from += 1;
2487           to_next += 1;
2488           n_left_from -= 1;
2489           n_left_to_next -= 1;
2490
2491           b0 = vlib_get_buffer (vm, bi0);
2492           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2493
2494           ip0 = vlib_buffer_get_current (b0);
2495           udp0 = ip4_next_header (ip0);
2496           tcp0 = (tcp_header_t *) udp0;
2497
2498           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2499
2500           if (PREDICT_FALSE(ip0->ttl == 1))
2501             {
2502               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2503               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2504                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2505                                            0);
2506               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2507               goto trace00;
2508             }
2509
2510           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2511
2512           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2513             {
2514               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2515               icmp0 = (icmp46_header_t *) udp0;
2516
2517               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2518                                   rx_fib_index0, node, next0, thread_index,
2519                                   &ses0, &dm0);
2520               goto trace00;
2521             }
2522
2523           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2524           if (PREDICT_FALSE(!dm0))
2525             {
2526               clib_warning("no match for internal host %U",
2527                            format_ip4_address, &ip0->src_address);
2528               next0 = SNAT_IN2OUT_NEXT_DROP;
2529               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2530               goto trace00;
2531             }
2532
2533           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2534
2535           key0.ext_host_addr = ip0->dst_address;
2536           key0.ext_host_port = tcp0->dst;
2537
2538           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2539           if (PREDICT_FALSE(!ses0))
2540             {
2541               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2542                 {
2543                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2544                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2545
2546                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2547                     continue;
2548
2549                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2550                   break;
2551                 }
2552               if (PREDICT_FALSE(!ses0))
2553                 {
2554                   /* too many sessions for user, send ICMP error packet */
2555
2556                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2557                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2558                                                ICMP4_destination_unreachable_destination_unreachable_host,
2559                                                0);
2560                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2561                   goto trace00;
2562                 }
2563             }
2564
2565           new_port0 = ses0->out.out_port;
2566
2567           old_addr0.as_u32 = ip0->src_address.as_u32;
2568           ip0->src_address.as_u32 = new_addr0.as_u32;
2569           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2570
2571           sum0 = ip0->checksum;
2572           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2573                                  ip4_header_t,
2574                                  src_address /* changed member */);
2575           ip0->checksum = ip_csum_fold (sum0);
2576
2577           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2578             {
2579               if (tcp0->flags & TCP_FLAG_SYN)
2580                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2581               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2582                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2583               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2584                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2585               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2586                 snat_det_ses_close(dm0, ses0);
2587               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2588                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2589               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2590                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2591
2592               old_port0 = tcp0->src;
2593               tcp0->src = new_port0;
2594
2595               sum0 = tcp0->checksum;
2596               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2597                                      ip4_header_t,
2598                                      dst_address /* changed member */);
2599               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2600                                      ip4_header_t /* cheat */,
2601                                      length /* changed member */);
2602               tcp0->checksum = ip_csum_fold(sum0);
2603             }
2604           else
2605             {
2606               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2607               old_port0 = udp0->src_port;
2608               udp0->src_port = new_port0;
2609               udp0->checksum = 0;
2610             }
2611
2612           switch(ses0->state)
2613             {
2614             case SNAT_SESSION_UDP_ACTIVE:
2615                 ses0->expire = now + sm->udp_timeout;
2616                 break;
2617             case SNAT_SESSION_TCP_SYN_SENT:
2618             case SNAT_SESSION_TCP_FIN_WAIT:
2619             case SNAT_SESSION_TCP_CLOSE_WAIT:
2620             case SNAT_SESSION_TCP_LAST_ACK:
2621                 ses0->expire = now + sm->tcp_transitory_timeout;
2622                 break;
2623             case SNAT_SESSION_TCP_ESTABLISHED:
2624                 ses0->expire = now + sm->tcp_established_timeout;
2625                 break;
2626             }
2627
2628         trace00:
2629           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2630                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2631             {
2632               snat_in2out_trace_t *t =
2633                  vlib_add_trace (vm, node, b0, sizeof (*t));
2634               t->is_slow_path = 0;
2635               t->sw_if_index = sw_if_index0;
2636               t->next_index = next0;
2637               t->session_index = ~0;
2638               if (ses0)
2639                 t->session_index = ses0 - dm0->sessions;
2640             }
2641
2642           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2643
2644           /* verify speculative enqueue, maybe switch current next frame */
2645           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2646                                            to_next, n_left_to_next,
2647                                            bi0, next0);
2648         }
2649
2650       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2651     }
2652
2653   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2654                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2655                                pkts_processed);
2656   return frame->n_vectors;
2657 }
2658
2659 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2660   .function = snat_det_in2out_node_fn,
2661   .name = "nat44-det-in2out",
2662   .vector_size = sizeof (u32),
2663   .format_trace = format_snat_in2out_trace,
2664   .type = VLIB_NODE_TYPE_INTERNAL,
2665
2666   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2667   .error_strings = snat_in2out_error_strings,
2668
2669   .runtime_data_bytes = sizeof (snat_runtime_t),
2670
2671   .n_next_nodes = 3,
2672
2673   /* edit / add dispositions here */
2674   .next_nodes = {
2675     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2676     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2677     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2678   },
2679 };
2680
2681 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2682
2683 /**
2684  * Get address and port values to be used for ICMP packet translation
2685  * and create session if needed
2686  *
2687  * @param[in,out] sm             NAT main
2688  * @param[in,out] node           NAT node runtime
2689  * @param[in] thread_index       thread index
2690  * @param[in,out] b0             buffer containing packet to be translated
2691  * @param[out] p_proto           protocol used for matching
2692  * @param[out] p_value           address and port after NAT translation
2693  * @param[out] p_dont_translate  if packet should not be translated
2694  * @param d                      optional parameter
2695  * @param e                      optional parameter
2696  */
2697 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2698                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2699                           snat_session_key_t *p_value,
2700                           u8 *p_dont_translate, void *d, void *e)
2701 {
2702   ip4_header_t *ip0;
2703   icmp46_header_t *icmp0;
2704   u32 sw_if_index0;
2705   u32 rx_fib_index0;
2706   u8 protocol;
2707   snat_det_out_key_t key0;
2708   u8 dont_translate = 0;
2709   u32 next0 = ~0;
2710   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2711   ip4_header_t *inner_ip0;
2712   void *l4_header = 0;
2713   icmp46_header_t *inner_icmp0;
2714   snat_det_map_t * dm0 = 0;
2715   ip4_address_t new_addr0;
2716   u16 lo_port0, i0;
2717   snat_det_session_t * ses0 = 0;
2718   ip4_address_t in_addr;
2719   u16 in_port;
2720
2721   ip0 = vlib_buffer_get_current (b0);
2722   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2723   echo0 = (icmp_echo_header_t *)(icmp0+1);
2724   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2725   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2726
2727   if (!icmp_is_error_message (icmp0))
2728     {
2729       protocol = SNAT_PROTOCOL_ICMP;
2730       in_addr = ip0->src_address;
2731       in_port = echo0->identifier;
2732     }
2733   else
2734     {
2735       inner_ip0 = (ip4_header_t *)(echo0+1);
2736       l4_header = ip4_next_header (inner_ip0);
2737       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2738       in_addr = inner_ip0->dst_address;
2739       switch (protocol)
2740         {
2741         case SNAT_PROTOCOL_ICMP:
2742           inner_icmp0 = (icmp46_header_t*)l4_header;
2743           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2744           in_port = inner_echo0->identifier;
2745           break;
2746         case SNAT_PROTOCOL_UDP:
2747         case SNAT_PROTOCOL_TCP:
2748           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2749           break;
2750         default:
2751           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2752           next0 = SNAT_IN2OUT_NEXT_DROP;
2753           goto out;
2754         }
2755     }
2756
2757   dm0 = snat_det_map_by_user(sm, &in_addr);
2758   if (PREDICT_FALSE(!dm0))
2759     {
2760       clib_warning("no match for internal host %U",
2761                    format_ip4_address, &in_addr);
2762       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2763           IP_PROTOCOL_ICMP, rx_fib_index0)))
2764         {
2765           dont_translate = 1;
2766           goto out;
2767         }
2768       next0 = SNAT_IN2OUT_NEXT_DROP;
2769       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2770       goto out;
2771     }
2772
2773   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2774
2775   key0.ext_host_addr = ip0->dst_address;
2776   key0.ext_host_port = 0;
2777
2778   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2779   if (PREDICT_FALSE(!ses0))
2780     {
2781       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2782           IP_PROTOCOL_ICMP, rx_fib_index0)))
2783         {
2784           dont_translate = 1;
2785           goto out;
2786         }
2787       if (icmp0->type != ICMP4_echo_request)
2788         {
2789           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2790           next0 = SNAT_IN2OUT_NEXT_DROP;
2791           goto out;
2792         }
2793       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2794         {
2795           key0.out_port = clib_host_to_net_u16 (lo_port0 +
2796             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
2797
2798           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
2799             continue;
2800
2801           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
2802           break;
2803         }
2804       if (PREDICT_FALSE(!ses0))
2805         {
2806           next0 = SNAT_IN2OUT_NEXT_DROP;
2807           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
2808           goto out;
2809         }
2810     }
2811
2812   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
2813                     !icmp_is_error_message (icmp0)))
2814     {
2815       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
2816       next0 = SNAT_IN2OUT_NEXT_DROP;
2817       goto out;
2818     }
2819
2820   u32 now = (u32) vlib_time_now (sm->vlib_main);
2821
2822   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
2823   ses0->expire = now + sm->icmp_timeout;
2824
2825 out:
2826   *p_proto = protocol;
2827   if (ses0)
2828     {
2829       p_value->addr = new_addr0;
2830       p_value->fib_index = sm->outside_fib_index;
2831       p_value->port = ses0->out.out_port;
2832     }
2833   *p_dont_translate = dont_translate;
2834   if (d)
2835     *(snat_det_session_t**)d = ses0;
2836   if (e)
2837     *(snat_det_map_t**)e = dm0;
2838   return next0;
2839 }
2840
2841 /**********************/
2842 /*** worker handoff ***/
2843 /**********************/
2844 static inline uword
2845 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
2846                                       vlib_node_runtime_t * node,
2847                                       vlib_frame_t * frame,
2848                                       u8 is_output)
2849 {
2850   snat_main_t *sm = &snat_main;
2851   vlib_thread_main_t *tm = vlib_get_thread_main ();
2852   u32 n_left_from, *from, *to_next = 0;
2853   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2854   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2855     = 0;
2856   vlib_frame_queue_elt_t *hf = 0;
2857   vlib_frame_t *f = 0;
2858   int i;
2859   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2860   u32 next_worker_index = 0;
2861   u32 current_worker_index = ~0;
2862   u32 thread_index = vlib_get_thread_index ();
2863   u32 fq_index;
2864   u32 to_node_index;
2865
2866   ASSERT (vec_len (sm->workers));
2867
2868   if (is_output)
2869     {
2870       fq_index = sm->fq_in2out_output_index;
2871       to_node_index = sm->in2out_output_node_index;
2872     }
2873   else
2874     {
2875       fq_index = sm->fq_in2out_index;
2876       to_node_index = sm->in2out_node_index;
2877     }
2878
2879   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2880     {
2881       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2882
2883       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2884                                sm->first_worker_index + sm->num_workers - 1,
2885                                (vlib_frame_queue_t *) (~0));
2886     }
2887
2888   from = vlib_frame_vector_args (frame);
2889   n_left_from = frame->n_vectors;
2890
2891   while (n_left_from > 0)
2892     {
2893       u32 bi0;
2894       vlib_buffer_t *b0;
2895       u32 sw_if_index0;
2896       u32 rx_fib_index0;
2897       ip4_header_t * ip0;
2898       u8 do_handoff;
2899
2900       bi0 = from[0];
2901       from += 1;
2902       n_left_from -= 1;
2903
2904       b0 = vlib_get_buffer (vm, bi0);
2905
2906       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2907       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2908
2909       ip0 = vlib_buffer_get_current (b0);
2910
2911       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
2912
2913       if (PREDICT_FALSE (next_worker_index != thread_index))
2914         {
2915           do_handoff = 1;
2916
2917           if (next_worker_index != current_worker_index)
2918             {
2919               if (hf)
2920                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2921
2922               hf = vlib_get_worker_handoff_queue_elt (fq_index,
2923                                                       next_worker_index,
2924                                                       handoff_queue_elt_by_worker_index);
2925
2926               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2927               to_next_worker = &hf->buffer_index[hf->n_vectors];
2928               current_worker_index = next_worker_index;
2929             }
2930
2931           /* enqueue to correct worker thread */
2932           to_next_worker[0] = bi0;
2933           to_next_worker++;
2934           n_left_to_next_worker--;
2935
2936           if (n_left_to_next_worker == 0)
2937             {
2938               hf->n_vectors = VLIB_FRAME_SIZE;
2939               vlib_put_frame_queue_elt (hf);
2940               current_worker_index = ~0;
2941               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2942               hf = 0;
2943             }
2944         }
2945       else
2946         {
2947           do_handoff = 0;
2948           /* if this is 1st frame */
2949           if (!f)
2950             {
2951               f = vlib_get_frame_to_node (vm, to_node_index);
2952               to_next = vlib_frame_vector_args (f);
2953             }
2954
2955           to_next[0] = bi0;
2956           to_next += 1;
2957           f->n_vectors++;
2958         }
2959
2960       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2961                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2962         {
2963           snat_in2out_worker_handoff_trace_t *t =
2964             vlib_add_trace (vm, node, b0, sizeof (*t));
2965           t->next_worker_index = next_worker_index;
2966           t->do_handoff = do_handoff;
2967         }
2968     }
2969
2970   if (f)
2971     vlib_put_frame_to_node (vm, to_node_index, f);
2972
2973   if (hf)
2974     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2975
2976   /* Ship frames to the worker nodes */
2977   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2978     {
2979       if (handoff_queue_elt_by_worker_index[i])
2980         {
2981           hf = handoff_queue_elt_by_worker_index[i];
2982           /*
2983            * It works better to let the handoff node
2984            * rate-adapt, always ship the handoff queue element.
2985            */
2986           if (1 || hf->n_vectors == hf->last_n_vectors)
2987             {
2988               vlib_put_frame_queue_elt (hf);
2989               handoff_queue_elt_by_worker_index[i] = 0;
2990             }
2991           else
2992             hf->last_n_vectors = hf->n_vectors;
2993         }
2994       congested_handoff_queue_by_worker_index[i] =
2995         (vlib_frame_queue_t *) (~0);
2996     }
2997   hf = 0;
2998   current_worker_index = ~0;
2999   return frame->n_vectors;
3000 }
3001
3002 static uword
3003 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3004                                vlib_node_runtime_t * node,
3005                                vlib_frame_t * frame)
3006 {
3007   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3008 }
3009
3010 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3011   .function = snat_in2out_worker_handoff_fn,
3012   .name = "nat44-in2out-worker-handoff",
3013   .vector_size = sizeof (u32),
3014   .format_trace = format_snat_in2out_worker_handoff_trace,
3015   .type = VLIB_NODE_TYPE_INTERNAL,
3016
3017   .n_next_nodes = 1,
3018
3019   .next_nodes = {
3020     [0] = "error-drop",
3021   },
3022 };
3023
3024 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3025                               snat_in2out_worker_handoff_fn);
3026
3027 static uword
3028 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3029                                       vlib_node_runtime_t * node,
3030                                       vlib_frame_t * frame)
3031 {
3032   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3033 }
3034
3035 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3036   .function = snat_in2out_output_worker_handoff_fn,
3037   .name = "nat44-in2out-output-worker-handoff",
3038   .vector_size = sizeof (u32),
3039   .format_trace = format_snat_in2out_worker_handoff_trace,
3040   .type = VLIB_NODE_TYPE_INTERNAL,
3041
3042   .n_next_nodes = 1,
3043
3044   .next_nodes = {
3045     [0] = "error-drop",
3046   },
3047 };
3048
3049 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3050                               snat_in2out_output_worker_handoff_fn);
3051
3052 static_always_inline int
3053 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3054 {
3055   snat_address_t * ap;
3056   clib_bihash_kv_8_8_t kv, value;
3057   snat_session_key_t m_key;
3058
3059   vec_foreach (ap, sm->addresses)
3060     {
3061       if (ap->addr.as_u32 == dst_addr->as_u32)
3062         return 1;
3063     }
3064
3065   m_key.addr.as_u32 = dst_addr->as_u32;
3066   m_key.fib_index = sm->outside_fib_index;
3067   m_key.port = 0;
3068   m_key.protocol = 0;
3069   kv.key = m_key.as_u64;
3070   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3071     return 1;
3072
3073   return 0;
3074 }
3075
3076 static uword
3077 snat_hairpin_dst_fn (vlib_main_t * vm,
3078                      vlib_node_runtime_t * node,
3079                      vlib_frame_t * frame)
3080 {
3081   u32 n_left_from, * from, * to_next;
3082   snat_in2out_next_t next_index;
3083   u32 pkts_processed = 0;
3084   snat_main_t * sm = &snat_main;
3085
3086   from = vlib_frame_vector_args (frame);
3087   n_left_from = frame->n_vectors;
3088   next_index = node->cached_next_index;
3089
3090   while (n_left_from > 0)
3091     {
3092       u32 n_left_to_next;
3093
3094       vlib_get_next_frame (vm, node, next_index,
3095                            to_next, n_left_to_next);
3096
3097       while (n_left_from > 0 && n_left_to_next > 0)
3098         {
3099           u32 bi0;
3100           vlib_buffer_t * b0;
3101           u32 next0;
3102           ip4_header_t * ip0;
3103           u32 proto0;
3104
3105           /* speculatively enqueue b0 to the current next frame */
3106           bi0 = from[0];
3107           to_next[0] = bi0;
3108           from += 1;
3109           to_next += 1;
3110           n_left_from -= 1;
3111           n_left_to_next -= 1;
3112
3113           b0 = vlib_get_buffer (vm, bi0);
3114           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3115           ip0 = vlib_buffer_get_current (b0);
3116
3117           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3118
3119           vnet_buffer (b0)->snat.flags = 0;
3120           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3121             {
3122               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3123                 {
3124                   udp_header_t * udp0 = ip4_next_header (ip0);
3125                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3126
3127                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3128                 }
3129               else if (proto0 == SNAT_PROTOCOL_ICMP)
3130                 {
3131                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3132
3133                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3134                 }
3135               else
3136                 {
3137                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3138                 }
3139
3140               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3141               clib_warning("is hairpinning");
3142             }
3143
3144           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3145
3146           /* verify speculative enqueue, maybe switch current next frame */
3147           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3148                                            to_next, n_left_to_next,
3149                                            bi0, next0);
3150          }
3151
3152       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3153     }
3154
3155   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3156                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3157                                pkts_processed);
3158   return frame->n_vectors;
3159 }
3160
3161 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3162   .function = snat_hairpin_dst_fn,
3163   .name = "nat44-hairpin-dst",
3164   .vector_size = sizeof (u32),
3165   .type = VLIB_NODE_TYPE_INTERNAL,
3166   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3167   .error_strings = snat_in2out_error_strings,
3168   .n_next_nodes = 2,
3169   .next_nodes = {
3170     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3171     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3172   },
3173 };
3174
3175 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3176                               snat_hairpin_dst_fn);
3177
3178 static uword
3179 snat_hairpin_src_fn (vlib_main_t * vm,
3180                      vlib_node_runtime_t * node,
3181                      vlib_frame_t * frame)
3182 {
3183   u32 n_left_from, * from, * to_next;
3184   snat_in2out_next_t next_index;
3185   u32 pkts_processed = 0;
3186   snat_main_t *sm = &snat_main;
3187
3188   from = vlib_frame_vector_args (frame);
3189   n_left_from = frame->n_vectors;
3190   next_index = node->cached_next_index;
3191
3192   while (n_left_from > 0)
3193     {
3194       u32 n_left_to_next;
3195
3196       vlib_get_next_frame (vm, node, next_index,
3197                            to_next, n_left_to_next);
3198
3199       while (n_left_from > 0 && n_left_to_next > 0)
3200         {
3201           u32 bi0;
3202           vlib_buffer_t * b0;
3203           u32 next0;
3204
3205           /* speculatively enqueue b0 to the current next frame */
3206           bi0 = from[0];
3207           to_next[0] = bi0;
3208           from += 1;
3209           to_next += 1;
3210           n_left_from -= 1;
3211           n_left_to_next -= 1;
3212
3213           b0 = vlib_get_buffer (vm, bi0);
3214           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3215
3216           if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) & SNAT_FLAG_HAIRPINNING))
3217             {
3218               if (PREDICT_TRUE (sm->num_workers > 1))
3219                 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3220               else
3221                 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3222             }
3223
3224           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3225
3226           /* verify speculative enqueue, maybe switch current next frame */
3227           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3228                                            to_next, n_left_to_next,
3229                                            bi0, next0);
3230          }
3231
3232       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3233     }
3234
3235   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3236                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3237                                pkts_processed);
3238   return frame->n_vectors;
3239 }
3240
3241 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3242   .function = snat_hairpin_src_fn,
3243   .name = "nat44-hairpin-src",
3244   .vector_size = sizeof (u32),
3245   .type = VLIB_NODE_TYPE_INTERNAL,
3246   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3247   .error_strings = snat_in2out_error_strings,
3248   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3249   .next_nodes = {
3250      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3251      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3252      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3253      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3254   },
3255 };
3256
3257 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3258                               snat_hairpin_src_fn);
3259
3260 static uword
3261 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3262                                 vlib_node_runtime_t * node,
3263                                 vlib_frame_t * frame)
3264 {
3265   u32 n_left_from, * from, * to_next;
3266   snat_in2out_next_t next_index;
3267   u32 pkts_processed = 0;
3268   snat_main_t * sm = &snat_main;
3269   u32 stats_node_index;
3270
3271   stats_node_index = snat_in2out_fast_node.index;
3272
3273   from = vlib_frame_vector_args (frame);
3274   n_left_from = frame->n_vectors;
3275   next_index = node->cached_next_index;
3276
3277   while (n_left_from > 0)
3278     {
3279       u32 n_left_to_next;
3280
3281       vlib_get_next_frame (vm, node, next_index,
3282                            to_next, n_left_to_next);
3283
3284       while (n_left_from > 0 && n_left_to_next > 0)
3285         {
3286           u32 bi0;
3287           vlib_buffer_t * b0;
3288           u32 next0;
3289           u32 sw_if_index0;
3290           ip4_header_t * ip0;
3291           ip_csum_t sum0;
3292           u32 new_addr0, old_addr0;
3293           u16 old_port0, new_port0;
3294           udp_header_t * udp0;
3295           tcp_header_t * tcp0;
3296           icmp46_header_t * icmp0;
3297           snat_session_key_t key0, sm0;
3298           u32 proto0;
3299           u32 rx_fib_index0;
3300
3301           /* speculatively enqueue b0 to the current next frame */
3302           bi0 = from[0];
3303           to_next[0] = bi0;
3304           from += 1;
3305           to_next += 1;
3306           n_left_from -= 1;
3307           n_left_to_next -= 1;
3308
3309           b0 = vlib_get_buffer (vm, bi0);
3310           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3311
3312           ip0 = vlib_buffer_get_current (b0);
3313           udp0 = ip4_next_header (ip0);
3314           tcp0 = (tcp_header_t *) udp0;
3315           icmp0 = (icmp46_header_t *) udp0;
3316
3317           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3318           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3319
3320           if (PREDICT_FALSE(ip0->ttl == 1))
3321             {
3322               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3323               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3324                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3325                                            0);
3326               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3327               goto trace0;
3328             }
3329
3330           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3331
3332           if (PREDICT_FALSE (proto0 == ~0))
3333               goto trace0;
3334
3335           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3336             {
3337               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3338                                   rx_fib_index0, node, next0, ~0, 0, 0);
3339               goto trace0;
3340             }
3341
3342           key0.addr = ip0->src_address;
3343           key0.protocol = proto0;
3344           key0.port = udp0->src_port;
3345           key0.fib_index = rx_fib_index0;
3346
3347           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
3348             {
3349               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3350               next0= SNAT_IN2OUT_NEXT_DROP;
3351               goto trace0;
3352             }
3353
3354           new_addr0 = sm0.addr.as_u32;
3355           new_port0 = sm0.port;
3356           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3357           old_addr0 = ip0->src_address.as_u32;
3358           ip0->src_address.as_u32 = new_addr0;
3359
3360           sum0 = ip0->checksum;
3361           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3362                                  ip4_header_t,
3363                                  src_address /* changed member */);
3364           ip0->checksum = ip_csum_fold (sum0);
3365
3366           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3367             {
3368               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3369                 {
3370                   old_port0 = tcp0->src_port;
3371                   tcp0->src_port = new_port0;
3372
3373                   sum0 = tcp0->checksum;
3374                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3375                                          ip4_header_t,
3376                                          dst_address /* changed member */);
3377                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3378                                          ip4_header_t /* cheat */,
3379                                          length /* changed member */);
3380                   tcp0->checksum = ip_csum_fold(sum0);
3381                 }
3382               else
3383                 {
3384                   old_port0 = udp0->src_port;
3385                   udp0->src_port = new_port0;
3386                   udp0->checksum = 0;
3387                 }
3388             }
3389           else
3390             {
3391               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3392                 {
3393                   sum0 = tcp0->checksum;
3394                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3395                                          ip4_header_t,
3396                                          dst_address /* changed member */);
3397                   tcp0->checksum = ip_csum_fold(sum0);
3398                 }
3399             }
3400
3401           /* Hairpinning */
3402           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3403
3404         trace0:
3405           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3406                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3407             {
3408               snat_in2out_trace_t *t =
3409                  vlib_add_trace (vm, node, b0, sizeof (*t));
3410               t->sw_if_index = sw_if_index0;
3411               t->next_index = next0;
3412             }
3413
3414           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3415
3416           /* verify speculative enqueue, maybe switch current next frame */
3417           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3418                                            to_next, n_left_to_next,
3419                                            bi0, next0);
3420         }
3421
3422       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3423     }
3424
3425   vlib_node_increment_counter (vm, stats_node_index,
3426                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3427                                pkts_processed);
3428   return frame->n_vectors;
3429 }
3430
3431
3432 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3433   .function = snat_in2out_fast_static_map_fn,
3434   .name = "nat44-in2out-fast",
3435   .vector_size = sizeof (u32),
3436   .format_trace = format_snat_in2out_fast_trace,
3437   .type = VLIB_NODE_TYPE_INTERNAL,
3438
3439   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3440   .error_strings = snat_in2out_error_strings,
3441
3442   .runtime_data_bytes = sizeof (snat_runtime_t),
3443
3444   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3445
3446   /* edit / add dispositions here */
3447   .next_nodes = {
3448     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3449     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3450     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3451     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3452   },
3453 };
3454
3455 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);