NAT: fixed bug in snat_alloc_outside_address_and_port (VPP-981)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
53
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91 vlib_node_registration_t snat_in2out_output_node;
92 vlib_node_registration_t snat_in2out_output_slowpath_node;
93 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
94 vlib_node_registration_t snat_hairpin_dst_node;
95 vlib_node_registration_t snat_hairpin_src_node;
96
97
98 #define foreach_snat_in2out_error                       \
99 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
100 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
101 _(OUT_OF_PORTS, "Out of ports")                         \
102 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
103 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
104 _(NO_TRANSLATION, "No translation")
105
106 typedef enum {
107 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
108   foreach_snat_in2out_error
109 #undef _
110   SNAT_IN2OUT_N_ERROR,
111 } snat_in2out_error_t;
112
113 static char * snat_in2out_error_strings[] = {
114 #define _(sym,string) string,
115   foreach_snat_in2out_error
116 #undef _
117 };
118
119 typedef enum {
120   SNAT_IN2OUT_NEXT_LOOKUP,
121   SNAT_IN2OUT_NEXT_DROP,
122   SNAT_IN2OUT_NEXT_ICMP_ERROR,
123   SNAT_IN2OUT_NEXT_SLOW_PATH,
124   SNAT_IN2OUT_N_NEXT,
125 } snat_in2out_next_t;
126
127 typedef enum {
128   SNAT_HAIRPIN_SRC_NEXT_DROP,
129   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
130   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
131   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
132   SNAT_HAIRPIN_SRC_N_NEXT,
133 } snat_hairpin_next_t;
134
135 /**
136  * @brief Check if packet should be translated
137  *
138  * Packets aimed at outside interface and external addresss with active session
139  * should be translated.
140  *
141  * @param sm            NAT main
142  * @param rt            NAT runtime data
143  * @param sw_if_index0  index of the inside interface
144  * @param ip0           IPv4 header
145  * @param proto0        NAT protocol
146  * @param rx_fib_index0 RX FIB index
147  *
148  * @returns 0 if packet should be translated otherwise 1
149  */
150 static inline int
151 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
152                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
153                          u32 rx_fib_index0)
154 {
155   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
156   fib_prefix_t pfx = {
157     .fp_proto = FIB_PROTOCOL_IP4,
158     .fp_len = 32,
159     .fp_addr = {
160         .ip4.as_u32 = ip0->dst_address.as_u32,
161     },
162   };
163
164   /* Don't NAT packet aimed at the intfc address */
165   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
166                                       ip0->dst_address.as_u32)))
167     return 1;
168
169   fei = fib_table_lookup (rx_fib_index0, &pfx);
170   if (FIB_NODE_INDEX_INVALID != fei)
171     {
172       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
173       if (sw_if_index == ~0)
174         {
175           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
176           if (FIB_NODE_INDEX_INVALID != fei)
177             sw_if_index = fib_entry_get_resolving_interface (fei);
178         }
179       snat_interface_t *i;
180       pool_foreach (i, sm->interfaces,
181       ({
182         /* NAT packet aimed at outside interface */
183         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
184           return 0;
185       }));
186     }
187
188   return 1;
189 }
190
191 static inline int
192 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
193                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
194                     u32 rx_fib_index0)
195 {
196   udp_header_t * udp0 = ip4_next_header (ip0);
197   snat_session_key_t key0, sm0;
198   clib_bihash_kv_8_8_t kv0, value0;
199
200   key0.addr = ip0->dst_address;
201   key0.port = udp0->dst_port;
202   key0.protocol = proto0;
203   key0.fib_index = sm->outside_fib_index;
204   kv0.key = key0.as_u64;
205
206   /* NAT packet aimed at external address if */
207   /* has active sessions */
208   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
209     {
210       /* or is static mappings */
211       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
212         return 0;
213     }
214   else
215     return 0;
216
217   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
218                                  rx_fib_index0);
219 }
220
221 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
222                       ip4_header_t * ip0,
223                       u32 rx_fib_index0,
224                       snat_session_key_t * key0,
225                       snat_session_t ** sessionp,
226                       vlib_node_runtime_t * node,
227                       u32 next0,
228                       u32 thread_index)
229 {
230   snat_user_t *u;
231   snat_user_key_t user_key;
232   snat_session_t *s;
233   clib_bihash_kv_8_8_t kv0, value0;
234   u32 oldest_per_user_translation_list_index;
235   dlist_elt_t * oldest_per_user_translation_list_elt;
236   dlist_elt_t * per_user_translation_list_elt;
237   dlist_elt_t * per_user_list_head_elt;
238   u32 session_index;
239   snat_session_key_t key1;
240   u32 address_index = ~0;
241   u32 outside_fib_index;
242   uword * p;
243   snat_worker_key_t worker_by_out_key;
244
245   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
246   if (! p)
247     {
248       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
249       return SNAT_IN2OUT_NEXT_DROP;
250     }
251   outside_fib_index = p[0];
252
253   key1.protocol = key0->protocol;
254   user_key.addr = ip0->src_address;
255   user_key.fib_index = rx_fib_index0;
256   kv0.key = user_key.as_u64;
257
258   /* Ever heard of the "user" = src ip4 address before? */
259   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
260     {
261       /* no, make a new one */
262       pool_get (sm->per_thread_data[thread_index].users, u);
263       memset (u, 0, sizeof (*u));
264       u->addr = ip0->src_address;
265       u->fib_index = rx_fib_index0;
266
267       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
268
269       u->sessions_per_user_list_head_index = per_user_list_head_elt -
270         sm->per_thread_data[thread_index].list_pool;
271
272       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
273                        u->sessions_per_user_list_head_index);
274
275       kv0.value = u - sm->per_thread_data[thread_index].users;
276
277       /* add user */
278       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
279     }
280   else
281     {
282       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
283                              value0.value);
284     }
285
286   /* Over quota? Recycle the least recently used dynamic translation */
287   if (u->nsessions >= sm->max_translations_per_user)
288     {
289       /* Remove the oldest dynamic translation */
290       do {
291           oldest_per_user_translation_list_index =
292             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
293                                     u->sessions_per_user_list_head_index);
294
295           ASSERT (oldest_per_user_translation_list_index != ~0);
296
297           /* add it back to the end of the LRU list */
298           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
299                               u->sessions_per_user_list_head_index,
300                               oldest_per_user_translation_list_index);
301           /* Get the list element */
302           oldest_per_user_translation_list_elt =
303             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
304                                oldest_per_user_translation_list_index);
305
306           /* Get the session index from the list element */
307           session_index = oldest_per_user_translation_list_elt->value;
308
309           /* Get the session */
310           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
311                                  session_index);
312       } while (snat_is_session_static (s));
313
314       if (snat_is_unk_proto_session (s))
315         {
316           clib_bihash_kv_16_8_t up_kv;
317           nat_ed_ses_key_t key;
318
319           /* Remove from lookup tables */
320           key.l_addr = s->in2out.addr;
321           key.r_addr = s->ext_host_addr;
322           key.fib_index = s->in2out.fib_index;
323           key.proto = s->in2out.port;
324           key.rsvd = 0;
325           key.l_port = 0;
326           up_kv.key[0] = key.as_u64[0];
327           up_kv.key[1] = key.as_u64[1];
328           if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0))
329             clib_warning ("in2out key del failed");
330
331           key.l_addr = s->out2in.addr;
332           key.fib_index = s->out2in.fib_index;
333           up_kv.key[0] = key.as_u64[0];
334           up_kv.key[1] = key.as_u64[1];
335           if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0))
336             clib_warning ("out2in key del failed");
337         }
338       else
339         {
340           /* Remove in2out, out2in keys */
341           kv0.key = s->in2out.as_u64;
342           if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
343               clib_warning ("in2out key delete failed");
344           kv0.key = s->out2in.as_u64;
345           if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
346               clib_warning ("out2in key delete failed");
347
348           /* log NAT event */
349           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
350                                               s->out2in.addr.as_u32,
351                                               s->in2out.protocol,
352                                               s->in2out.port,
353                                               s->out2in.port,
354                                               s->in2out.fib_index);
355
356           snat_free_outside_address_and_port
357             (sm, thread_index, &s->out2in, s->outside_address_index);
358         }
359       s->outside_address_index = ~0;
360
361       if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, thread_index,
362                                                &key1, &address_index))
363         {
364           ASSERT(0);
365
366           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
367           return SNAT_IN2OUT_NEXT_DROP;
368         }
369       s->outside_address_index = address_index;
370     }
371   else
372     {
373       u8 static_mapping = 1;
374
375       /* First try to match static mapping by local address and port */
376       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
377         {
378           static_mapping = 0;
379           /* Try to create dynamic translation */
380           if (snat_alloc_outside_address_and_port (sm, rx_fib_index0,
381                                                    thread_index, &key1,
382                                                    &address_index))
383             {
384               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
385               return SNAT_IN2OUT_NEXT_DROP;
386             }
387         }
388
389       /* Create a new session */
390       pool_get (sm->per_thread_data[thread_index].sessions, s);
391       memset (s, 0, sizeof (*s));
392
393       s->outside_address_index = address_index;
394
395       if (static_mapping)
396         {
397           u->nstaticsessions++;
398           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
399         }
400       else
401         {
402           u->nsessions++;
403         }
404
405       /* Create list elts */
406       pool_get (sm->per_thread_data[thread_index].list_pool,
407                 per_user_translation_list_elt);
408       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
409                        per_user_translation_list_elt -
410                        sm->per_thread_data[thread_index].list_pool);
411
412       per_user_translation_list_elt->value =
413         s - sm->per_thread_data[thread_index].sessions;
414       s->per_user_index = per_user_translation_list_elt -
415                           sm->per_thread_data[thread_index].list_pool;
416       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
417
418       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
419                           s->per_user_list_head_index,
420                           per_user_translation_list_elt -
421                           sm->per_thread_data[thread_index].list_pool);
422    }
423
424   s->in2out = *key0;
425   s->out2in = key1;
426   s->out2in.protocol = key0->protocol;
427   s->out2in.fib_index = outside_fib_index;
428   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
429   *sessionp = s;
430
431   /* Add to translation hashes */
432   kv0.key = s->in2out.as_u64;
433   kv0.value = s - sm->per_thread_data[thread_index].sessions;
434   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
435       clib_warning ("in2out key add failed");
436
437   kv0.key = s->out2in.as_u64;
438   kv0.value = s - sm->per_thread_data[thread_index].sessions;
439
440   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
441       clib_warning ("out2in key add failed");
442
443   /* Add to translated packets worker lookup */
444   worker_by_out_key.addr = s->out2in.addr;
445   worker_by_out_key.port = s->out2in.port;
446   worker_by_out_key.fib_index = s->out2in.fib_index;
447   kv0.key = worker_by_out_key.as_u64;
448   kv0.value = thread_index;
449   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
450
451   /* log NAT event */
452   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
453                                       s->out2in.addr.as_u32,
454                                       s->in2out.protocol,
455                                       s->in2out.port,
456                                       s->out2in.port,
457                                       s->in2out.fib_index);
458   return next0;
459 }
460
461 static_always_inline
462 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
463                                  snat_session_key_t *p_key0)
464 {
465   icmp46_header_t *icmp0;
466   snat_session_key_t key0;
467   icmp_echo_header_t *echo0, *inner_echo0 = 0;
468   ip4_header_t *inner_ip0 = 0;
469   void *l4_header = 0;
470   icmp46_header_t *inner_icmp0;
471
472   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
473   echo0 = (icmp_echo_header_t *)(icmp0+1);
474
475   if (!icmp_is_error_message (icmp0))
476     {
477       key0.protocol = SNAT_PROTOCOL_ICMP;
478       key0.addr = ip0->src_address;
479       key0.port = echo0->identifier;
480     }
481   else
482     {
483       inner_ip0 = (ip4_header_t *)(echo0+1);
484       l4_header = ip4_next_header (inner_ip0);
485       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
486       key0.addr = inner_ip0->dst_address;
487       switch (key0.protocol)
488         {
489         case SNAT_PROTOCOL_ICMP:
490           inner_icmp0 = (icmp46_header_t*)l4_header;
491           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
492           key0.port = inner_echo0->identifier;
493           break;
494         case SNAT_PROTOCOL_UDP:
495         case SNAT_PROTOCOL_TCP:
496           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
497           break;
498         default:
499           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
500         }
501     }
502   *p_key0 = key0;
503   return -1; /* success */
504 }
505
506 /**
507  * Get address and port values to be used for ICMP packet translation
508  * and create session if needed
509  *
510  * @param[in,out] sm             NAT main
511  * @param[in,out] node           NAT node runtime
512  * @param[in] thread_index       thread index
513  * @param[in,out] b0             buffer containing packet to be translated
514  * @param[out] p_proto           protocol used for matching
515  * @param[out] p_value           address and port after NAT translation
516  * @param[out] p_dont_translate  if packet should not be translated
517  * @param d                      optional parameter
518  * @param e                      optional parameter
519  */
520 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
521                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
522                            snat_session_key_t *p_value,
523                            u8 *p_dont_translate, void *d, void *e)
524 {
525   ip4_header_t *ip0;
526   icmp46_header_t *icmp0;
527   u32 sw_if_index0;
528   u32 rx_fib_index0;
529   snat_session_key_t key0;
530   snat_session_t *s0 = 0;
531   u8 dont_translate = 0;
532   clib_bihash_kv_8_8_t kv0, value0;
533   u32 next0 = ~0;
534   int err;
535   u32 iph_offset0 = 0;
536
537   if (PREDICT_FALSE(vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0))
538     {
539       iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
540     }
541   ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
542   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
543   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
544   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
545
546   err = icmp_get_key (ip0, &key0);
547   if (err != -1)
548     {
549       b0->error = node->errors[err];
550       next0 = SNAT_IN2OUT_NEXT_DROP;
551       goto out;
552     }
553   key0.fib_index = rx_fib_index0;
554
555   kv0.key = key0.as_u64;
556
557   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
558     {
559       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
560           IP_PROTOCOL_ICMP, rx_fib_index0) &&
561           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
562         {
563           dont_translate = 1;
564           goto out;
565         }
566
567       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
568         {
569           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
570           next0 = SNAT_IN2OUT_NEXT_DROP;
571           goto out;
572         }
573
574       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
575                          &s0, node, next0, thread_index);
576
577       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
578         goto out;
579     }
580   else
581     {
582       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
583                         icmp0->type != ICMP4_echo_reply &&
584                         !icmp_is_error_message (icmp0)))
585         {
586           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
587           next0 = SNAT_IN2OUT_NEXT_DROP;
588           goto out;
589         }
590
591       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
592                               value0.value);
593     }
594
595 out:
596   *p_proto = key0.protocol;
597   if (s0)
598     *p_value = s0->out2in;
599   *p_dont_translate = dont_translate;
600   if (d)
601     *(snat_session_t**)d = s0;
602   return next0;
603 }
604
605 /**
606  * Get address and port values to be used for ICMP packet translation
607  *
608  * @param[in] sm                 NAT main
609  * @param[in,out] node           NAT node runtime
610  * @param[in] thread_index       thread index
611  * @param[in,out] b0             buffer containing packet to be translated
612  * @param[out] p_proto           protocol used for matching
613  * @param[out] p_value           address and port after NAT translation
614  * @param[out] p_dont_translate  if packet should not be translated
615  * @param d                      optional parameter
616  * @param e                      optional parameter
617  */
618 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
619                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
620                            snat_session_key_t *p_value,
621                            u8 *p_dont_translate, void *d, void *e)
622 {
623   ip4_header_t *ip0;
624   icmp46_header_t *icmp0;
625   u32 sw_if_index0;
626   u32 rx_fib_index0;
627   snat_session_key_t key0;
628   snat_session_key_t sm0;
629   u8 dont_translate = 0;
630   u8 is_addr_only;
631   u32 next0 = ~0;
632   int err;
633
634   ip0 = vlib_buffer_get_current (b0);
635   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
636   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
637   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
638
639   err = icmp_get_key (ip0, &key0);
640   if (err != -1)
641     {
642       b0->error = node->errors[err];
643       next0 = SNAT_IN2OUT_NEXT_DROP;
644       goto out2;
645     }
646   key0.fib_index = rx_fib_index0;
647
648   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
649     {
650       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
651           IP_PROTOCOL_ICMP, rx_fib_index0)))
652         {
653           dont_translate = 1;
654           goto out;
655         }
656
657       if (icmp_is_error_message (icmp0))
658         {
659           next0 = SNAT_IN2OUT_NEXT_DROP;
660           goto out;
661         }
662
663       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
664       next0 = SNAT_IN2OUT_NEXT_DROP;
665       goto out;
666     }
667
668   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
669                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
670                     !icmp_is_error_message (icmp0)))
671     {
672       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
673       next0 = SNAT_IN2OUT_NEXT_DROP;
674       goto out;
675     }
676
677 out:
678   *p_value = sm0;
679 out2:
680   *p_proto = key0.protocol;
681   *p_dont_translate = dont_translate;
682   return next0;
683 }
684
685 static inline u32 icmp_in2out (snat_main_t *sm,
686                                vlib_buffer_t * b0,
687                                ip4_header_t * ip0,
688                                icmp46_header_t * icmp0,
689                                u32 sw_if_index0,
690                                u32 rx_fib_index0,
691                                vlib_node_runtime_t * node,
692                                u32 next0,
693                                u32 thread_index,
694                                void *d,
695                                void *e)
696 {
697   snat_session_key_t sm0;
698   u8 protocol;
699   icmp_echo_header_t *echo0, *inner_echo0 = 0;
700   ip4_header_t *inner_ip0;
701   void *l4_header = 0;
702   icmp46_header_t *inner_icmp0;
703   u8 dont_translate;
704   u32 new_addr0, old_addr0;
705   u16 old_id0, new_id0;
706   ip_csum_t sum0;
707   u16 checksum0;
708   u32 next0_tmp;
709
710   echo0 = (icmp_echo_header_t *)(icmp0+1);
711
712   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
713                                        &protocol, &sm0, &dont_translate, d, e);
714   if (next0_tmp != ~0)
715     next0 = next0_tmp;
716   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
717     goto out;
718
719   sum0 = ip_incremental_checksum (0, icmp0,
720                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
721   checksum0 = ~ip_csum_fold (sum0);
722   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
723     {
724       next0 = SNAT_IN2OUT_NEXT_DROP;
725       goto out;
726     }
727
728   old_addr0 = ip0->src_address.as_u32;
729   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
730   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
731     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
732
733   sum0 = ip0->checksum;
734   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
735                          src_address /* changed member */);
736   ip0->checksum = ip_csum_fold (sum0);
737
738   if (!icmp_is_error_message (icmp0))
739     {
740       new_id0 = sm0.port;
741       if (PREDICT_FALSE(new_id0 != echo0->identifier))
742         {
743           old_id0 = echo0->identifier;
744           new_id0 = sm0.port;
745           echo0->identifier = new_id0;
746
747           sum0 = icmp0->checksum;
748           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
749                                  identifier);
750           icmp0->checksum = ip_csum_fold (sum0);
751         }
752     }
753   else
754     {
755       inner_ip0 = (ip4_header_t *)(echo0+1);
756       l4_header = ip4_next_header (inner_ip0);
757
758       if (!ip4_header_checksum_is_valid (inner_ip0))
759         {
760           next0 = SNAT_IN2OUT_NEXT_DROP;
761           goto out;
762         }
763
764       old_addr0 = inner_ip0->dst_address.as_u32;
765       inner_ip0->dst_address = sm0.addr;
766       new_addr0 = inner_ip0->dst_address.as_u32;
767
768       sum0 = icmp0->checksum;
769       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
770                              dst_address /* changed member */);
771       icmp0->checksum = ip_csum_fold (sum0);
772
773       switch (protocol)
774         {
775           case SNAT_PROTOCOL_ICMP:
776             inner_icmp0 = (icmp46_header_t*)l4_header;
777             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
778
779             old_id0 = inner_echo0->identifier;
780             new_id0 = sm0.port;
781             inner_echo0->identifier = new_id0;
782
783             sum0 = icmp0->checksum;
784             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
785                                    identifier);
786             icmp0->checksum = ip_csum_fold (sum0);
787             break;
788           case SNAT_PROTOCOL_UDP:
789           case SNAT_PROTOCOL_TCP:
790             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
791             new_id0 = sm0.port;
792             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
793
794             sum0 = icmp0->checksum;
795             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
796                                    dst_port);
797             icmp0->checksum = ip_csum_fold (sum0);
798             break;
799           default:
800             ASSERT(0);
801         }
802     }
803
804 out:
805   return next0;
806 }
807
808 /**
809  * @brief Hairpinning
810  *
811  * Hairpinning allows two endpoints on the internal side of the NAT to
812  * communicate even if they only use each other's external IP addresses
813  * and ports.
814  *
815  * @param sm     NAT main.
816  * @param b0     Vlib buffer.
817  * @param ip0    IP header.
818  * @param udp0   UDP header.
819  * @param tcp0   TCP header.
820  * @param proto0 NAT protocol.
821  */
822 static inline void
823 snat_hairpinning (snat_main_t *sm,
824                   vlib_buffer_t * b0,
825                   ip4_header_t * ip0,
826                   udp_header_t * udp0,
827                   tcp_header_t * tcp0,
828                   u32 proto0)
829 {
830   snat_session_key_t key0, sm0;
831   snat_worker_key_t k0;
832   snat_session_t * s0;
833   clib_bihash_kv_8_8_t kv0, value0;
834   ip_csum_t sum0;
835   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
836   u16 new_dst_port0, old_dst_port0;
837
838   key0.addr = ip0->dst_address;
839   key0.port = udp0->dst_port;
840   key0.protocol = proto0;
841   key0.fib_index = sm->outside_fib_index;
842   kv0.key = key0.as_u64;
843
844   /* Check if destination is in active sessions */
845   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
846     {
847       /* or static mappings */
848       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
849         {
850           new_dst_addr0 = sm0.addr.as_u32;
851           new_dst_port0 = sm0.port;
852           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
853         }
854     }
855   else
856     {
857       si = value0.value;
858       if (sm->num_workers > 1)
859         {
860           k0.addr = ip0->dst_address;
861           k0.port = udp0->dst_port;
862           k0.fib_index = sm->outside_fib_index;
863           kv0.key = k0.as_u64;
864           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
865             ASSERT(0);
866           else
867             ti = value0.value;
868         }
869       else
870         ti = sm->num_workers;
871
872       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
873       new_dst_addr0 = s0->in2out.addr.as_u32;
874       new_dst_port0 = s0->in2out.port;
875       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
876     }
877
878   /* Destination is behind the same NAT, use internal address and port */
879   if (new_dst_addr0)
880     {
881       old_dst_addr0 = ip0->dst_address.as_u32;
882       ip0->dst_address.as_u32 = new_dst_addr0;
883       sum0 = ip0->checksum;
884       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
885                              ip4_header_t, dst_address);
886       ip0->checksum = ip_csum_fold (sum0);
887
888       old_dst_port0 = tcp0->dst;
889       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
890         {
891           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
892             {
893               tcp0->dst = new_dst_port0;
894               sum0 = tcp0->checksum;
895               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
896                                      ip4_header_t, dst_address);
897               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
898                                      ip4_header_t /* cheat */, length);
899               tcp0->checksum = ip_csum_fold(sum0);
900             }
901           else
902             {
903               udp0->dst_port = new_dst_port0;
904               udp0->checksum = 0;
905             }
906         }
907       else
908         {
909           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
910             {
911               sum0 = tcp0->checksum;
912               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
913                                      ip4_header_t, dst_address);
914               tcp0->checksum = ip_csum_fold(sum0);
915             }
916         }
917     }
918 }
919
920 static inline void
921 snat_icmp_hairpinning (snat_main_t *sm,
922                        vlib_buffer_t * b0,
923                        ip4_header_t * ip0,
924                        icmp46_header_t * icmp0)
925 {
926   snat_session_key_t key0, sm0;
927   clib_bihash_kv_8_8_t kv0, value0;
928   snat_worker_key_t k0;
929   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
930   ip_csum_t sum0;
931   snat_session_t *s0;
932
933   if (!icmp_is_error_message (icmp0))
934     {
935       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
936       u16 icmp_id0 = echo0->identifier;
937       key0.addr = ip0->dst_address;
938       key0.port = icmp_id0;
939       key0.protocol = SNAT_PROTOCOL_ICMP;
940       key0.fib_index = sm->outside_fib_index;
941       kv0.key = key0.as_u64;
942
943       /* Check if destination is in active sessions */
944       if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
945         {
946           /* or static mappings */
947           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
948             {
949               new_dst_addr0 = sm0.addr.as_u32;
950               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
951             }
952         }
953       else
954         {
955           si = value0.value;
956           if (sm->num_workers > 1)
957             {
958               k0.addr = ip0->dst_address;
959               k0.port = icmp_id0;
960               k0.fib_index = sm->outside_fib_index;
961               kv0.key = k0.as_u64;
962               if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
963                 ASSERT(0);
964               else
965                 ti = value0.value;
966             }
967           else
968             ti = sm->num_workers;
969
970           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
971           new_dst_addr0 = s0->in2out.addr.as_u32;
972           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
973           echo0->identifier = s0->in2out.port;
974           sum0 = icmp0->checksum;
975           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
976                                  icmp_echo_header_t, identifier);
977           icmp0->checksum = ip_csum_fold (sum0);
978         }
979
980       /* Destination is behind the same NAT, use internal address and port */
981       if (new_dst_addr0)
982         {
983           old_dst_addr0 = ip0->dst_address.as_u32;
984           ip0->dst_address.as_u32 = new_dst_addr0;
985           sum0 = ip0->checksum;
986           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
987                                  ip4_header_t, dst_address);
988           ip0->checksum = ip_csum_fold (sum0);
989         }
990     }
991
992 }
993
994 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
995                                          vlib_buffer_t * b0,
996                                          ip4_header_t * ip0,
997                                          icmp46_header_t * icmp0,
998                                          u32 sw_if_index0,
999                                          u32 rx_fib_index0,
1000                                          vlib_node_runtime_t * node,
1001                                          u32 next0,
1002                                          f64 now,
1003                                          u32 thread_index,
1004                                          snat_session_t ** p_s0)
1005 {
1006   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1007                       next0, thread_index, p_s0, 0);
1008   snat_session_t * s0 = *p_s0;
1009   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1010     {
1011       /* Hairpinning */
1012       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1013         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
1014       /* Accounting */
1015       s0->last_heard = now;
1016       s0->total_pkts++;
1017       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1018       /* Per-user LRU list maintenance for dynamic translations */
1019       if (!snat_is_session_static (s0))
1020         {
1021           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1022                              s0->per_user_index);
1023           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1024                               s0->per_user_list_head_index,
1025                               s0->per_user_index);
1026         }
1027     }
1028   return next0;
1029 }
1030 static inline void
1031 snat_hairpinning_unknown_proto (snat_main_t *sm,
1032                                 vlib_buffer_t * b,
1033                                 ip4_header_t * ip)
1034 {
1035   u32 old_addr, new_addr = 0, ti = 0;
1036   clib_bihash_kv_8_8_t kv, value;
1037   clib_bihash_kv_16_8_t s_kv, s_value;
1038   nat_ed_ses_key_t key;
1039   snat_session_key_t m_key;
1040   snat_worker_key_t w_key;
1041   snat_static_mapping_t *m;
1042   ip_csum_t sum;
1043   snat_session_t *s;
1044
1045   old_addr = ip->dst_address.as_u32;
1046   key.l_addr.as_u32 = ip->dst_address.as_u32;
1047   key.r_addr.as_u32 = ip->src_address.as_u32;
1048   key.fib_index = sm->outside_fib_index;
1049   key.proto = ip->protocol;
1050   key.rsvd = 0;
1051   key.l_port = 0;
1052   s_kv.key[0] = key.as_u64[0];
1053   s_kv.key[1] = key.as_u64[1];
1054   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1055     {
1056       m_key.addr = ip->dst_address;
1057       m_key.fib_index = sm->outside_fib_index;
1058       m_key.port = 0;
1059       m_key.protocol = 0;
1060       kv.key = m_key.as_u64;
1061       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1062         return;
1063
1064       m = pool_elt_at_index (sm->static_mappings, value.value);
1065       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1066         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1067       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1068     }
1069   else
1070     {
1071       if (sm->num_workers > 1)
1072         {
1073           w_key.addr = ip->dst_address;
1074           w_key.port = 0;
1075           w_key.fib_index = sm->outside_fib_index;
1076           kv.key = w_key.as_u64;
1077           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value))
1078             return;
1079           else
1080             ti = value.value;
1081         }
1082       else
1083         ti = sm->num_workers;
1084
1085       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1086       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1087         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1088       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1089     }
1090   sum = ip->checksum;
1091   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1092   ip->checksum = ip_csum_fold (sum);
1093 }
1094
1095 static void
1096 snat_in2out_unknown_proto (snat_main_t *sm,
1097                            vlib_buffer_t * b,
1098                            ip4_header_t * ip,
1099                            u32 rx_fib_index,
1100                            u32 thread_index,
1101                            f64 now,
1102                            vlib_main_t * vm)
1103 {
1104   clib_bihash_kv_8_8_t kv, value;
1105   clib_bihash_kv_16_8_t s_kv, s_value;
1106   snat_static_mapping_t *m;
1107   snat_session_key_t m_key;
1108   u32 old_addr, new_addr = 0;
1109   ip_csum_t sum;
1110   snat_user_key_t u_key;
1111   snat_user_t *u;
1112   dlist_elt_t *head, *elt, *oldest;
1113   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1114   u32 elt_index, head_index, ses_index, oldest_index;
1115   snat_session_t * s;
1116   nat_ed_ses_key_t key;
1117   u32 address_index = ~0;
1118   int i;
1119   u8 is_sm = 0;
1120
1121   old_addr = ip->src_address.as_u32;
1122
1123   key.l_addr = ip->src_address;
1124   key.r_addr = ip->dst_address;
1125   key.fib_index = rx_fib_index;
1126   key.proto = ip->protocol;
1127   key.rsvd = 0;
1128   key.l_port = 0;
1129   s_kv.key[0] = key.as_u64[0];
1130   s_kv.key[1] = key.as_u64[1];
1131
1132   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1133     {
1134       s = pool_elt_at_index (tsm->sessions, s_value.value);
1135       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1136     }
1137   else
1138     {
1139       u_key.addr = ip->src_address;
1140       u_key.fib_index = rx_fib_index;
1141       kv.key = u_key.as_u64;
1142
1143       /* Ever heard of the "user" = src ip4 address before? */
1144       if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1145         {
1146           /* no, make a new one */
1147           pool_get (tsm->users, u);
1148           memset (u, 0, sizeof (*u));
1149           u->addr = ip->src_address;
1150           u->fib_index = rx_fib_index;
1151
1152           pool_get (tsm->list_pool, head);
1153           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1154
1155           clib_dlist_init (tsm->list_pool,
1156                            u->sessions_per_user_list_head_index);
1157
1158           kv.value = u - tsm->users;
1159
1160           /* add user */
1161           clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1);
1162         }
1163       else
1164         {
1165           u = pool_elt_at_index (tsm->users, value.value);
1166         }
1167
1168       m_key.addr = ip->src_address;
1169       m_key.port = 0;
1170       m_key.protocol = 0;
1171       m_key.fib_index = rx_fib_index;
1172       kv.key = m_key.as_u64;
1173
1174       /* Try to find static mapping first */
1175       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1176         {
1177           m = pool_elt_at_index (sm->static_mappings, value.value);
1178           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1179           is_sm = 1;
1180           goto create_ses;
1181         }
1182       /* Fallback to 3-tuple key */
1183       else
1184         {
1185           /* Choose same out address as for TCP/UDP session to same destination */
1186           if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1187             {
1188               head_index = u->sessions_per_user_list_head_index;
1189               head = pool_elt_at_index (tsm->list_pool, head_index);
1190               elt_index = head->next;
1191               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1192               ses_index = elt->value;
1193               while (ses_index != ~0)
1194                 {
1195                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1196                   elt_index = elt->next;
1197                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1198                   ses_index = elt->value;
1199
1200                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1201                     {
1202                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1203                       address_index = s->outside_address_index;
1204
1205                       key.fib_index = sm->outside_fib_index;
1206                       key.l_addr.as_u32 = new_addr;
1207                       s_kv.key[0] = key.as_u64[0];
1208                       s_kv.key[1] = key.as_u64[1];
1209                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1210                         break;
1211
1212                       goto create_ses;
1213                     }
1214                 }
1215             }
1216           key.fib_index = sm->outside_fib_index;
1217           for (i = 0; i < vec_len (sm->addresses); i++)
1218             {
1219               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1220               s_kv.key[0] = key.as_u64[0];
1221               s_kv.key[1] = key.as_u64[1];
1222               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1223                 {
1224                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1225                   address_index = i;
1226                   goto create_ses;
1227                 }
1228             }
1229           return;
1230         }
1231
1232 create_ses:
1233       /* Over quota? Recycle the least recently used dynamic translation */
1234       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1235         {
1236           /* Remove the oldest dynamic translation */
1237           do {
1238               oldest_index = clib_dlist_remove_head (
1239                 tsm->list_pool, u->sessions_per_user_list_head_index);
1240
1241               ASSERT (oldest_index != ~0);
1242
1243               /* add it back to the end of the LRU list */
1244               clib_dlist_addtail (tsm->list_pool,
1245                                   u->sessions_per_user_list_head_index,
1246                                   oldest_index);
1247               /* Get the list element */
1248               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1249
1250               /* Get the session index from the list element */
1251               ses_index = oldest->value;
1252
1253               /* Get the session */
1254               s = pool_elt_at_index (tsm->sessions, ses_index);
1255           } while (snat_is_session_static (s));
1256
1257           if (snat_is_unk_proto_session (s))
1258             {
1259               /* Remove from lookup tables */
1260               key.l_addr = s->in2out.addr;
1261               key.r_addr = s->ext_host_addr;
1262               key.fib_index = s->in2out.fib_index;
1263               key.proto = s->in2out.port;
1264               s_kv.key[0] = key.as_u64[0];
1265               s_kv.key[1] = key.as_u64[1];
1266               if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0))
1267                 clib_warning ("in2out key del failed");
1268
1269               key.l_addr = s->out2in.addr;
1270               key.fib_index = s->out2in.fib_index;
1271               s_kv.key[0] = key.as_u64[0];
1272               s_kv.key[1] = key.as_u64[1];
1273               if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0))
1274                 clib_warning ("out2in key del failed");
1275             }
1276           else
1277             {
1278               /* log NAT event */
1279               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1280                                                   s->out2in.addr.as_u32,
1281                                                   s->in2out.protocol,
1282                                                   s->in2out.port,
1283                                                   s->out2in.port,
1284                                                   s->in2out.fib_index);
1285
1286               snat_free_outside_address_and_port (sm, thread_index, &s->out2in,
1287                                                   s->outside_address_index);
1288
1289               /* Remove in2out, out2in keys */
1290               kv.key = s->in2out.as_u64;
1291               if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0))
1292                 clib_warning ("in2out key del failed");
1293               kv.key = s->out2in.as_u64;
1294               if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0))
1295                 clib_warning ("out2in key del failed");
1296             }
1297         }
1298       else
1299         {
1300           /* Create a new session */
1301           pool_get (tsm->sessions, s);
1302           memset (s, 0, sizeof (*s));
1303
1304           /* Create list elts */
1305           pool_get (tsm->list_pool, elt);
1306           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1307           elt->value = s - tsm->sessions;
1308           s->per_user_index = elt - tsm->list_pool;
1309           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1310           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1311                               s->per_user_index);
1312         }
1313
1314       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1315       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1316       s->outside_address_index = address_index;
1317       s->out2in.addr.as_u32 = new_addr;
1318       s->out2in.fib_index = sm->outside_fib_index;
1319       s->in2out.addr.as_u32 = old_addr;
1320       s->in2out.fib_index = rx_fib_index;
1321       s->in2out.port = s->out2in.port = ip->protocol;
1322       if (is_sm)
1323         {
1324           u->nstaticsessions++;
1325           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1326         }
1327       else
1328         {
1329           u->nsessions++;
1330         }
1331
1332       /* Add to lookup tables */
1333       key.l_addr.as_u32 = old_addr;
1334       key.r_addr = ip->dst_address;
1335       key.proto = ip->protocol;
1336       key.fib_index = rx_fib_index;
1337       s_kv.key[0] = key.as_u64[0];
1338       s_kv.key[1] = key.as_u64[1];
1339       s_kv.value = s - tsm->sessions;
1340       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1341         clib_warning ("in2out key add failed");
1342
1343       key.l_addr.as_u32 = new_addr;
1344       key.fib_index = sm->outside_fib_index;
1345       s_kv.key[0] = key.as_u64[0];
1346       s_kv.key[1] = key.as_u64[1];
1347       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1348         clib_warning ("out2in key add failed");
1349   }
1350
1351   /* Update IP checksum */
1352   sum = ip->checksum;
1353   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1354   ip->checksum = ip_csum_fold (sum);
1355
1356   /* Accounting */
1357   s->last_heard = now;
1358   s->total_pkts++;
1359   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1360   /* Per-user LRU list maintenance */
1361   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1362   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1363                       s->per_user_index);
1364
1365   /* Hairpinning */
1366   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1367     snat_hairpinning_unknown_proto(sm, b, ip);
1368
1369   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1370     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1371 }
1372
1373 static void
1374 snat_in2out_lb (snat_main_t *sm,
1375                 vlib_buffer_t * b,
1376                 ip4_header_t * ip,
1377                 u32 rx_fib_index,
1378                 u32 thread_index,
1379                 f64 now,
1380                 vlib_main_t * vm)
1381 {
1382   nat_ed_ses_key_t key;
1383   clib_bihash_kv_16_8_t s_kv, s_value;
1384   udp_header_t *udp = ip4_next_header (ip);
1385   tcp_header_t *tcp = (tcp_header_t *) udp;
1386   snat_session_t *s = 0;
1387   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1388   u32 old_addr, new_addr;
1389   u16 new_port, old_port;
1390   ip_csum_t sum;
1391   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1392   snat_session_key_t e_key, l_key;
1393   clib_bihash_kv_8_8_t kv, value;
1394   snat_user_key_t u_key;
1395   snat_user_t *u;
1396   dlist_elt_t *head, *elt;
1397
1398   old_addr = ip->src_address.as_u32;
1399
1400   key.l_addr = ip->src_address;
1401   key.r_addr = ip->dst_address;
1402   key.fib_index = rx_fib_index;
1403   key.proto = ip->protocol;
1404   key.rsvd = 0;
1405   key.l_port = udp->src_port;
1406   s_kv.key[0] = key.as_u64[0];
1407   s_kv.key[1] = key.as_u64[1];
1408
1409   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1410     {
1411       s = pool_elt_at_index (tsm->sessions, s_value.value);
1412     }
1413   else
1414     {
1415       l_key.addr = ip->src_address;
1416       l_key.port = udp->src_port;
1417       l_key.protocol = proto;
1418       l_key.fib_index = rx_fib_index;
1419       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0))
1420         return;
1421
1422       u_key.addr = ip->src_address;
1423       u_key.fib_index = rx_fib_index;
1424       kv.key = u_key.as_u64;
1425
1426       /* Ever heard of the "user" = src ip4 address before? */
1427       if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
1428         {
1429           /* no, make a new one */
1430           pool_get (tsm->users, u);
1431           memset (u, 0, sizeof (*u));
1432           u->addr = ip->src_address;
1433           u->fib_index = rx_fib_index;
1434
1435           pool_get (tsm->list_pool, head);
1436           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1437
1438           clib_dlist_init (tsm->list_pool,
1439                            u->sessions_per_user_list_head_index);
1440
1441           kv.value = u - tsm->users;
1442
1443           /* add user */
1444           if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1))
1445             clib_warning ("user key add failed");
1446         }
1447       else
1448         {
1449           u = pool_elt_at_index (tsm->users, value.value);
1450         }
1451
1452       /* Create a new session */
1453       pool_get (tsm->sessions, s);
1454       memset (s, 0, sizeof (*s));
1455
1456       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1457       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1458       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1459       s->outside_address_index = ~0;
1460       s->in2out = l_key;
1461       s->out2in = e_key;
1462       u->nstaticsessions++;
1463
1464       /* Create list elts */
1465       pool_get (tsm->list_pool, elt);
1466       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1467       elt->value = s - tsm->sessions;
1468       s->per_user_index = elt - tsm->list_pool;
1469       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1470       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1471                           s->per_user_index);
1472
1473       /* Add to lookup tables */
1474       s_kv.value = s - tsm->sessions;
1475       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1476         clib_warning ("in2out-ed key add failed");
1477
1478       key.l_addr = e_key.addr;
1479       key.fib_index = e_key.fib_index;
1480       key.l_port = e_key.port;
1481       s_kv.key[0] = key.as_u64[0];
1482       s_kv.key[1] = key.as_u64[1];
1483       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1484         clib_warning ("out2in-ed key add failed");
1485     }
1486
1487   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1488
1489   /* Update IP checksum */
1490   sum = ip->checksum;
1491   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1492   ip->checksum = ip_csum_fold (sum);
1493
1494   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1495     {
1496       old_port = tcp->src_port;
1497       tcp->src_port = s->out2in.port;
1498       new_port = tcp->src_port;
1499
1500       sum = tcp->checksum;
1501       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1502       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1503       tcp->checksum = ip_csum_fold(sum);
1504     }
1505   else
1506     {
1507       udp->src_port = s->out2in.port;
1508       udp->checksum = 0;
1509     }
1510
1511   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1512     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1513
1514   /* Accounting */
1515   s->last_heard = now;
1516   s->total_pkts++;
1517   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1518 }
1519
1520 static inline uword
1521 snat_in2out_node_fn_inline (vlib_main_t * vm,
1522                             vlib_node_runtime_t * node,
1523                             vlib_frame_t * frame, int is_slow_path,
1524                             int is_output_feature)
1525 {
1526   u32 n_left_from, * from, * to_next;
1527   snat_in2out_next_t next_index;
1528   u32 pkts_processed = 0;
1529   snat_main_t * sm = &snat_main;
1530   f64 now = vlib_time_now (vm);
1531   u32 stats_node_index;
1532   u32 thread_index = vlib_get_thread_index ();
1533
1534   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1535     snat_in2out_node.index;
1536
1537   from = vlib_frame_vector_args (frame);
1538   n_left_from = frame->n_vectors;
1539   next_index = node->cached_next_index;
1540
1541   while (n_left_from > 0)
1542     {
1543       u32 n_left_to_next;
1544
1545       vlib_get_next_frame (vm, node, next_index,
1546                            to_next, n_left_to_next);
1547
1548       while (n_left_from >= 4 && n_left_to_next >= 2)
1549         {
1550           u32 bi0, bi1;
1551           vlib_buffer_t * b0, * b1;
1552           u32 next0, next1;
1553           u32 sw_if_index0, sw_if_index1;
1554           ip4_header_t * ip0, * ip1;
1555           ip_csum_t sum0, sum1;
1556           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1557           u16 old_port0, new_port0, old_port1, new_port1;
1558           udp_header_t * udp0, * udp1;
1559           tcp_header_t * tcp0, * tcp1;
1560           icmp46_header_t * icmp0, * icmp1;
1561           snat_session_key_t key0, key1;
1562           u32 rx_fib_index0, rx_fib_index1;
1563           u32 proto0, proto1;
1564           snat_session_t * s0 = 0, * s1 = 0;
1565           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1566           u32 iph_offset0 = 0, iph_offset1 = 0;
1567
1568           /* Prefetch next iteration. */
1569           {
1570             vlib_buffer_t * p2, * p3;
1571
1572             p2 = vlib_get_buffer (vm, from[2]);
1573             p3 = vlib_get_buffer (vm, from[3]);
1574
1575             vlib_prefetch_buffer_header (p2, LOAD);
1576             vlib_prefetch_buffer_header (p3, LOAD);
1577
1578             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1579             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1580           }
1581
1582           /* speculatively enqueue b0 and b1 to the current next frame */
1583           to_next[0] = bi0 = from[0];
1584           to_next[1] = bi1 = from[1];
1585           from += 2;
1586           to_next += 2;
1587           n_left_from -= 2;
1588           n_left_to_next -= 2;
1589
1590           b0 = vlib_get_buffer (vm, bi0);
1591           b1 = vlib_get_buffer (vm, bi1);
1592
1593           if (is_output_feature)
1594             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1595
1596           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1597                  iph_offset0);
1598
1599           udp0 = ip4_next_header (ip0);
1600           tcp0 = (tcp_header_t *) udp0;
1601           icmp0 = (icmp46_header_t *) udp0;
1602
1603           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1604           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1605                                    sw_if_index0);
1606
1607           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1608
1609           if (PREDICT_FALSE(ip0->ttl == 1))
1610             {
1611               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1612               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1613                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1614                                            0);
1615               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1616               goto trace00;
1617             }
1618
1619           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1620
1621           /* Next configured feature, probably ip4-lookup */
1622           if (is_slow_path)
1623             {
1624               if (PREDICT_FALSE (proto0 == ~0))
1625                 {
1626                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1627                                              thread_index, now, vm);
1628                   goto trace00;
1629                 }
1630
1631               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1632                 {
1633                   next0 = icmp_in2out_slow_path
1634                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1635                      node, next0, now, thread_index, &s0);
1636                   goto trace00;
1637                 }
1638             }
1639           else
1640             {
1641               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1642                 {
1643                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1644                   goto trace00;
1645                 }
1646             }
1647
1648           key0.addr = ip0->src_address;
1649           key0.port = udp0->src_port;
1650           key0.protocol = proto0;
1651           key0.fib_index = rx_fib_index0;
1652
1653           kv0.key = key0.as_u64;
1654
1655           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
1656             {
1657               if (is_slow_path)
1658                 {
1659                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1660                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
1661                     goto trace00;
1662
1663                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1664                                      &s0, node, next0, thread_index);
1665                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1666                     goto trace00;
1667                 }
1668               else
1669                 {
1670                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1671                   goto trace00;
1672                 }
1673             }
1674           else
1675             {
1676               if (PREDICT_FALSE (value0.value == ~0ULL))
1677                 {
1678                   if (is_slow_path)
1679                     {
1680                       snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1681                                      now, vm);
1682                       goto trace00;
1683                     }
1684                   else
1685                     {
1686                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1687                       goto trace00;
1688                     }
1689                 }
1690               else
1691                 {
1692                   s0 = pool_elt_at_index (
1693                     sm->per_thread_data[thread_index].sessions,
1694                     value0.value);
1695                 }
1696             }
1697
1698           old_addr0 = ip0->src_address.as_u32;
1699           ip0->src_address = s0->out2in.addr;
1700           new_addr0 = ip0->src_address.as_u32;
1701           if (!is_output_feature)
1702             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1703
1704           sum0 = ip0->checksum;
1705           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1706                                  ip4_header_t,
1707                                  src_address /* changed member */);
1708           ip0->checksum = ip_csum_fold (sum0);
1709
1710           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1711             {
1712               old_port0 = tcp0->src_port;
1713               tcp0->src_port = s0->out2in.port;
1714               new_port0 = tcp0->src_port;
1715
1716               sum0 = tcp0->checksum;
1717               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1718                                      ip4_header_t,
1719                                      dst_address /* changed member */);
1720               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1721                                      ip4_header_t /* cheat */,
1722                                      length /* changed member */);
1723               tcp0->checksum = ip_csum_fold(sum0);
1724             }
1725           else
1726             {
1727               old_port0 = udp0->src_port;
1728               udp0->src_port = s0->out2in.port;
1729               udp0->checksum = 0;
1730             }
1731
1732           /* Hairpinning */
1733           if (!is_output_feature)
1734             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1735
1736           /* Accounting */
1737           s0->last_heard = now;
1738           s0->total_pkts++;
1739           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1740           /* Per-user LRU list maintenance for dynamic translation */
1741           if (!snat_is_session_static (s0))
1742             {
1743               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1744                                  s0->per_user_index);
1745               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1746                                   s0->per_user_list_head_index,
1747                                   s0->per_user_index);
1748             }
1749         trace00:
1750
1751           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1752                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1753             {
1754               snat_in2out_trace_t *t =
1755                  vlib_add_trace (vm, node, b0, sizeof (*t));
1756               t->is_slow_path = is_slow_path;
1757               t->sw_if_index = sw_if_index0;
1758               t->next_index = next0;
1759                   t->session_index = ~0;
1760               if (s0)
1761                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1762             }
1763
1764           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1765
1766           if (is_output_feature)
1767             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1768
1769           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1770                  iph_offset1);
1771
1772           udp1 = ip4_next_header (ip1);
1773           tcp1 = (tcp_header_t *) udp1;
1774           icmp1 = (icmp46_header_t *) udp1;
1775
1776           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1777           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1778                                    sw_if_index1);
1779
1780           if (PREDICT_FALSE(ip1->ttl == 1))
1781             {
1782               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1783               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1784                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1785                                            0);
1786               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1787               goto trace01;
1788             }
1789
1790           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1791
1792           /* Next configured feature, probably ip4-lookup */
1793           if (is_slow_path)
1794             {
1795               if (PREDICT_FALSE (proto1 == ~0))
1796                 {
1797                   snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1798                                              thread_index, now, vm);
1799                   goto trace01;
1800                 }
1801
1802               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1803                 {
1804                   next1 = icmp_in2out_slow_path
1805                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1806                      next1, now, thread_index, &s1);
1807                   goto trace01;
1808                 }
1809             }
1810           else
1811             {
1812               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1813                 {
1814                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1815                   goto trace01;
1816                 }
1817             }
1818
1819           key1.addr = ip1->src_address;
1820           key1.port = udp1->src_port;
1821           key1.protocol = proto1;
1822           key1.fib_index = rx_fib_index1;
1823
1824           kv1.key = key1.as_u64;
1825
1826             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
1827             {
1828               if (is_slow_path)
1829                 {
1830                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1831                       ip1, proto1, rx_fib_index1)) && !is_output_feature)
1832                     goto trace01;
1833
1834                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1835                                      &s1, node, next1, thread_index);
1836                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1837                     goto trace01;
1838                 }
1839               else
1840                 {
1841                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1842                   goto trace01;
1843                 }
1844             }
1845           else
1846             {
1847               if (PREDICT_FALSE (value1.value == ~0ULL))
1848                 {
1849                   if (is_slow_path)
1850                     {
1851                       snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1852                                      now, vm);
1853                       goto trace01;
1854                     }
1855                   else
1856                     {
1857                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1858                       goto trace01;
1859                     }
1860                 }
1861               else
1862                 {
1863                   s1 = pool_elt_at_index (
1864                     sm->per_thread_data[thread_index].sessions,
1865                     value1.value);
1866                 }
1867             }
1868
1869           old_addr1 = ip1->src_address.as_u32;
1870           ip1->src_address = s1->out2in.addr;
1871           new_addr1 = ip1->src_address.as_u32;
1872           if (!is_output_feature)
1873             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1874
1875           sum1 = ip1->checksum;
1876           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1877                                  ip4_header_t,
1878                                  src_address /* changed member */);
1879           ip1->checksum = ip_csum_fold (sum1);
1880
1881           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1882             {
1883               old_port1 = tcp1->src_port;
1884               tcp1->src_port = s1->out2in.port;
1885               new_port1 = tcp1->src_port;
1886
1887               sum1 = tcp1->checksum;
1888               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1889                                      ip4_header_t,
1890                                      dst_address /* changed member */);
1891               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1892                                      ip4_header_t /* cheat */,
1893                                      length /* changed member */);
1894               tcp1->checksum = ip_csum_fold(sum1);
1895             }
1896           else
1897             {
1898               old_port1 = udp1->src_port;
1899               udp1->src_port = s1->out2in.port;
1900               udp1->checksum = 0;
1901             }
1902
1903           /* Hairpinning */
1904           if (!is_output_feature)
1905             snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
1906
1907           /* Accounting */
1908           s1->last_heard = now;
1909           s1->total_pkts++;
1910           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1911           /* Per-user LRU list maintenance for dynamic translation */
1912           if (!snat_is_session_static (s1))
1913             {
1914               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1915                                  s1->per_user_index);
1916               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1917                                   s1->per_user_list_head_index,
1918                                   s1->per_user_index);
1919             }
1920         trace01:
1921
1922           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1923                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1924             {
1925               snat_in2out_trace_t *t =
1926                  vlib_add_trace (vm, node, b1, sizeof (*t));
1927               t->sw_if_index = sw_if_index1;
1928               t->next_index = next1;
1929               t->session_index = ~0;
1930               if (s1)
1931                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1932             }
1933
1934           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1935
1936           /* verify speculative enqueues, maybe switch current next frame */
1937           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1938                                            to_next, n_left_to_next,
1939                                            bi0, bi1, next0, next1);
1940         }
1941
1942       while (n_left_from > 0 && n_left_to_next > 0)
1943         {
1944           u32 bi0;
1945           vlib_buffer_t * b0;
1946           u32 next0;
1947           u32 sw_if_index0;
1948           ip4_header_t * ip0;
1949           ip_csum_t sum0;
1950           u32 new_addr0, old_addr0;
1951           u16 old_port0, new_port0;
1952           udp_header_t * udp0;
1953           tcp_header_t * tcp0;
1954           icmp46_header_t * icmp0;
1955           snat_session_key_t key0;
1956           u32 rx_fib_index0;
1957           u32 proto0;
1958           snat_session_t * s0 = 0;
1959           clib_bihash_kv_8_8_t kv0, value0;
1960           u32 iph_offset0 = 0;
1961
1962           /* speculatively enqueue b0 to the current next frame */
1963           bi0 = from[0];
1964           to_next[0] = bi0;
1965           from += 1;
1966           to_next += 1;
1967           n_left_from -= 1;
1968           n_left_to_next -= 1;
1969
1970           b0 = vlib_get_buffer (vm, bi0);
1971           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1972
1973           if (is_output_feature)
1974             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1975
1976           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1977                  iph_offset0);
1978
1979           udp0 = ip4_next_header (ip0);
1980           tcp0 = (tcp_header_t *) udp0;
1981           icmp0 = (icmp46_header_t *) udp0;
1982
1983           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1984           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1985                                    sw_if_index0);
1986
1987           if (PREDICT_FALSE(ip0->ttl == 1))
1988             {
1989               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1990               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1991                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1992                                            0);
1993               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1994               goto trace0;
1995             }
1996
1997           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1998
1999           /* Next configured feature, probably ip4-lookup */
2000           if (is_slow_path)
2001             {
2002               if (PREDICT_FALSE (proto0 == ~0))
2003                 {
2004                   snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
2005                                              thread_index, now, vm);
2006                   goto trace0;
2007                 }
2008
2009               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2010                 {
2011                   next0 = icmp_in2out_slow_path
2012                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2013                      next0, now, thread_index, &s0);
2014                   goto trace0;
2015                 }
2016             }
2017           else
2018             {
2019               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2020                 {
2021                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2022                   goto trace0;
2023                 }
2024             }
2025
2026           key0.addr = ip0->src_address;
2027           key0.port = udp0->src_port;
2028           key0.protocol = proto0;
2029           key0.fib_index = rx_fib_index0;
2030
2031           kv0.key = key0.as_u64;
2032
2033           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
2034             {
2035               if (is_slow_path)
2036                 {
2037                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2038                       ip0, proto0, rx_fib_index0)) && !is_output_feature)
2039                     goto trace0;
2040
2041                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2042                                      &s0, node, next0, thread_index);
2043
2044                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2045                     goto trace0;
2046                 }
2047               else
2048                 {
2049                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2050                   goto trace0;
2051                 }
2052             }
2053           else
2054             {
2055               if (PREDICT_FALSE (value0.value == ~0ULL))
2056                 {
2057                   if (is_slow_path)
2058                     {
2059                       snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index,
2060                                      now, vm);
2061                       goto trace0;
2062                     }
2063                   else
2064                     {
2065                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2066                       goto trace0;
2067                     }
2068                 }
2069               else
2070                 {
2071                   s0 = pool_elt_at_index (
2072                     sm->per_thread_data[thread_index].sessions,
2073                     value0.value);
2074                 }
2075             }
2076
2077           old_addr0 = ip0->src_address.as_u32;
2078           ip0->src_address = s0->out2in.addr;
2079           new_addr0 = ip0->src_address.as_u32;
2080           if (!is_output_feature)
2081             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2082
2083           sum0 = ip0->checksum;
2084           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2085                                  ip4_header_t,
2086                                  src_address /* changed member */);
2087           ip0->checksum = ip_csum_fold (sum0);
2088
2089           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2090             {
2091               old_port0 = tcp0->src_port;
2092               tcp0->src_port = s0->out2in.port;
2093               new_port0 = tcp0->src_port;
2094
2095               sum0 = tcp0->checksum;
2096               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2097                                      ip4_header_t,
2098                                      dst_address /* changed member */);
2099               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2100                                      ip4_header_t /* cheat */,
2101                                      length /* changed member */);
2102               tcp0->checksum = ip_csum_fold(sum0);
2103             }
2104           else
2105             {
2106               old_port0 = udp0->src_port;
2107               udp0->src_port = s0->out2in.port;
2108               udp0->checksum = 0;
2109             }
2110
2111           /* Hairpinning */
2112           if (!is_output_feature)
2113             snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
2114
2115           /* Accounting */
2116           s0->last_heard = now;
2117           s0->total_pkts++;
2118           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2119           /* Per-user LRU list maintenance for dynamic translation */
2120           if (!snat_is_session_static (s0))
2121             {
2122               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2123                                  s0->per_user_index);
2124               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2125                                   s0->per_user_list_head_index,
2126                                   s0->per_user_index);
2127             }
2128
2129         trace0:
2130           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2131                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2132             {
2133               snat_in2out_trace_t *t =
2134                  vlib_add_trace (vm, node, b0, sizeof (*t));
2135               t->is_slow_path = is_slow_path;
2136               t->sw_if_index = sw_if_index0;
2137               t->next_index = next0;
2138                   t->session_index = ~0;
2139               if (s0)
2140                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
2141             }
2142
2143           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2144
2145           /* verify speculative enqueue, maybe switch current next frame */
2146           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2147                                            to_next, n_left_to_next,
2148                                            bi0, next0);
2149         }
2150
2151       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2152     }
2153
2154   vlib_node_increment_counter (vm, stats_node_index,
2155                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2156                                pkts_processed);
2157   return frame->n_vectors;
2158 }
2159
2160 static uword
2161 snat_in2out_fast_path_fn (vlib_main_t * vm,
2162                           vlib_node_runtime_t * node,
2163                           vlib_frame_t * frame)
2164 {
2165   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
2166 }
2167
2168 VLIB_REGISTER_NODE (snat_in2out_node) = {
2169   .function = snat_in2out_fast_path_fn,
2170   .name = "nat44-in2out",
2171   .vector_size = sizeof (u32),
2172   .format_trace = format_snat_in2out_trace,
2173   .type = VLIB_NODE_TYPE_INTERNAL,
2174
2175   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2176   .error_strings = snat_in2out_error_strings,
2177
2178   .runtime_data_bytes = sizeof (snat_runtime_t),
2179
2180   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2181
2182   /* edit / add dispositions here */
2183   .next_nodes = {
2184     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2185     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2186     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2187     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2188   },
2189 };
2190
2191 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
2192
2193 static uword
2194 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
2195                                  vlib_node_runtime_t * node,
2196                                  vlib_frame_t * frame)
2197 {
2198   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
2199 }
2200
2201 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2202   .function = snat_in2out_output_fast_path_fn,
2203   .name = "nat44-in2out-output",
2204   .vector_size = sizeof (u32),
2205   .format_trace = format_snat_in2out_trace,
2206   .type = VLIB_NODE_TYPE_INTERNAL,
2207
2208   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2209   .error_strings = snat_in2out_error_strings,
2210
2211   .runtime_data_bytes = sizeof (snat_runtime_t),
2212
2213   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2214
2215   /* edit / add dispositions here */
2216   .next_nodes = {
2217     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2218     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2219     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2220     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2221   },
2222 };
2223
2224 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2225                               snat_in2out_output_fast_path_fn);
2226
2227 static uword
2228 snat_in2out_slow_path_fn (vlib_main_t * vm,
2229                           vlib_node_runtime_t * node,
2230                           vlib_frame_t * frame)
2231 {
2232   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2233 }
2234
2235 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2236   .function = snat_in2out_slow_path_fn,
2237   .name = "nat44-in2out-slowpath",
2238   .vector_size = sizeof (u32),
2239   .format_trace = format_snat_in2out_trace,
2240   .type = VLIB_NODE_TYPE_INTERNAL,
2241
2242   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2243   .error_strings = snat_in2out_error_strings,
2244
2245   .runtime_data_bytes = sizeof (snat_runtime_t),
2246
2247   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2248
2249   /* edit / add dispositions here */
2250   .next_nodes = {
2251     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2252     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2253     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2254     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2255   },
2256 };
2257
2258 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2259                               snat_in2out_slow_path_fn);
2260
2261 static uword
2262 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2263                                  vlib_node_runtime_t * node,
2264                                  vlib_frame_t * frame)
2265 {
2266   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2267 }
2268
2269 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2270   .function = snat_in2out_output_slow_path_fn,
2271   .name = "nat44-in2out-output-slowpath",
2272   .vector_size = sizeof (u32),
2273   .format_trace = format_snat_in2out_trace,
2274   .type = VLIB_NODE_TYPE_INTERNAL,
2275
2276   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2277   .error_strings = snat_in2out_error_strings,
2278
2279   .runtime_data_bytes = sizeof (snat_runtime_t),
2280
2281   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2282
2283   /* edit / add dispositions here */
2284   .next_nodes = {
2285     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2286     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2287     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2288     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2289   },
2290 };
2291
2292 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2293                               snat_in2out_output_slow_path_fn);
2294
2295 /**************************/
2296 /*** deterministic mode ***/
2297 /**************************/
2298 static uword
2299 snat_det_in2out_node_fn (vlib_main_t * vm,
2300                          vlib_node_runtime_t * node,
2301                          vlib_frame_t * frame)
2302 {
2303   u32 n_left_from, * from, * to_next;
2304   snat_in2out_next_t next_index;
2305   u32 pkts_processed = 0;
2306   snat_main_t * sm = &snat_main;
2307   u32 now = (u32) vlib_time_now (vm);
2308   u32 thread_index = vlib_get_thread_index ();
2309
2310   from = vlib_frame_vector_args (frame);
2311   n_left_from = frame->n_vectors;
2312   next_index = node->cached_next_index;
2313
2314   while (n_left_from > 0)
2315     {
2316       u32 n_left_to_next;
2317
2318       vlib_get_next_frame (vm, node, next_index,
2319                            to_next, n_left_to_next);
2320
2321       while (n_left_from >= 4 && n_left_to_next >= 2)
2322         {
2323           u32 bi0, bi1;
2324           vlib_buffer_t * b0, * b1;
2325           u32 next0, next1;
2326           u32 sw_if_index0, sw_if_index1;
2327           ip4_header_t * ip0, * ip1;
2328           ip_csum_t sum0, sum1;
2329           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2330           u16 old_port0, new_port0, lo_port0, i0;
2331           u16 old_port1, new_port1, lo_port1, i1;
2332           udp_header_t * udp0, * udp1;
2333           tcp_header_t * tcp0, * tcp1;
2334           u32 proto0, proto1;
2335           snat_det_out_key_t key0, key1;
2336           snat_det_map_t * dm0, * dm1;
2337           snat_det_session_t * ses0 = 0, * ses1 = 0;
2338           u32 rx_fib_index0, rx_fib_index1;
2339           icmp46_header_t * icmp0, * icmp1;
2340
2341           /* Prefetch next iteration. */
2342           {
2343             vlib_buffer_t * p2, * p3;
2344
2345             p2 = vlib_get_buffer (vm, from[2]);
2346             p3 = vlib_get_buffer (vm, from[3]);
2347
2348             vlib_prefetch_buffer_header (p2, LOAD);
2349             vlib_prefetch_buffer_header (p3, LOAD);
2350
2351             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2352             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2353           }
2354
2355           /* speculatively enqueue b0 and b1 to the current next frame */
2356           to_next[0] = bi0 = from[0];
2357           to_next[1] = bi1 = from[1];
2358           from += 2;
2359           to_next += 2;
2360           n_left_from -= 2;
2361           n_left_to_next -= 2;
2362
2363           b0 = vlib_get_buffer (vm, bi0);
2364           b1 = vlib_get_buffer (vm, bi1);
2365
2366           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2367           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2368
2369           ip0 = vlib_buffer_get_current (b0);
2370           udp0 = ip4_next_header (ip0);
2371           tcp0 = (tcp_header_t *) udp0;
2372
2373           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2374
2375           if (PREDICT_FALSE(ip0->ttl == 1))
2376             {
2377               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2378               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2379                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2380                                            0);
2381               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2382               goto trace0;
2383             }
2384
2385           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2386
2387           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2388             {
2389               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2390               icmp0 = (icmp46_header_t *) udp0;
2391
2392               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2393                                   rx_fib_index0, node, next0, thread_index,
2394                                   &ses0, &dm0);
2395               goto trace0;
2396             }
2397
2398           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2399           if (PREDICT_FALSE(!dm0))
2400             {
2401               clib_warning("no match for internal host %U",
2402                            format_ip4_address, &ip0->src_address);
2403               next0 = SNAT_IN2OUT_NEXT_DROP;
2404               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2405               goto trace0;
2406             }
2407
2408           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2409
2410           key0.ext_host_addr = ip0->dst_address;
2411           key0.ext_host_port = tcp0->dst;
2412
2413           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2414           if (PREDICT_FALSE(!ses0))
2415             {
2416               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2417                 {
2418                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2419                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2420
2421                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2422                     continue;
2423
2424                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2425                   break;
2426                 }
2427               if (PREDICT_FALSE(!ses0))
2428                 {
2429                   /* too many sessions for user, send ICMP error packet */
2430
2431                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2432                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2433                                                ICMP4_destination_unreachable_destination_unreachable_host,
2434                                                0);
2435                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2436                   goto trace0;
2437                 }
2438             }
2439
2440           new_port0 = ses0->out.out_port;
2441
2442           old_addr0.as_u32 = ip0->src_address.as_u32;
2443           ip0->src_address.as_u32 = new_addr0.as_u32;
2444           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2445
2446           sum0 = ip0->checksum;
2447           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2448                                  ip4_header_t,
2449                                  src_address /* changed member */);
2450           ip0->checksum = ip_csum_fold (sum0);
2451
2452           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2453             {
2454               if (tcp0->flags & TCP_FLAG_SYN)
2455                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2456               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2457                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2458               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2459                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2460               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2461                 snat_det_ses_close(dm0, ses0);
2462               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2463                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2464               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2465                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2466
2467               old_port0 = tcp0->src;
2468               tcp0->src = new_port0;
2469
2470               sum0 = tcp0->checksum;
2471               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2472                                      ip4_header_t,
2473                                      dst_address /* changed member */);
2474               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2475                                      ip4_header_t /* cheat */,
2476                                      length /* changed member */);
2477               tcp0->checksum = ip_csum_fold(sum0);
2478             }
2479           else
2480             {
2481               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2482               old_port0 = udp0->src_port;
2483               udp0->src_port = new_port0;
2484               udp0->checksum = 0;
2485             }
2486
2487           switch(ses0->state)
2488             {
2489             case SNAT_SESSION_UDP_ACTIVE:
2490                 ses0->expire = now + sm->udp_timeout;
2491                 break;
2492             case SNAT_SESSION_TCP_SYN_SENT:
2493             case SNAT_SESSION_TCP_FIN_WAIT:
2494             case SNAT_SESSION_TCP_CLOSE_WAIT:
2495             case SNAT_SESSION_TCP_LAST_ACK:
2496                 ses0->expire = now + sm->tcp_transitory_timeout;
2497                 break;
2498             case SNAT_SESSION_TCP_ESTABLISHED:
2499                 ses0->expire = now + sm->tcp_established_timeout;
2500                 break;
2501             }
2502
2503         trace0:
2504           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2505                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2506             {
2507               snat_in2out_trace_t *t =
2508                  vlib_add_trace (vm, node, b0, sizeof (*t));
2509               t->is_slow_path = 0;
2510               t->sw_if_index = sw_if_index0;
2511               t->next_index = next0;
2512               t->session_index = ~0;
2513               if (ses0)
2514                 t->session_index = ses0 - dm0->sessions;
2515             }
2516
2517           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2518
2519           ip1 = vlib_buffer_get_current (b1);
2520           udp1 = ip4_next_header (ip1);
2521           tcp1 = (tcp_header_t *) udp1;
2522
2523           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2524
2525           if (PREDICT_FALSE(ip1->ttl == 1))
2526             {
2527               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2528               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2529                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2530                                            0);
2531               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2532               goto trace1;
2533             }
2534
2535           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2536
2537           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2538             {
2539               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2540               icmp1 = (icmp46_header_t *) udp1;
2541
2542               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2543                                   rx_fib_index1, node, next1, thread_index,
2544                                   &ses1, &dm1);
2545               goto trace1;
2546             }
2547
2548           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2549           if (PREDICT_FALSE(!dm1))
2550             {
2551               clib_warning("no match for internal host %U",
2552                            format_ip4_address, &ip0->src_address);
2553               next1 = SNAT_IN2OUT_NEXT_DROP;
2554               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2555               goto trace1;
2556             }
2557
2558           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2559
2560           key1.ext_host_addr = ip1->dst_address;
2561           key1.ext_host_port = tcp1->dst;
2562
2563           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2564           if (PREDICT_FALSE(!ses1))
2565             {
2566               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2567                 {
2568                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2569                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2570
2571                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2572                     continue;
2573
2574                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2575                   break;
2576                 }
2577               if (PREDICT_FALSE(!ses1))
2578                 {
2579                   /* too many sessions for user, send ICMP error packet */
2580
2581                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2582                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2583                                                ICMP4_destination_unreachable_destination_unreachable_host,
2584                                                0);
2585                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2586                   goto trace1;
2587                 }
2588             }
2589
2590           new_port1 = ses1->out.out_port;
2591
2592           old_addr1.as_u32 = ip1->src_address.as_u32;
2593           ip1->src_address.as_u32 = new_addr1.as_u32;
2594           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2595
2596           sum1 = ip1->checksum;
2597           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2598                                  ip4_header_t,
2599                                  src_address /* changed member */);
2600           ip1->checksum = ip_csum_fold (sum1);
2601
2602           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2603             {
2604               if (tcp1->flags & TCP_FLAG_SYN)
2605                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2606               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2607                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2608               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2609                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2610               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2611                 snat_det_ses_close(dm1, ses1);
2612               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2613                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2614               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2615                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2616
2617               old_port1 = tcp1->src;
2618               tcp1->src = new_port1;
2619
2620               sum1 = tcp1->checksum;
2621               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2622                                      ip4_header_t,
2623                                      dst_address /* changed member */);
2624               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2625                                      ip4_header_t /* cheat */,
2626                                      length /* changed member */);
2627               tcp1->checksum = ip_csum_fold(sum1);
2628             }
2629           else
2630             {
2631               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2632               old_port1 = udp1->src_port;
2633               udp1->src_port = new_port1;
2634               udp1->checksum = 0;
2635             }
2636
2637           switch(ses1->state)
2638             {
2639             case SNAT_SESSION_UDP_ACTIVE:
2640                 ses1->expire = now + sm->udp_timeout;
2641                 break;
2642             case SNAT_SESSION_TCP_SYN_SENT:
2643             case SNAT_SESSION_TCP_FIN_WAIT:
2644             case SNAT_SESSION_TCP_CLOSE_WAIT:
2645             case SNAT_SESSION_TCP_LAST_ACK:
2646                 ses1->expire = now + sm->tcp_transitory_timeout;
2647                 break;
2648             case SNAT_SESSION_TCP_ESTABLISHED:
2649                 ses1->expire = now + sm->tcp_established_timeout;
2650                 break;
2651             }
2652
2653         trace1:
2654           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2655                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2656             {
2657               snat_in2out_trace_t *t =
2658                  vlib_add_trace (vm, node, b1, sizeof (*t));
2659               t->is_slow_path = 0;
2660               t->sw_if_index = sw_if_index1;
2661               t->next_index = next1;
2662               t->session_index = ~0;
2663               if (ses1)
2664                 t->session_index = ses1 - dm1->sessions;
2665             }
2666
2667           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2668
2669           /* verify speculative enqueues, maybe switch current next frame */
2670           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2671                                            to_next, n_left_to_next,
2672                                            bi0, bi1, next0, next1);
2673          }
2674
2675       while (n_left_from > 0 && n_left_to_next > 0)
2676         {
2677           u32 bi0;
2678           vlib_buffer_t * b0;
2679           u32 next0;
2680           u32 sw_if_index0;
2681           ip4_header_t * ip0;
2682           ip_csum_t sum0;
2683           ip4_address_t new_addr0, old_addr0;
2684           u16 old_port0, new_port0, lo_port0, i0;
2685           udp_header_t * udp0;
2686           tcp_header_t * tcp0;
2687           u32 proto0;
2688           snat_det_out_key_t key0;
2689           snat_det_map_t * dm0;
2690           snat_det_session_t * ses0 = 0;
2691           u32 rx_fib_index0;
2692           icmp46_header_t * icmp0;
2693
2694           /* speculatively enqueue b0 to the current next frame */
2695           bi0 = from[0];
2696           to_next[0] = bi0;
2697           from += 1;
2698           to_next += 1;
2699           n_left_from -= 1;
2700           n_left_to_next -= 1;
2701
2702           b0 = vlib_get_buffer (vm, bi0);
2703           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2704
2705           ip0 = vlib_buffer_get_current (b0);
2706           udp0 = ip4_next_header (ip0);
2707           tcp0 = (tcp_header_t *) udp0;
2708
2709           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2710
2711           if (PREDICT_FALSE(ip0->ttl == 1))
2712             {
2713               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2714               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2715                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2716                                            0);
2717               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2718               goto trace00;
2719             }
2720
2721           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2722
2723           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2724             {
2725               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2726               icmp0 = (icmp46_header_t *) udp0;
2727
2728               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2729                                   rx_fib_index0, node, next0, thread_index,
2730                                   &ses0, &dm0);
2731               goto trace00;
2732             }
2733
2734           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2735           if (PREDICT_FALSE(!dm0))
2736             {
2737               clib_warning("no match for internal host %U",
2738                            format_ip4_address, &ip0->src_address);
2739               next0 = SNAT_IN2OUT_NEXT_DROP;
2740               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2741               goto trace00;
2742             }
2743
2744           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2745
2746           key0.ext_host_addr = ip0->dst_address;
2747           key0.ext_host_port = tcp0->dst;
2748
2749           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2750           if (PREDICT_FALSE(!ses0))
2751             {
2752               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2753                 {
2754                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2755                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2756
2757                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2758                     continue;
2759
2760                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2761                   break;
2762                 }
2763               if (PREDICT_FALSE(!ses0))
2764                 {
2765                   /* too many sessions for user, send ICMP error packet */
2766
2767                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2768                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2769                                                ICMP4_destination_unreachable_destination_unreachable_host,
2770                                                0);
2771                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2772                   goto trace00;
2773                 }
2774             }
2775
2776           new_port0 = ses0->out.out_port;
2777
2778           old_addr0.as_u32 = ip0->src_address.as_u32;
2779           ip0->src_address.as_u32 = new_addr0.as_u32;
2780           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2781
2782           sum0 = ip0->checksum;
2783           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2784                                  ip4_header_t,
2785                                  src_address /* changed member */);
2786           ip0->checksum = ip_csum_fold (sum0);
2787
2788           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2789             {
2790               if (tcp0->flags & TCP_FLAG_SYN)
2791                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2792               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2793                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2794               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2795                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2796               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2797                 snat_det_ses_close(dm0, ses0);
2798               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2799                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2800               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2801                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2802
2803               old_port0 = tcp0->src;
2804               tcp0->src = new_port0;
2805
2806               sum0 = tcp0->checksum;
2807               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2808                                      ip4_header_t,
2809                                      dst_address /* changed member */);
2810               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2811                                      ip4_header_t /* cheat */,
2812                                      length /* changed member */);
2813               tcp0->checksum = ip_csum_fold(sum0);
2814             }
2815           else
2816             {
2817               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2818               old_port0 = udp0->src_port;
2819               udp0->src_port = new_port0;
2820               udp0->checksum = 0;
2821             }
2822
2823           switch(ses0->state)
2824             {
2825             case SNAT_SESSION_UDP_ACTIVE:
2826                 ses0->expire = now + sm->udp_timeout;
2827                 break;
2828             case SNAT_SESSION_TCP_SYN_SENT:
2829             case SNAT_SESSION_TCP_FIN_WAIT:
2830             case SNAT_SESSION_TCP_CLOSE_WAIT:
2831             case SNAT_SESSION_TCP_LAST_ACK:
2832                 ses0->expire = now + sm->tcp_transitory_timeout;
2833                 break;
2834             case SNAT_SESSION_TCP_ESTABLISHED:
2835                 ses0->expire = now + sm->tcp_established_timeout;
2836                 break;
2837             }
2838
2839         trace00:
2840           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2841                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2842             {
2843               snat_in2out_trace_t *t =
2844                  vlib_add_trace (vm, node, b0, sizeof (*t));
2845               t->is_slow_path = 0;
2846               t->sw_if_index = sw_if_index0;
2847               t->next_index = next0;
2848               t->session_index = ~0;
2849               if (ses0)
2850                 t->session_index = ses0 - dm0->sessions;
2851             }
2852
2853           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2854
2855           /* verify speculative enqueue, maybe switch current next frame */
2856           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2857                                            to_next, n_left_to_next,
2858                                            bi0, next0);
2859         }
2860
2861       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2862     }
2863
2864   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2865                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2866                                pkts_processed);
2867   return frame->n_vectors;
2868 }
2869
2870 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2871   .function = snat_det_in2out_node_fn,
2872   .name = "nat44-det-in2out",
2873   .vector_size = sizeof (u32),
2874   .format_trace = format_snat_in2out_trace,
2875   .type = VLIB_NODE_TYPE_INTERNAL,
2876
2877   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2878   .error_strings = snat_in2out_error_strings,
2879
2880   .runtime_data_bytes = sizeof (snat_runtime_t),
2881
2882   .n_next_nodes = 3,
2883
2884   /* edit / add dispositions here */
2885   .next_nodes = {
2886     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2887     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2888     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2889   },
2890 };
2891
2892 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2893
2894 /**
2895  * Get address and port values to be used for ICMP packet translation
2896  * and create session if needed
2897  *
2898  * @param[in,out] sm             NAT main
2899  * @param[in,out] node           NAT node runtime
2900  * @param[in] thread_index       thread index
2901  * @param[in,out] b0             buffer containing packet to be translated
2902  * @param[out] p_proto           protocol used for matching
2903  * @param[out] p_value           address and port after NAT translation
2904  * @param[out] p_dont_translate  if packet should not be translated
2905  * @param d                      optional parameter
2906  * @param e                      optional parameter
2907  */
2908 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
2909                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
2910                           snat_session_key_t *p_value,
2911                           u8 *p_dont_translate, void *d, void *e)
2912 {
2913   ip4_header_t *ip0;
2914   icmp46_header_t *icmp0;
2915   u32 sw_if_index0;
2916   u32 rx_fib_index0;
2917   u8 protocol;
2918   snat_det_out_key_t key0;
2919   u8 dont_translate = 0;
2920   u32 next0 = ~0;
2921   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2922   ip4_header_t *inner_ip0;
2923   void *l4_header = 0;
2924   icmp46_header_t *inner_icmp0;
2925   snat_det_map_t * dm0 = 0;
2926   ip4_address_t new_addr0;
2927   u16 lo_port0, i0;
2928   snat_det_session_t * ses0 = 0;
2929   ip4_address_t in_addr;
2930   u16 in_port;
2931
2932   ip0 = vlib_buffer_get_current (b0);
2933   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2934   echo0 = (icmp_echo_header_t *)(icmp0+1);
2935   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2936   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
2937
2938   if (!icmp_is_error_message (icmp0))
2939     {
2940       protocol = SNAT_PROTOCOL_ICMP;
2941       in_addr = ip0->src_address;
2942       in_port = echo0->identifier;
2943     }
2944   else
2945     {
2946       inner_ip0 = (ip4_header_t *)(echo0+1);
2947       l4_header = ip4_next_header (inner_ip0);
2948       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2949       in_addr = inner_ip0->dst_address;
2950       switch (protocol)
2951         {
2952         case SNAT_PROTOCOL_ICMP:
2953           inner_icmp0 = (icmp46_header_t*)l4_header;
2954           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2955           in_port = inner_echo0->identifier;
2956           break;
2957         case SNAT_PROTOCOL_UDP:
2958         case SNAT_PROTOCOL_TCP:
2959           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2960           break;
2961         default:
2962           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
2963           next0 = SNAT_IN2OUT_NEXT_DROP;
2964           goto out;
2965         }
2966     }
2967
2968   dm0 = snat_det_map_by_user(sm, &in_addr);
2969   if (PREDICT_FALSE(!dm0))
2970     {
2971       clib_warning("no match for internal host %U",
2972                    format_ip4_address, &in_addr);
2973       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2974           IP_PROTOCOL_ICMP, rx_fib_index0)))
2975         {
2976           dont_translate = 1;
2977           goto out;
2978         }
2979       next0 = SNAT_IN2OUT_NEXT_DROP;
2980       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2981       goto out;
2982     }
2983
2984   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
2985
2986   key0.ext_host_addr = ip0->dst_address;
2987   key0.ext_host_port = 0;
2988
2989   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
2990   if (PREDICT_FALSE(!ses0))
2991     {
2992       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
2993           IP_PROTOCOL_ICMP, rx_fib_index0)))
2994         {
2995           dont_translate = 1;
2996           goto out;
2997         }
2998       if (icmp0->type != ICMP4_echo_request)
2999         {
3000           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3001           next0 = SNAT_IN2OUT_NEXT_DROP;
3002           goto out;
3003         }
3004       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3005         {
3006           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3007             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3008
3009           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3010             continue;
3011
3012           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3013           break;
3014         }
3015       if (PREDICT_FALSE(!ses0))
3016         {
3017           next0 = SNAT_IN2OUT_NEXT_DROP;
3018           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3019           goto out;
3020         }
3021     }
3022
3023   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3024                     !icmp_is_error_message (icmp0)))
3025     {
3026       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3027       next0 = SNAT_IN2OUT_NEXT_DROP;
3028       goto out;
3029     }
3030
3031   u32 now = (u32) vlib_time_now (sm->vlib_main);
3032
3033   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3034   ses0->expire = now + sm->icmp_timeout;
3035
3036 out:
3037   *p_proto = protocol;
3038   if (ses0)
3039     {
3040       p_value->addr = new_addr0;
3041       p_value->fib_index = sm->outside_fib_index;
3042       p_value->port = ses0->out.out_port;
3043     }
3044   *p_dont_translate = dont_translate;
3045   if (d)
3046     *(snat_det_session_t**)d = ses0;
3047   if (e)
3048     *(snat_det_map_t**)e = dm0;
3049   return next0;
3050 }
3051
3052 /**********************/
3053 /*** worker handoff ***/
3054 /**********************/
3055 static inline uword
3056 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3057                                       vlib_node_runtime_t * node,
3058                                       vlib_frame_t * frame,
3059                                       u8 is_output)
3060 {
3061   snat_main_t *sm = &snat_main;
3062   vlib_thread_main_t *tm = vlib_get_thread_main ();
3063   u32 n_left_from, *from, *to_next = 0;
3064   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3065   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3066     = 0;
3067   vlib_frame_queue_elt_t *hf = 0;
3068   vlib_frame_t *f = 0;
3069   int i;
3070   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3071   u32 next_worker_index = 0;
3072   u32 current_worker_index = ~0;
3073   u32 thread_index = vlib_get_thread_index ();
3074   u32 fq_index;
3075   u32 to_node_index;
3076
3077   ASSERT (vec_len (sm->workers));
3078
3079   if (is_output)
3080     {
3081       fq_index = sm->fq_in2out_output_index;
3082       to_node_index = sm->in2out_output_node_index;
3083     }
3084   else
3085     {
3086       fq_index = sm->fq_in2out_index;
3087       to_node_index = sm->in2out_node_index;
3088     }
3089
3090   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3091     {
3092       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3093
3094       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3095                                sm->first_worker_index + sm->num_workers - 1,
3096                                (vlib_frame_queue_t *) (~0));
3097     }
3098
3099   from = vlib_frame_vector_args (frame);
3100   n_left_from = frame->n_vectors;
3101
3102   while (n_left_from > 0)
3103     {
3104       u32 bi0;
3105       vlib_buffer_t *b0;
3106       u32 sw_if_index0;
3107       u32 rx_fib_index0;
3108       ip4_header_t * ip0;
3109       u8 do_handoff;
3110
3111       bi0 = from[0];
3112       from += 1;
3113       n_left_from -= 1;
3114
3115       b0 = vlib_get_buffer (vm, bi0);
3116
3117       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3118       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3119
3120       ip0 = vlib_buffer_get_current (b0);
3121
3122       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3123
3124       if (PREDICT_FALSE (next_worker_index != thread_index))
3125         {
3126           do_handoff = 1;
3127
3128           if (next_worker_index != current_worker_index)
3129             {
3130               if (hf)
3131                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3132
3133               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3134                                                       next_worker_index,
3135                                                       handoff_queue_elt_by_worker_index);
3136
3137               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3138               to_next_worker = &hf->buffer_index[hf->n_vectors];
3139               current_worker_index = next_worker_index;
3140             }
3141
3142           /* enqueue to correct worker thread */
3143           to_next_worker[0] = bi0;
3144           to_next_worker++;
3145           n_left_to_next_worker--;
3146
3147           if (n_left_to_next_worker == 0)
3148             {
3149               hf->n_vectors = VLIB_FRAME_SIZE;
3150               vlib_put_frame_queue_elt (hf);
3151               current_worker_index = ~0;
3152               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3153               hf = 0;
3154             }
3155         }
3156       else
3157         {
3158           do_handoff = 0;
3159           /* if this is 1st frame */
3160           if (!f)
3161             {
3162               f = vlib_get_frame_to_node (vm, to_node_index);
3163               to_next = vlib_frame_vector_args (f);
3164             }
3165
3166           to_next[0] = bi0;
3167           to_next += 1;
3168           f->n_vectors++;
3169         }
3170
3171       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3172                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3173         {
3174           snat_in2out_worker_handoff_trace_t *t =
3175             vlib_add_trace (vm, node, b0, sizeof (*t));
3176           t->next_worker_index = next_worker_index;
3177           t->do_handoff = do_handoff;
3178         }
3179     }
3180
3181   if (f)
3182     vlib_put_frame_to_node (vm, to_node_index, f);
3183
3184   if (hf)
3185     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3186
3187   /* Ship frames to the worker nodes */
3188   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3189     {
3190       if (handoff_queue_elt_by_worker_index[i])
3191         {
3192           hf = handoff_queue_elt_by_worker_index[i];
3193           /*
3194            * It works better to let the handoff node
3195            * rate-adapt, always ship the handoff queue element.
3196            */
3197           if (1 || hf->n_vectors == hf->last_n_vectors)
3198             {
3199               vlib_put_frame_queue_elt (hf);
3200               handoff_queue_elt_by_worker_index[i] = 0;
3201             }
3202           else
3203             hf->last_n_vectors = hf->n_vectors;
3204         }
3205       congested_handoff_queue_by_worker_index[i] =
3206         (vlib_frame_queue_t *) (~0);
3207     }
3208   hf = 0;
3209   current_worker_index = ~0;
3210   return frame->n_vectors;
3211 }
3212
3213 static uword
3214 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3215                                vlib_node_runtime_t * node,
3216                                vlib_frame_t * frame)
3217 {
3218   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3219 }
3220
3221 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3222   .function = snat_in2out_worker_handoff_fn,
3223   .name = "nat44-in2out-worker-handoff",
3224   .vector_size = sizeof (u32),
3225   .format_trace = format_snat_in2out_worker_handoff_trace,
3226   .type = VLIB_NODE_TYPE_INTERNAL,
3227
3228   .n_next_nodes = 1,
3229
3230   .next_nodes = {
3231     [0] = "error-drop",
3232   },
3233 };
3234
3235 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3236                               snat_in2out_worker_handoff_fn);
3237
3238 static uword
3239 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3240                                       vlib_node_runtime_t * node,
3241                                       vlib_frame_t * frame)
3242 {
3243   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3244 }
3245
3246 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3247   .function = snat_in2out_output_worker_handoff_fn,
3248   .name = "nat44-in2out-output-worker-handoff",
3249   .vector_size = sizeof (u32),
3250   .format_trace = format_snat_in2out_worker_handoff_trace,
3251   .type = VLIB_NODE_TYPE_INTERNAL,
3252
3253   .n_next_nodes = 1,
3254
3255   .next_nodes = {
3256     [0] = "error-drop",
3257   },
3258 };
3259
3260 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3261                               snat_in2out_output_worker_handoff_fn);
3262
3263 static_always_inline int
3264 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3265 {
3266   snat_address_t * ap;
3267   clib_bihash_kv_8_8_t kv, value;
3268   snat_session_key_t m_key;
3269
3270   vec_foreach (ap, sm->addresses)
3271     {
3272       if (ap->addr.as_u32 == dst_addr->as_u32)
3273         return 1;
3274     }
3275
3276   m_key.addr.as_u32 = dst_addr->as_u32;
3277   m_key.fib_index = sm->outside_fib_index;
3278   m_key.port = 0;
3279   m_key.protocol = 0;
3280   kv.key = m_key.as_u64;
3281   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3282     return 1;
3283
3284   return 0;
3285 }
3286
3287 static uword
3288 snat_hairpin_dst_fn (vlib_main_t * vm,
3289                      vlib_node_runtime_t * node,
3290                      vlib_frame_t * frame)
3291 {
3292   u32 n_left_from, * from, * to_next;
3293   snat_in2out_next_t next_index;
3294   u32 pkts_processed = 0;
3295   snat_main_t * sm = &snat_main;
3296
3297   from = vlib_frame_vector_args (frame);
3298   n_left_from = frame->n_vectors;
3299   next_index = node->cached_next_index;
3300
3301   while (n_left_from > 0)
3302     {
3303       u32 n_left_to_next;
3304
3305       vlib_get_next_frame (vm, node, next_index,
3306                            to_next, n_left_to_next);
3307
3308       while (n_left_from > 0 && n_left_to_next > 0)
3309         {
3310           u32 bi0;
3311           vlib_buffer_t * b0;
3312           u32 next0;
3313           ip4_header_t * ip0;
3314           u32 proto0;
3315
3316           /* speculatively enqueue b0 to the current next frame */
3317           bi0 = from[0];
3318           to_next[0] = bi0;
3319           from += 1;
3320           to_next += 1;
3321           n_left_from -= 1;
3322           n_left_to_next -= 1;
3323
3324           b0 = vlib_get_buffer (vm, bi0);
3325           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3326           ip0 = vlib_buffer_get_current (b0);
3327
3328           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3329
3330           vnet_buffer (b0)->snat.flags = 0;
3331           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3332             {
3333               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3334                 {
3335                   udp_header_t * udp0 = ip4_next_header (ip0);
3336                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3337
3338                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3339                 }
3340               else if (proto0 == SNAT_PROTOCOL_ICMP)
3341                 {
3342                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3343
3344                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3345                 }
3346               else
3347                 {
3348                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3349                 }
3350
3351               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3352               clib_warning("is hairpinning");
3353             }
3354
3355           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3356
3357           /* verify speculative enqueue, maybe switch current next frame */
3358           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3359                                            to_next, n_left_to_next,
3360                                            bi0, next0);
3361          }
3362
3363       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3364     }
3365
3366   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3367                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3368                                pkts_processed);
3369   return frame->n_vectors;
3370 }
3371
3372 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3373   .function = snat_hairpin_dst_fn,
3374   .name = "nat44-hairpin-dst",
3375   .vector_size = sizeof (u32),
3376   .type = VLIB_NODE_TYPE_INTERNAL,
3377   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3378   .error_strings = snat_in2out_error_strings,
3379   .n_next_nodes = 2,
3380   .next_nodes = {
3381     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3382     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3383   },
3384 };
3385
3386 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3387                               snat_hairpin_dst_fn);
3388
3389 static uword
3390 snat_hairpin_src_fn (vlib_main_t * vm,
3391                      vlib_node_runtime_t * node,
3392                      vlib_frame_t * frame)
3393 {
3394   u32 n_left_from, * from, * to_next;
3395   snat_in2out_next_t next_index;
3396   u32 pkts_processed = 0;
3397   snat_main_t *sm = &snat_main;
3398
3399   from = vlib_frame_vector_args (frame);
3400   n_left_from = frame->n_vectors;
3401   next_index = node->cached_next_index;
3402
3403   while (n_left_from > 0)
3404     {
3405       u32 n_left_to_next;
3406
3407       vlib_get_next_frame (vm, node, next_index,
3408                            to_next, n_left_to_next);
3409
3410       while (n_left_from > 0 && n_left_to_next > 0)
3411         {
3412           u32 bi0;
3413           vlib_buffer_t * b0;
3414           u32 next0;
3415           snat_interface_t *i;
3416           u32 sw_if_index0;
3417
3418           /* speculatively enqueue b0 to the current next frame */
3419           bi0 = from[0];
3420           to_next[0] = bi0;
3421           from += 1;
3422           to_next += 1;
3423           n_left_from -= 1;
3424           n_left_to_next -= 1;
3425
3426           b0 = vlib_get_buffer (vm, bi0);
3427           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3428           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3429
3430           pool_foreach (i, sm->output_feature_interfaces,
3431           ({
3432             /* Only packets from NAT inside interface */
3433             if ((i->is_inside == 1) && (sw_if_index0 == i->sw_if_index))
3434               {
3435                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3436                                     SNAT_FLAG_HAIRPINNING))
3437                   {
3438                     if (PREDICT_TRUE (sm->num_workers > 1))
3439                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3440                     else
3441                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3442                   }
3443                 break;
3444               }
3445           }));
3446
3447           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3448
3449           /* verify speculative enqueue, maybe switch current next frame */
3450           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3451                                            to_next, n_left_to_next,
3452                                            bi0, next0);
3453          }
3454
3455       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3456     }
3457
3458   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3459                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3460                                pkts_processed);
3461   return frame->n_vectors;
3462 }
3463
3464 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3465   .function = snat_hairpin_src_fn,
3466   .name = "nat44-hairpin-src",
3467   .vector_size = sizeof (u32),
3468   .type = VLIB_NODE_TYPE_INTERNAL,
3469   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3470   .error_strings = snat_in2out_error_strings,
3471   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3472   .next_nodes = {
3473      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3474      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3475      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3476      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3477   },
3478 };
3479
3480 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3481                               snat_hairpin_src_fn);
3482
3483 static uword
3484 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3485                                 vlib_node_runtime_t * node,
3486                                 vlib_frame_t * frame)
3487 {
3488   u32 n_left_from, * from, * to_next;
3489   snat_in2out_next_t next_index;
3490   u32 pkts_processed = 0;
3491   snat_main_t * sm = &snat_main;
3492   u32 stats_node_index;
3493
3494   stats_node_index = snat_in2out_fast_node.index;
3495
3496   from = vlib_frame_vector_args (frame);
3497   n_left_from = frame->n_vectors;
3498   next_index = node->cached_next_index;
3499
3500   while (n_left_from > 0)
3501     {
3502       u32 n_left_to_next;
3503
3504       vlib_get_next_frame (vm, node, next_index,
3505                            to_next, n_left_to_next);
3506
3507       while (n_left_from > 0 && n_left_to_next > 0)
3508         {
3509           u32 bi0;
3510           vlib_buffer_t * b0;
3511           u32 next0;
3512           u32 sw_if_index0;
3513           ip4_header_t * ip0;
3514           ip_csum_t sum0;
3515           u32 new_addr0, old_addr0;
3516           u16 old_port0, new_port0;
3517           udp_header_t * udp0;
3518           tcp_header_t * tcp0;
3519           icmp46_header_t * icmp0;
3520           snat_session_key_t key0, sm0;
3521           u32 proto0;
3522           u32 rx_fib_index0;
3523
3524           /* speculatively enqueue b0 to the current next frame */
3525           bi0 = from[0];
3526           to_next[0] = bi0;
3527           from += 1;
3528           to_next += 1;
3529           n_left_from -= 1;
3530           n_left_to_next -= 1;
3531
3532           b0 = vlib_get_buffer (vm, bi0);
3533           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3534
3535           ip0 = vlib_buffer_get_current (b0);
3536           udp0 = ip4_next_header (ip0);
3537           tcp0 = (tcp_header_t *) udp0;
3538           icmp0 = (icmp46_header_t *) udp0;
3539
3540           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3541           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3542
3543           if (PREDICT_FALSE(ip0->ttl == 1))
3544             {
3545               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3546               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3547                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3548                                            0);
3549               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3550               goto trace0;
3551             }
3552
3553           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3554
3555           if (PREDICT_FALSE (proto0 == ~0))
3556               goto trace0;
3557
3558           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3559             {
3560               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3561                                   rx_fib_index0, node, next0, ~0, 0, 0);
3562               goto trace0;
3563             }
3564
3565           key0.addr = ip0->src_address;
3566           key0.protocol = proto0;
3567           key0.port = udp0->src_port;
3568           key0.fib_index = rx_fib_index0;
3569
3570           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
3571             {
3572               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3573               next0= SNAT_IN2OUT_NEXT_DROP;
3574               goto trace0;
3575             }
3576
3577           new_addr0 = sm0.addr.as_u32;
3578           new_port0 = sm0.port;
3579           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3580           old_addr0 = ip0->src_address.as_u32;
3581           ip0->src_address.as_u32 = new_addr0;
3582
3583           sum0 = ip0->checksum;
3584           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3585                                  ip4_header_t,
3586                                  src_address /* changed member */);
3587           ip0->checksum = ip_csum_fold (sum0);
3588
3589           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3590             {
3591               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3592                 {
3593                   old_port0 = tcp0->src_port;
3594                   tcp0->src_port = new_port0;
3595
3596                   sum0 = tcp0->checksum;
3597                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3598                                          ip4_header_t,
3599                                          dst_address /* changed member */);
3600                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3601                                          ip4_header_t /* cheat */,
3602                                          length /* changed member */);
3603                   tcp0->checksum = ip_csum_fold(sum0);
3604                 }
3605               else
3606                 {
3607                   old_port0 = udp0->src_port;
3608                   udp0->src_port = new_port0;
3609                   udp0->checksum = 0;
3610                 }
3611             }
3612           else
3613             {
3614               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3615                 {
3616                   sum0 = tcp0->checksum;
3617                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3618                                          ip4_header_t,
3619                                          dst_address /* changed member */);
3620                   tcp0->checksum = ip_csum_fold(sum0);
3621                 }
3622             }
3623
3624           /* Hairpinning */
3625           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3626
3627         trace0:
3628           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3629                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3630             {
3631               snat_in2out_trace_t *t =
3632                  vlib_add_trace (vm, node, b0, sizeof (*t));
3633               t->sw_if_index = sw_if_index0;
3634               t->next_index = next0;
3635             }
3636
3637           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3638
3639           /* verify speculative enqueue, maybe switch current next frame */
3640           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3641                                            to_next, n_left_to_next,
3642                                            bi0, next0);
3643         }
3644
3645       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3646     }
3647
3648   vlib_node_increment_counter (vm, stats_node_index,
3649                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3650                                pkts_processed);
3651   return frame->n_vectors;
3652 }
3653
3654
3655 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3656   .function = snat_in2out_fast_static_map_fn,
3657   .name = "nat44-in2out-fast",
3658   .vector_size = sizeof (u32),
3659   .format_trace = format_snat_in2out_fast_trace,
3660   .type = VLIB_NODE_TYPE_INTERNAL,
3661
3662   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3663   .error_strings = snat_in2out_error_strings,
3664
3665   .runtime_data_bytes = sizeof (snat_runtime_t),
3666
3667   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3668
3669   /* edit / add dispositions here */
3670   .next_nodes = {
3671     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3672     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3673     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3674     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3675   },
3676 };
3677
3678 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);