NAT: DS-Lite (VPP-1040)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
31
32 typedef struct {
33   u32 sw_if_index;
34   u32 next_index;
35   u32 session_index;
36   u32 is_slow_path;
37 } snat_in2out_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_in2out_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
50   char * tag;
51
52   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
53
54   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
55               t->sw_if_index, t->next_index, t->session_index);
56
57   return s;
58 }
59
60 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
61 {
62   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
63   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
64   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
65
66   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
67               t->sw_if_index, t->next_index);
68
69   return s;
70 }
71
72 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   snat_in2out_worker_handoff_trace_t * t =
77     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
78   char * m;
79
80   m = t->do_handoff ? "next worker" : "same worker";
81   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
82
83   return s;
84 }
85
86 vlib_node_registration_t snat_in2out_node;
87 vlib_node_registration_t snat_in2out_slowpath_node;
88 vlib_node_registration_t snat_in2out_fast_node;
89 vlib_node_registration_t snat_in2out_worker_handoff_node;
90 vlib_node_registration_t snat_det_in2out_node;
91 vlib_node_registration_t snat_in2out_output_node;
92 vlib_node_registration_t snat_in2out_output_slowpath_node;
93 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
94 vlib_node_registration_t snat_hairpin_dst_node;
95 vlib_node_registration_t snat_hairpin_src_node;
96 vlib_node_registration_t nat44_hairpinning_node;
97
98
99 #define foreach_snat_in2out_error                       \
100 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
101 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
102 _(OUT_OF_PORTS, "Out of ports")                         \
103 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
104 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
105 _(NO_TRANSLATION, "No translation")                     \
106 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
107
108 typedef enum {
109 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
110   foreach_snat_in2out_error
111 #undef _
112   SNAT_IN2OUT_N_ERROR,
113 } snat_in2out_error_t;
114
115 static char * snat_in2out_error_strings[] = {
116 #define _(sym,string) string,
117   foreach_snat_in2out_error
118 #undef _
119 };
120
121 typedef enum {
122   SNAT_IN2OUT_NEXT_LOOKUP,
123   SNAT_IN2OUT_NEXT_DROP,
124   SNAT_IN2OUT_NEXT_ICMP_ERROR,
125   SNAT_IN2OUT_NEXT_SLOW_PATH,
126   SNAT_IN2OUT_N_NEXT,
127 } snat_in2out_next_t;
128
129 typedef enum {
130   SNAT_HAIRPIN_SRC_NEXT_DROP,
131   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
132   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
133   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
134   SNAT_HAIRPIN_SRC_N_NEXT,
135 } snat_hairpin_next_t;
136
137 /**
138  * @brief Check if packet should be translated
139  *
140  * Packets aimed at outside interface and external addresss with active session
141  * should be translated.
142  *
143  * @param sm            NAT main
144  * @param rt            NAT runtime data
145  * @param sw_if_index0  index of the inside interface
146  * @param ip0           IPv4 header
147  * @param proto0        NAT protocol
148  * @param rx_fib_index0 RX FIB index
149  *
150  * @returns 0 if packet should be translated otherwise 1
151  */
152 static inline int
153 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
154                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
155                          u32 rx_fib_index0)
156 {
157   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
158   fib_prefix_t pfx = {
159     .fp_proto = FIB_PROTOCOL_IP4,
160     .fp_len = 32,
161     .fp_addr = {
162         .ip4.as_u32 = ip0->dst_address.as_u32,
163     },
164   };
165
166   /* Don't NAT packet aimed at the intfc address */
167   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
168                                       ip0->dst_address.as_u32)))
169     return 1;
170
171   fei = fib_table_lookup (rx_fib_index0, &pfx);
172   if (FIB_NODE_INDEX_INVALID != fei)
173     {
174       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
175       if (sw_if_index == ~0)
176         {
177           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
178           if (FIB_NODE_INDEX_INVALID != fei)
179             sw_if_index = fib_entry_get_resolving_interface (fei);
180         }
181       snat_interface_t *i;
182       pool_foreach (i, sm->interfaces,
183       ({
184         /* NAT packet aimed at outside interface */
185         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
186           return 0;
187       }));
188     }
189
190   return 1;
191 }
192
193 static inline int
194 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
195                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
196                     u32 rx_fib_index0, u32 thread_index)
197 {
198   udp_header_t * udp0 = ip4_next_header (ip0);
199   snat_session_key_t key0, sm0;
200   clib_bihash_kv_8_8_t kv0, value0;
201
202   key0.addr = ip0->dst_address;
203   key0.port = udp0->dst_port;
204   key0.protocol = proto0;
205   key0.fib_index = sm->outside_fib_index;
206   kv0.key = key0.as_u64;
207
208   /* NAT packet aimed at external address if */
209   /* has active sessions */
210   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
211                               &value0))
212     {
213       /* or is static mappings */
214       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
215         return 0;
216     }
217   else
218     return 0;
219
220   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
221                                  rx_fib_index0);
222 }
223
224 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
225                       ip4_header_t * ip0,
226                       u32 rx_fib_index0,
227                       snat_session_key_t * key0,
228                       snat_session_t ** sessionp,
229                       vlib_node_runtime_t * node,
230                       u32 next0,
231                       u32 thread_index)
232 {
233   snat_user_t *u;
234   snat_user_key_t user_key;
235   snat_session_t *s;
236   clib_bihash_kv_8_8_t kv0, value0;
237   u32 oldest_per_user_translation_list_index;
238   dlist_elt_t * oldest_per_user_translation_list_elt;
239   dlist_elt_t * per_user_translation_list_elt;
240   dlist_elt_t * per_user_list_head_elt;
241   u32 session_index;
242   snat_session_key_t key1;
243   u32 address_index = ~0;
244   u32 outside_fib_index;
245   uword * p;
246
247   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
248     {
249       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
250       return SNAT_IN2OUT_NEXT_DROP;
251     }
252
253   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
254   if (! p)
255     {
256       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
257       return SNAT_IN2OUT_NEXT_DROP;
258     }
259   outside_fib_index = p[0];
260
261   key1.protocol = key0->protocol;
262   user_key.addr = ip0->src_address;
263   user_key.fib_index = rx_fib_index0;
264   kv0.key = user_key.as_u64;
265
266   /* Ever heard of the "user" = src ip4 address before? */
267   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
268                               &kv0, &value0))
269     {
270       /* no, make a new one */
271       pool_get (sm->per_thread_data[thread_index].users, u);
272       memset (u, 0, sizeof (*u));
273       u->addr = ip0->src_address;
274       u->fib_index = rx_fib_index0;
275
276       pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
277
278       u->sessions_per_user_list_head_index = per_user_list_head_elt -
279         sm->per_thread_data[thread_index].list_pool;
280
281       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
282                        u->sessions_per_user_list_head_index);
283
284       kv0.value = u - sm->per_thread_data[thread_index].users;
285
286       /* add user */
287       clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
288                                &kv0, 1 /* is_add */);
289     }
290   else
291     {
292       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
293                              value0.value);
294     }
295
296   /* Over quota? Recycle the least recently used dynamic translation */
297   if (u->nsessions >= sm->max_translations_per_user)
298     {
299       /* Remove the oldest dynamic translation */
300       do {
301           oldest_per_user_translation_list_index =
302             clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
303                                     u->sessions_per_user_list_head_index);
304
305           ASSERT (oldest_per_user_translation_list_index != ~0);
306
307           /* add it back to the end of the LRU list */
308           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
309                               u->sessions_per_user_list_head_index,
310                               oldest_per_user_translation_list_index);
311           /* Get the list element */
312           oldest_per_user_translation_list_elt =
313             pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
314                                oldest_per_user_translation_list_index);
315
316           /* Get the session index from the list element */
317           session_index = oldest_per_user_translation_list_elt->value;
318
319           /* Get the session */
320           s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
321                                  session_index);
322       } while (snat_is_session_static (s));
323
324       if (snat_is_unk_proto_session (s))
325         {
326           clib_bihash_kv_16_8_t up_kv;
327           nat_ed_ses_key_t key;
328
329           /* Remove from lookup tables */
330           key.l_addr = s->in2out.addr;
331           key.r_addr = s->ext_host_addr;
332           key.fib_index = s->in2out.fib_index;
333           key.proto = s->in2out.port;
334           key.rsvd = 0;
335           key.l_port = 0;
336           up_kv.key[0] = key.as_u64[0];
337           up_kv.key[1] = key.as_u64[1];
338           if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0))
339             clib_warning ("in2out key del failed");
340
341           key.l_addr = s->out2in.addr;
342           key.fib_index = s->out2in.fib_index;
343           up_kv.key[0] = key.as_u64[0];
344           up_kv.key[1] = key.as_u64[1];
345           if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0))
346             clib_warning ("out2in key del failed");
347         }
348       else
349         {
350           /* Remove in2out, out2in keys */
351           kv0.key = s->in2out.as_u64;
352           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out,
353                                        &kv0, 0 /* is_add */))
354               clib_warning ("in2out key delete failed");
355           kv0.key = s->out2in.as_u64;
356           if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in,
357                                        &kv0, 0 /* is_add */))
358               clib_warning ("out2in key delete failed");
359
360           /* log NAT event */
361           snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
362                                               s->out2in.addr.as_u32,
363                                               s->in2out.protocol,
364                                               s->in2out.port,
365                                               s->out2in.port,
366                                               s->in2out.fib_index);
367
368           snat_free_outside_address_and_port
369             (sm->addresses, thread_index, &s->out2in, s->outside_address_index);
370         }
371       s->outside_address_index = ~0;
372
373       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
374                                                thread_index, &key1,
375                                                &address_index, sm->vrf_mode,
376                                                sm->port_per_thread,
377                                                sm->per_thread_data[thread_index].snat_thread_index))
378         {
379           ASSERT(0);
380
381           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
382           return SNAT_IN2OUT_NEXT_DROP;
383         }
384       s->outside_address_index = address_index;
385     }
386   else
387     {
388       u8 static_mapping = 1;
389
390       /* First try to match static mapping by local address and port */
391       if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
392         {
393           static_mapping = 0;
394           /* Try to create dynamic translation */
395           if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
396                                                    thread_index, &key1,
397                                                    &address_index, sm->vrf_mode,
398                                                    sm->port_per_thread,
399                                                    sm->per_thread_data[thread_index].snat_thread_index))
400             {
401               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
402               return SNAT_IN2OUT_NEXT_DROP;
403             }
404         }
405
406       /* Create a new session */
407       pool_get (sm->per_thread_data[thread_index].sessions, s);
408       memset (s, 0, sizeof (*s));
409
410       s->outside_address_index = address_index;
411
412       if (static_mapping)
413         {
414           u->nstaticsessions++;
415           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
416         }
417       else
418         {
419           u->nsessions++;
420         }
421
422       /* Create list elts */
423       pool_get (sm->per_thread_data[thread_index].list_pool,
424                 per_user_translation_list_elt);
425       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
426                        per_user_translation_list_elt -
427                        sm->per_thread_data[thread_index].list_pool);
428
429       per_user_translation_list_elt->value =
430         s - sm->per_thread_data[thread_index].sessions;
431       s->per_user_index = per_user_translation_list_elt -
432                           sm->per_thread_data[thread_index].list_pool;
433       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
434
435       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
436                           s->per_user_list_head_index,
437                           per_user_translation_list_elt -
438                           sm->per_thread_data[thread_index].list_pool);
439    }
440
441   s->in2out = *key0;
442   s->out2in = key1;
443   s->out2in.protocol = key0->protocol;
444   s->out2in.fib_index = outside_fib_index;
445   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
446   *sessionp = s;
447
448   /* Add to translation hashes */
449   kv0.key = s->in2out.as_u64;
450   kv0.value = s - sm->per_thread_data[thread_index].sessions;
451   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
452                                1 /* is_add */))
453       clib_warning ("in2out key add failed");
454
455   kv0.key = s->out2in.as_u64;
456   kv0.value = s - sm->per_thread_data[thread_index].sessions;
457
458   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
459                                1 /* is_add */))
460       clib_warning ("out2in key add failed");
461
462   /* log NAT event */
463   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
464                                       s->out2in.addr.as_u32,
465                                       s->in2out.protocol,
466                                       s->in2out.port,
467                                       s->out2in.port,
468                                       s->in2out.fib_index);
469   return next0;
470 }
471
472 static_always_inline
473 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
474                                  snat_session_key_t *p_key0)
475 {
476   icmp46_header_t *icmp0;
477   snat_session_key_t key0;
478   icmp_echo_header_t *echo0, *inner_echo0 = 0;
479   ip4_header_t *inner_ip0 = 0;
480   void *l4_header = 0;
481   icmp46_header_t *inner_icmp0;
482
483   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
484   echo0 = (icmp_echo_header_t *)(icmp0+1);
485
486   if (!icmp_is_error_message (icmp0))
487     {
488       key0.protocol = SNAT_PROTOCOL_ICMP;
489       key0.addr = ip0->src_address;
490       key0.port = echo0->identifier;
491     }
492   else
493     {
494       inner_ip0 = (ip4_header_t *)(echo0+1);
495       l4_header = ip4_next_header (inner_ip0);
496       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
497       key0.addr = inner_ip0->dst_address;
498       switch (key0.protocol)
499         {
500         case SNAT_PROTOCOL_ICMP:
501           inner_icmp0 = (icmp46_header_t*)l4_header;
502           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
503           key0.port = inner_echo0->identifier;
504           break;
505         case SNAT_PROTOCOL_UDP:
506         case SNAT_PROTOCOL_TCP:
507           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
508           break;
509         default:
510           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
511         }
512     }
513   *p_key0 = key0;
514   return -1; /* success */
515 }
516
517 /**
518  * Get address and port values to be used for ICMP packet translation
519  * and create session if needed
520  *
521  * @param[in,out] sm             NAT main
522  * @param[in,out] node           NAT node runtime
523  * @param[in] thread_index       thread index
524  * @param[in,out] b0             buffer containing packet to be translated
525  * @param[out] p_proto           protocol used for matching
526  * @param[out] p_value           address and port after NAT translation
527  * @param[out] p_dont_translate  if packet should not be translated
528  * @param d                      optional parameter
529  * @param e                      optional parameter
530  */
531 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
532                            u32 thread_index, vlib_buffer_t *b0,
533                            ip4_header_t *ip0, u8 *p_proto,
534                            snat_session_key_t *p_value,
535                            u8 *p_dont_translate, void *d, void *e)
536 {
537   icmp46_header_t *icmp0;
538   u32 sw_if_index0;
539   u32 rx_fib_index0;
540   snat_session_key_t key0;
541   snat_session_t *s0 = 0;
542   u8 dont_translate = 0;
543   clib_bihash_kv_8_8_t kv0, value0;
544   u32 next0 = ~0;
545   int err;
546
547   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
548   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
549   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
550
551   err = icmp_get_key (ip0, &key0);
552   if (err != -1)
553     {
554       b0->error = node->errors[err];
555       next0 = SNAT_IN2OUT_NEXT_DROP;
556       goto out;
557     }
558   key0.fib_index = rx_fib_index0;
559
560   kv0.key = key0.as_u64;
561
562   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
563                               &value0))
564     {
565       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
566           IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
567           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
568         {
569           dont_translate = 1;
570           goto out;
571         }
572
573       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
574         {
575           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
576           next0 = SNAT_IN2OUT_NEXT_DROP;
577           goto out;
578         }
579
580       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
581                          &s0, node, next0, thread_index);
582
583       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
584         goto out;
585     }
586   else
587     {
588       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
589                         icmp0->type != ICMP4_echo_reply &&
590                         !icmp_is_error_message (icmp0)))
591         {
592           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
593           next0 = SNAT_IN2OUT_NEXT_DROP;
594           goto out;
595         }
596
597       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
598                               value0.value);
599     }
600
601 out:
602   *p_proto = key0.protocol;
603   if (s0)
604     *p_value = s0->out2in;
605   *p_dont_translate = dont_translate;
606   if (d)
607     *(snat_session_t**)d = s0;
608   return next0;
609 }
610
611 /**
612  * Get address and port values to be used for ICMP packet translation
613  *
614  * @param[in] sm                 NAT main
615  * @param[in,out] node           NAT node runtime
616  * @param[in] thread_index       thread index
617  * @param[in,out] b0             buffer containing packet to be translated
618  * @param[out] p_proto           protocol used for matching
619  * @param[out] p_value           address and port after NAT translation
620  * @param[out] p_dont_translate  if packet should not be translated
621  * @param d                      optional parameter
622  * @param e                      optional parameter
623  */
624 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
625                            u32 thread_index, vlib_buffer_t *b0,
626                            ip4_header_t *ip0, u8 *p_proto,
627                            snat_session_key_t *p_value,
628                            u8 *p_dont_translate, void *d, void *e)
629 {
630   icmp46_header_t *icmp0;
631   u32 sw_if_index0;
632   u32 rx_fib_index0;
633   snat_session_key_t key0;
634   snat_session_key_t sm0;
635   u8 dont_translate = 0;
636   u8 is_addr_only;
637   u32 next0 = ~0;
638   int err;
639
640   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
641   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
642   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
643
644   err = icmp_get_key (ip0, &key0);
645   if (err != -1)
646     {
647       b0->error = node->errors[err];
648       next0 = SNAT_IN2OUT_NEXT_DROP;
649       goto out2;
650     }
651   key0.fib_index = rx_fib_index0;
652
653   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
654     {
655       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
656           IP_PROTOCOL_ICMP, rx_fib_index0)))
657         {
658           dont_translate = 1;
659           goto out;
660         }
661
662       if (icmp_is_error_message (icmp0))
663         {
664           next0 = SNAT_IN2OUT_NEXT_DROP;
665           goto out;
666         }
667
668       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
669       next0 = SNAT_IN2OUT_NEXT_DROP;
670       goto out;
671     }
672
673   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
674                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
675                     !icmp_is_error_message (icmp0)))
676     {
677       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
678       next0 = SNAT_IN2OUT_NEXT_DROP;
679       goto out;
680     }
681
682 out:
683   *p_value = sm0;
684 out2:
685   *p_proto = key0.protocol;
686   *p_dont_translate = dont_translate;
687   return next0;
688 }
689
690 static inline u32 icmp_in2out (snat_main_t *sm,
691                                vlib_buffer_t * b0,
692                                ip4_header_t * ip0,
693                                icmp46_header_t * icmp0,
694                                u32 sw_if_index0,
695                                u32 rx_fib_index0,
696                                vlib_node_runtime_t * node,
697                                u32 next0,
698                                u32 thread_index,
699                                void *d,
700                                void *e)
701 {
702   snat_session_key_t sm0;
703   u8 protocol;
704   icmp_echo_header_t *echo0, *inner_echo0 = 0;
705   ip4_header_t *inner_ip0;
706   void *l4_header = 0;
707   icmp46_header_t *inner_icmp0;
708   u8 dont_translate;
709   u32 new_addr0, old_addr0;
710   u16 old_id0, new_id0;
711   ip_csum_t sum0;
712   u16 checksum0;
713   u32 next0_tmp;
714
715   echo0 = (icmp_echo_header_t *)(icmp0+1);
716
717   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
718                                        &protocol, &sm0, &dont_translate, d, e);
719   if (next0_tmp != ~0)
720     next0 = next0_tmp;
721   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
722     goto out;
723
724   sum0 = ip_incremental_checksum (0, icmp0,
725                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
726   checksum0 = ~ip_csum_fold (sum0);
727   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
728     {
729       next0 = SNAT_IN2OUT_NEXT_DROP;
730       goto out;
731     }
732
733   old_addr0 = ip0->src_address.as_u32;
734   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
735   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
736     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
737
738   sum0 = ip0->checksum;
739   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
740                          src_address /* changed member */);
741   ip0->checksum = ip_csum_fold (sum0);
742
743   if (!icmp_is_error_message (icmp0))
744     {
745       new_id0 = sm0.port;
746       if (PREDICT_FALSE(new_id0 != echo0->identifier))
747         {
748           old_id0 = echo0->identifier;
749           new_id0 = sm0.port;
750           echo0->identifier = new_id0;
751
752           sum0 = icmp0->checksum;
753           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
754                                  identifier);
755           icmp0->checksum = ip_csum_fold (sum0);
756         }
757     }
758   else
759     {
760       inner_ip0 = (ip4_header_t *)(echo0+1);
761       l4_header = ip4_next_header (inner_ip0);
762
763       if (!ip4_header_checksum_is_valid (inner_ip0))
764         {
765           next0 = SNAT_IN2OUT_NEXT_DROP;
766           goto out;
767         }
768
769       old_addr0 = inner_ip0->dst_address.as_u32;
770       inner_ip0->dst_address = sm0.addr;
771       new_addr0 = inner_ip0->dst_address.as_u32;
772
773       sum0 = icmp0->checksum;
774       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
775                              dst_address /* changed member */);
776       icmp0->checksum = ip_csum_fold (sum0);
777
778       switch (protocol)
779         {
780           case SNAT_PROTOCOL_ICMP:
781             inner_icmp0 = (icmp46_header_t*)l4_header;
782             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
783
784             old_id0 = inner_echo0->identifier;
785             new_id0 = sm0.port;
786             inner_echo0->identifier = new_id0;
787
788             sum0 = icmp0->checksum;
789             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
790                                    identifier);
791             icmp0->checksum = ip_csum_fold (sum0);
792             break;
793           case SNAT_PROTOCOL_UDP:
794           case SNAT_PROTOCOL_TCP:
795             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
796             new_id0 = sm0.port;
797             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
798
799             sum0 = icmp0->checksum;
800             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
801                                    dst_port);
802             icmp0->checksum = ip_csum_fold (sum0);
803             break;
804           default:
805             ASSERT(0);
806         }
807     }
808
809 out:
810   return next0;
811 }
812
813 /**
814  * @brief Hairpinning
815  *
816  * Hairpinning allows two endpoints on the internal side of the NAT to
817  * communicate even if they only use each other's external IP addresses
818  * and ports.
819  *
820  * @param sm     NAT main.
821  * @param b0     Vlib buffer.
822  * @param ip0    IP header.
823  * @param udp0   UDP header.
824  * @param tcp0   TCP header.
825  * @param proto0 NAT protocol.
826  */
827 static inline int
828 snat_hairpinning (snat_main_t *sm,
829                   vlib_buffer_t * b0,
830                   ip4_header_t * ip0,
831                   udp_header_t * udp0,
832                   tcp_header_t * tcp0,
833                   u32 proto0)
834 {
835   snat_session_key_t key0, sm0;
836   snat_session_t * s0;
837   clib_bihash_kv_8_8_t kv0, value0;
838   ip_csum_t sum0;
839   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
840   u16 new_dst_port0, old_dst_port0;
841
842   key0.addr = ip0->dst_address;
843   key0.port = udp0->dst_port;
844   key0.protocol = proto0;
845   key0.fib_index = sm->outside_fib_index;
846   kv0.key = key0.as_u64;
847
848   /* Check if destination is static mappings */
849   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
850     {
851       new_dst_addr0 = sm0.addr.as_u32;
852       new_dst_port0 = sm0.port;
853       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
854     }
855   /* or active session */
856   else
857     {
858       if (sm->num_workers > 1)
859         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
860       else
861         ti = sm->num_workers;
862
863       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
864         {
865           si = value0.value;
866
867           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
868           new_dst_addr0 = s0->in2out.addr.as_u32;
869           new_dst_port0 = s0->in2out.port;
870           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
871         }
872     }
873
874   /* Destination is behind the same NAT, use internal address and port */
875   if (new_dst_addr0)
876     {
877       old_dst_addr0 = ip0->dst_address.as_u32;
878       ip0->dst_address.as_u32 = new_dst_addr0;
879       sum0 = ip0->checksum;
880       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
881                              ip4_header_t, dst_address);
882       ip0->checksum = ip_csum_fold (sum0);
883
884       old_dst_port0 = tcp0->dst;
885       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
886         {
887           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
888             {
889               tcp0->dst = new_dst_port0;
890               sum0 = tcp0->checksum;
891               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
892                                      ip4_header_t, dst_address);
893               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
894                                      ip4_header_t /* cheat */, length);
895               tcp0->checksum = ip_csum_fold(sum0);
896             }
897           else
898             {
899               udp0->dst_port = new_dst_port0;
900               udp0->checksum = 0;
901             }
902         }
903       else
904         {
905           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
906             {
907               sum0 = tcp0->checksum;
908               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
909                                      ip4_header_t, dst_address);
910               tcp0->checksum = ip_csum_fold(sum0);
911             }
912         }
913       return 1;
914     }
915   return 0;
916 }
917
918 static inline void
919 snat_icmp_hairpinning (snat_main_t *sm,
920                        vlib_buffer_t * b0,
921                        ip4_header_t * ip0,
922                        icmp46_header_t * icmp0)
923 {
924   snat_session_key_t key0, sm0;
925   clib_bihash_kv_8_8_t kv0, value0;
926   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
927   ip_csum_t sum0;
928   snat_session_t *s0;
929
930   if (!icmp_is_error_message (icmp0))
931     {
932       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
933       u16 icmp_id0 = echo0->identifier;
934       key0.addr = ip0->dst_address;
935       key0.port = icmp_id0;
936       key0.protocol = SNAT_PROTOCOL_ICMP;
937       key0.fib_index = sm->outside_fib_index;
938       kv0.key = key0.as_u64;
939
940       if (sm->num_workers > 1)
941         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
942       else
943         ti = sm->num_workers;
944
945       /* Check if destination is in active sessions */
946       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
947                                   &value0))
948         {
949           /* or static mappings */
950           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
951             {
952               new_dst_addr0 = sm0.addr.as_u32;
953               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
954             }
955         }
956       else
957         {
958           si = value0.value;
959
960           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
961           new_dst_addr0 = s0->in2out.addr.as_u32;
962           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
963           echo0->identifier = s0->in2out.port;
964           sum0 = icmp0->checksum;
965           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
966                                  icmp_echo_header_t, identifier);
967           icmp0->checksum = ip_csum_fold (sum0);
968         }
969
970       /* Destination is behind the same NAT, use internal address and port */
971       if (new_dst_addr0)
972         {
973           old_dst_addr0 = ip0->dst_address.as_u32;
974           ip0->dst_address.as_u32 = new_dst_addr0;
975           sum0 = ip0->checksum;
976           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
977                                  ip4_header_t, dst_address);
978           ip0->checksum = ip_csum_fold (sum0);
979         }
980     }
981
982 }
983
984 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
985                                          vlib_buffer_t * b0,
986                                          ip4_header_t * ip0,
987                                          icmp46_header_t * icmp0,
988                                          u32 sw_if_index0,
989                                          u32 rx_fib_index0,
990                                          vlib_node_runtime_t * node,
991                                          u32 next0,
992                                          f64 now,
993                                          u32 thread_index,
994                                          snat_session_t ** p_s0)
995 {
996   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
997                       next0, thread_index, p_s0, 0);
998   snat_session_t * s0 = *p_s0;
999   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1000     {
1001       /* Hairpinning */
1002       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1003         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
1004       /* Accounting */
1005       s0->last_heard = now;
1006       s0->total_pkts++;
1007       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1008       /* Per-user LRU list maintenance for dynamic translations */
1009       if (!snat_is_session_static (s0))
1010         {
1011           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1012                              s0->per_user_index);
1013           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1014                               s0->per_user_list_head_index,
1015                               s0->per_user_index);
1016         }
1017     }
1018   return next0;
1019 }
1020 static inline void
1021 snat_hairpinning_unknown_proto (snat_main_t *sm,
1022                                 vlib_buffer_t * b,
1023                                 ip4_header_t * ip)
1024 {
1025   u32 old_addr, new_addr = 0, ti = 0;
1026   clib_bihash_kv_8_8_t kv, value;
1027   clib_bihash_kv_16_8_t s_kv, s_value;
1028   nat_ed_ses_key_t key;
1029   snat_session_key_t m_key;
1030   snat_static_mapping_t *m;
1031   ip_csum_t sum;
1032   snat_session_t *s;
1033
1034   old_addr = ip->dst_address.as_u32;
1035   key.l_addr.as_u32 = ip->dst_address.as_u32;
1036   key.r_addr.as_u32 = ip->src_address.as_u32;
1037   key.fib_index = sm->outside_fib_index;
1038   key.proto = ip->protocol;
1039   key.rsvd = 0;
1040   key.l_port = 0;
1041   s_kv.key[0] = key.as_u64[0];
1042   s_kv.key[1] = key.as_u64[1];
1043   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1044     {
1045       m_key.addr = ip->dst_address;
1046       m_key.fib_index = sm->outside_fib_index;
1047       m_key.port = 0;
1048       m_key.protocol = 0;
1049       kv.key = m_key.as_u64;
1050       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1051         return;
1052
1053       m = pool_elt_at_index (sm->static_mappings, value.value);
1054       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1055         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1056       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1057     }
1058   else
1059     {
1060       if (sm->num_workers > 1)
1061         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
1062       else
1063         ti = sm->num_workers;
1064
1065       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1066       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1067         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1068       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1069     }
1070   sum = ip->checksum;
1071   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1072   ip->checksum = ip_csum_fold (sum);
1073 }
1074
1075 static snat_session_t *
1076 snat_in2out_unknown_proto (snat_main_t *sm,
1077                            vlib_buffer_t * b,
1078                            ip4_header_t * ip,
1079                            u32 rx_fib_index,
1080                            u32 thread_index,
1081                            f64 now,
1082                            vlib_main_t * vm,
1083                            vlib_node_runtime_t * node)
1084 {
1085   clib_bihash_kv_8_8_t kv, value;
1086   clib_bihash_kv_16_8_t s_kv, s_value;
1087   snat_static_mapping_t *m;
1088   snat_session_key_t m_key;
1089   u32 old_addr, new_addr = 0;
1090   ip_csum_t sum;
1091   snat_user_key_t u_key;
1092   snat_user_t *u;
1093   dlist_elt_t *head, *elt, *oldest;
1094   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1095   u32 elt_index, head_index, ses_index, oldest_index;
1096   snat_session_t * s;
1097   nat_ed_ses_key_t key;
1098   u32 address_index = ~0;
1099   int i;
1100   u8 is_sm = 0;
1101
1102   old_addr = ip->src_address.as_u32;
1103
1104   key.l_addr = ip->src_address;
1105   key.r_addr = ip->dst_address;
1106   key.fib_index = rx_fib_index;
1107   key.proto = ip->protocol;
1108   key.rsvd = 0;
1109   key.l_port = 0;
1110   s_kv.key[0] = key.as_u64[0];
1111   s_kv.key[1] = key.as_u64[1];
1112
1113   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1114     {
1115       s = pool_elt_at_index (tsm->sessions, s_value.value);
1116       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1117     }
1118   else
1119     {
1120       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1121         {
1122           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1123           return 0;
1124         }
1125
1126       u_key.addr = ip->src_address;
1127       u_key.fib_index = rx_fib_index;
1128       kv.key = u_key.as_u64;
1129
1130       /* Ever heard of the "user" = src ip4 address before? */
1131       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1132         {
1133           /* no, make a new one */
1134           pool_get (tsm->users, u);
1135           memset (u, 0, sizeof (*u));
1136           u->addr = ip->src_address;
1137           u->fib_index = rx_fib_index;
1138
1139           pool_get (tsm->list_pool, head);
1140           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1141
1142           clib_dlist_init (tsm->list_pool,
1143                            u->sessions_per_user_list_head_index);
1144
1145           kv.value = u - tsm->users;
1146
1147           /* add user */
1148           clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
1149         }
1150       else
1151         {
1152           u = pool_elt_at_index (tsm->users, value.value);
1153         }
1154
1155       m_key.addr = ip->src_address;
1156       m_key.port = 0;
1157       m_key.protocol = 0;
1158       m_key.fib_index = rx_fib_index;
1159       kv.key = m_key.as_u64;
1160
1161       /* Try to find static mapping first */
1162       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1163         {
1164           m = pool_elt_at_index (sm->static_mappings, value.value);
1165           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1166           is_sm = 1;
1167           goto create_ses;
1168         }
1169       /* Fallback to 3-tuple key */
1170       else
1171         {
1172           /* Choose same out address as for TCP/UDP session to same destination */
1173           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1174             {
1175               head_index = u->sessions_per_user_list_head_index;
1176               head = pool_elt_at_index (tsm->list_pool, head_index);
1177               elt_index = head->next;
1178               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1179               ses_index = elt->value;
1180               while (ses_index != ~0)
1181                 {
1182                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1183                   elt_index = elt->next;
1184                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1185                   ses_index = elt->value;
1186
1187                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1188                     {
1189                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1190                       address_index = s->outside_address_index;
1191
1192                       key.fib_index = sm->outside_fib_index;
1193                       key.l_addr.as_u32 = new_addr;
1194                       s_kv.key[0] = key.as_u64[0];
1195                       s_kv.key[1] = key.as_u64[1];
1196                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1197                         break;
1198
1199                       goto create_ses;
1200                     }
1201                 }
1202             }
1203           key.fib_index = sm->outside_fib_index;
1204           for (i = 0; i < vec_len (sm->addresses); i++)
1205             {
1206               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1207               s_kv.key[0] = key.as_u64[0];
1208               s_kv.key[1] = key.as_u64[1];
1209               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1210                 {
1211                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1212                   address_index = i;
1213                   goto create_ses;
1214                 }
1215             }
1216           return 0;
1217         }
1218
1219 create_ses:
1220       /* Over quota? Recycle the least recently used dynamic translation */
1221       if (u->nsessions >= sm->max_translations_per_user && !is_sm)
1222         {
1223           /* Remove the oldest dynamic translation */
1224           do {
1225               oldest_index = clib_dlist_remove_head (
1226                 tsm->list_pool, u->sessions_per_user_list_head_index);
1227
1228               ASSERT (oldest_index != ~0);
1229
1230               /* add it back to the end of the LRU list */
1231               clib_dlist_addtail (tsm->list_pool,
1232                                   u->sessions_per_user_list_head_index,
1233                                   oldest_index);
1234               /* Get the list element */
1235               oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
1236
1237               /* Get the session index from the list element */
1238               ses_index = oldest->value;
1239
1240               /* Get the session */
1241               s = pool_elt_at_index (tsm->sessions, ses_index);
1242           } while (snat_is_session_static (s));
1243
1244           if (snat_is_unk_proto_session (s))
1245             {
1246               /* Remove from lookup tables */
1247               key.l_addr = s->in2out.addr;
1248               key.r_addr = s->ext_host_addr;
1249               key.fib_index = s->in2out.fib_index;
1250               key.proto = s->in2out.port;
1251               s_kv.key[0] = key.as_u64[0];
1252               s_kv.key[1] = key.as_u64[1];
1253               if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0))
1254                 clib_warning ("in2out key del failed");
1255
1256               key.l_addr = s->out2in.addr;
1257               key.fib_index = s->out2in.fib_index;
1258               s_kv.key[0] = key.as_u64[0];
1259               s_kv.key[1] = key.as_u64[1];
1260               if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0))
1261                 clib_warning ("out2in key del failed");
1262             }
1263           else
1264             {
1265               /* log NAT event */
1266               snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1267                                                   s->out2in.addr.as_u32,
1268                                                   s->in2out.protocol,
1269                                                   s->in2out.port,
1270                                                   s->out2in.port,
1271                                                   s->in2out.fib_index);
1272
1273               snat_free_outside_address_and_port (sm->addresses, thread_index,
1274                                                   &s->out2in,
1275                                                   s->outside_address_index);
1276
1277               /* Remove in2out, out2in keys */
1278               kv.key = s->in2out.as_u64;
1279               if (clib_bihash_add_del_8_8 (
1280                     &sm->per_thread_data[thread_index].in2out, &kv, 0))
1281                 clib_warning ("in2out key del failed");
1282               kv.key = s->out2in.as_u64;
1283               if (clib_bihash_add_del_8_8 (
1284                     &sm->per_thread_data[thread_index].out2in, &kv, 0))
1285                 clib_warning ("out2in key del failed");
1286             }
1287         }
1288       else
1289         {
1290           /* Create a new session */
1291           pool_get (tsm->sessions, s);
1292           memset (s, 0, sizeof (*s));
1293
1294           /* Create list elts */
1295           pool_get (tsm->list_pool, elt);
1296           clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1297           elt->value = s - tsm->sessions;
1298           s->per_user_index = elt - tsm->list_pool;
1299           s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1300           clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1301                               s->per_user_index);
1302         }
1303
1304       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1305       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1306       s->outside_address_index = address_index;
1307       s->out2in.addr.as_u32 = new_addr;
1308       s->out2in.fib_index = sm->outside_fib_index;
1309       s->in2out.addr.as_u32 = old_addr;
1310       s->in2out.fib_index = rx_fib_index;
1311       s->in2out.port = s->out2in.port = ip->protocol;
1312       if (is_sm)
1313         {
1314           u->nstaticsessions++;
1315           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1316         }
1317       else
1318         {
1319           u->nsessions++;
1320         }
1321
1322       /* Add to lookup tables */
1323       key.l_addr.as_u32 = old_addr;
1324       key.r_addr = ip->dst_address;
1325       key.proto = ip->protocol;
1326       key.fib_index = rx_fib_index;
1327       s_kv.key[0] = key.as_u64[0];
1328       s_kv.key[1] = key.as_u64[1];
1329       s_kv.value = s - tsm->sessions;
1330       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1331         clib_warning ("in2out key add failed");
1332
1333       key.l_addr.as_u32 = new_addr;
1334       key.fib_index = sm->outside_fib_index;
1335       s_kv.key[0] = key.as_u64[0];
1336       s_kv.key[1] = key.as_u64[1];
1337       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1338         clib_warning ("out2in key add failed");
1339   }
1340
1341   /* Update IP checksum */
1342   sum = ip->checksum;
1343   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1344   ip->checksum = ip_csum_fold (sum);
1345
1346   /* Accounting */
1347   s->last_heard = now;
1348   s->total_pkts++;
1349   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1350   /* Per-user LRU list maintenance */
1351   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1352   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1353                       s->per_user_index);
1354
1355   /* Hairpinning */
1356   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1357     snat_hairpinning_unknown_proto(sm, b, ip);
1358
1359   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1360     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1361
1362   return s;
1363 }
1364
1365 static snat_session_t *
1366 snat_in2out_lb (snat_main_t *sm,
1367                 vlib_buffer_t * b,
1368                 ip4_header_t * ip,
1369                 u32 rx_fib_index,
1370                 u32 thread_index,
1371                 f64 now,
1372                 vlib_main_t * vm,
1373                 vlib_node_runtime_t * node)
1374 {
1375   nat_ed_ses_key_t key;
1376   clib_bihash_kv_16_8_t s_kv, s_value;
1377   udp_header_t *udp = ip4_next_header (ip);
1378   tcp_header_t *tcp = (tcp_header_t *) udp;
1379   snat_session_t *s = 0;
1380   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1381   u32 old_addr, new_addr;
1382   u16 new_port, old_port;
1383   ip_csum_t sum;
1384   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1385   snat_session_key_t e_key, l_key;
1386   clib_bihash_kv_8_8_t kv, value;
1387   snat_user_key_t u_key;
1388   snat_user_t *u;
1389   dlist_elt_t *head, *elt;
1390
1391   old_addr = ip->src_address.as_u32;
1392
1393   key.l_addr = ip->src_address;
1394   key.r_addr = ip->dst_address;
1395   key.fib_index = rx_fib_index;
1396   key.proto = ip->protocol;
1397   key.rsvd = 0;
1398   key.l_port = udp->src_port;
1399   s_kv.key[0] = key.as_u64[0];
1400   s_kv.key[1] = key.as_u64[1];
1401
1402   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1403     {
1404       s = pool_elt_at_index (tsm->sessions, s_value.value);
1405     }
1406   else
1407     {
1408       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1409         {
1410           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1411           return 0;
1412         }
1413
1414       l_key.addr = ip->src_address;
1415       l_key.port = udp->src_port;
1416       l_key.protocol = proto;
1417       l_key.fib_index = rx_fib_index;
1418       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0))
1419         return 0;
1420
1421       u_key.addr = ip->src_address;
1422       u_key.fib_index = rx_fib_index;
1423       kv.key = u_key.as_u64;
1424
1425       /* Ever heard of the "user" = src ip4 address before? */
1426       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1427         {
1428           /* no, make a new one */
1429           pool_get (tsm->users, u);
1430           memset (u, 0, sizeof (*u));
1431           u->addr = ip->src_address;
1432           u->fib_index = rx_fib_index;
1433
1434           pool_get (tsm->list_pool, head);
1435           u->sessions_per_user_list_head_index = head - tsm->list_pool;
1436
1437           clib_dlist_init (tsm->list_pool,
1438                            u->sessions_per_user_list_head_index);
1439
1440           kv.value = u - tsm->users;
1441
1442           /* add user */
1443           if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
1444             clib_warning ("user key add failed");
1445         }
1446       else
1447         {
1448           u = pool_elt_at_index (tsm->users, value.value);
1449         }
1450
1451       /* Create a new session */
1452       pool_get (tsm->sessions, s);
1453       memset (s, 0, sizeof (*s));
1454
1455       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1456       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1457       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1458       s->outside_address_index = ~0;
1459       s->in2out = l_key;
1460       s->out2in = e_key;
1461       u->nstaticsessions++;
1462
1463       /* Create list elts */
1464       pool_get (tsm->list_pool, elt);
1465       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
1466       elt->value = s - tsm->sessions;
1467       s->per_user_index = elt - tsm->list_pool;
1468       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
1469       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1470                           s->per_user_index);
1471
1472       /* Add to lookup tables */
1473       s_kv.value = s - tsm->sessions;
1474       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1475         clib_warning ("in2out-ed key add failed");
1476
1477       key.l_addr = e_key.addr;
1478       key.fib_index = e_key.fib_index;
1479       key.l_port = e_key.port;
1480       s_kv.key[0] = key.as_u64[0];
1481       s_kv.key[1] = key.as_u64[1];
1482       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1483         clib_warning ("out2in-ed key add failed");
1484     }
1485
1486   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1487
1488   /* Update IP checksum */
1489   sum = ip->checksum;
1490   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1491   ip->checksum = ip_csum_fold (sum);
1492
1493   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1494     {
1495       old_port = tcp->src_port;
1496       tcp->src_port = s->out2in.port;
1497       new_port = tcp->src_port;
1498
1499       sum = tcp->checksum;
1500       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1501       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1502       tcp->checksum = ip_csum_fold(sum);
1503     }
1504   else
1505     {
1506       udp->src_port = s->out2in.port;
1507       udp->checksum = 0;
1508     }
1509
1510   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1511     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1512
1513   /* Accounting */
1514   s->last_heard = now;
1515   s->total_pkts++;
1516   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1517   return s;
1518 }
1519
1520 static inline uword
1521 snat_in2out_node_fn_inline (vlib_main_t * vm,
1522                             vlib_node_runtime_t * node,
1523                             vlib_frame_t * frame, int is_slow_path,
1524                             int is_output_feature)
1525 {
1526   u32 n_left_from, * from, * to_next;
1527   snat_in2out_next_t next_index;
1528   u32 pkts_processed = 0;
1529   snat_main_t * sm = &snat_main;
1530   f64 now = vlib_time_now (vm);
1531   u32 stats_node_index;
1532   u32 thread_index = vlib_get_thread_index ();
1533
1534   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1535     snat_in2out_node.index;
1536
1537   from = vlib_frame_vector_args (frame);
1538   n_left_from = frame->n_vectors;
1539   next_index = node->cached_next_index;
1540
1541   while (n_left_from > 0)
1542     {
1543       u32 n_left_to_next;
1544
1545       vlib_get_next_frame (vm, node, next_index,
1546                            to_next, n_left_to_next);
1547
1548       while (n_left_from >= 4 && n_left_to_next >= 2)
1549         {
1550           u32 bi0, bi1;
1551           vlib_buffer_t * b0, * b1;
1552           u32 next0, next1;
1553           u32 sw_if_index0, sw_if_index1;
1554           ip4_header_t * ip0, * ip1;
1555           ip_csum_t sum0, sum1;
1556           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1557           u16 old_port0, new_port0, old_port1, new_port1;
1558           udp_header_t * udp0, * udp1;
1559           tcp_header_t * tcp0, * tcp1;
1560           icmp46_header_t * icmp0, * icmp1;
1561           snat_session_key_t key0, key1;
1562           u32 rx_fib_index0, rx_fib_index1;
1563           u32 proto0, proto1;
1564           snat_session_t * s0 = 0, * s1 = 0;
1565           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1566           u32 iph_offset0 = 0, iph_offset1 = 0;
1567
1568           /* Prefetch next iteration. */
1569           {
1570             vlib_buffer_t * p2, * p3;
1571
1572             p2 = vlib_get_buffer (vm, from[2]);
1573             p3 = vlib_get_buffer (vm, from[3]);
1574
1575             vlib_prefetch_buffer_header (p2, LOAD);
1576             vlib_prefetch_buffer_header (p3, LOAD);
1577
1578             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1579             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1580           }
1581
1582           /* speculatively enqueue b0 and b1 to the current next frame */
1583           to_next[0] = bi0 = from[0];
1584           to_next[1] = bi1 = from[1];
1585           from += 2;
1586           to_next += 2;
1587           n_left_from -= 2;
1588           n_left_to_next -= 2;
1589
1590           b0 = vlib_get_buffer (vm, bi0);
1591           b1 = vlib_get_buffer (vm, bi1);
1592
1593           if (is_output_feature)
1594             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1595
1596           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1597                  iph_offset0);
1598
1599           udp0 = ip4_next_header (ip0);
1600           tcp0 = (tcp_header_t *) udp0;
1601           icmp0 = (icmp46_header_t *) udp0;
1602
1603           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1604           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1605                                    sw_if_index0);
1606
1607           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1608
1609           if (PREDICT_FALSE(ip0->ttl == 1))
1610             {
1611               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1612               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1613                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1614                                            0);
1615               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1616               goto trace00;
1617             }
1618
1619           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1620
1621           /* Next configured feature, probably ip4-lookup */
1622           if (is_slow_path)
1623             {
1624               if (PREDICT_FALSE (proto0 == ~0))
1625                 {
1626                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1627                                                   thread_index, now, vm, node);
1628                   if (!s0)
1629                     next0 = SNAT_IN2OUT_NEXT_DROP;
1630                   goto trace00;
1631                 }
1632
1633               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1634                 {
1635                   next0 = icmp_in2out_slow_path
1636                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1637                      node, next0, now, thread_index, &s0);
1638                   goto trace00;
1639                 }
1640             }
1641           else
1642             {
1643               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1644                 {
1645                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1646                   goto trace00;
1647                 }
1648             }
1649
1650           key0.addr = ip0->src_address;
1651           key0.port = udp0->src_port;
1652           key0.protocol = proto0;
1653           key0.fib_index = rx_fib_index0;
1654
1655           kv0.key = key0.as_u64;
1656
1657           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1658               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1659             {
1660               if (is_slow_path)
1661                 {
1662                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1663                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1664                     goto trace00;
1665
1666                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1667                                      &s0, node, next0, thread_index);
1668                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1669                     goto trace00;
1670                 }
1671               else
1672                 {
1673                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1674                   goto trace00;
1675                 }
1676             }
1677           else
1678             {
1679               if (PREDICT_FALSE (value0.value == ~0ULL))
1680                 {
1681                   if (is_slow_path)
1682                     {
1683                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1684                                           thread_index, now, vm, node);
1685                       if (!s0)
1686                         next0 = SNAT_IN2OUT_NEXT_DROP;
1687                       goto trace00;
1688                     }
1689                   else
1690                     {
1691                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1692                       goto trace00;
1693                     }
1694                 }
1695               else
1696                 {
1697                   s0 = pool_elt_at_index (
1698                     sm->per_thread_data[thread_index].sessions,
1699                     value0.value);
1700                 }
1701             }
1702
1703           b0->flags |= VNET_BUFFER_F_IS_NATED;
1704
1705           old_addr0 = ip0->src_address.as_u32;
1706           ip0->src_address = s0->out2in.addr;
1707           new_addr0 = ip0->src_address.as_u32;
1708           if (!is_output_feature)
1709             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1710
1711           sum0 = ip0->checksum;
1712           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1713                                  ip4_header_t,
1714                                  src_address /* changed member */);
1715           ip0->checksum = ip_csum_fold (sum0);
1716
1717           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1718             {
1719               old_port0 = tcp0->src_port;
1720               tcp0->src_port = s0->out2in.port;
1721               new_port0 = tcp0->src_port;
1722
1723               sum0 = tcp0->checksum;
1724               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1725                                      ip4_header_t,
1726                                      dst_address /* changed member */);
1727               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1728                                      ip4_header_t /* cheat */,
1729                                      length /* changed member */);
1730               tcp0->checksum = ip_csum_fold(sum0);
1731             }
1732           else
1733             {
1734               old_port0 = udp0->src_port;
1735               udp0->src_port = s0->out2in.port;
1736               udp0->checksum = 0;
1737             }
1738
1739           /* Accounting */
1740           s0->last_heard = now;
1741           s0->total_pkts++;
1742           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1743           /* Per-user LRU list maintenance for dynamic translation */
1744           if (!snat_is_session_static (s0))
1745             {
1746               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1747                                  s0->per_user_index);
1748               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1749                                   s0->per_user_list_head_index,
1750                                   s0->per_user_index);
1751             }
1752         trace00:
1753
1754           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1755                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1756             {
1757               snat_in2out_trace_t *t =
1758                  vlib_add_trace (vm, node, b0, sizeof (*t));
1759               t->is_slow_path = is_slow_path;
1760               t->sw_if_index = sw_if_index0;
1761               t->next_index = next0;
1762                   t->session_index = ~0;
1763               if (s0)
1764                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1765             }
1766
1767           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1768
1769           if (is_output_feature)
1770             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1771
1772           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1773                  iph_offset1);
1774
1775           udp1 = ip4_next_header (ip1);
1776           tcp1 = (tcp_header_t *) udp1;
1777           icmp1 = (icmp46_header_t *) udp1;
1778
1779           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1780           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1781                                    sw_if_index1);
1782
1783           if (PREDICT_FALSE(ip1->ttl == 1))
1784             {
1785               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1786               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1787                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1788                                            0);
1789               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1790               goto trace01;
1791             }
1792
1793           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1794
1795           /* Next configured feature, probably ip4-lookup */
1796           if (is_slow_path)
1797             {
1798               if (PREDICT_FALSE (proto1 == ~0))
1799                 {
1800                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1801                                                   thread_index, now, vm, node);
1802                   if (!s1)
1803                     next1 = SNAT_IN2OUT_NEXT_DROP;
1804                   goto trace01;
1805                 }
1806
1807               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1808                 {
1809                   next1 = icmp_in2out_slow_path
1810                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1811                      next1, now, thread_index, &s1);
1812                   goto trace01;
1813                 }
1814             }
1815           else
1816             {
1817               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1818                 {
1819                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1820                   goto trace01;
1821                 }
1822             }
1823
1824           b1->flags |= VNET_BUFFER_F_IS_NATED;
1825
1826           key1.addr = ip1->src_address;
1827           key1.port = udp1->src_port;
1828           key1.protocol = proto1;
1829           key1.fib_index = rx_fib_index1;
1830
1831           kv1.key = key1.as_u64;
1832
1833             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1834                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1835             {
1836               if (is_slow_path)
1837                 {
1838                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1839                       ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
1840                     goto trace01;
1841
1842                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1843                                      &s1, node, next1, thread_index);
1844                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1845                     goto trace01;
1846                 }
1847               else
1848                 {
1849                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1850                   goto trace01;
1851                 }
1852             }
1853           else
1854             {
1855               if (PREDICT_FALSE (value1.value == ~0ULL))
1856                 {
1857                   if (is_slow_path)
1858                     {
1859                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1860                                           thread_index, now, vm, node);
1861                       if (!s1)
1862                         next1 = SNAT_IN2OUT_NEXT_DROP;
1863                       goto trace01;
1864                     }
1865                   else
1866                     {
1867                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1868                       goto trace01;
1869                     }
1870                 }
1871               else
1872                 {
1873                   s1 = pool_elt_at_index (
1874                     sm->per_thread_data[thread_index].sessions,
1875                     value1.value);
1876                 }
1877             }
1878
1879           old_addr1 = ip1->src_address.as_u32;
1880           ip1->src_address = s1->out2in.addr;
1881           new_addr1 = ip1->src_address.as_u32;
1882           if (!is_output_feature)
1883             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1884
1885           sum1 = ip1->checksum;
1886           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1887                                  ip4_header_t,
1888                                  src_address /* changed member */);
1889           ip1->checksum = ip_csum_fold (sum1);
1890
1891           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1892             {
1893               old_port1 = tcp1->src_port;
1894               tcp1->src_port = s1->out2in.port;
1895               new_port1 = tcp1->src_port;
1896
1897               sum1 = tcp1->checksum;
1898               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1899                                      ip4_header_t,
1900                                      dst_address /* changed member */);
1901               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1902                                      ip4_header_t /* cheat */,
1903                                      length /* changed member */);
1904               tcp1->checksum = ip_csum_fold(sum1);
1905             }
1906           else
1907             {
1908               old_port1 = udp1->src_port;
1909               udp1->src_port = s1->out2in.port;
1910               udp1->checksum = 0;
1911             }
1912
1913           /* Accounting */
1914           s1->last_heard = now;
1915           s1->total_pkts++;
1916           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1917           /* Per-user LRU list maintenance for dynamic translation */
1918           if (!snat_is_session_static (s1))
1919             {
1920               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1921                                  s1->per_user_index);
1922               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1923                                   s1->per_user_list_head_index,
1924                                   s1->per_user_index);
1925             }
1926         trace01:
1927
1928           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1929                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1930             {
1931               snat_in2out_trace_t *t =
1932                  vlib_add_trace (vm, node, b1, sizeof (*t));
1933               t->sw_if_index = sw_if_index1;
1934               t->next_index = next1;
1935               t->session_index = ~0;
1936               if (s1)
1937                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1938             }
1939
1940           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1941
1942           /* verify speculative enqueues, maybe switch current next frame */
1943           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1944                                            to_next, n_left_to_next,
1945                                            bi0, bi1, next0, next1);
1946         }
1947
1948       while (n_left_from > 0 && n_left_to_next > 0)
1949         {
1950           u32 bi0;
1951           vlib_buffer_t * b0;
1952           u32 next0;
1953           u32 sw_if_index0;
1954           ip4_header_t * ip0;
1955           ip_csum_t sum0;
1956           u32 new_addr0, old_addr0;
1957           u16 old_port0, new_port0;
1958           udp_header_t * udp0;
1959           tcp_header_t * tcp0;
1960           icmp46_header_t * icmp0;
1961           snat_session_key_t key0;
1962           u32 rx_fib_index0;
1963           u32 proto0;
1964           snat_session_t * s0 = 0;
1965           clib_bihash_kv_8_8_t kv0, value0;
1966           u32 iph_offset0 = 0;
1967
1968           /* speculatively enqueue b0 to the current next frame */
1969           bi0 = from[0];
1970           to_next[0] = bi0;
1971           from += 1;
1972           to_next += 1;
1973           n_left_from -= 1;
1974           n_left_to_next -= 1;
1975
1976           b0 = vlib_get_buffer (vm, bi0);
1977           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1978
1979           if (is_output_feature)
1980             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1981
1982           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1983                  iph_offset0);
1984
1985           udp0 = ip4_next_header (ip0);
1986           tcp0 = (tcp_header_t *) udp0;
1987           icmp0 = (icmp46_header_t *) udp0;
1988
1989           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1990           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1991                                    sw_if_index0);
1992
1993           if (PREDICT_FALSE(ip0->ttl == 1))
1994             {
1995               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1996               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1997                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1998                                            0);
1999               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2000               goto trace0;
2001             }
2002
2003           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2004
2005           /* Next configured feature, probably ip4-lookup */
2006           if (is_slow_path)
2007             {
2008               if (PREDICT_FALSE (proto0 == ~0))
2009                 {
2010                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
2011                                                   thread_index, now, vm, node);
2012                   if (!s0)
2013                     next0 = SNAT_IN2OUT_NEXT_DROP;
2014                   goto trace0;
2015                 }
2016
2017               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2018                 {
2019                   next0 = icmp_in2out_slow_path
2020                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2021                      next0, now, thread_index, &s0);
2022                   goto trace0;
2023                 }
2024             }
2025           else
2026             {
2027               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2028                 {
2029                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2030                   goto trace0;
2031                 }
2032             }
2033
2034           key0.addr = ip0->src_address;
2035           key0.port = udp0->src_port;
2036           key0.protocol = proto0;
2037           key0.fib_index = rx_fib_index0;
2038
2039           kv0.key = key0.as_u64;
2040
2041           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
2042                                       &kv0, &value0))
2043             {
2044               if (is_slow_path)
2045                 {
2046                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2047                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
2048                     goto trace0;
2049
2050                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2051                                      &s0, node, next0, thread_index);
2052
2053                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2054                     goto trace0;
2055                 }
2056               else
2057                 {
2058                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2059                   goto trace0;
2060                 }
2061             }
2062           else
2063             {
2064               if (PREDICT_FALSE (value0.value == ~0ULL))
2065                 {
2066                   if (is_slow_path)
2067                     {
2068                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
2069                                           thread_index, now, vm, node);
2070                       if (!s0)
2071                         next0 = SNAT_IN2OUT_NEXT_DROP;
2072                       goto trace0;
2073                     }
2074                   else
2075                     {
2076                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2077                       goto trace0;
2078                     }
2079                 }
2080               else
2081                 {
2082                   s0 = pool_elt_at_index (
2083                     sm->per_thread_data[thread_index].sessions,
2084                     value0.value);
2085                 }
2086             }
2087
2088           b0->flags |= VNET_BUFFER_F_IS_NATED;
2089
2090           old_addr0 = ip0->src_address.as_u32;
2091           ip0->src_address = s0->out2in.addr;
2092           new_addr0 = ip0->src_address.as_u32;
2093           if (!is_output_feature)
2094             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2095
2096           sum0 = ip0->checksum;
2097           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2098                                  ip4_header_t,
2099                                  src_address /* changed member */);
2100           ip0->checksum = ip_csum_fold (sum0);
2101
2102           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2103             {
2104               old_port0 = tcp0->src_port;
2105               tcp0->src_port = s0->out2in.port;
2106               new_port0 = tcp0->src_port;
2107
2108               sum0 = tcp0->checksum;
2109               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2110                                      ip4_header_t,
2111                                      dst_address /* changed member */);
2112               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2113                                      ip4_header_t /* cheat */,
2114                                      length /* changed member */);
2115               tcp0->checksum = ip_csum_fold(sum0);
2116             }
2117           else
2118             {
2119               old_port0 = udp0->src_port;
2120               udp0->src_port = s0->out2in.port;
2121               udp0->checksum = 0;
2122             }
2123
2124           /* Accounting */
2125           s0->last_heard = now;
2126           s0->total_pkts++;
2127           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2128           /* Per-user LRU list maintenance for dynamic translation */
2129           if (!snat_is_session_static (s0))
2130             {
2131               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2132                                  s0->per_user_index);
2133               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2134                                   s0->per_user_list_head_index,
2135                                   s0->per_user_index);
2136             }
2137
2138         trace0:
2139           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2140                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2141             {
2142               snat_in2out_trace_t *t =
2143                  vlib_add_trace (vm, node, b0, sizeof (*t));
2144               t->is_slow_path = is_slow_path;
2145               t->sw_if_index = sw_if_index0;
2146               t->next_index = next0;
2147                   t->session_index = ~0;
2148               if (s0)
2149                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
2150             }
2151
2152           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2153
2154           /* verify speculative enqueue, maybe switch current next frame */
2155           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2156                                            to_next, n_left_to_next,
2157                                            bi0, next0);
2158         }
2159
2160       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2161     }
2162
2163   vlib_node_increment_counter (vm, stats_node_index,
2164                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2165                                pkts_processed);
2166   return frame->n_vectors;
2167 }
2168
2169 static uword
2170 snat_in2out_fast_path_fn (vlib_main_t * vm,
2171                           vlib_node_runtime_t * node,
2172                           vlib_frame_t * frame)
2173 {
2174   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
2175 }
2176
2177 VLIB_REGISTER_NODE (snat_in2out_node) = {
2178   .function = snat_in2out_fast_path_fn,
2179   .name = "nat44-in2out",
2180   .vector_size = sizeof (u32),
2181   .format_trace = format_snat_in2out_trace,
2182   .type = VLIB_NODE_TYPE_INTERNAL,
2183
2184   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2185   .error_strings = snat_in2out_error_strings,
2186
2187   .runtime_data_bytes = sizeof (snat_runtime_t),
2188
2189   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2190
2191   /* edit / add dispositions here */
2192   .next_nodes = {
2193     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2194     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2195     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2196     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2197   },
2198 };
2199
2200 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
2201
2202 static uword
2203 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
2204                                  vlib_node_runtime_t * node,
2205                                  vlib_frame_t * frame)
2206 {
2207   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
2208 }
2209
2210 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2211   .function = snat_in2out_output_fast_path_fn,
2212   .name = "nat44-in2out-output",
2213   .vector_size = sizeof (u32),
2214   .format_trace = format_snat_in2out_trace,
2215   .type = VLIB_NODE_TYPE_INTERNAL,
2216
2217   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2218   .error_strings = snat_in2out_error_strings,
2219
2220   .runtime_data_bytes = sizeof (snat_runtime_t),
2221
2222   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2223
2224   /* edit / add dispositions here */
2225   .next_nodes = {
2226     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2227     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2228     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2229     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2230   },
2231 };
2232
2233 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2234                               snat_in2out_output_fast_path_fn);
2235
2236 static uword
2237 snat_in2out_slow_path_fn (vlib_main_t * vm,
2238                           vlib_node_runtime_t * node,
2239                           vlib_frame_t * frame)
2240 {
2241   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2242 }
2243
2244 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2245   .function = snat_in2out_slow_path_fn,
2246   .name = "nat44-in2out-slowpath",
2247   .vector_size = sizeof (u32),
2248   .format_trace = format_snat_in2out_trace,
2249   .type = VLIB_NODE_TYPE_INTERNAL,
2250
2251   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2252   .error_strings = snat_in2out_error_strings,
2253
2254   .runtime_data_bytes = sizeof (snat_runtime_t),
2255
2256   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2257
2258   /* edit / add dispositions here */
2259   .next_nodes = {
2260     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2261     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2262     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2263     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2264   },
2265 };
2266
2267 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2268                               snat_in2out_slow_path_fn);
2269
2270 static uword
2271 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2272                                  vlib_node_runtime_t * node,
2273                                  vlib_frame_t * frame)
2274 {
2275   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2276 }
2277
2278 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2279   .function = snat_in2out_output_slow_path_fn,
2280   .name = "nat44-in2out-output-slowpath",
2281   .vector_size = sizeof (u32),
2282   .format_trace = format_snat_in2out_trace,
2283   .type = VLIB_NODE_TYPE_INTERNAL,
2284
2285   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2286   .error_strings = snat_in2out_error_strings,
2287
2288   .runtime_data_bytes = sizeof (snat_runtime_t),
2289
2290   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2291
2292   /* edit / add dispositions here */
2293   .next_nodes = {
2294     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2295     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2296     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2297     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2298   },
2299 };
2300
2301 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2302                               snat_in2out_output_slow_path_fn);
2303
2304 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
2305
2306 static uword
2307 nat44_hairpinning_fn (vlib_main_t * vm,
2308                       vlib_node_runtime_t * node,
2309                       vlib_frame_t * frame)
2310 {
2311   u32 n_left_from, * from, * to_next;
2312   snat_in2out_next_t next_index;
2313   u32 pkts_processed = 0;
2314   snat_main_t * sm = &snat_main;
2315   vnet_feature_main_t *fm = &feature_main;
2316   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
2317   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
2318
2319   from = vlib_frame_vector_args (frame);
2320   n_left_from = frame->n_vectors;
2321   next_index = node->cached_next_index;
2322
2323   while (n_left_from > 0)
2324     {
2325       u32 n_left_to_next;
2326
2327       vlib_get_next_frame (vm, node, next_index,
2328                            to_next, n_left_to_next);
2329
2330       while (n_left_from > 0 && n_left_to_next > 0)
2331         {
2332           u32 bi0;
2333           vlib_buffer_t * b0;
2334           u32 next0;
2335           ip4_header_t * ip0;
2336           u32 proto0;
2337           udp_header_t * udp0;
2338           tcp_header_t * tcp0;
2339
2340           /* speculatively enqueue b0 to the current next frame */
2341           bi0 = from[0];
2342           to_next[0] = bi0;
2343           from += 1;
2344           to_next += 1;
2345           n_left_from -= 1;
2346           n_left_to_next -= 1;
2347
2348           b0 = vlib_get_buffer (vm, bi0);
2349           ip0 = vlib_buffer_get_current (b0);
2350           udp0 = ip4_next_header (ip0);
2351           tcp0 = (tcp_header_t *) udp0;
2352
2353           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2354
2355           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
2356                                 &next0, 0);
2357
2358           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0))
2359             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2360
2361           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2362
2363           /* verify speculative enqueue, maybe switch current next frame */
2364           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2365                                            to_next, n_left_to_next,
2366                                            bi0, next0);
2367          }
2368
2369       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2370     }
2371
2372   vlib_node_increment_counter (vm, nat44_hairpinning_node.index,
2373                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2374                                pkts_processed);
2375   return frame->n_vectors;
2376 }
2377
2378 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
2379   .function = nat44_hairpinning_fn,
2380   .name = "nat44-hairpinning",
2381   .vector_size = sizeof (u32),
2382   .type = VLIB_NODE_TYPE_INTERNAL,
2383   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2384   .error_strings = snat_in2out_error_strings,
2385   .n_next_nodes = 2,
2386   .next_nodes = {
2387     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2388     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2389   },
2390 };
2391
2392 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
2393                               nat44_hairpinning_fn);
2394
2395 /**************************/
2396 /*** deterministic mode ***/
2397 /**************************/
2398 static uword
2399 snat_det_in2out_node_fn (vlib_main_t * vm,
2400                          vlib_node_runtime_t * node,
2401                          vlib_frame_t * frame)
2402 {
2403   u32 n_left_from, * from, * to_next;
2404   snat_in2out_next_t next_index;
2405   u32 pkts_processed = 0;
2406   snat_main_t * sm = &snat_main;
2407   u32 now = (u32) vlib_time_now (vm);
2408   u32 thread_index = vlib_get_thread_index ();
2409
2410   from = vlib_frame_vector_args (frame);
2411   n_left_from = frame->n_vectors;
2412   next_index = node->cached_next_index;
2413
2414   while (n_left_from > 0)
2415     {
2416       u32 n_left_to_next;
2417
2418       vlib_get_next_frame (vm, node, next_index,
2419                            to_next, n_left_to_next);
2420
2421       while (n_left_from >= 4 && n_left_to_next >= 2)
2422         {
2423           u32 bi0, bi1;
2424           vlib_buffer_t * b0, * b1;
2425           u32 next0, next1;
2426           u32 sw_if_index0, sw_if_index1;
2427           ip4_header_t * ip0, * ip1;
2428           ip_csum_t sum0, sum1;
2429           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2430           u16 old_port0, new_port0, lo_port0, i0;
2431           u16 old_port1, new_port1, lo_port1, i1;
2432           udp_header_t * udp0, * udp1;
2433           tcp_header_t * tcp0, * tcp1;
2434           u32 proto0, proto1;
2435           snat_det_out_key_t key0, key1;
2436           snat_det_map_t * dm0, * dm1;
2437           snat_det_session_t * ses0 = 0, * ses1 = 0;
2438           u32 rx_fib_index0, rx_fib_index1;
2439           icmp46_header_t * icmp0, * icmp1;
2440
2441           /* Prefetch next iteration. */
2442           {
2443             vlib_buffer_t * p2, * p3;
2444
2445             p2 = vlib_get_buffer (vm, from[2]);
2446             p3 = vlib_get_buffer (vm, from[3]);
2447
2448             vlib_prefetch_buffer_header (p2, LOAD);
2449             vlib_prefetch_buffer_header (p3, LOAD);
2450
2451             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2452             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2453           }
2454
2455           /* speculatively enqueue b0 and b1 to the current next frame */
2456           to_next[0] = bi0 = from[0];
2457           to_next[1] = bi1 = from[1];
2458           from += 2;
2459           to_next += 2;
2460           n_left_from -= 2;
2461           n_left_to_next -= 2;
2462
2463           b0 = vlib_get_buffer (vm, bi0);
2464           b1 = vlib_get_buffer (vm, bi1);
2465
2466           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2467           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2468
2469           ip0 = vlib_buffer_get_current (b0);
2470           udp0 = ip4_next_header (ip0);
2471           tcp0 = (tcp_header_t *) udp0;
2472
2473           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2474
2475           if (PREDICT_FALSE(ip0->ttl == 1))
2476             {
2477               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2478               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2479                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2480                                            0);
2481               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2482               goto trace0;
2483             }
2484
2485           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2486
2487           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2488             {
2489               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2490               icmp0 = (icmp46_header_t *) udp0;
2491
2492               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2493                                   rx_fib_index0, node, next0, thread_index,
2494                                   &ses0, &dm0);
2495               goto trace0;
2496             }
2497
2498           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2499           if (PREDICT_FALSE(!dm0))
2500             {
2501               clib_warning("no match for internal host %U",
2502                            format_ip4_address, &ip0->src_address);
2503               next0 = SNAT_IN2OUT_NEXT_DROP;
2504               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2505               goto trace0;
2506             }
2507
2508           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2509
2510           key0.ext_host_addr = ip0->dst_address;
2511           key0.ext_host_port = tcp0->dst;
2512
2513           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2514           if (PREDICT_FALSE(!ses0))
2515             {
2516               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2517                 {
2518                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2519                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2520
2521                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2522                     continue;
2523
2524                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2525                   break;
2526                 }
2527               if (PREDICT_FALSE(!ses0))
2528                 {
2529                   /* too many sessions for user, send ICMP error packet */
2530
2531                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2532                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2533                                                ICMP4_destination_unreachable_destination_unreachable_host,
2534                                                0);
2535                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2536                   goto trace0;
2537                 }
2538             }
2539
2540           new_port0 = ses0->out.out_port;
2541
2542           old_addr0.as_u32 = ip0->src_address.as_u32;
2543           ip0->src_address.as_u32 = new_addr0.as_u32;
2544           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2545
2546           sum0 = ip0->checksum;
2547           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2548                                  ip4_header_t,
2549                                  src_address /* changed member */);
2550           ip0->checksum = ip_csum_fold (sum0);
2551
2552           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2553             {
2554               if (tcp0->flags & TCP_FLAG_SYN)
2555                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2556               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2557                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2558               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2559                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2560               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2561                 snat_det_ses_close(dm0, ses0);
2562               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2563                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2564               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2565                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2566
2567               old_port0 = tcp0->src;
2568               tcp0->src = new_port0;
2569
2570               sum0 = tcp0->checksum;
2571               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2572                                      ip4_header_t,
2573                                      dst_address /* changed member */);
2574               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2575                                      ip4_header_t /* cheat */,
2576                                      length /* changed member */);
2577               tcp0->checksum = ip_csum_fold(sum0);
2578             }
2579           else
2580             {
2581               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2582               old_port0 = udp0->src_port;
2583               udp0->src_port = new_port0;
2584               udp0->checksum = 0;
2585             }
2586
2587           switch(ses0->state)
2588             {
2589             case SNAT_SESSION_UDP_ACTIVE:
2590                 ses0->expire = now + sm->udp_timeout;
2591                 break;
2592             case SNAT_SESSION_TCP_SYN_SENT:
2593             case SNAT_SESSION_TCP_FIN_WAIT:
2594             case SNAT_SESSION_TCP_CLOSE_WAIT:
2595             case SNAT_SESSION_TCP_LAST_ACK:
2596                 ses0->expire = now + sm->tcp_transitory_timeout;
2597                 break;
2598             case SNAT_SESSION_TCP_ESTABLISHED:
2599                 ses0->expire = now + sm->tcp_established_timeout;
2600                 break;
2601             }
2602
2603         trace0:
2604           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2605                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2606             {
2607               snat_in2out_trace_t *t =
2608                  vlib_add_trace (vm, node, b0, sizeof (*t));
2609               t->is_slow_path = 0;
2610               t->sw_if_index = sw_if_index0;
2611               t->next_index = next0;
2612               t->session_index = ~0;
2613               if (ses0)
2614                 t->session_index = ses0 - dm0->sessions;
2615             }
2616
2617           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2618
2619           ip1 = vlib_buffer_get_current (b1);
2620           udp1 = ip4_next_header (ip1);
2621           tcp1 = (tcp_header_t *) udp1;
2622
2623           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2624
2625           if (PREDICT_FALSE(ip1->ttl == 1))
2626             {
2627               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2628               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2629                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2630                                            0);
2631               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2632               goto trace1;
2633             }
2634
2635           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2636
2637           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2638             {
2639               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2640               icmp1 = (icmp46_header_t *) udp1;
2641
2642               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2643                                   rx_fib_index1, node, next1, thread_index,
2644                                   &ses1, &dm1);
2645               goto trace1;
2646             }
2647
2648           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2649           if (PREDICT_FALSE(!dm1))
2650             {
2651               clib_warning("no match for internal host %U",
2652                            format_ip4_address, &ip0->src_address);
2653               next1 = SNAT_IN2OUT_NEXT_DROP;
2654               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2655               goto trace1;
2656             }
2657
2658           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2659
2660           key1.ext_host_addr = ip1->dst_address;
2661           key1.ext_host_port = tcp1->dst;
2662
2663           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2664           if (PREDICT_FALSE(!ses1))
2665             {
2666               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2667                 {
2668                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2669                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2670
2671                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2672                     continue;
2673
2674                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2675                   break;
2676                 }
2677               if (PREDICT_FALSE(!ses1))
2678                 {
2679                   /* too many sessions for user, send ICMP error packet */
2680
2681                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2682                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2683                                                ICMP4_destination_unreachable_destination_unreachable_host,
2684                                                0);
2685                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2686                   goto trace1;
2687                 }
2688             }
2689
2690           new_port1 = ses1->out.out_port;
2691
2692           old_addr1.as_u32 = ip1->src_address.as_u32;
2693           ip1->src_address.as_u32 = new_addr1.as_u32;
2694           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2695
2696           sum1 = ip1->checksum;
2697           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2698                                  ip4_header_t,
2699                                  src_address /* changed member */);
2700           ip1->checksum = ip_csum_fold (sum1);
2701
2702           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2703             {
2704               if (tcp1->flags & TCP_FLAG_SYN)
2705                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2706               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2707                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2708               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2709                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2710               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2711                 snat_det_ses_close(dm1, ses1);
2712               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2713                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2714               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2715                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2716
2717               old_port1 = tcp1->src;
2718               tcp1->src = new_port1;
2719
2720               sum1 = tcp1->checksum;
2721               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2722                                      ip4_header_t,
2723                                      dst_address /* changed member */);
2724               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2725                                      ip4_header_t /* cheat */,
2726                                      length /* changed member */);
2727               tcp1->checksum = ip_csum_fold(sum1);
2728             }
2729           else
2730             {
2731               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2732               old_port1 = udp1->src_port;
2733               udp1->src_port = new_port1;
2734               udp1->checksum = 0;
2735             }
2736
2737           switch(ses1->state)
2738             {
2739             case SNAT_SESSION_UDP_ACTIVE:
2740                 ses1->expire = now + sm->udp_timeout;
2741                 break;
2742             case SNAT_SESSION_TCP_SYN_SENT:
2743             case SNAT_SESSION_TCP_FIN_WAIT:
2744             case SNAT_SESSION_TCP_CLOSE_WAIT:
2745             case SNAT_SESSION_TCP_LAST_ACK:
2746                 ses1->expire = now + sm->tcp_transitory_timeout;
2747                 break;
2748             case SNAT_SESSION_TCP_ESTABLISHED:
2749                 ses1->expire = now + sm->tcp_established_timeout;
2750                 break;
2751             }
2752
2753         trace1:
2754           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2755                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2756             {
2757               snat_in2out_trace_t *t =
2758                  vlib_add_trace (vm, node, b1, sizeof (*t));
2759               t->is_slow_path = 0;
2760               t->sw_if_index = sw_if_index1;
2761               t->next_index = next1;
2762               t->session_index = ~0;
2763               if (ses1)
2764                 t->session_index = ses1 - dm1->sessions;
2765             }
2766
2767           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2768
2769           /* verify speculative enqueues, maybe switch current next frame */
2770           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2771                                            to_next, n_left_to_next,
2772                                            bi0, bi1, next0, next1);
2773          }
2774
2775       while (n_left_from > 0 && n_left_to_next > 0)
2776         {
2777           u32 bi0;
2778           vlib_buffer_t * b0;
2779           u32 next0;
2780           u32 sw_if_index0;
2781           ip4_header_t * ip0;
2782           ip_csum_t sum0;
2783           ip4_address_t new_addr0, old_addr0;
2784           u16 old_port0, new_port0, lo_port0, i0;
2785           udp_header_t * udp0;
2786           tcp_header_t * tcp0;
2787           u32 proto0;
2788           snat_det_out_key_t key0;
2789           snat_det_map_t * dm0;
2790           snat_det_session_t * ses0 = 0;
2791           u32 rx_fib_index0;
2792           icmp46_header_t * icmp0;
2793
2794           /* speculatively enqueue b0 to the current next frame */
2795           bi0 = from[0];
2796           to_next[0] = bi0;
2797           from += 1;
2798           to_next += 1;
2799           n_left_from -= 1;
2800           n_left_to_next -= 1;
2801
2802           b0 = vlib_get_buffer (vm, bi0);
2803           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2804
2805           ip0 = vlib_buffer_get_current (b0);
2806           udp0 = ip4_next_header (ip0);
2807           tcp0 = (tcp_header_t *) udp0;
2808
2809           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2810
2811           if (PREDICT_FALSE(ip0->ttl == 1))
2812             {
2813               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2814               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2815                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2816                                            0);
2817               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2818               goto trace00;
2819             }
2820
2821           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2822
2823           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2824             {
2825               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2826               icmp0 = (icmp46_header_t *) udp0;
2827
2828               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2829                                   rx_fib_index0, node, next0, thread_index,
2830                                   &ses0, &dm0);
2831               goto trace00;
2832             }
2833
2834           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2835           if (PREDICT_FALSE(!dm0))
2836             {
2837               clib_warning("no match for internal host %U",
2838                            format_ip4_address, &ip0->src_address);
2839               next0 = SNAT_IN2OUT_NEXT_DROP;
2840               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2841               goto trace00;
2842             }
2843
2844           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2845
2846           key0.ext_host_addr = ip0->dst_address;
2847           key0.ext_host_port = tcp0->dst;
2848
2849           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2850           if (PREDICT_FALSE(!ses0))
2851             {
2852               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2853                 {
2854                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2855                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2856
2857                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2858                     continue;
2859
2860                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2861                   break;
2862                 }
2863               if (PREDICT_FALSE(!ses0))
2864                 {
2865                   /* too many sessions for user, send ICMP error packet */
2866
2867                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2868                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2869                                                ICMP4_destination_unreachable_destination_unreachable_host,
2870                                                0);
2871                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2872                   goto trace00;
2873                 }
2874             }
2875
2876           new_port0 = ses0->out.out_port;
2877
2878           old_addr0.as_u32 = ip0->src_address.as_u32;
2879           ip0->src_address.as_u32 = new_addr0.as_u32;
2880           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2881
2882           sum0 = ip0->checksum;
2883           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2884                                  ip4_header_t,
2885                                  src_address /* changed member */);
2886           ip0->checksum = ip_csum_fold (sum0);
2887
2888           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2889             {
2890               if (tcp0->flags & TCP_FLAG_SYN)
2891                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2892               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2893                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2894               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2895                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2896               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2897                 snat_det_ses_close(dm0, ses0);
2898               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2899                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2900               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2901                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2902
2903               old_port0 = tcp0->src;
2904               tcp0->src = new_port0;
2905
2906               sum0 = tcp0->checksum;
2907               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2908                                      ip4_header_t,
2909                                      dst_address /* changed member */);
2910               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2911                                      ip4_header_t /* cheat */,
2912                                      length /* changed member */);
2913               tcp0->checksum = ip_csum_fold(sum0);
2914             }
2915           else
2916             {
2917               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2918               old_port0 = udp0->src_port;
2919               udp0->src_port = new_port0;
2920               udp0->checksum = 0;
2921             }
2922
2923           switch(ses0->state)
2924             {
2925             case SNAT_SESSION_UDP_ACTIVE:
2926                 ses0->expire = now + sm->udp_timeout;
2927                 break;
2928             case SNAT_SESSION_TCP_SYN_SENT:
2929             case SNAT_SESSION_TCP_FIN_WAIT:
2930             case SNAT_SESSION_TCP_CLOSE_WAIT:
2931             case SNAT_SESSION_TCP_LAST_ACK:
2932                 ses0->expire = now + sm->tcp_transitory_timeout;
2933                 break;
2934             case SNAT_SESSION_TCP_ESTABLISHED:
2935                 ses0->expire = now + sm->tcp_established_timeout;
2936                 break;
2937             }
2938
2939         trace00:
2940           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2941                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2942             {
2943               snat_in2out_trace_t *t =
2944                  vlib_add_trace (vm, node, b0, sizeof (*t));
2945               t->is_slow_path = 0;
2946               t->sw_if_index = sw_if_index0;
2947               t->next_index = next0;
2948               t->session_index = ~0;
2949               if (ses0)
2950                 t->session_index = ses0 - dm0->sessions;
2951             }
2952
2953           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2954
2955           /* verify speculative enqueue, maybe switch current next frame */
2956           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2957                                            to_next, n_left_to_next,
2958                                            bi0, next0);
2959         }
2960
2961       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2962     }
2963
2964   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
2965                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2966                                pkts_processed);
2967   return frame->n_vectors;
2968 }
2969
2970 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
2971   .function = snat_det_in2out_node_fn,
2972   .name = "nat44-det-in2out",
2973   .vector_size = sizeof (u32),
2974   .format_trace = format_snat_in2out_trace,
2975   .type = VLIB_NODE_TYPE_INTERNAL,
2976
2977   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2978   .error_strings = snat_in2out_error_strings,
2979
2980   .runtime_data_bytes = sizeof (snat_runtime_t),
2981
2982   .n_next_nodes = 3,
2983
2984   /* edit / add dispositions here */
2985   .next_nodes = {
2986     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2987     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2988     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2989   },
2990 };
2991
2992 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
2993
2994 /**
2995  * Get address and port values to be used for ICMP packet translation
2996  * and create session if needed
2997  *
2998  * @param[in,out] sm             NAT main
2999  * @param[in,out] node           NAT node runtime
3000  * @param[in] thread_index       thread index
3001  * @param[in,out] b0             buffer containing packet to be translated
3002  * @param[out] p_proto           protocol used for matching
3003  * @param[out] p_value           address and port after NAT translation
3004  * @param[out] p_dont_translate  if packet should not be translated
3005  * @param d                      optional parameter
3006  * @param e                      optional parameter
3007  */
3008 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
3009                           u32 thread_index, vlib_buffer_t *b0,
3010                           ip4_header_t *ip0, u8 *p_proto,
3011                           snat_session_key_t *p_value,
3012                           u8 *p_dont_translate, void *d, void *e)
3013 {
3014   icmp46_header_t *icmp0;
3015   u32 sw_if_index0;
3016   u32 rx_fib_index0;
3017   u8 protocol;
3018   snat_det_out_key_t key0;
3019   u8 dont_translate = 0;
3020   u32 next0 = ~0;
3021   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3022   ip4_header_t *inner_ip0;
3023   void *l4_header = 0;
3024   icmp46_header_t *inner_icmp0;
3025   snat_det_map_t * dm0 = 0;
3026   ip4_address_t new_addr0;
3027   u16 lo_port0, i0;
3028   snat_det_session_t * ses0 = 0;
3029   ip4_address_t in_addr;
3030   u16 in_port;
3031
3032   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3033   echo0 = (icmp_echo_header_t *)(icmp0+1);
3034   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3035   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
3036
3037   if (!icmp_is_error_message (icmp0))
3038     {
3039       protocol = SNAT_PROTOCOL_ICMP;
3040       in_addr = ip0->src_address;
3041       in_port = echo0->identifier;
3042     }
3043   else
3044     {
3045       inner_ip0 = (ip4_header_t *)(echo0+1);
3046       l4_header = ip4_next_header (inner_ip0);
3047       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3048       in_addr = inner_ip0->dst_address;
3049       switch (protocol)
3050         {
3051         case SNAT_PROTOCOL_ICMP:
3052           inner_icmp0 = (icmp46_header_t*)l4_header;
3053           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3054           in_port = inner_echo0->identifier;
3055           break;
3056         case SNAT_PROTOCOL_UDP:
3057         case SNAT_PROTOCOL_TCP:
3058           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3059           break;
3060         default:
3061           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
3062           next0 = SNAT_IN2OUT_NEXT_DROP;
3063           goto out;
3064         }
3065     }
3066
3067   dm0 = snat_det_map_by_user(sm, &in_addr);
3068   if (PREDICT_FALSE(!dm0))
3069     {
3070       clib_warning("no match for internal host %U",
3071                    format_ip4_address, &in_addr);
3072       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3073           IP_PROTOCOL_ICMP, rx_fib_index0)))
3074         {
3075           dont_translate = 1;
3076           goto out;
3077         }
3078       next0 = SNAT_IN2OUT_NEXT_DROP;
3079       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3080       goto out;
3081     }
3082
3083   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
3084
3085   key0.ext_host_addr = ip0->dst_address;
3086   key0.ext_host_port = 0;
3087
3088   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3089   if (PREDICT_FALSE(!ses0))
3090     {
3091       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3092           IP_PROTOCOL_ICMP, rx_fib_index0)))
3093         {
3094           dont_translate = 1;
3095           goto out;
3096         }
3097       if (icmp0->type != ICMP4_echo_request)
3098         {
3099           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3100           next0 = SNAT_IN2OUT_NEXT_DROP;
3101           goto out;
3102         }
3103       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3104         {
3105           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3106             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3107
3108           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3109             continue;
3110
3111           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3112           break;
3113         }
3114       if (PREDICT_FALSE(!ses0))
3115         {
3116           next0 = SNAT_IN2OUT_NEXT_DROP;
3117           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3118           goto out;
3119         }
3120     }
3121
3122   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3123                     !icmp_is_error_message (icmp0)))
3124     {
3125       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3126       next0 = SNAT_IN2OUT_NEXT_DROP;
3127       goto out;
3128     }
3129
3130   u32 now = (u32) vlib_time_now (sm->vlib_main);
3131
3132   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3133   ses0->expire = now + sm->icmp_timeout;
3134
3135 out:
3136   *p_proto = protocol;
3137   if (ses0)
3138     {
3139       p_value->addr = new_addr0;
3140       p_value->fib_index = sm->outside_fib_index;
3141       p_value->port = ses0->out.out_port;
3142     }
3143   *p_dont_translate = dont_translate;
3144   if (d)
3145     *(snat_det_session_t**)d = ses0;
3146   if (e)
3147     *(snat_det_map_t**)e = dm0;
3148   return next0;
3149 }
3150
3151 /**********************/
3152 /*** worker handoff ***/
3153 /**********************/
3154 static inline uword
3155 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3156                                       vlib_node_runtime_t * node,
3157                                       vlib_frame_t * frame,
3158                                       u8 is_output)
3159 {
3160   snat_main_t *sm = &snat_main;
3161   vlib_thread_main_t *tm = vlib_get_thread_main ();
3162   u32 n_left_from, *from, *to_next = 0;
3163   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3164   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3165     = 0;
3166   vlib_frame_queue_elt_t *hf = 0;
3167   vlib_frame_t *f = 0;
3168   int i;
3169   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3170   u32 next_worker_index = 0;
3171   u32 current_worker_index = ~0;
3172   u32 thread_index = vlib_get_thread_index ();
3173   u32 fq_index;
3174   u32 to_node_index;
3175
3176   ASSERT (vec_len (sm->workers));
3177
3178   if (is_output)
3179     {
3180       fq_index = sm->fq_in2out_output_index;
3181       to_node_index = sm->in2out_output_node_index;
3182     }
3183   else
3184     {
3185       fq_index = sm->fq_in2out_index;
3186       to_node_index = sm->in2out_node_index;
3187     }
3188
3189   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3190     {
3191       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3192
3193       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3194                                sm->first_worker_index + sm->num_workers - 1,
3195                                (vlib_frame_queue_t *) (~0));
3196     }
3197
3198   from = vlib_frame_vector_args (frame);
3199   n_left_from = frame->n_vectors;
3200
3201   while (n_left_from > 0)
3202     {
3203       u32 bi0;
3204       vlib_buffer_t *b0;
3205       u32 sw_if_index0;
3206       u32 rx_fib_index0;
3207       ip4_header_t * ip0;
3208       u8 do_handoff;
3209
3210       bi0 = from[0];
3211       from += 1;
3212       n_left_from -= 1;
3213
3214       b0 = vlib_get_buffer (vm, bi0);
3215
3216       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3217       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3218
3219       ip0 = vlib_buffer_get_current (b0);
3220
3221       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3222
3223       if (PREDICT_FALSE (next_worker_index != thread_index))
3224         {
3225           do_handoff = 1;
3226
3227           if (next_worker_index != current_worker_index)
3228             {
3229               if (hf)
3230                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3231
3232               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3233                                                       next_worker_index,
3234                                                       handoff_queue_elt_by_worker_index);
3235
3236               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3237               to_next_worker = &hf->buffer_index[hf->n_vectors];
3238               current_worker_index = next_worker_index;
3239             }
3240
3241           /* enqueue to correct worker thread */
3242           to_next_worker[0] = bi0;
3243           to_next_worker++;
3244           n_left_to_next_worker--;
3245
3246           if (n_left_to_next_worker == 0)
3247             {
3248               hf->n_vectors = VLIB_FRAME_SIZE;
3249               vlib_put_frame_queue_elt (hf);
3250               current_worker_index = ~0;
3251               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3252               hf = 0;
3253             }
3254         }
3255       else
3256         {
3257           do_handoff = 0;
3258           /* if this is 1st frame */
3259           if (!f)
3260             {
3261               f = vlib_get_frame_to_node (vm, to_node_index);
3262               to_next = vlib_frame_vector_args (f);
3263             }
3264
3265           to_next[0] = bi0;
3266           to_next += 1;
3267           f->n_vectors++;
3268         }
3269
3270       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3271                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3272         {
3273           snat_in2out_worker_handoff_trace_t *t =
3274             vlib_add_trace (vm, node, b0, sizeof (*t));
3275           t->next_worker_index = next_worker_index;
3276           t->do_handoff = do_handoff;
3277         }
3278     }
3279
3280   if (f)
3281     vlib_put_frame_to_node (vm, to_node_index, f);
3282
3283   if (hf)
3284     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3285
3286   /* Ship frames to the worker nodes */
3287   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3288     {
3289       if (handoff_queue_elt_by_worker_index[i])
3290         {
3291           hf = handoff_queue_elt_by_worker_index[i];
3292           /*
3293            * It works better to let the handoff node
3294            * rate-adapt, always ship the handoff queue element.
3295            */
3296           if (1 || hf->n_vectors == hf->last_n_vectors)
3297             {
3298               vlib_put_frame_queue_elt (hf);
3299               handoff_queue_elt_by_worker_index[i] = 0;
3300             }
3301           else
3302             hf->last_n_vectors = hf->n_vectors;
3303         }
3304       congested_handoff_queue_by_worker_index[i] =
3305         (vlib_frame_queue_t *) (~0);
3306     }
3307   hf = 0;
3308   current_worker_index = ~0;
3309   return frame->n_vectors;
3310 }
3311
3312 static uword
3313 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3314                                vlib_node_runtime_t * node,
3315                                vlib_frame_t * frame)
3316 {
3317   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3318 }
3319
3320 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3321   .function = snat_in2out_worker_handoff_fn,
3322   .name = "nat44-in2out-worker-handoff",
3323   .vector_size = sizeof (u32),
3324   .format_trace = format_snat_in2out_worker_handoff_trace,
3325   .type = VLIB_NODE_TYPE_INTERNAL,
3326
3327   .n_next_nodes = 1,
3328
3329   .next_nodes = {
3330     [0] = "error-drop",
3331   },
3332 };
3333
3334 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3335                               snat_in2out_worker_handoff_fn);
3336
3337 static uword
3338 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3339                                       vlib_node_runtime_t * node,
3340                                       vlib_frame_t * frame)
3341 {
3342   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3343 }
3344
3345 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3346   .function = snat_in2out_output_worker_handoff_fn,
3347   .name = "nat44-in2out-output-worker-handoff",
3348   .vector_size = sizeof (u32),
3349   .format_trace = format_snat_in2out_worker_handoff_trace,
3350   .type = VLIB_NODE_TYPE_INTERNAL,
3351
3352   .n_next_nodes = 1,
3353
3354   .next_nodes = {
3355     [0] = "error-drop",
3356   },
3357 };
3358
3359 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3360                               snat_in2out_output_worker_handoff_fn);
3361
3362 static_always_inline int
3363 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3364 {
3365   snat_address_t * ap;
3366   clib_bihash_kv_8_8_t kv, value;
3367   snat_session_key_t m_key;
3368
3369   vec_foreach (ap, sm->addresses)
3370     {
3371       if (ap->addr.as_u32 == dst_addr->as_u32)
3372         return 1;
3373     }
3374
3375   m_key.addr.as_u32 = dst_addr->as_u32;
3376   m_key.fib_index = sm->outside_fib_index;
3377   m_key.port = 0;
3378   m_key.protocol = 0;
3379   kv.key = m_key.as_u64;
3380   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3381     return 1;
3382
3383   return 0;
3384 }
3385
3386 static uword
3387 snat_hairpin_dst_fn (vlib_main_t * vm,
3388                      vlib_node_runtime_t * node,
3389                      vlib_frame_t * frame)
3390 {
3391   u32 n_left_from, * from, * to_next;
3392   snat_in2out_next_t next_index;
3393   u32 pkts_processed = 0;
3394   snat_main_t * sm = &snat_main;
3395
3396   from = vlib_frame_vector_args (frame);
3397   n_left_from = frame->n_vectors;
3398   next_index = node->cached_next_index;
3399
3400   while (n_left_from > 0)
3401     {
3402       u32 n_left_to_next;
3403
3404       vlib_get_next_frame (vm, node, next_index,
3405                            to_next, n_left_to_next);
3406
3407       while (n_left_from > 0 && n_left_to_next > 0)
3408         {
3409           u32 bi0;
3410           vlib_buffer_t * b0;
3411           u32 next0;
3412           ip4_header_t * ip0;
3413           u32 proto0;
3414
3415           /* speculatively enqueue b0 to the current next frame */
3416           bi0 = from[0];
3417           to_next[0] = bi0;
3418           from += 1;
3419           to_next += 1;
3420           n_left_from -= 1;
3421           n_left_to_next -= 1;
3422
3423           b0 = vlib_get_buffer (vm, bi0);
3424           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3425           ip0 = vlib_buffer_get_current (b0);
3426
3427           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3428
3429           vnet_buffer (b0)->snat.flags = 0;
3430           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3431             {
3432               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3433                 {
3434                   udp_header_t * udp0 = ip4_next_header (ip0);
3435                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3436
3437                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3438                 }
3439               else if (proto0 == SNAT_PROTOCOL_ICMP)
3440                 {
3441                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3442
3443                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3444                 }
3445               else
3446                 {
3447                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3448                 }
3449
3450               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3451               clib_warning("is hairpinning");
3452             }
3453
3454           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3455
3456           /* verify speculative enqueue, maybe switch current next frame */
3457           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3458                                            to_next, n_left_to_next,
3459                                            bi0, next0);
3460          }
3461
3462       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3463     }
3464
3465   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3466                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3467                                pkts_processed);
3468   return frame->n_vectors;
3469 }
3470
3471 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3472   .function = snat_hairpin_dst_fn,
3473   .name = "nat44-hairpin-dst",
3474   .vector_size = sizeof (u32),
3475   .type = VLIB_NODE_TYPE_INTERNAL,
3476   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3477   .error_strings = snat_in2out_error_strings,
3478   .n_next_nodes = 2,
3479   .next_nodes = {
3480     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3481     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3482   },
3483 };
3484
3485 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3486                               snat_hairpin_dst_fn);
3487
3488 static uword
3489 snat_hairpin_src_fn (vlib_main_t * vm,
3490                      vlib_node_runtime_t * node,
3491                      vlib_frame_t * frame)
3492 {
3493   u32 n_left_from, * from, * to_next;
3494   snat_in2out_next_t next_index;
3495   u32 pkts_processed = 0;
3496   snat_main_t *sm = &snat_main;
3497
3498   from = vlib_frame_vector_args (frame);
3499   n_left_from = frame->n_vectors;
3500   next_index = node->cached_next_index;
3501
3502   while (n_left_from > 0)
3503     {
3504       u32 n_left_to_next;
3505
3506       vlib_get_next_frame (vm, node, next_index,
3507                            to_next, n_left_to_next);
3508
3509       while (n_left_from > 0 && n_left_to_next > 0)
3510         {
3511           u32 bi0;
3512           vlib_buffer_t * b0;
3513           u32 next0;
3514           snat_interface_t *i;
3515           u32 sw_if_index0;
3516
3517           /* speculatively enqueue b0 to the current next frame */
3518           bi0 = from[0];
3519           to_next[0] = bi0;
3520           from += 1;
3521           to_next += 1;
3522           n_left_from -= 1;
3523           n_left_to_next -= 1;
3524
3525           b0 = vlib_get_buffer (vm, bi0);
3526           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3527           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3528
3529           pool_foreach (i, sm->output_feature_interfaces,
3530           ({
3531             /* Only packets from NAT inside interface */
3532             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
3533               {
3534                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3535                                     SNAT_FLAG_HAIRPINNING))
3536                   {
3537                     if (PREDICT_TRUE (sm->num_workers > 1))
3538                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3539                     else
3540                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3541                   }
3542                 break;
3543               }
3544           }));
3545
3546           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3547
3548           /* verify speculative enqueue, maybe switch current next frame */
3549           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3550                                            to_next, n_left_to_next,
3551                                            bi0, next0);
3552          }
3553
3554       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3555     }
3556
3557   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3558                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3559                                pkts_processed);
3560   return frame->n_vectors;
3561 }
3562
3563 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3564   .function = snat_hairpin_src_fn,
3565   .name = "nat44-hairpin-src",
3566   .vector_size = sizeof (u32),
3567   .type = VLIB_NODE_TYPE_INTERNAL,
3568   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3569   .error_strings = snat_in2out_error_strings,
3570   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3571   .next_nodes = {
3572      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3573      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3574      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3575      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3576   },
3577 };
3578
3579 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3580                               snat_hairpin_src_fn);
3581
3582 static uword
3583 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3584                                 vlib_node_runtime_t * node,
3585                                 vlib_frame_t * frame)
3586 {
3587   u32 n_left_from, * from, * to_next;
3588   snat_in2out_next_t next_index;
3589   u32 pkts_processed = 0;
3590   snat_main_t * sm = &snat_main;
3591   u32 stats_node_index;
3592
3593   stats_node_index = snat_in2out_fast_node.index;
3594
3595   from = vlib_frame_vector_args (frame);
3596   n_left_from = frame->n_vectors;
3597   next_index = node->cached_next_index;
3598
3599   while (n_left_from > 0)
3600     {
3601       u32 n_left_to_next;
3602
3603       vlib_get_next_frame (vm, node, next_index,
3604                            to_next, n_left_to_next);
3605
3606       while (n_left_from > 0 && n_left_to_next > 0)
3607         {
3608           u32 bi0;
3609           vlib_buffer_t * b0;
3610           u32 next0;
3611           u32 sw_if_index0;
3612           ip4_header_t * ip0;
3613           ip_csum_t sum0;
3614           u32 new_addr0, old_addr0;
3615           u16 old_port0, new_port0;
3616           udp_header_t * udp0;
3617           tcp_header_t * tcp0;
3618           icmp46_header_t * icmp0;
3619           snat_session_key_t key0, sm0;
3620           u32 proto0;
3621           u32 rx_fib_index0;
3622
3623           /* speculatively enqueue b0 to the current next frame */
3624           bi0 = from[0];
3625           to_next[0] = bi0;
3626           from += 1;
3627           to_next += 1;
3628           n_left_from -= 1;
3629           n_left_to_next -= 1;
3630
3631           b0 = vlib_get_buffer (vm, bi0);
3632           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3633
3634           ip0 = vlib_buffer_get_current (b0);
3635           udp0 = ip4_next_header (ip0);
3636           tcp0 = (tcp_header_t *) udp0;
3637           icmp0 = (icmp46_header_t *) udp0;
3638
3639           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3640           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3641
3642           if (PREDICT_FALSE(ip0->ttl == 1))
3643             {
3644               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3645               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3646                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3647                                            0);
3648               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3649               goto trace0;
3650             }
3651
3652           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3653
3654           if (PREDICT_FALSE (proto0 == ~0))
3655               goto trace0;
3656
3657           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3658             {
3659               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3660                                   rx_fib_index0, node, next0, ~0, 0, 0);
3661               goto trace0;
3662             }
3663
3664           key0.addr = ip0->src_address;
3665           key0.protocol = proto0;
3666           key0.port = udp0->src_port;
3667           key0.fib_index = rx_fib_index0;
3668
3669           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
3670             {
3671               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3672               next0= SNAT_IN2OUT_NEXT_DROP;
3673               goto trace0;
3674             }
3675
3676           new_addr0 = sm0.addr.as_u32;
3677           new_port0 = sm0.port;
3678           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3679           old_addr0 = ip0->src_address.as_u32;
3680           ip0->src_address.as_u32 = new_addr0;
3681
3682           sum0 = ip0->checksum;
3683           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3684                                  ip4_header_t,
3685                                  src_address /* changed member */);
3686           ip0->checksum = ip_csum_fold (sum0);
3687
3688           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3689             {
3690               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3691                 {
3692                   old_port0 = tcp0->src_port;
3693                   tcp0->src_port = new_port0;
3694
3695                   sum0 = tcp0->checksum;
3696                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3697                                          ip4_header_t,
3698                                          dst_address /* changed member */);
3699                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3700                                          ip4_header_t /* cheat */,
3701                                          length /* changed member */);
3702                   tcp0->checksum = ip_csum_fold(sum0);
3703                 }
3704               else
3705                 {
3706                   old_port0 = udp0->src_port;
3707                   udp0->src_port = new_port0;
3708                   udp0->checksum = 0;
3709                 }
3710             }
3711           else
3712             {
3713               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3714                 {
3715                   sum0 = tcp0->checksum;
3716                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3717                                          ip4_header_t,
3718                                          dst_address /* changed member */);
3719                   tcp0->checksum = ip_csum_fold(sum0);
3720                 }
3721             }
3722
3723           /* Hairpinning */
3724           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3725
3726         trace0:
3727           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3728                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3729             {
3730               snat_in2out_trace_t *t =
3731                  vlib_add_trace (vm, node, b0, sizeof (*t));
3732               t->sw_if_index = sw_if_index0;
3733               t->next_index = next0;
3734             }
3735
3736           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3737
3738           /* verify speculative enqueue, maybe switch current next frame */
3739           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3740                                            to_next, n_left_to_next,
3741                                            bi0, next0);
3742         }
3743
3744       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3745     }
3746
3747   vlib_node_increment_counter (vm, stats_node_index,
3748                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3749                                pkts_processed);
3750   return frame->n_vectors;
3751 }
3752
3753
3754 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3755   .function = snat_in2out_fast_static_map_fn,
3756   .name = "nat44-in2out-fast",
3757   .vector_size = sizeof (u32),
3758   .format_trace = format_snat_in2out_fast_trace,
3759   .type = VLIB_NODE_TYPE_INTERNAL,
3760
3761   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3762   .error_strings = snat_in2out_error_strings,
3763
3764   .runtime_data_bytes = sizeof (snat_runtime_t),
3765
3766   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3767
3768   /* edit / add dispositions here */
3769   .next_nodes = {
3770     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3771     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3772     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3773     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3774   },
3775 };
3776
3777 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);