NAT44: in2out output feature skip translation for already translated packets (VPP...
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
32
33 typedef struct {
34   u32 sw_if_index;
35   u32 next_index;
36   u32 session_index;
37   u32 is_slow_path;
38 } snat_in2out_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_in2out_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
51   char * tag;
52
53   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
54
55   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
56               t->sw_if_index, t->next_index, t->session_index);
57
58   return s;
59 }
60
61 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
66
67   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
68               t->sw_if_index, t->next_index);
69
70   return s;
71 }
72
73 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
74 {
75   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
76   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
77   snat_in2out_worker_handoff_trace_t * t =
78     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
79   char * m;
80
81   m = t->do_handoff ? "next worker" : "same worker";
82   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
83
84   return s;
85 }
86
87 typedef struct {
88   u32 sw_if_index;
89   u32 next_index;
90   u8 cached;
91 } nat44_in2out_reass_trace_t;
92
93 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
94 {
95   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
96   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
97   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
98
99   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
100               t->sw_if_index, t->next_index,
101               t->cached ? "cached" : "translated");
102
103   return s;
104 }
105
106 vlib_node_registration_t snat_in2out_node;
107 vlib_node_registration_t snat_in2out_slowpath_node;
108 vlib_node_registration_t snat_in2out_fast_node;
109 vlib_node_registration_t snat_in2out_worker_handoff_node;
110 vlib_node_registration_t snat_det_in2out_node;
111 vlib_node_registration_t snat_in2out_output_node;
112 vlib_node_registration_t snat_in2out_output_slowpath_node;
113 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
114 vlib_node_registration_t snat_hairpin_dst_node;
115 vlib_node_registration_t snat_hairpin_src_node;
116 vlib_node_registration_t nat44_hairpinning_node;
117 vlib_node_registration_t nat44_in2out_reass_node;
118
119
120 #define foreach_snat_in2out_error                       \
121 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
122 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
123 _(OUT_OF_PORTS, "Out of ports")                         \
124 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
125 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
126 _(NO_TRANSLATION, "No translation")                     \
127 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
128 _(DROP_FRAGMENT, "Drop fragment")                       \
129 _(MAX_REASS, "Maximum reassemblies exceeded")           \
130 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
131
132 typedef enum {
133 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
134   foreach_snat_in2out_error
135 #undef _
136   SNAT_IN2OUT_N_ERROR,
137 } snat_in2out_error_t;
138
139 static char * snat_in2out_error_strings[] = {
140 #define _(sym,string) string,
141   foreach_snat_in2out_error
142 #undef _
143 };
144
145 typedef enum {
146   SNAT_IN2OUT_NEXT_LOOKUP,
147   SNAT_IN2OUT_NEXT_DROP,
148   SNAT_IN2OUT_NEXT_ICMP_ERROR,
149   SNAT_IN2OUT_NEXT_SLOW_PATH,
150   SNAT_IN2OUT_NEXT_REASS,
151   SNAT_IN2OUT_N_NEXT,
152 } snat_in2out_next_t;
153
154 typedef enum {
155   SNAT_HAIRPIN_SRC_NEXT_DROP,
156   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
157   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
158   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
159   SNAT_HAIRPIN_SRC_N_NEXT,
160 } snat_hairpin_next_t;
161
162 /**
163  * @brief Check if packet should be translated
164  *
165  * Packets aimed at outside interface and external address with active session
166  * should be translated.
167  *
168  * @param sm            NAT main
169  * @param rt            NAT runtime data
170  * @param sw_if_index0  index of the inside interface
171  * @param ip0           IPv4 header
172  * @param proto0        NAT protocol
173  * @param rx_fib_index0 RX FIB index
174  *
175  * @returns 0 if packet should be translated otherwise 1
176  */
177 static inline int
178 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
179                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                          u32 rx_fib_index0)
181 {
182   if (sm->out2in_dpo)
183     return 0;
184
185   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
186   fib_prefix_t pfx = {
187     .fp_proto = FIB_PROTOCOL_IP4,
188     .fp_len = 32,
189     .fp_addr = {
190         .ip4.as_u32 = ip0->dst_address.as_u32,
191     },
192   };
193
194   /* Don't NAT packet aimed at the intfc address */
195   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
196                                       ip0->dst_address.as_u32)))
197     return 1;
198
199   fei = fib_table_lookup (rx_fib_index0, &pfx);
200   if (FIB_NODE_INDEX_INVALID != fei)
201     {
202       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
203       if (sw_if_index == ~0)
204         {
205           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
206           if (FIB_NODE_INDEX_INVALID != fei)
207             sw_if_index = fib_entry_get_resolving_interface (fei);
208         }
209       snat_interface_t *i;
210       pool_foreach (i, sm->interfaces,
211       ({
212         /* NAT packet aimed at outside interface */
213         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
214           return 0;
215       }));
216     }
217
218   return 1;
219 }
220
221 static inline int
222 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
223                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
224                     u32 rx_fib_index0, u32 thread_index)
225 {
226   udp_header_t * udp0 = ip4_next_header (ip0);
227   snat_session_key_t key0, sm0;
228   clib_bihash_kv_8_8_t kv0, value0;
229
230   key0.addr = ip0->dst_address;
231   key0.port = udp0->dst_port;
232   key0.protocol = proto0;
233   key0.fib_index = sm->outside_fib_index;
234   kv0.key = key0.as_u64;
235
236   /* NAT packet aimed at external address if */
237   /* has active sessions */
238   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
239                               &value0))
240     {
241       /* or is static mappings */
242       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
243         return 0;
244     }
245   else
246     return 0;
247
248   if (sm->forwarding_enabled)
249     return 1;
250
251   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
252                                  rx_fib_index0);
253 }
254
255 static inline int
256 nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0,
257                                   u32 proto0, u32 thread_index)
258 {
259   udp_header_t * udp0 = ip4_next_header (ip0);
260   snat_session_key_t key0;
261   clib_bihash_kv_8_8_t kv0, value0;
262
263   key0.addr = ip0->src_address;
264   key0.port = udp0->src_port;
265   key0.protocol = proto0;
266   key0.fib_index = sm->outside_fib_index;
267   kv0.key = key0.as_u64;
268
269   if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
270                               &value0))
271     return 1;
272
273   return 0;
274 }
275
276 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
277                       ip4_header_t * ip0,
278                       u32 rx_fib_index0,
279                       snat_session_key_t * key0,
280                       snat_session_t ** sessionp,
281                       vlib_node_runtime_t * node,
282                       u32 next0,
283                       u32 thread_index)
284 {
285   snat_user_t *u;
286   snat_session_t *s;
287   clib_bihash_kv_8_8_t kv0;
288   snat_session_key_t key1;
289   u32 address_index = ~0;
290   u32 outside_fib_index;
291   uword * p;
292   udp_header_t * udp0 = ip4_next_header (ip0);
293
294   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
295     {
296       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
297       nat_ipfix_logging_max_sessions(sm->max_translations);
298       return SNAT_IN2OUT_NEXT_DROP;
299     }
300
301   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
302   if (! p)
303     {
304       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
305       return SNAT_IN2OUT_NEXT_DROP;
306     }
307   outside_fib_index = p[0];
308
309   key1.protocol = key0->protocol;
310
311   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
312                               thread_index);
313   if (!u)
314     {
315       clib_warning ("create NAT user failed");
316       return SNAT_IN2OUT_NEXT_DROP;
317     }
318
319   /* First try to match static mapping by local address and port */
320   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0))
321     {
322       /* Try to create dynamic translation */
323       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
324                                                thread_index, &key1,
325                                                &address_index,
326                                                sm->port_per_thread,
327                                                sm->per_thread_data[thread_index].snat_thread_index))
328         {
329           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
330           return SNAT_IN2OUT_NEXT_DROP;
331         }
332       u->nsessions++;
333     }
334   else
335     {
336       u->nstaticsessions++;
337     }
338
339   s = nat_session_alloc_or_recycle (sm, u, thread_index);
340   if (!s)
341     {
342       clib_warning ("create NAT session failed");
343       return SNAT_IN2OUT_NEXT_DROP;
344     }
345
346   if (address_index == ~0)
347     s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
348   s->outside_address_index = address_index;
349   s->in2out = *key0;
350   s->out2in = key1;
351   s->out2in.protocol = key0->protocol;
352   s->out2in.fib_index = outside_fib_index;
353   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
354   s->ext_host_port = udp0->dst_port;
355   *sessionp = s;
356
357   /* Add to translation hashes */
358   kv0.key = s->in2out.as_u64;
359   kv0.value = s - sm->per_thread_data[thread_index].sessions;
360   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
361                                1 /* is_add */))
362       clib_warning ("in2out key add failed");
363
364   kv0.key = s->out2in.as_u64;
365   kv0.value = s - sm->per_thread_data[thread_index].sessions;
366
367   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
368                                1 /* is_add */))
369       clib_warning ("out2in key add failed");
370
371   /* log NAT event */
372   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
373                                       s->out2in.addr.as_u32,
374                                       s->in2out.protocol,
375                                       s->in2out.port,
376                                       s->out2in.port,
377                                       s->in2out.fib_index);
378   return next0;
379 }
380
381 static_always_inline
382 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
383                                  snat_session_key_t *p_key0)
384 {
385   icmp46_header_t *icmp0;
386   snat_session_key_t key0;
387   icmp_echo_header_t *echo0, *inner_echo0 = 0;
388   ip4_header_t *inner_ip0 = 0;
389   void *l4_header = 0;
390   icmp46_header_t *inner_icmp0;
391
392   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
393   echo0 = (icmp_echo_header_t *)(icmp0+1);
394
395   if (!icmp_is_error_message (icmp0))
396     {
397       key0.protocol = SNAT_PROTOCOL_ICMP;
398       key0.addr = ip0->src_address;
399       key0.port = echo0->identifier;
400     }
401   else
402     {
403       inner_ip0 = (ip4_header_t *)(echo0+1);
404       l4_header = ip4_next_header (inner_ip0);
405       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
406       key0.addr = inner_ip0->dst_address;
407       switch (key0.protocol)
408         {
409         case SNAT_PROTOCOL_ICMP:
410           inner_icmp0 = (icmp46_header_t*)l4_header;
411           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
412           key0.port = inner_echo0->identifier;
413           break;
414         case SNAT_PROTOCOL_UDP:
415         case SNAT_PROTOCOL_TCP:
416           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
417           break;
418         default:
419           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
420         }
421     }
422   *p_key0 = key0;
423   return -1; /* success */
424 }
425
426 static_always_inline int
427 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
428 {
429   icmp46_header_t *icmp0;
430   nat_ed_ses_key_t key0;
431   icmp_echo_header_t *echo0, *inner_echo0 = 0;
432   ip4_header_t *inner_ip0 = 0;
433   void *l4_header = 0;
434   icmp46_header_t *inner_icmp0;
435
436   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
437   echo0 = (icmp_echo_header_t *)(icmp0+1);
438
439   if (!icmp_is_error_message (icmp0))
440     {
441       key0.proto = IP_PROTOCOL_ICMP;
442       key0.l_addr = ip0->src_address;
443       key0.r_addr = ip0->dst_address;
444       key0.l_port = key0.r_port = echo0->identifier;
445     }
446   else
447     {
448       inner_ip0 = (ip4_header_t *)(echo0+1);
449       l4_header = ip4_next_header (inner_ip0);
450       key0.proto = inner_ip0->protocol;
451       key0.r_addr = inner_ip0->src_address;
452       key0.l_addr = inner_ip0->dst_address;
453       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
454         {
455         case SNAT_PROTOCOL_ICMP:
456           inner_icmp0 = (icmp46_header_t*)l4_header;
457           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
458           key0.r_port = key0.l_port = inner_echo0->identifier;
459           break;
460         case SNAT_PROTOCOL_UDP:
461         case SNAT_PROTOCOL_TCP:
462           key0.l_port = ((tcp_udp_header_t*)l4_header)->dst_port;
463           key0.r_port = ((tcp_udp_header_t*)l4_header)->src_port;
464           break;
465         default:
466           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
467         }
468     }
469   *p_key0 = key0;
470   return 0;
471 }
472
473 /**
474  * Get address and port values to be used for ICMP packet translation
475  * and create session if needed
476  *
477  * @param[in,out] sm             NAT main
478  * @param[in,out] node           NAT node runtime
479  * @param[in] thread_index       thread index
480  * @param[in,out] b0             buffer containing packet to be translated
481  * @param[out] p_proto           protocol used for matching
482  * @param[out] p_value           address and port after NAT translation
483  * @param[out] p_dont_translate  if packet should not be translated
484  * @param d                      optional parameter
485  * @param e                      optional parameter
486  */
487 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
488                            u32 thread_index, vlib_buffer_t *b0,
489                            ip4_header_t *ip0, u8 *p_proto,
490                            snat_session_key_t *p_value,
491                            u8 *p_dont_translate, void *d, void *e)
492 {
493   icmp46_header_t *icmp0;
494   u32 sw_if_index0;
495   u32 rx_fib_index0;
496   snat_session_key_t key0;
497   snat_session_t *s0 = 0;
498   u8 dont_translate = 0;
499   clib_bihash_kv_8_8_t kv0, value0;
500   u32 next0 = ~0;
501   int err;
502
503   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
504   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
505   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
506
507   err = icmp_get_key (ip0, &key0);
508   if (err != -1)
509     {
510       b0->error = node->errors[err];
511       next0 = SNAT_IN2OUT_NEXT_DROP;
512       goto out;
513     }
514   key0.fib_index = rx_fib_index0;
515
516   kv0.key = key0.as_u64;
517
518   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
519                               &value0))
520     {
521       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)
522         {
523           if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
524               ip0, IP_PROTOCOL_ICMP, thread_index)))
525             {
526               dont_translate = 1;
527               goto out;
528             }
529         }
530       else
531         {
532           if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
533               ip0, IP_PROTOCOL_ICMP, rx_fib_index0, thread_index)))
534             {
535               dont_translate = 1;
536               goto out;
537             }
538         }
539
540       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
541         {
542           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
543           next0 = SNAT_IN2OUT_NEXT_DROP;
544           goto out;
545         }
546
547       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
548                          &s0, node, next0, thread_index);
549
550       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
551         goto out;
552     }
553   else
554     {
555       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
556                         icmp0->type != ICMP4_echo_reply &&
557                         !icmp_is_error_message (icmp0)))
558         {
559           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
560           next0 = SNAT_IN2OUT_NEXT_DROP;
561           goto out;
562         }
563
564       if (PREDICT_FALSE (value0.value == ~0ULL))
565         {
566           nat_ed_ses_key_t key;
567           clib_bihash_kv_16_8_t s_kv, s_value;
568
569           key.as_u64[0] = 0;
570           key.as_u64[1] = 0;
571           if (icmp_get_ed_key (ip0, &key))
572             {
573               b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
574               next0 = SNAT_IN2OUT_NEXT_DROP;
575               goto out;
576             }
577           key.fib_index = rx_fib_index0;
578           s_kv.key[0] = key.as_u64[0];
579           s_kv.key[1] = key.as_u64[1];
580           if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
581             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
582                                     s_value.value);
583           else
584            {
585               next0 = SNAT_IN2OUT_NEXT_DROP;
586               goto out;
587            }
588         }
589       else
590         s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
591                                 value0.value);
592     }
593
594 out:
595   *p_proto = key0.protocol;
596   if (s0)
597     *p_value = s0->out2in;
598   *p_dont_translate = dont_translate;
599   if (d)
600     *(snat_session_t**)d = s0;
601   return next0;
602 }
603
604 /**
605  * Get address and port values to be used for ICMP packet translation
606  *
607  * @param[in] sm                 NAT main
608  * @param[in,out] node           NAT node runtime
609  * @param[in] thread_index       thread index
610  * @param[in,out] b0             buffer containing packet to be translated
611  * @param[out] p_proto           protocol used for matching
612  * @param[out] p_value           address and port after NAT translation
613  * @param[out] p_dont_translate  if packet should not be translated
614  * @param d                      optional parameter
615  * @param e                      optional parameter
616  */
617 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
618                            u32 thread_index, vlib_buffer_t *b0,
619                            ip4_header_t *ip0, u8 *p_proto,
620                            snat_session_key_t *p_value,
621                            u8 *p_dont_translate, void *d, void *e)
622 {
623   icmp46_header_t *icmp0;
624   u32 sw_if_index0;
625   u32 rx_fib_index0;
626   snat_session_key_t key0;
627   snat_session_key_t sm0;
628   u8 dont_translate = 0;
629   u8 is_addr_only;
630   u32 next0 = ~0;
631   int err;
632
633   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
634   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
635   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
636
637   err = icmp_get_key (ip0, &key0);
638   if (err != -1)
639     {
640       b0->error = node->errors[err];
641       next0 = SNAT_IN2OUT_NEXT_DROP;
642       goto out2;
643     }
644   key0.fib_index = rx_fib_index0;
645
646   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0))
647     {
648       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
649           IP_PROTOCOL_ICMP, rx_fib_index0)))
650         {
651           dont_translate = 1;
652           goto out;
653         }
654
655       if (icmp_is_error_message (icmp0))
656         {
657           next0 = SNAT_IN2OUT_NEXT_DROP;
658           goto out;
659         }
660
661       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
662       next0 = SNAT_IN2OUT_NEXT_DROP;
663       goto out;
664     }
665
666   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
667                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
668                     !icmp_is_error_message (icmp0)))
669     {
670       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
671       next0 = SNAT_IN2OUT_NEXT_DROP;
672       goto out;
673     }
674
675 out:
676   *p_value = sm0;
677 out2:
678   *p_proto = key0.protocol;
679   *p_dont_translate = dont_translate;
680   return next0;
681 }
682
683 static inline u32 icmp_in2out (snat_main_t *sm,
684                                vlib_buffer_t * b0,
685                                ip4_header_t * ip0,
686                                icmp46_header_t * icmp0,
687                                u32 sw_if_index0,
688                                u32 rx_fib_index0,
689                                vlib_node_runtime_t * node,
690                                u32 next0,
691                                u32 thread_index,
692                                void *d,
693                                void *e)
694 {
695   snat_session_key_t sm0;
696   u8 protocol;
697   icmp_echo_header_t *echo0, *inner_echo0 = 0;
698   ip4_header_t *inner_ip0;
699   void *l4_header = 0;
700   icmp46_header_t *inner_icmp0;
701   u8 dont_translate;
702   u32 new_addr0, old_addr0;
703   u16 old_id0, new_id0;
704   ip_csum_t sum0;
705   u16 checksum0;
706   u32 next0_tmp;
707
708   echo0 = (icmp_echo_header_t *)(icmp0+1);
709
710   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
711                                        &protocol, &sm0, &dont_translate, d, e);
712   if (next0_tmp != ~0)
713     next0 = next0_tmp;
714   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
715     goto out;
716
717   sum0 = ip_incremental_checksum (0, icmp0,
718                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
719   checksum0 = ~ip_csum_fold (sum0);
720   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
721     {
722       next0 = SNAT_IN2OUT_NEXT_DROP;
723       goto out;
724     }
725
726   old_addr0 = ip0->src_address.as_u32;
727   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
728   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
729     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
730
731   sum0 = ip0->checksum;
732   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
733                          src_address /* changed member */);
734   ip0->checksum = ip_csum_fold (sum0);
735
736   if (!icmp_is_error_message (icmp0))
737     {
738       new_id0 = sm0.port;
739       if (PREDICT_FALSE(new_id0 != echo0->identifier))
740         {
741           old_id0 = echo0->identifier;
742           new_id0 = sm0.port;
743           echo0->identifier = new_id0;
744
745           sum0 = icmp0->checksum;
746           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
747                                  identifier);
748           icmp0->checksum = ip_csum_fold (sum0);
749         }
750     }
751   else
752     {
753       inner_ip0 = (ip4_header_t *)(echo0+1);
754       l4_header = ip4_next_header (inner_ip0);
755
756       if (!ip4_header_checksum_is_valid (inner_ip0))
757         {
758           next0 = SNAT_IN2OUT_NEXT_DROP;
759           goto out;
760         }
761
762       old_addr0 = inner_ip0->dst_address.as_u32;
763       inner_ip0->dst_address = sm0.addr;
764       new_addr0 = inner_ip0->dst_address.as_u32;
765
766       sum0 = icmp0->checksum;
767       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
768                              dst_address /* changed member */);
769       icmp0->checksum = ip_csum_fold (sum0);
770
771       switch (protocol)
772         {
773           case SNAT_PROTOCOL_ICMP:
774             inner_icmp0 = (icmp46_header_t*)l4_header;
775             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
776
777             old_id0 = inner_echo0->identifier;
778             new_id0 = sm0.port;
779             inner_echo0->identifier = new_id0;
780
781             sum0 = icmp0->checksum;
782             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
783                                    identifier);
784             icmp0->checksum = ip_csum_fold (sum0);
785             break;
786           case SNAT_PROTOCOL_UDP:
787           case SNAT_PROTOCOL_TCP:
788             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
789             new_id0 = sm0.port;
790             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
791
792             sum0 = icmp0->checksum;
793             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
794                                    dst_port);
795             icmp0->checksum = ip_csum_fold (sum0);
796             break;
797           default:
798             ASSERT(0);
799         }
800     }
801
802 out:
803   return next0;
804 }
805
806 /**
807  * @brief Hairpinning
808  *
809  * Hairpinning allows two endpoints on the internal side of the NAT to
810  * communicate even if they only use each other's external IP addresses
811  * and ports.
812  *
813  * @param sm     NAT main.
814  * @param b0     Vlib buffer.
815  * @param ip0    IP header.
816  * @param udp0   UDP header.
817  * @param tcp0   TCP header.
818  * @param proto0 NAT protocol.
819  */
820 static inline int
821 snat_hairpinning (snat_main_t *sm,
822                   vlib_buffer_t * b0,
823                   ip4_header_t * ip0,
824                   udp_header_t * udp0,
825                   tcp_header_t * tcp0,
826                   u32 proto0)
827 {
828   snat_session_key_t key0, sm0;
829   snat_session_t * s0;
830   clib_bihash_kv_8_8_t kv0, value0;
831   ip_csum_t sum0;
832   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
833   u16 new_dst_port0, old_dst_port0;
834
835   key0.addr = ip0->dst_address;
836   key0.port = udp0->dst_port;
837   key0.protocol = proto0;
838   key0.fib_index = sm->outside_fib_index;
839   kv0.key = key0.as_u64;
840
841   /* Check if destination is static mappings */
842   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
843     {
844       new_dst_addr0 = sm0.addr.as_u32;
845       new_dst_port0 = sm0.port;
846       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
847     }
848   /* or active session */
849   else
850     {
851       if (sm->num_workers > 1)
852         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
853       else
854         ti = sm->num_workers;
855
856       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
857         {
858           si = value0.value;
859
860           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
861           new_dst_addr0 = s0->in2out.addr.as_u32;
862           new_dst_port0 = s0->in2out.port;
863           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
864         }
865     }
866
867   /* Destination is behind the same NAT, use internal address and port */
868   if (new_dst_addr0)
869     {
870       old_dst_addr0 = ip0->dst_address.as_u32;
871       ip0->dst_address.as_u32 = new_dst_addr0;
872       sum0 = ip0->checksum;
873       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
874                              ip4_header_t, dst_address);
875       ip0->checksum = ip_csum_fold (sum0);
876
877       old_dst_port0 = tcp0->dst;
878       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
879         {
880           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
881             {
882               tcp0->dst = new_dst_port0;
883               sum0 = tcp0->checksum;
884               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
885                                      ip4_header_t, dst_address);
886               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
887                                      ip4_header_t /* cheat */, length);
888               tcp0->checksum = ip_csum_fold(sum0);
889             }
890           else
891             {
892               udp0->dst_port = new_dst_port0;
893               udp0->checksum = 0;
894             }
895         }
896       else
897         {
898           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
899             {
900               sum0 = tcp0->checksum;
901               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
902                                      ip4_header_t, dst_address);
903               tcp0->checksum = ip_csum_fold(sum0);
904             }
905         }
906       return 1;
907     }
908   return 0;
909 }
910
911 static inline void
912 snat_icmp_hairpinning (snat_main_t *sm,
913                        vlib_buffer_t * b0,
914                        ip4_header_t * ip0,
915                        icmp46_header_t * icmp0)
916 {
917   snat_session_key_t key0, sm0;
918   clib_bihash_kv_8_8_t kv0, value0;
919   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
920   ip_csum_t sum0;
921   snat_session_t *s0;
922
923   if (!icmp_is_error_message (icmp0))
924     {
925       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
926       u16 icmp_id0 = echo0->identifier;
927       key0.addr = ip0->dst_address;
928       key0.port = icmp_id0;
929       key0.protocol = SNAT_PROTOCOL_ICMP;
930       key0.fib_index = sm->outside_fib_index;
931       kv0.key = key0.as_u64;
932
933       if (sm->num_workers > 1)
934         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
935       else
936         ti = sm->num_workers;
937
938       /* Check if destination is in active sessions */
939       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
940                                   &value0))
941         {
942           /* or static mappings */
943           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
944             {
945               new_dst_addr0 = sm0.addr.as_u32;
946               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
947             }
948         }
949       else
950         {
951           si = value0.value;
952
953           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
954           new_dst_addr0 = s0->in2out.addr.as_u32;
955           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
956           echo0->identifier = s0->in2out.port;
957           sum0 = icmp0->checksum;
958           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
959                                  icmp_echo_header_t, identifier);
960           icmp0->checksum = ip_csum_fold (sum0);
961         }
962
963       /* Destination is behind the same NAT, use internal address and port */
964       if (new_dst_addr0)
965         {
966           old_dst_addr0 = ip0->dst_address.as_u32;
967           ip0->dst_address.as_u32 = new_dst_addr0;
968           sum0 = ip0->checksum;
969           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
970                                  ip4_header_t, dst_address);
971           ip0->checksum = ip_csum_fold (sum0);
972         }
973     }
974
975 }
976
977 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
978                                          vlib_buffer_t * b0,
979                                          ip4_header_t * ip0,
980                                          icmp46_header_t * icmp0,
981                                          u32 sw_if_index0,
982                                          u32 rx_fib_index0,
983                                          vlib_node_runtime_t * node,
984                                          u32 next0,
985                                          f64 now,
986                                          u32 thread_index,
987                                          snat_session_t ** p_s0)
988 {
989   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
990                       next0, thread_index, p_s0, 0);
991   snat_session_t * s0 = *p_s0;
992   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
993     {
994       /* Hairpinning */
995       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
996         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
997       /* Accounting */
998       s0->last_heard = now;
999       s0->total_pkts++;
1000       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1001       /* Per-user LRU list maintenance */
1002       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1003                          s0->per_user_index);
1004       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1005                           s0->per_user_list_head_index,
1006                           s0->per_user_index);
1007     }
1008   return next0;
1009 }
1010 static inline void
1011 snat_hairpinning_unknown_proto (snat_main_t *sm,
1012                                 vlib_buffer_t * b,
1013                                 ip4_header_t * ip)
1014 {
1015   u32 old_addr, new_addr = 0, ti = 0;
1016   clib_bihash_kv_8_8_t kv, value;
1017   clib_bihash_kv_16_8_t s_kv, s_value;
1018   nat_ed_ses_key_t key;
1019   snat_session_key_t m_key;
1020   snat_static_mapping_t *m;
1021   ip_csum_t sum;
1022   snat_session_t *s;
1023
1024   old_addr = ip->dst_address.as_u32;
1025   key.l_addr.as_u32 = ip->dst_address.as_u32;
1026   key.r_addr.as_u32 = ip->src_address.as_u32;
1027   key.fib_index = sm->outside_fib_index;
1028   key.proto = ip->protocol;
1029   key.r_port = 0;
1030   key.l_port = 0;
1031   s_kv.key[0] = key.as_u64[0];
1032   s_kv.key[1] = key.as_u64[1];
1033   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1034     {
1035       m_key.addr = ip->dst_address;
1036       m_key.fib_index = sm->outside_fib_index;
1037       m_key.port = 0;
1038       m_key.protocol = 0;
1039       kv.key = m_key.as_u64;
1040       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1041         return;
1042
1043       m = pool_elt_at_index (sm->static_mappings, value.value);
1044       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1045         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1046       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1047     }
1048   else
1049     {
1050       if (sm->num_workers > 1)
1051         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
1052       else
1053         ti = sm->num_workers;
1054
1055       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1056       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1057         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1058       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1059     }
1060   sum = ip->checksum;
1061   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1062   ip->checksum = ip_csum_fold (sum);
1063 }
1064
1065 static snat_session_t *
1066 snat_in2out_unknown_proto (snat_main_t *sm,
1067                            vlib_buffer_t * b,
1068                            ip4_header_t * ip,
1069                            u32 rx_fib_index,
1070                            u32 thread_index,
1071                            f64 now,
1072                            vlib_main_t * vm,
1073                            vlib_node_runtime_t * node)
1074 {
1075   clib_bihash_kv_8_8_t kv, value;
1076   clib_bihash_kv_16_8_t s_kv, s_value;
1077   snat_static_mapping_t *m;
1078   snat_session_key_t m_key;
1079   u32 old_addr, new_addr = 0;
1080   ip_csum_t sum;
1081   snat_user_t *u;
1082   dlist_elt_t *head, *elt;
1083   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1084   u32 elt_index, head_index, ses_index;
1085   snat_session_t * s;
1086   nat_ed_ses_key_t key;
1087   u32 address_index = ~0;
1088   int i;
1089   u8 is_sm = 0;
1090
1091   old_addr = ip->src_address.as_u32;
1092
1093   key.l_addr = ip->src_address;
1094   key.r_addr = ip->dst_address;
1095   key.fib_index = rx_fib_index;
1096   key.proto = ip->protocol;
1097   key.l_port = 0;
1098   key.l_port = 0;
1099   s_kv.key[0] = key.as_u64[0];
1100   s_kv.key[1] = key.as_u64[1];
1101
1102   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1103     {
1104       s = pool_elt_at_index (tsm->sessions, s_value.value);
1105       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1106     }
1107   else
1108     {
1109       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1110         {
1111           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1112           nat_ipfix_logging_max_sessions(sm->max_translations);
1113           return 0;
1114         }
1115
1116       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1117                                   thread_index);
1118       if (!u)
1119         {
1120           clib_warning ("create NAT user failed");
1121           return 0;
1122         }
1123
1124       m_key.addr = ip->src_address;
1125       m_key.port = 0;
1126       m_key.protocol = 0;
1127       m_key.fib_index = rx_fib_index;
1128       kv.key = m_key.as_u64;
1129
1130       /* Try to find static mapping first */
1131       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1132         {
1133           m = pool_elt_at_index (sm->static_mappings, value.value);
1134           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1135           is_sm = 1;
1136           goto create_ses;
1137         }
1138       /* Fallback to 3-tuple key */
1139       else
1140         {
1141           /* Choose same out address as for TCP/UDP session to same destination */
1142           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1143             {
1144               head_index = u->sessions_per_user_list_head_index;
1145               head = pool_elt_at_index (tsm->list_pool, head_index);
1146               elt_index = head->next;
1147               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1148               ses_index = elt->value;
1149               while (ses_index != ~0)
1150                 {
1151                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1152                   elt_index = elt->next;
1153                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1154                   ses_index = elt->value;
1155
1156                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1157                     {
1158                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1159                       address_index = s->outside_address_index;
1160
1161                       key.fib_index = sm->outside_fib_index;
1162                       key.l_addr.as_u32 = new_addr;
1163                       s_kv.key[0] = key.as_u64[0];
1164                       s_kv.key[1] = key.as_u64[1];
1165                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1166                         break;
1167
1168                       goto create_ses;
1169                     }
1170                 }
1171             }
1172           key.fib_index = sm->outside_fib_index;
1173           for (i = 0; i < vec_len (sm->addresses); i++)
1174             {
1175               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1176               s_kv.key[0] = key.as_u64[0];
1177               s_kv.key[1] = key.as_u64[1];
1178               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1179                 {
1180                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1181                   address_index = i;
1182                   goto create_ses;
1183                 }
1184             }
1185           return 0;
1186         }
1187
1188 create_ses:
1189       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1190       if (!s)
1191         {
1192           clib_warning ("create NAT session failed");
1193           return 0;
1194         }
1195
1196       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1197       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1198       s->outside_address_index = address_index;
1199       s->out2in.addr.as_u32 = new_addr;
1200       s->out2in.fib_index = sm->outside_fib_index;
1201       s->in2out.addr.as_u32 = old_addr;
1202       s->in2out.fib_index = rx_fib_index;
1203       s->in2out.port = s->out2in.port = ip->protocol;
1204       if (is_sm)
1205         {
1206           u->nstaticsessions++;
1207           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1208         }
1209       else
1210         {
1211           u->nsessions++;
1212         }
1213
1214       /* Add to lookup tables */
1215       key.l_addr.as_u32 = old_addr;
1216       key.r_addr = ip->dst_address;
1217       key.proto = ip->protocol;
1218       key.fib_index = rx_fib_index;
1219       s_kv.key[0] = key.as_u64[0];
1220       s_kv.key[1] = key.as_u64[1];
1221       s_kv.value = s - tsm->sessions;
1222       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1223         clib_warning ("in2out key add failed");
1224
1225       key.l_addr.as_u32 = new_addr;
1226       key.fib_index = sm->outside_fib_index;
1227       s_kv.key[0] = key.as_u64[0];
1228       s_kv.key[1] = key.as_u64[1];
1229       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1230         clib_warning ("out2in key add failed");
1231   }
1232
1233   /* Update IP checksum */
1234   sum = ip->checksum;
1235   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1236   ip->checksum = ip_csum_fold (sum);
1237
1238   /* Accounting */
1239   s->last_heard = now;
1240   s->total_pkts++;
1241   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1242   /* Per-user LRU list maintenance */
1243   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1244   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1245                       s->per_user_index);
1246
1247   /* Hairpinning */
1248   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1249     snat_hairpinning_unknown_proto(sm, b, ip);
1250
1251   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1252     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1253
1254   return s;
1255 }
1256
1257 static snat_session_t *
1258 snat_in2out_lb (snat_main_t *sm,
1259                 vlib_buffer_t * b,
1260                 ip4_header_t * ip,
1261                 u32 rx_fib_index,
1262                 u32 thread_index,
1263                 f64 now,
1264                 vlib_main_t * vm,
1265                 vlib_node_runtime_t * node)
1266 {
1267   nat_ed_ses_key_t key;
1268   clib_bihash_kv_16_8_t s_kv, s_value;
1269   udp_header_t *udp = ip4_next_header (ip);
1270   tcp_header_t *tcp = (tcp_header_t *) udp;
1271   snat_session_t *s = 0;
1272   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1273   u32 old_addr, new_addr;
1274   u16 new_port, old_port;
1275   ip_csum_t sum;
1276   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1277   snat_session_key_t e_key, l_key;
1278   snat_user_t *u;
1279
1280   old_addr = ip->src_address.as_u32;
1281
1282   key.l_addr = ip->src_address;
1283   key.r_addr = ip->dst_address;
1284   key.fib_index = rx_fib_index;
1285   key.proto = ip->protocol;
1286   key.r_port = udp->dst_port;
1287   key.l_port = udp->src_port;
1288   s_kv.key[0] = key.as_u64[0];
1289   s_kv.key[1] = key.as_u64[1];
1290
1291   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1292     {
1293       s = pool_elt_at_index (tsm->sessions, s_value.value);
1294     }
1295   else
1296     {
1297       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1298         {
1299           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1300           nat_ipfix_logging_max_sessions(sm->max_translations);
1301           return 0;
1302         }
1303
1304       l_key.addr = ip->src_address;
1305       l_key.port = udp->src_port;
1306       l_key.protocol = proto;
1307       l_key.fib_index = rx_fib_index;
1308       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0, 0))
1309         return 0;
1310
1311       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1312                                   thread_index);
1313       if (!u)
1314         {
1315           clib_warning ("create NAT user failed");
1316           return 0;
1317         }
1318
1319       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1320       if (!s)
1321         {
1322           clib_warning ("create NAT session failed");
1323           return 0;
1324         }
1325
1326       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1327       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1328       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1329       s->outside_address_index = ~0;
1330       s->in2out = l_key;
1331       s->out2in = e_key;
1332       u->nstaticsessions++;
1333
1334       /* Add to lookup tables */
1335       s_kv.value = s - tsm->sessions;
1336       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1337         clib_warning ("in2out-ed key add failed");
1338
1339       key.l_addr = e_key.addr;
1340       key.fib_index = e_key.fib_index;
1341       key.l_port = e_key.port;
1342       s_kv.key[0] = key.as_u64[0];
1343       s_kv.key[1] = key.as_u64[1];
1344       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1345         clib_warning ("out2in-ed key add failed");
1346     }
1347
1348   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1349
1350   /* Update IP checksum */
1351   sum = ip->checksum;
1352   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1353   if (is_twice_nat_session (s))
1354     sum = ip_csum_update (sum, ip->dst_address.as_u32,
1355                           s->ext_host_addr.as_u32, ip4_header_t, dst_address);
1356   ip->checksum = ip_csum_fold (sum);
1357
1358   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1359     {
1360       old_port = tcp->src_port;
1361       tcp->src_port = s->out2in.port;
1362       new_port = tcp->src_port;
1363
1364       sum = tcp->checksum;
1365       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1366       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1367       if (is_twice_nat_session (s))
1368         {
1369           sum = ip_csum_update (sum, ip->dst_address.as_u32,
1370                                 s->ext_host_addr.as_u32, ip4_header_t,
1371                                 dst_address);
1372           sum = ip_csum_update (sum, tcp->dst_port, s->ext_host_port,
1373                                 ip4_header_t, length);
1374           tcp->dst_port = s->ext_host_port;
1375           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1376         }
1377       tcp->checksum = ip_csum_fold(sum);
1378     }
1379   else
1380     {
1381       udp->src_port = s->out2in.port;
1382       if (is_twice_nat_session (s))
1383         {
1384           udp->dst_port = s->ext_host_port;
1385           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1386         }
1387       udp->checksum = 0;
1388     }
1389
1390   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1391     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1392
1393   /* Accounting */
1394   s->last_heard = now;
1395   s->total_pkts++;
1396   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1397   /* Per-user LRU list maintenance */
1398   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1399   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1400                       s->per_user_index);
1401   return s;
1402 }
1403
1404 static inline uword
1405 snat_in2out_node_fn_inline (vlib_main_t * vm,
1406                             vlib_node_runtime_t * node,
1407                             vlib_frame_t * frame, int is_slow_path,
1408                             int is_output_feature)
1409 {
1410   u32 n_left_from, * from, * to_next;
1411   snat_in2out_next_t next_index;
1412   u32 pkts_processed = 0;
1413   snat_main_t * sm = &snat_main;
1414   f64 now = vlib_time_now (vm);
1415   u32 stats_node_index;
1416   u32 thread_index = vlib_get_thread_index ();
1417
1418   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1419     snat_in2out_node.index;
1420
1421   from = vlib_frame_vector_args (frame);
1422   n_left_from = frame->n_vectors;
1423   next_index = node->cached_next_index;
1424
1425   while (n_left_from > 0)
1426     {
1427       u32 n_left_to_next;
1428
1429       vlib_get_next_frame (vm, node, next_index,
1430                            to_next, n_left_to_next);
1431
1432       while (n_left_from >= 4 && n_left_to_next >= 2)
1433         {
1434           u32 bi0, bi1;
1435           vlib_buffer_t * b0, * b1;
1436           u32 next0, next1;
1437           u32 sw_if_index0, sw_if_index1;
1438           ip4_header_t * ip0, * ip1;
1439           ip_csum_t sum0, sum1;
1440           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1441           u16 old_port0, new_port0, old_port1, new_port1;
1442           udp_header_t * udp0, * udp1;
1443           tcp_header_t * tcp0, * tcp1;
1444           icmp46_header_t * icmp0, * icmp1;
1445           snat_session_key_t key0, key1;
1446           u32 rx_fib_index0, rx_fib_index1;
1447           u32 proto0, proto1;
1448           snat_session_t * s0 = 0, * s1 = 0;
1449           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1450           u32 iph_offset0 = 0, iph_offset1 = 0;
1451
1452           /* Prefetch next iteration. */
1453           {
1454             vlib_buffer_t * p2, * p3;
1455
1456             p2 = vlib_get_buffer (vm, from[2]);
1457             p3 = vlib_get_buffer (vm, from[3]);
1458
1459             vlib_prefetch_buffer_header (p2, LOAD);
1460             vlib_prefetch_buffer_header (p3, LOAD);
1461
1462             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1463             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1464           }
1465
1466           /* speculatively enqueue b0 and b1 to the current next frame */
1467           to_next[0] = bi0 = from[0];
1468           to_next[1] = bi1 = from[1];
1469           from += 2;
1470           to_next += 2;
1471           n_left_from -= 2;
1472           n_left_to_next -= 2;
1473
1474           b0 = vlib_get_buffer (vm, bi0);
1475           b1 = vlib_get_buffer (vm, bi1);
1476
1477           if (is_output_feature)
1478             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1479
1480           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1481                  iph_offset0);
1482
1483           udp0 = ip4_next_header (ip0);
1484           tcp0 = (tcp_header_t *) udp0;
1485           icmp0 = (icmp46_header_t *) udp0;
1486
1487           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1488           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1489                                    sw_if_index0);
1490
1491           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1492
1493           if (PREDICT_FALSE(ip0->ttl == 1))
1494             {
1495               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1496               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1497                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1498                                            0);
1499               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1500               goto trace00;
1501             }
1502
1503           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1504
1505           /* Next configured feature, probably ip4-lookup */
1506           if (is_slow_path)
1507             {
1508               if (PREDICT_FALSE (proto0 == ~0))
1509                 {
1510                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1511                                                   thread_index, now, vm, node);
1512                   if (!s0)
1513                     next0 = SNAT_IN2OUT_NEXT_DROP;
1514                   goto trace00;
1515                 }
1516
1517               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1518                 {
1519                   next0 = icmp_in2out_slow_path
1520                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1521                      node, next0, now, thread_index, &s0);
1522                   goto trace00;
1523                 }
1524             }
1525           else
1526             {
1527               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1528                 {
1529                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1530                   goto trace00;
1531                 }
1532
1533               if (ip4_is_fragment (ip0))
1534                 {
1535                   next0 = SNAT_IN2OUT_NEXT_REASS;
1536                   goto trace00;
1537                 }
1538             }
1539
1540           key0.addr = ip0->src_address;
1541           key0.port = udp0->src_port;
1542           key0.protocol = proto0;
1543           key0.fib_index = rx_fib_index0;
1544
1545           kv0.key = key0.as_u64;
1546
1547           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1548               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1549             {
1550               if (is_slow_path)
1551                 {
1552                   if (is_output_feature)
1553                     {
1554                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1555                           ip0, proto0, thread_index)))
1556                         goto trace00;
1557                     }
1558                   else
1559                     {
1560                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1561                           ip0, proto0, rx_fib_index0, thread_index)))
1562                         goto trace00;
1563                     }
1564
1565                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1566                                      &s0, node, next0, thread_index);
1567                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1568                     goto trace00;
1569                 }
1570               else
1571                 {
1572                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1573                   goto trace00;
1574                 }
1575             }
1576           else
1577             {
1578               if (PREDICT_FALSE (value0.value == ~0ULL))
1579                 {
1580                   if (is_slow_path)
1581                     {
1582                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1583                                           thread_index, now, vm, node);
1584                       if (!s0 && !sm->forwarding_enabled)
1585                         next0 = SNAT_IN2OUT_NEXT_DROP;
1586                       goto trace00;
1587                     }
1588                   else
1589                     {
1590                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1591                       goto trace00;
1592                     }
1593                 }
1594               else
1595                 {
1596                   s0 = pool_elt_at_index (
1597                     sm->per_thread_data[thread_index].sessions,
1598                     value0.value);
1599                 }
1600             }
1601
1602           b0->flags |= VNET_BUFFER_F_IS_NATED;
1603
1604           old_addr0 = ip0->src_address.as_u32;
1605           ip0->src_address = s0->out2in.addr;
1606           new_addr0 = ip0->src_address.as_u32;
1607           if (!is_output_feature)
1608             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1609
1610           sum0 = ip0->checksum;
1611           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1612                                  ip4_header_t,
1613                                  src_address /* changed member */);
1614           ip0->checksum = ip_csum_fold (sum0);
1615
1616           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1617             {
1618               old_port0 = tcp0->src_port;
1619               tcp0->src_port = s0->out2in.port;
1620               new_port0 = tcp0->src_port;
1621
1622               sum0 = tcp0->checksum;
1623               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1624                                      ip4_header_t,
1625                                      dst_address /* changed member */);
1626               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1627                                      ip4_header_t /* cheat */,
1628                                      length /* changed member */);
1629               tcp0->checksum = ip_csum_fold(sum0);
1630             }
1631           else
1632             {
1633               old_port0 = udp0->src_port;
1634               udp0->src_port = s0->out2in.port;
1635               udp0->checksum = 0;
1636             }
1637
1638           /* Accounting */
1639           s0->last_heard = now;
1640           s0->total_pkts++;
1641           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1642           /* Per-user LRU list maintenance */
1643           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1644                              s0->per_user_index);
1645           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1646                               s0->per_user_list_head_index,
1647                               s0->per_user_index);
1648         trace00:
1649
1650           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1651                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1652             {
1653               snat_in2out_trace_t *t =
1654                  vlib_add_trace (vm, node, b0, sizeof (*t));
1655               t->is_slow_path = is_slow_path;
1656               t->sw_if_index = sw_if_index0;
1657               t->next_index = next0;
1658                   t->session_index = ~0;
1659               if (s0)
1660                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1661             }
1662
1663           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1664
1665           if (is_output_feature)
1666             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1667
1668           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1669                  iph_offset1);
1670
1671           udp1 = ip4_next_header (ip1);
1672           tcp1 = (tcp_header_t *) udp1;
1673           icmp1 = (icmp46_header_t *) udp1;
1674
1675           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1676           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1677                                    sw_if_index1);
1678
1679           if (PREDICT_FALSE(ip1->ttl == 1))
1680             {
1681               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1682               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1683                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1684                                            0);
1685               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1686               goto trace01;
1687             }
1688
1689           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1690
1691           /* Next configured feature, probably ip4-lookup */
1692           if (is_slow_path)
1693             {
1694               if (PREDICT_FALSE (proto1 == ~0))
1695                 {
1696                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1697                                                   thread_index, now, vm, node);
1698                   if (!s1)
1699                     next1 = SNAT_IN2OUT_NEXT_DROP;
1700                   goto trace01;
1701                 }
1702
1703               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1704                 {
1705                   next1 = icmp_in2out_slow_path
1706                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1707                      next1, now, thread_index, &s1);
1708                   goto trace01;
1709                 }
1710             }
1711           else
1712             {
1713               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1714                 {
1715                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1716                   goto trace01;
1717                 }
1718
1719               if (ip4_is_fragment (ip1))
1720                 {
1721                   next1 = SNAT_IN2OUT_NEXT_REASS;
1722                   goto trace01;
1723                 }
1724             }
1725
1726           b1->flags |= VNET_BUFFER_F_IS_NATED;
1727
1728           key1.addr = ip1->src_address;
1729           key1.port = udp1->src_port;
1730           key1.protocol = proto1;
1731           key1.fib_index = rx_fib_index1;
1732
1733           kv1.key = key1.as_u64;
1734
1735             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1736                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1737             {
1738               if (is_slow_path)
1739                 {
1740                   if (is_output_feature)
1741                     {
1742                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1743                           ip1, proto1, thread_index)))
1744                         goto trace00;
1745                     }
1746                   else
1747                     {
1748                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1749                           ip1, proto1, rx_fib_index1, thread_index)))
1750                         goto trace01;
1751                     }
1752
1753                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1754                                      &s1, node, next1, thread_index);
1755                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1756                     goto trace01;
1757                 }
1758               else
1759                 {
1760                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1761                   goto trace01;
1762                 }
1763             }
1764           else
1765             {
1766               if (PREDICT_FALSE (value1.value == ~0ULL))
1767                 {
1768                   if (is_slow_path)
1769                     {
1770                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1771                                           thread_index, now, vm, node);
1772                       if (!s1 && !sm->forwarding_enabled)
1773                         next1 = SNAT_IN2OUT_NEXT_DROP;
1774                       goto trace01;
1775                     }
1776                   else
1777                     {
1778                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1779                       goto trace01;
1780                     }
1781                 }
1782               else
1783                 {
1784                   s1 = pool_elt_at_index (
1785                     sm->per_thread_data[thread_index].sessions,
1786                     value1.value);
1787                 }
1788             }
1789
1790           old_addr1 = ip1->src_address.as_u32;
1791           ip1->src_address = s1->out2in.addr;
1792           new_addr1 = ip1->src_address.as_u32;
1793           if (!is_output_feature)
1794             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1795
1796           sum1 = ip1->checksum;
1797           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1798                                  ip4_header_t,
1799                                  src_address /* changed member */);
1800           ip1->checksum = ip_csum_fold (sum1);
1801
1802           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1803             {
1804               old_port1 = tcp1->src_port;
1805               tcp1->src_port = s1->out2in.port;
1806               new_port1 = tcp1->src_port;
1807
1808               sum1 = tcp1->checksum;
1809               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1810                                      ip4_header_t,
1811                                      dst_address /* changed member */);
1812               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1813                                      ip4_header_t /* cheat */,
1814                                      length /* changed member */);
1815               tcp1->checksum = ip_csum_fold(sum1);
1816             }
1817           else
1818             {
1819               old_port1 = udp1->src_port;
1820               udp1->src_port = s1->out2in.port;
1821               udp1->checksum = 0;
1822             }
1823
1824           /* Accounting */
1825           s1->last_heard = now;
1826           s1->total_pkts++;
1827           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1828           /* Per-user LRU list maintenance */
1829           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1830                              s1->per_user_index);
1831           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1832                               s1->per_user_list_head_index,
1833                               s1->per_user_index);
1834         trace01:
1835
1836           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1837                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1838             {
1839               snat_in2out_trace_t *t =
1840                  vlib_add_trace (vm, node, b1, sizeof (*t));
1841               t->sw_if_index = sw_if_index1;
1842               t->next_index = next1;
1843               t->session_index = ~0;
1844               if (s1)
1845                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1846             }
1847
1848           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1849
1850           /* verify speculative enqueues, maybe switch current next frame */
1851           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1852                                            to_next, n_left_to_next,
1853                                            bi0, bi1, next0, next1);
1854         }
1855
1856       while (n_left_from > 0 && n_left_to_next > 0)
1857         {
1858           u32 bi0;
1859           vlib_buffer_t * b0;
1860           u32 next0;
1861           u32 sw_if_index0;
1862           ip4_header_t * ip0;
1863           ip_csum_t sum0;
1864           u32 new_addr0, old_addr0;
1865           u16 old_port0, new_port0;
1866           udp_header_t * udp0;
1867           tcp_header_t * tcp0;
1868           icmp46_header_t * icmp0;
1869           snat_session_key_t key0;
1870           u32 rx_fib_index0;
1871           u32 proto0;
1872           snat_session_t * s0 = 0;
1873           clib_bihash_kv_8_8_t kv0, value0;
1874           u32 iph_offset0 = 0;
1875
1876           /* speculatively enqueue b0 to the current next frame */
1877           bi0 = from[0];
1878           to_next[0] = bi0;
1879           from += 1;
1880           to_next += 1;
1881           n_left_from -= 1;
1882           n_left_to_next -= 1;
1883
1884           b0 = vlib_get_buffer (vm, bi0);
1885           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1886
1887           if (is_output_feature)
1888             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1889
1890           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1891                  iph_offset0);
1892
1893           udp0 = ip4_next_header (ip0);
1894           tcp0 = (tcp_header_t *) udp0;
1895           icmp0 = (icmp46_header_t *) udp0;
1896
1897           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1898           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1899                                    sw_if_index0);
1900
1901           if (PREDICT_FALSE(ip0->ttl == 1))
1902             {
1903               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1904               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1905                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1906                                            0);
1907               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1908               goto trace0;
1909             }
1910
1911           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1912
1913           /* Next configured feature, probably ip4-lookup */
1914           if (is_slow_path)
1915             {
1916               if (PREDICT_FALSE (proto0 == ~0))
1917                 {
1918                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1919                                                   thread_index, now, vm, node);
1920                   if (!s0)
1921                     next0 = SNAT_IN2OUT_NEXT_DROP;
1922                   goto trace0;
1923                 }
1924
1925               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1926                 {
1927                   next0 = icmp_in2out_slow_path
1928                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1929                      next0, now, thread_index, &s0);
1930                   goto trace0;
1931                 }
1932             }
1933           else
1934             {
1935               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1936                 {
1937                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1938                   goto trace0;
1939                 }
1940
1941               if (ip4_is_fragment (ip0))
1942                 {
1943                   next0 = SNAT_IN2OUT_NEXT_REASS;
1944                   goto trace0;
1945                 }
1946             }
1947
1948           key0.addr = ip0->src_address;
1949           key0.port = udp0->src_port;
1950           key0.protocol = proto0;
1951           key0.fib_index = rx_fib_index0;
1952
1953           kv0.key = key0.as_u64;
1954
1955           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1956                                       &kv0, &value0))
1957             {
1958               if (is_slow_path)
1959                 {
1960                   if (is_output_feature)
1961                     {
1962                       if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1963                           ip0, proto0, thread_index)))
1964                         goto trace0;
1965                     }
1966                   else
1967                     {
1968                       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1969                           ip0, proto0, rx_fib_index0, thread_index)))
1970                         goto trace0;
1971                     }
1972
1973                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1974                                      &s0, node, next0, thread_index);
1975
1976                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1977                     goto trace0;
1978                 }
1979               else
1980                 {
1981                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1982                   goto trace0;
1983                 }
1984             }
1985           else
1986             {
1987               if (PREDICT_FALSE (value0.value == ~0ULL))
1988                 {
1989                   if (is_slow_path)
1990                     {
1991                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1992                                           thread_index, now, vm, node);
1993                       if (!s0 && !sm->forwarding_enabled)
1994                         next0 = SNAT_IN2OUT_NEXT_DROP;
1995                       goto trace0;
1996                     }
1997                   else
1998                     {
1999                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2000                       goto trace0;
2001                     }
2002                 }
2003               else
2004                 {
2005                   s0 = pool_elt_at_index (
2006                     sm->per_thread_data[thread_index].sessions,
2007                     value0.value);
2008                 }
2009             }
2010
2011           b0->flags |= VNET_BUFFER_F_IS_NATED;
2012
2013           old_addr0 = ip0->src_address.as_u32;
2014           ip0->src_address = s0->out2in.addr;
2015           new_addr0 = ip0->src_address.as_u32;
2016           if (!is_output_feature)
2017             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2018
2019           sum0 = ip0->checksum;
2020           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2021                                  ip4_header_t,
2022                                  src_address /* changed member */);
2023           ip0->checksum = ip_csum_fold (sum0);
2024
2025           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2026             {
2027               old_port0 = tcp0->src_port;
2028               tcp0->src_port = s0->out2in.port;
2029               new_port0 = tcp0->src_port;
2030
2031               sum0 = tcp0->checksum;
2032               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2033                                      ip4_header_t,
2034                                      dst_address /* changed member */);
2035               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2036                                      ip4_header_t /* cheat */,
2037                                      length /* changed member */);
2038               tcp0->checksum = ip_csum_fold(sum0);
2039             }
2040           else
2041             {
2042               old_port0 = udp0->src_port;
2043               udp0->src_port = s0->out2in.port;
2044               udp0->checksum = 0;
2045             }
2046
2047           /* Accounting */
2048           s0->last_heard = now;
2049           s0->total_pkts++;
2050           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2051           /* Per-user LRU list maintenance */
2052           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2053                              s0->per_user_index);
2054           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2055                               s0->per_user_list_head_index,
2056                               s0->per_user_index);
2057
2058         trace0:
2059           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2060                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2061             {
2062               snat_in2out_trace_t *t =
2063                  vlib_add_trace (vm, node, b0, sizeof (*t));
2064               t->is_slow_path = is_slow_path;
2065               t->sw_if_index = sw_if_index0;
2066               t->next_index = next0;
2067                   t->session_index = ~0;
2068               if (s0)
2069                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
2070             }
2071
2072           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2073
2074           /* verify speculative enqueue, maybe switch current next frame */
2075           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2076                                            to_next, n_left_to_next,
2077                                            bi0, next0);
2078         }
2079
2080       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2081     }
2082
2083   vlib_node_increment_counter (vm, stats_node_index,
2084                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2085                                pkts_processed);
2086   return frame->n_vectors;
2087 }
2088
2089 static uword
2090 snat_in2out_fast_path_fn (vlib_main_t * vm,
2091                           vlib_node_runtime_t * node,
2092                           vlib_frame_t * frame)
2093 {
2094   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
2095 }
2096
2097 VLIB_REGISTER_NODE (snat_in2out_node) = {
2098   .function = snat_in2out_fast_path_fn,
2099   .name = "nat44-in2out",
2100   .vector_size = sizeof (u32),
2101   .format_trace = format_snat_in2out_trace,
2102   .type = VLIB_NODE_TYPE_INTERNAL,
2103
2104   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2105   .error_strings = snat_in2out_error_strings,
2106
2107   .runtime_data_bytes = sizeof (snat_runtime_t),
2108
2109   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2110
2111   /* edit / add dispositions here */
2112   .next_nodes = {
2113     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2114     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2115     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2116     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2117     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2118   },
2119 };
2120
2121 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
2122
2123 static uword
2124 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
2125                                  vlib_node_runtime_t * node,
2126                                  vlib_frame_t * frame)
2127 {
2128   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
2129 }
2130
2131 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2132   .function = snat_in2out_output_fast_path_fn,
2133   .name = "nat44-in2out-output",
2134   .vector_size = sizeof (u32),
2135   .format_trace = format_snat_in2out_trace,
2136   .type = VLIB_NODE_TYPE_INTERNAL,
2137
2138   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2139   .error_strings = snat_in2out_error_strings,
2140
2141   .runtime_data_bytes = sizeof (snat_runtime_t),
2142
2143   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2144
2145   /* edit / add dispositions here */
2146   .next_nodes = {
2147     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2148     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2149     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2150     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2151     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2152   },
2153 };
2154
2155 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2156                               snat_in2out_output_fast_path_fn);
2157
2158 static uword
2159 snat_in2out_slow_path_fn (vlib_main_t * vm,
2160                           vlib_node_runtime_t * node,
2161                           vlib_frame_t * frame)
2162 {
2163   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2164 }
2165
2166 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2167   .function = snat_in2out_slow_path_fn,
2168   .name = "nat44-in2out-slowpath",
2169   .vector_size = sizeof (u32),
2170   .format_trace = format_snat_in2out_trace,
2171   .type = VLIB_NODE_TYPE_INTERNAL,
2172
2173   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2174   .error_strings = snat_in2out_error_strings,
2175
2176   .runtime_data_bytes = sizeof (snat_runtime_t),
2177
2178   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2179
2180   /* edit / add dispositions here */
2181   .next_nodes = {
2182     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2183     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2184     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2185     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2186     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2187   },
2188 };
2189
2190 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2191                               snat_in2out_slow_path_fn);
2192
2193 static uword
2194 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2195                                  vlib_node_runtime_t * node,
2196                                  vlib_frame_t * frame)
2197 {
2198   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2199 }
2200
2201 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2202   .function = snat_in2out_output_slow_path_fn,
2203   .name = "nat44-in2out-output-slowpath",
2204   .vector_size = sizeof (u32),
2205   .format_trace = format_snat_in2out_trace,
2206   .type = VLIB_NODE_TYPE_INTERNAL,
2207
2208   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2209   .error_strings = snat_in2out_error_strings,
2210
2211   .runtime_data_bytes = sizeof (snat_runtime_t),
2212
2213   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2214
2215   /* edit / add dispositions here */
2216   .next_nodes = {
2217     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2218     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2219     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2220     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2221     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2222   },
2223 };
2224
2225 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2226                               snat_in2out_output_slow_path_fn);
2227
2228 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
2229
2230 static uword
2231 nat44_hairpinning_fn (vlib_main_t * vm,
2232                       vlib_node_runtime_t * node,
2233                       vlib_frame_t * frame)
2234 {
2235   u32 n_left_from, * from, * to_next;
2236   snat_in2out_next_t next_index;
2237   u32 pkts_processed = 0;
2238   snat_main_t * sm = &snat_main;
2239   vnet_feature_main_t *fm = &feature_main;
2240   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
2241   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
2242
2243   from = vlib_frame_vector_args (frame);
2244   n_left_from = frame->n_vectors;
2245   next_index = node->cached_next_index;
2246
2247   while (n_left_from > 0)
2248     {
2249       u32 n_left_to_next;
2250
2251       vlib_get_next_frame (vm, node, next_index,
2252                            to_next, n_left_to_next);
2253
2254       while (n_left_from > 0 && n_left_to_next > 0)
2255         {
2256           u32 bi0;
2257           vlib_buffer_t * b0;
2258           u32 next0;
2259           ip4_header_t * ip0;
2260           u32 proto0;
2261           udp_header_t * udp0;
2262           tcp_header_t * tcp0;
2263
2264           /* speculatively enqueue b0 to the current next frame */
2265           bi0 = from[0];
2266           to_next[0] = bi0;
2267           from += 1;
2268           to_next += 1;
2269           n_left_from -= 1;
2270           n_left_to_next -= 1;
2271
2272           b0 = vlib_get_buffer (vm, bi0);
2273           ip0 = vlib_buffer_get_current (b0);
2274           udp0 = ip4_next_header (ip0);
2275           tcp0 = (tcp_header_t *) udp0;
2276
2277           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2278
2279           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
2280                                 &next0, 0);
2281
2282           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0))
2283             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2284
2285           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2286
2287           /* verify speculative enqueue, maybe switch current next frame */
2288           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2289                                            to_next, n_left_to_next,
2290                                            bi0, next0);
2291          }
2292
2293       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2294     }
2295
2296   vlib_node_increment_counter (vm, nat44_hairpinning_node.index,
2297                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2298                                pkts_processed);
2299   return frame->n_vectors;
2300 }
2301
2302 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
2303   .function = nat44_hairpinning_fn,
2304   .name = "nat44-hairpinning",
2305   .vector_size = sizeof (u32),
2306   .type = VLIB_NODE_TYPE_INTERNAL,
2307   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2308   .error_strings = snat_in2out_error_strings,
2309   .n_next_nodes = 2,
2310   .next_nodes = {
2311     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2312     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2313   },
2314 };
2315
2316 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
2317                               nat44_hairpinning_fn);
2318
2319 static inline void
2320 nat44_reass_hairpinning (snat_main_t *sm,
2321                          vlib_buffer_t * b0,
2322                          ip4_header_t * ip0,
2323                          u16 sport,
2324                          u16 dport,
2325                          u32 proto0)
2326 {
2327   snat_session_key_t key0, sm0;
2328   snat_session_t * s0;
2329   clib_bihash_kv_8_8_t kv0, value0;
2330   ip_csum_t sum0;
2331   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2332   u16 new_dst_port0, old_dst_port0;
2333   udp_header_t * udp0;
2334   tcp_header_t * tcp0;
2335
2336   key0.addr = ip0->dst_address;
2337   key0.port = dport;
2338   key0.protocol = proto0;
2339   key0.fib_index = sm->outside_fib_index;
2340   kv0.key = key0.as_u64;
2341
2342   udp0 = ip4_next_header (ip0);
2343
2344   /* Check if destination is static mappings */
2345   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2346     {
2347       new_dst_addr0 = sm0.addr.as_u32;
2348       new_dst_port0 = sm0.port;
2349       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2350     }
2351   /* or active sessions */
2352   else
2353     {
2354       if (sm->num_workers > 1)
2355         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2356       else
2357         ti = sm->num_workers;
2358
2359       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2360         {
2361           si = value0.value;
2362           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2363           new_dst_addr0 = s0->in2out.addr.as_u32;
2364           new_dst_port0 = s0->in2out.port;
2365           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2366         }
2367     }
2368
2369   /* Destination is behind the same NAT, use internal address and port */
2370   if (new_dst_addr0)
2371     {
2372       old_dst_addr0 = ip0->dst_address.as_u32;
2373       ip0->dst_address.as_u32 = new_dst_addr0;
2374       sum0 = ip0->checksum;
2375       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2376                              ip4_header_t, dst_address);
2377       ip0->checksum = ip_csum_fold (sum0);
2378
2379       old_dst_port0 = dport;
2380       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2381                        ip4_is_first_fragment (ip0)))
2382         {
2383           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2384             {
2385               tcp0 = ip4_next_header (ip0);
2386               tcp0->dst = new_dst_port0;
2387               sum0 = tcp0->checksum;
2388               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2389                                      ip4_header_t, dst_address);
2390               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2391                                      ip4_header_t /* cheat */, length);
2392               tcp0->checksum = ip_csum_fold(sum0);
2393             }
2394           else
2395             {
2396               udp0->dst_port = new_dst_port0;
2397               udp0->checksum = 0;
2398             }
2399         }
2400       else
2401         {
2402           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2403             {
2404               tcp0 = ip4_next_header (ip0);
2405               sum0 = tcp0->checksum;
2406               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2407                                      ip4_header_t, dst_address);
2408               tcp0->checksum = ip_csum_fold(sum0);
2409             }
2410         }
2411     }
2412 }
2413
2414 static uword
2415 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2416                             vlib_node_runtime_t * node,
2417                             vlib_frame_t * frame)
2418 {
2419   u32 n_left_from, *from, *to_next;
2420   snat_in2out_next_t next_index;
2421   u32 pkts_processed = 0;
2422   snat_main_t *sm = &snat_main;
2423   f64 now = vlib_time_now (vm);
2424   u32 thread_index = vlib_get_thread_index ();
2425   snat_main_per_thread_data_t *per_thread_data =
2426     &sm->per_thread_data[thread_index];
2427   u32 *fragments_to_drop = 0;
2428   u32 *fragments_to_loopback = 0;
2429
2430   from = vlib_frame_vector_args (frame);
2431   n_left_from = frame->n_vectors;
2432   next_index = node->cached_next_index;
2433
2434   while (n_left_from > 0)
2435     {
2436       u32 n_left_to_next;
2437
2438       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2439
2440       while (n_left_from > 0 && n_left_to_next > 0)
2441        {
2442           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2443           vlib_buffer_t *b0;
2444           u32 next0;
2445           u8 cached0 = 0;
2446           ip4_header_t *ip0;
2447           nat_reass_ip4_t *reass0;
2448           udp_header_t * udp0;
2449           tcp_header_t * tcp0;
2450           snat_session_key_t key0;
2451           clib_bihash_kv_8_8_t kv0, value0;
2452           snat_session_t * s0 = 0;
2453           u16 old_port0, new_port0;
2454           ip_csum_t sum0;
2455
2456           /* speculatively enqueue b0 to the current next frame */
2457           bi0 = from[0];
2458           to_next[0] = bi0;
2459           from += 1;
2460           to_next += 1;
2461           n_left_from -= 1;
2462           n_left_to_next -= 1;
2463
2464           b0 = vlib_get_buffer (vm, bi0);
2465           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2466
2467           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2468           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2469                                                                sw_if_index0);
2470
2471           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2472             {
2473               next0 = SNAT_IN2OUT_NEXT_DROP;
2474               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2475               goto trace0;
2476             }
2477
2478           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2479           udp0 = ip4_next_header (ip0);
2480           tcp0 = (tcp_header_t *) udp0;
2481           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2482
2483           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2484                                                  ip0->dst_address,
2485                                                  ip0->fragment_id,
2486                                                  ip0->protocol,
2487                                                  1,
2488                                                  &fragments_to_drop);
2489
2490           if (PREDICT_FALSE (!reass0))
2491             {
2492               next0 = SNAT_IN2OUT_NEXT_DROP;
2493               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2494               goto trace0;
2495             }
2496
2497           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2498             {
2499               key0.addr = ip0->src_address;
2500               key0.port = udp0->src_port;
2501               key0.protocol = proto0;
2502               key0.fib_index = rx_fib_index0;
2503               kv0.key = key0.as_u64;
2504
2505               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2506                 {
2507                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2508                       ip0, proto0, rx_fib_index0, thread_index)))
2509                     goto trace0;
2510
2511                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2512                                      &s0, node, next0, thread_index);
2513
2514                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2515                     goto trace0;
2516
2517                   reass0->sess_index = s0 - per_thread_data->sessions;
2518                 }
2519               else
2520                 {
2521                   s0 = pool_elt_at_index (per_thread_data->sessions,
2522                                           value0.value);
2523                   reass0->sess_index = value0.value;
2524                 }
2525               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2526             }
2527           else
2528             {
2529               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2530                 {
2531                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2532                     {
2533                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2534                       next0 = SNAT_IN2OUT_NEXT_DROP;
2535                       goto trace0;
2536                     }
2537                   cached0 = 1;
2538                   goto trace0;
2539                 }
2540               s0 = pool_elt_at_index (per_thread_data->sessions,
2541                                       reass0->sess_index);
2542             }
2543
2544           old_addr0 = ip0->src_address.as_u32;
2545           ip0->src_address = s0->out2in.addr;
2546           new_addr0 = ip0->src_address.as_u32;
2547           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2548
2549           sum0 = ip0->checksum;
2550           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2551                                  ip4_header_t,
2552                                  src_address /* changed member */);
2553           ip0->checksum = ip_csum_fold (sum0);
2554
2555           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2556             {
2557               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2558                 {
2559                   old_port0 = tcp0->src_port;
2560                   tcp0->src_port = s0->out2in.port;
2561                   new_port0 = tcp0->src_port;
2562
2563                   sum0 = tcp0->checksum;
2564                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2565                                          ip4_header_t,
2566                                          dst_address /* changed member */);
2567                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2568                                          ip4_header_t /* cheat */,
2569                                          length /* changed member */);
2570                   tcp0->checksum = ip_csum_fold(sum0);
2571                 }
2572               else
2573                 {
2574                   old_port0 = udp0->src_port;
2575                   udp0->src_port = s0->out2in.port;
2576                   udp0->checksum = 0;
2577                 }
2578             }
2579
2580           /* Hairpinning */
2581           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2582                                    s0->ext_host_port, proto0);
2583
2584           /* Accounting */
2585           s0->last_heard = now;
2586           s0->total_pkts++;
2587           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2588           /* Per-user LRU list maintenance */
2589           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2590                              s0->per_user_index);
2591           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2592                               s0->per_user_list_head_index,
2593                               s0->per_user_index);
2594
2595         trace0:
2596           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2597                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2598             {
2599               nat44_in2out_reass_trace_t *t =
2600                  vlib_add_trace (vm, node, b0, sizeof (*t));
2601               t->cached = cached0;
2602               t->sw_if_index = sw_if_index0;
2603               t->next_index = next0;
2604             }
2605
2606           if (cached0)
2607             {
2608               n_left_to_next++;
2609               to_next--;
2610             }
2611           else
2612             {
2613               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2614
2615               /* verify speculative enqueue, maybe switch current next frame */
2616               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2617                                                to_next, n_left_to_next,
2618                                                bi0, next0);
2619             }
2620
2621           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2622             {
2623               from = vlib_frame_vector_args (frame);
2624               u32 len = vec_len (fragments_to_loopback);
2625               if (len <= VLIB_FRAME_SIZE)
2626                 {
2627                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2628                   n_left_from = len;
2629                   vec_reset_length (fragments_to_loopback);
2630                 }
2631               else
2632                 {
2633                   clib_memcpy (from,
2634                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2635                                sizeof (u32) * VLIB_FRAME_SIZE);
2636                   n_left_from = VLIB_FRAME_SIZE;
2637                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2638                 }
2639             }
2640        }
2641
2642       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2643     }
2644
2645   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2646                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2647                                pkts_processed);
2648
2649   nat_send_all_to_node (vm, fragments_to_drop, node,
2650                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2651                         SNAT_IN2OUT_NEXT_DROP);
2652
2653   vec_free (fragments_to_drop);
2654   vec_free (fragments_to_loopback);
2655   return frame->n_vectors;
2656 }
2657
2658 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2659   .function = nat44_in2out_reass_node_fn,
2660   .name = "nat44-in2out-reass",
2661   .vector_size = sizeof (u32),
2662   .format_trace = format_nat44_in2out_reass_trace,
2663   .type = VLIB_NODE_TYPE_INTERNAL,
2664
2665   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2666   .error_strings = snat_in2out_error_strings,
2667
2668   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2669   .next_nodes = {
2670     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2671     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2672     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2673     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2674     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2675   },
2676 };
2677
2678 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2679                               nat44_in2out_reass_node_fn);
2680
2681 /**************************/
2682 /*** deterministic mode ***/
2683 /**************************/
2684 static uword
2685 snat_det_in2out_node_fn (vlib_main_t * vm,
2686                          vlib_node_runtime_t * node,
2687                          vlib_frame_t * frame)
2688 {
2689   u32 n_left_from, * from, * to_next;
2690   snat_in2out_next_t next_index;
2691   u32 pkts_processed = 0;
2692   snat_main_t * sm = &snat_main;
2693   u32 now = (u32) vlib_time_now (vm);
2694   u32 thread_index = vlib_get_thread_index ();
2695
2696   from = vlib_frame_vector_args (frame);
2697   n_left_from = frame->n_vectors;
2698   next_index = node->cached_next_index;
2699
2700   while (n_left_from > 0)
2701     {
2702       u32 n_left_to_next;
2703
2704       vlib_get_next_frame (vm, node, next_index,
2705                            to_next, n_left_to_next);
2706
2707       while (n_left_from >= 4 && n_left_to_next >= 2)
2708         {
2709           u32 bi0, bi1;
2710           vlib_buffer_t * b0, * b1;
2711           u32 next0, next1;
2712           u32 sw_if_index0, sw_if_index1;
2713           ip4_header_t * ip0, * ip1;
2714           ip_csum_t sum0, sum1;
2715           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2716           u16 old_port0, new_port0, lo_port0, i0;
2717           u16 old_port1, new_port1, lo_port1, i1;
2718           udp_header_t * udp0, * udp1;
2719           tcp_header_t * tcp0, * tcp1;
2720           u32 proto0, proto1;
2721           snat_det_out_key_t key0, key1;
2722           snat_det_map_t * dm0, * dm1;
2723           snat_det_session_t * ses0 = 0, * ses1 = 0;
2724           u32 rx_fib_index0, rx_fib_index1;
2725           icmp46_header_t * icmp0, * icmp1;
2726
2727           /* Prefetch next iteration. */
2728           {
2729             vlib_buffer_t * p2, * p3;
2730
2731             p2 = vlib_get_buffer (vm, from[2]);
2732             p3 = vlib_get_buffer (vm, from[3]);
2733
2734             vlib_prefetch_buffer_header (p2, LOAD);
2735             vlib_prefetch_buffer_header (p3, LOAD);
2736
2737             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2738             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2739           }
2740
2741           /* speculatively enqueue b0 and b1 to the current next frame */
2742           to_next[0] = bi0 = from[0];
2743           to_next[1] = bi1 = from[1];
2744           from += 2;
2745           to_next += 2;
2746           n_left_from -= 2;
2747           n_left_to_next -= 2;
2748
2749           b0 = vlib_get_buffer (vm, bi0);
2750           b1 = vlib_get_buffer (vm, bi1);
2751
2752           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2753           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2754
2755           ip0 = vlib_buffer_get_current (b0);
2756           udp0 = ip4_next_header (ip0);
2757           tcp0 = (tcp_header_t *) udp0;
2758
2759           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2760
2761           if (PREDICT_FALSE(ip0->ttl == 1))
2762             {
2763               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2764               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2765                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2766                                            0);
2767               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2768               goto trace0;
2769             }
2770
2771           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2772
2773           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2774             {
2775               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2776               icmp0 = (icmp46_header_t *) udp0;
2777
2778               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2779                                   rx_fib_index0, node, next0, thread_index,
2780                                   &ses0, &dm0);
2781               goto trace0;
2782             }
2783
2784           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2785           if (PREDICT_FALSE(!dm0))
2786             {
2787               clib_warning("no match for internal host %U",
2788                            format_ip4_address, &ip0->src_address);
2789               next0 = SNAT_IN2OUT_NEXT_DROP;
2790               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2791               goto trace0;
2792             }
2793
2794           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2795
2796           key0.ext_host_addr = ip0->dst_address;
2797           key0.ext_host_port = tcp0->dst;
2798
2799           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2800           if (PREDICT_FALSE(!ses0))
2801             {
2802               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2803                 {
2804                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2805                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2806
2807                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2808                     continue;
2809
2810                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2811                   break;
2812                 }
2813               if (PREDICT_FALSE(!ses0))
2814                 {
2815                   /* too many sessions for user, send ICMP error packet */
2816
2817                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2818                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2819                                                ICMP4_destination_unreachable_destination_unreachable_host,
2820                                                0);
2821                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2822                   goto trace0;
2823                 }
2824             }
2825
2826           new_port0 = ses0->out.out_port;
2827
2828           old_addr0.as_u32 = ip0->src_address.as_u32;
2829           ip0->src_address.as_u32 = new_addr0.as_u32;
2830           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2831
2832           sum0 = ip0->checksum;
2833           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2834                                  ip4_header_t,
2835                                  src_address /* changed member */);
2836           ip0->checksum = ip_csum_fold (sum0);
2837
2838           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2839             {
2840               if (tcp0->flags & TCP_FLAG_SYN)
2841                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2842               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2843                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2844               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2845                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2846               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2847                 snat_det_ses_close(dm0, ses0);
2848               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2849                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2850               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2851                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2852
2853               old_port0 = tcp0->src;
2854               tcp0->src = new_port0;
2855
2856               sum0 = tcp0->checksum;
2857               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2858                                      ip4_header_t,
2859                                      dst_address /* changed member */);
2860               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2861                                      ip4_header_t /* cheat */,
2862                                      length /* changed member */);
2863               tcp0->checksum = ip_csum_fold(sum0);
2864             }
2865           else
2866             {
2867               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2868               old_port0 = udp0->src_port;
2869               udp0->src_port = new_port0;
2870               udp0->checksum = 0;
2871             }
2872
2873           switch(ses0->state)
2874             {
2875             case SNAT_SESSION_UDP_ACTIVE:
2876                 ses0->expire = now + sm->udp_timeout;
2877                 break;
2878             case SNAT_SESSION_TCP_SYN_SENT:
2879             case SNAT_SESSION_TCP_FIN_WAIT:
2880             case SNAT_SESSION_TCP_CLOSE_WAIT:
2881             case SNAT_SESSION_TCP_LAST_ACK:
2882                 ses0->expire = now + sm->tcp_transitory_timeout;
2883                 break;
2884             case SNAT_SESSION_TCP_ESTABLISHED:
2885                 ses0->expire = now + sm->tcp_established_timeout;
2886                 break;
2887             }
2888
2889         trace0:
2890           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2891                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2892             {
2893               snat_in2out_trace_t *t =
2894                  vlib_add_trace (vm, node, b0, sizeof (*t));
2895               t->is_slow_path = 0;
2896               t->sw_if_index = sw_if_index0;
2897               t->next_index = next0;
2898               t->session_index = ~0;
2899               if (ses0)
2900                 t->session_index = ses0 - dm0->sessions;
2901             }
2902
2903           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2904
2905           ip1 = vlib_buffer_get_current (b1);
2906           udp1 = ip4_next_header (ip1);
2907           tcp1 = (tcp_header_t *) udp1;
2908
2909           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2910
2911           if (PREDICT_FALSE(ip1->ttl == 1))
2912             {
2913               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2914               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2915                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2916                                            0);
2917               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2918               goto trace1;
2919             }
2920
2921           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2922
2923           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2924             {
2925               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2926               icmp1 = (icmp46_header_t *) udp1;
2927
2928               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2929                                   rx_fib_index1, node, next1, thread_index,
2930                                   &ses1, &dm1);
2931               goto trace1;
2932             }
2933
2934           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2935           if (PREDICT_FALSE(!dm1))
2936             {
2937               clib_warning("no match for internal host %U",
2938                            format_ip4_address, &ip0->src_address);
2939               next1 = SNAT_IN2OUT_NEXT_DROP;
2940               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2941               goto trace1;
2942             }
2943
2944           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2945
2946           key1.ext_host_addr = ip1->dst_address;
2947           key1.ext_host_port = tcp1->dst;
2948
2949           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2950           if (PREDICT_FALSE(!ses1))
2951             {
2952               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2953                 {
2954                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2955                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2956
2957                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2958                     continue;
2959
2960                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2961                   break;
2962                 }
2963               if (PREDICT_FALSE(!ses1))
2964                 {
2965                   /* too many sessions for user, send ICMP error packet */
2966
2967                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2968                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2969                                                ICMP4_destination_unreachable_destination_unreachable_host,
2970                                                0);
2971                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2972                   goto trace1;
2973                 }
2974             }
2975
2976           new_port1 = ses1->out.out_port;
2977
2978           old_addr1.as_u32 = ip1->src_address.as_u32;
2979           ip1->src_address.as_u32 = new_addr1.as_u32;
2980           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2981
2982           sum1 = ip1->checksum;
2983           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2984                                  ip4_header_t,
2985                                  src_address /* changed member */);
2986           ip1->checksum = ip_csum_fold (sum1);
2987
2988           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2989             {
2990               if (tcp1->flags & TCP_FLAG_SYN)
2991                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2992               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2993                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2994               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2995                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2996               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2997                 snat_det_ses_close(dm1, ses1);
2998               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2999                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
3000               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
3001                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
3002
3003               old_port1 = tcp1->src;
3004               tcp1->src = new_port1;
3005
3006               sum1 = tcp1->checksum;
3007               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3008                                      ip4_header_t,
3009                                      dst_address /* changed member */);
3010               sum1 = ip_csum_update (sum1, old_port1, new_port1,
3011                                      ip4_header_t /* cheat */,
3012                                      length /* changed member */);
3013               tcp1->checksum = ip_csum_fold(sum1);
3014             }
3015           else
3016             {
3017               ses1->state = SNAT_SESSION_UDP_ACTIVE;
3018               old_port1 = udp1->src_port;
3019               udp1->src_port = new_port1;
3020               udp1->checksum = 0;
3021             }
3022
3023           switch(ses1->state)
3024             {
3025             case SNAT_SESSION_UDP_ACTIVE:
3026                 ses1->expire = now + sm->udp_timeout;
3027                 break;
3028             case SNAT_SESSION_TCP_SYN_SENT:
3029             case SNAT_SESSION_TCP_FIN_WAIT:
3030             case SNAT_SESSION_TCP_CLOSE_WAIT:
3031             case SNAT_SESSION_TCP_LAST_ACK:
3032                 ses1->expire = now + sm->tcp_transitory_timeout;
3033                 break;
3034             case SNAT_SESSION_TCP_ESTABLISHED:
3035                 ses1->expire = now + sm->tcp_established_timeout;
3036                 break;
3037             }
3038
3039         trace1:
3040           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3041                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3042             {
3043               snat_in2out_trace_t *t =
3044                  vlib_add_trace (vm, node, b1, sizeof (*t));
3045               t->is_slow_path = 0;
3046               t->sw_if_index = sw_if_index1;
3047               t->next_index = next1;
3048               t->session_index = ~0;
3049               if (ses1)
3050                 t->session_index = ses1 - dm1->sessions;
3051             }
3052
3053           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
3054
3055           /* verify speculative enqueues, maybe switch current next frame */
3056           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3057                                            to_next, n_left_to_next,
3058                                            bi0, bi1, next0, next1);
3059          }
3060
3061       while (n_left_from > 0 && n_left_to_next > 0)
3062         {
3063           u32 bi0;
3064           vlib_buffer_t * b0;
3065           u32 next0;
3066           u32 sw_if_index0;
3067           ip4_header_t * ip0;
3068           ip_csum_t sum0;
3069           ip4_address_t new_addr0, old_addr0;
3070           u16 old_port0, new_port0, lo_port0, i0;
3071           udp_header_t * udp0;
3072           tcp_header_t * tcp0;
3073           u32 proto0;
3074           snat_det_out_key_t key0;
3075           snat_det_map_t * dm0;
3076           snat_det_session_t * ses0 = 0;
3077           u32 rx_fib_index0;
3078           icmp46_header_t * icmp0;
3079
3080           /* speculatively enqueue b0 to the current next frame */
3081           bi0 = from[0];
3082           to_next[0] = bi0;
3083           from += 1;
3084           to_next += 1;
3085           n_left_from -= 1;
3086           n_left_to_next -= 1;
3087
3088           b0 = vlib_get_buffer (vm, bi0);
3089           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3090
3091           ip0 = vlib_buffer_get_current (b0);
3092           udp0 = ip4_next_header (ip0);
3093           tcp0 = (tcp_header_t *) udp0;
3094
3095           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3096
3097           if (PREDICT_FALSE(ip0->ttl == 1))
3098             {
3099               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3100               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3101                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3102                                            0);
3103               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3104               goto trace00;
3105             }
3106
3107           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3108
3109           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3110             {
3111               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3112               icmp0 = (icmp46_header_t *) udp0;
3113
3114               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3115                                   rx_fib_index0, node, next0, thread_index,
3116                                   &ses0, &dm0);
3117               goto trace00;
3118             }
3119
3120           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
3121           if (PREDICT_FALSE(!dm0))
3122             {
3123               clib_warning("no match for internal host %U",
3124                            format_ip4_address, &ip0->src_address);
3125               next0 = SNAT_IN2OUT_NEXT_DROP;
3126               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3127               goto trace00;
3128             }
3129
3130           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
3131
3132           key0.ext_host_addr = ip0->dst_address;
3133           key0.ext_host_port = tcp0->dst;
3134
3135           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
3136           if (PREDICT_FALSE(!ses0))
3137             {
3138               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3139                 {
3140                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
3141                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3142
3143                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3144                     continue;
3145
3146                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3147                   break;
3148                 }
3149               if (PREDICT_FALSE(!ses0))
3150                 {
3151                   /* too many sessions for user, send ICMP error packet */
3152
3153                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3154                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3155                                                ICMP4_destination_unreachable_destination_unreachable_host,
3156                                                0);
3157                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3158                   goto trace00;
3159                 }
3160             }
3161
3162           new_port0 = ses0->out.out_port;
3163
3164           old_addr0.as_u32 = ip0->src_address.as_u32;
3165           ip0->src_address.as_u32 = new_addr0.as_u32;
3166           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3167
3168           sum0 = ip0->checksum;
3169           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3170                                  ip4_header_t,
3171                                  src_address /* changed member */);
3172           ip0->checksum = ip_csum_fold (sum0);
3173
3174           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3175             {
3176               if (tcp0->flags & TCP_FLAG_SYN)
3177                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3178               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3179                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3180               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3181                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3182               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3183                 snat_det_ses_close(dm0, ses0);
3184               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3185                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3186               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3187                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3188
3189               old_port0 = tcp0->src;
3190               tcp0->src = new_port0;
3191
3192               sum0 = tcp0->checksum;
3193               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3194                                      ip4_header_t,
3195                                      dst_address /* changed member */);
3196               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3197                                      ip4_header_t /* cheat */,
3198                                      length /* changed member */);
3199               tcp0->checksum = ip_csum_fold(sum0);
3200             }
3201           else
3202             {
3203               ses0->state = SNAT_SESSION_UDP_ACTIVE;
3204               old_port0 = udp0->src_port;
3205               udp0->src_port = new_port0;
3206               udp0->checksum = 0;
3207             }
3208
3209           switch(ses0->state)
3210             {
3211             case SNAT_SESSION_UDP_ACTIVE:
3212                 ses0->expire = now + sm->udp_timeout;
3213                 break;
3214             case SNAT_SESSION_TCP_SYN_SENT:
3215             case SNAT_SESSION_TCP_FIN_WAIT:
3216             case SNAT_SESSION_TCP_CLOSE_WAIT:
3217             case SNAT_SESSION_TCP_LAST_ACK:
3218                 ses0->expire = now + sm->tcp_transitory_timeout;
3219                 break;
3220             case SNAT_SESSION_TCP_ESTABLISHED:
3221                 ses0->expire = now + sm->tcp_established_timeout;
3222                 break;
3223             }
3224
3225         trace00:
3226           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3227                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3228             {
3229               snat_in2out_trace_t *t =
3230                  vlib_add_trace (vm, node, b0, sizeof (*t));
3231               t->is_slow_path = 0;
3232               t->sw_if_index = sw_if_index0;
3233               t->next_index = next0;
3234               t->session_index = ~0;
3235               if (ses0)
3236                 t->session_index = ses0 - dm0->sessions;
3237             }
3238
3239           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3240
3241           /* verify speculative enqueue, maybe switch current next frame */
3242           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3243                                            to_next, n_left_to_next,
3244                                            bi0, next0);
3245         }
3246
3247       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3248     }
3249
3250   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
3251                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3252                                pkts_processed);
3253   return frame->n_vectors;
3254 }
3255
3256 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
3257   .function = snat_det_in2out_node_fn,
3258   .name = "nat44-det-in2out",
3259   .vector_size = sizeof (u32),
3260   .format_trace = format_snat_in2out_trace,
3261   .type = VLIB_NODE_TYPE_INTERNAL,
3262
3263   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3264   .error_strings = snat_in2out_error_strings,
3265
3266   .runtime_data_bytes = sizeof (snat_runtime_t),
3267
3268   .n_next_nodes = 3,
3269
3270   /* edit / add dispositions here */
3271   .next_nodes = {
3272     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3273     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3274     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3275   },
3276 };
3277
3278 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
3279
3280 /**
3281  * Get address and port values to be used for ICMP packet translation
3282  * and create session if needed
3283  *
3284  * @param[in,out] sm             NAT main
3285  * @param[in,out] node           NAT node runtime
3286  * @param[in] thread_index       thread index
3287  * @param[in,out] b0             buffer containing packet to be translated
3288  * @param[out] p_proto           protocol used for matching
3289  * @param[out] p_value           address and port after NAT translation
3290  * @param[out] p_dont_translate  if packet should not be translated
3291  * @param d                      optional parameter
3292  * @param e                      optional parameter
3293  */
3294 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
3295                           u32 thread_index, vlib_buffer_t *b0,
3296                           ip4_header_t *ip0, u8 *p_proto,
3297                           snat_session_key_t *p_value,
3298                           u8 *p_dont_translate, void *d, void *e)
3299 {
3300   icmp46_header_t *icmp0;
3301   u32 sw_if_index0;
3302   u32 rx_fib_index0;
3303   u8 protocol;
3304   snat_det_out_key_t key0;
3305   u8 dont_translate = 0;
3306   u32 next0 = ~0;
3307   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3308   ip4_header_t *inner_ip0;
3309   void *l4_header = 0;
3310   icmp46_header_t *inner_icmp0;
3311   snat_det_map_t * dm0 = 0;
3312   ip4_address_t new_addr0;
3313   u16 lo_port0, i0;
3314   snat_det_session_t * ses0 = 0;
3315   ip4_address_t in_addr;
3316   u16 in_port;
3317
3318   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3319   echo0 = (icmp_echo_header_t *)(icmp0+1);
3320   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3321   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
3322
3323   if (!icmp_is_error_message (icmp0))
3324     {
3325       protocol = SNAT_PROTOCOL_ICMP;
3326       in_addr = ip0->src_address;
3327       in_port = echo0->identifier;
3328     }
3329   else
3330     {
3331       inner_ip0 = (ip4_header_t *)(echo0+1);
3332       l4_header = ip4_next_header (inner_ip0);
3333       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3334       in_addr = inner_ip0->dst_address;
3335       switch (protocol)
3336         {
3337         case SNAT_PROTOCOL_ICMP:
3338           inner_icmp0 = (icmp46_header_t*)l4_header;
3339           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3340           in_port = inner_echo0->identifier;
3341           break;
3342         case SNAT_PROTOCOL_UDP:
3343         case SNAT_PROTOCOL_TCP:
3344           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3345           break;
3346         default:
3347           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
3348           next0 = SNAT_IN2OUT_NEXT_DROP;
3349           goto out;
3350         }
3351     }
3352
3353   dm0 = snat_det_map_by_user(sm, &in_addr);
3354   if (PREDICT_FALSE(!dm0))
3355     {
3356       clib_warning("no match for internal host %U",
3357                    format_ip4_address, &in_addr);
3358       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3359           IP_PROTOCOL_ICMP, rx_fib_index0)))
3360         {
3361           dont_translate = 1;
3362           goto out;
3363         }
3364       next0 = SNAT_IN2OUT_NEXT_DROP;
3365       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3366       goto out;
3367     }
3368
3369   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
3370
3371   key0.ext_host_addr = ip0->dst_address;
3372   key0.ext_host_port = 0;
3373
3374   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3375   if (PREDICT_FALSE(!ses0))
3376     {
3377       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3378           IP_PROTOCOL_ICMP, rx_fib_index0)))
3379         {
3380           dont_translate = 1;
3381           goto out;
3382         }
3383       if (icmp0->type != ICMP4_echo_request)
3384         {
3385           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3386           next0 = SNAT_IN2OUT_NEXT_DROP;
3387           goto out;
3388         }
3389       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3390         {
3391           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3392             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3393
3394           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3395             continue;
3396
3397           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3398           break;
3399         }
3400       if (PREDICT_FALSE(!ses0))
3401         {
3402           next0 = SNAT_IN2OUT_NEXT_DROP;
3403           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3404           goto out;
3405         }
3406     }
3407
3408   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3409                     !icmp_is_error_message (icmp0)))
3410     {
3411       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3412       next0 = SNAT_IN2OUT_NEXT_DROP;
3413       goto out;
3414     }
3415
3416   u32 now = (u32) vlib_time_now (sm->vlib_main);
3417
3418   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3419   ses0->expire = now + sm->icmp_timeout;
3420
3421 out:
3422   *p_proto = protocol;
3423   if (ses0)
3424     {
3425       p_value->addr = new_addr0;
3426       p_value->fib_index = sm->outside_fib_index;
3427       p_value->port = ses0->out.out_port;
3428     }
3429   *p_dont_translate = dont_translate;
3430   if (d)
3431     *(snat_det_session_t**)d = ses0;
3432   if (e)
3433     *(snat_det_map_t**)e = dm0;
3434   return next0;
3435 }
3436
3437 /**********************/
3438 /*** worker handoff ***/
3439 /**********************/
3440 static inline uword
3441 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3442                                       vlib_node_runtime_t * node,
3443                                       vlib_frame_t * frame,
3444                                       u8 is_output)
3445 {
3446   snat_main_t *sm = &snat_main;
3447   vlib_thread_main_t *tm = vlib_get_thread_main ();
3448   u32 n_left_from, *from, *to_next = 0;
3449   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3450   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3451     = 0;
3452   vlib_frame_queue_elt_t *hf = 0;
3453   vlib_frame_t *f = 0;
3454   int i;
3455   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3456   u32 next_worker_index = 0;
3457   u32 current_worker_index = ~0;
3458   u32 thread_index = vlib_get_thread_index ();
3459   u32 fq_index;
3460   u32 to_node_index;
3461
3462   ASSERT (vec_len (sm->workers));
3463
3464   if (is_output)
3465     {
3466       fq_index = sm->fq_in2out_output_index;
3467       to_node_index = sm->in2out_output_node_index;
3468     }
3469   else
3470     {
3471       fq_index = sm->fq_in2out_index;
3472       to_node_index = sm->in2out_node_index;
3473     }
3474
3475   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3476     {
3477       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3478
3479       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3480                                sm->first_worker_index + sm->num_workers - 1,
3481                                (vlib_frame_queue_t *) (~0));
3482     }
3483
3484   from = vlib_frame_vector_args (frame);
3485   n_left_from = frame->n_vectors;
3486
3487   while (n_left_from > 0)
3488     {
3489       u32 bi0;
3490       vlib_buffer_t *b0;
3491       u32 sw_if_index0;
3492       u32 rx_fib_index0;
3493       ip4_header_t * ip0;
3494       u8 do_handoff;
3495
3496       bi0 = from[0];
3497       from += 1;
3498       n_left_from -= 1;
3499
3500       b0 = vlib_get_buffer (vm, bi0);
3501
3502       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3503       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3504
3505       ip0 = vlib_buffer_get_current (b0);
3506
3507       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3508
3509       if (PREDICT_FALSE (next_worker_index != thread_index))
3510         {
3511           do_handoff = 1;
3512
3513           if (next_worker_index != current_worker_index)
3514             {
3515               if (hf)
3516                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3517
3518               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3519                                                       next_worker_index,
3520                                                       handoff_queue_elt_by_worker_index);
3521
3522               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3523               to_next_worker = &hf->buffer_index[hf->n_vectors];
3524               current_worker_index = next_worker_index;
3525             }
3526
3527           /* enqueue to correct worker thread */
3528           to_next_worker[0] = bi0;
3529           to_next_worker++;
3530           n_left_to_next_worker--;
3531
3532           if (n_left_to_next_worker == 0)
3533             {
3534               hf->n_vectors = VLIB_FRAME_SIZE;
3535               vlib_put_frame_queue_elt (hf);
3536               current_worker_index = ~0;
3537               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3538               hf = 0;
3539             }
3540         }
3541       else
3542         {
3543           do_handoff = 0;
3544           /* if this is 1st frame */
3545           if (!f)
3546             {
3547               f = vlib_get_frame_to_node (vm, to_node_index);
3548               to_next = vlib_frame_vector_args (f);
3549             }
3550
3551           to_next[0] = bi0;
3552           to_next += 1;
3553           f->n_vectors++;
3554         }
3555
3556       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3557                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3558         {
3559           snat_in2out_worker_handoff_trace_t *t =
3560             vlib_add_trace (vm, node, b0, sizeof (*t));
3561           t->next_worker_index = next_worker_index;
3562           t->do_handoff = do_handoff;
3563         }
3564     }
3565
3566   if (f)
3567     vlib_put_frame_to_node (vm, to_node_index, f);
3568
3569   if (hf)
3570     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3571
3572   /* Ship frames to the worker nodes */
3573   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3574     {
3575       if (handoff_queue_elt_by_worker_index[i])
3576         {
3577           hf = handoff_queue_elt_by_worker_index[i];
3578           /*
3579            * It works better to let the handoff node
3580            * rate-adapt, always ship the handoff queue element.
3581            */
3582           if (1 || hf->n_vectors == hf->last_n_vectors)
3583             {
3584               vlib_put_frame_queue_elt (hf);
3585               handoff_queue_elt_by_worker_index[i] = 0;
3586             }
3587           else
3588             hf->last_n_vectors = hf->n_vectors;
3589         }
3590       congested_handoff_queue_by_worker_index[i] =
3591         (vlib_frame_queue_t *) (~0);
3592     }
3593   hf = 0;
3594   current_worker_index = ~0;
3595   return frame->n_vectors;
3596 }
3597
3598 static uword
3599 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3600                                vlib_node_runtime_t * node,
3601                                vlib_frame_t * frame)
3602 {
3603   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3604 }
3605
3606 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3607   .function = snat_in2out_worker_handoff_fn,
3608   .name = "nat44-in2out-worker-handoff",
3609   .vector_size = sizeof (u32),
3610   .format_trace = format_snat_in2out_worker_handoff_trace,
3611   .type = VLIB_NODE_TYPE_INTERNAL,
3612
3613   .n_next_nodes = 1,
3614
3615   .next_nodes = {
3616     [0] = "error-drop",
3617   },
3618 };
3619
3620 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3621                               snat_in2out_worker_handoff_fn);
3622
3623 static uword
3624 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3625                                       vlib_node_runtime_t * node,
3626                                       vlib_frame_t * frame)
3627 {
3628   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3629 }
3630
3631 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3632   .function = snat_in2out_output_worker_handoff_fn,
3633   .name = "nat44-in2out-output-worker-handoff",
3634   .vector_size = sizeof (u32),
3635   .format_trace = format_snat_in2out_worker_handoff_trace,
3636   .type = VLIB_NODE_TYPE_INTERNAL,
3637
3638   .n_next_nodes = 1,
3639
3640   .next_nodes = {
3641     [0] = "error-drop",
3642   },
3643 };
3644
3645 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3646                               snat_in2out_output_worker_handoff_fn);
3647
3648 static_always_inline int
3649 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3650 {
3651   snat_address_t * ap;
3652   clib_bihash_kv_8_8_t kv, value;
3653   snat_session_key_t m_key;
3654
3655   vec_foreach (ap, sm->addresses)
3656     {
3657       if (ap->addr.as_u32 == dst_addr->as_u32)
3658         return 1;
3659     }
3660
3661   m_key.addr.as_u32 = dst_addr->as_u32;
3662   m_key.fib_index = sm->outside_fib_index;
3663   m_key.port = 0;
3664   m_key.protocol = 0;
3665   kv.key = m_key.as_u64;
3666   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3667     return 1;
3668
3669   return 0;
3670 }
3671
3672 static uword
3673 snat_hairpin_dst_fn (vlib_main_t * vm,
3674                      vlib_node_runtime_t * node,
3675                      vlib_frame_t * frame)
3676 {
3677   u32 n_left_from, * from, * to_next;
3678   snat_in2out_next_t next_index;
3679   u32 pkts_processed = 0;
3680   snat_main_t * sm = &snat_main;
3681
3682   from = vlib_frame_vector_args (frame);
3683   n_left_from = frame->n_vectors;
3684   next_index = node->cached_next_index;
3685
3686   while (n_left_from > 0)
3687     {
3688       u32 n_left_to_next;
3689
3690       vlib_get_next_frame (vm, node, next_index,
3691                            to_next, n_left_to_next);
3692
3693       while (n_left_from > 0 && n_left_to_next > 0)
3694         {
3695           u32 bi0;
3696           vlib_buffer_t * b0;
3697           u32 next0;
3698           ip4_header_t * ip0;
3699           u32 proto0;
3700
3701           /* speculatively enqueue b0 to the current next frame */
3702           bi0 = from[0];
3703           to_next[0] = bi0;
3704           from += 1;
3705           to_next += 1;
3706           n_left_from -= 1;
3707           n_left_to_next -= 1;
3708
3709           b0 = vlib_get_buffer (vm, bi0);
3710           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3711           ip0 = vlib_buffer_get_current (b0);
3712
3713           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3714
3715           vnet_buffer (b0)->snat.flags = 0;
3716           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3717             {
3718               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3719                 {
3720                   udp_header_t * udp0 = ip4_next_header (ip0);
3721                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3722
3723                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3724                 }
3725               else if (proto0 == SNAT_PROTOCOL_ICMP)
3726                 {
3727                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3728
3729                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3730                 }
3731               else
3732                 {
3733                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3734                 }
3735
3736               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3737             }
3738
3739           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3740
3741           /* verify speculative enqueue, maybe switch current next frame */
3742           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3743                                            to_next, n_left_to_next,
3744                                            bi0, next0);
3745          }
3746
3747       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3748     }
3749
3750   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3751                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3752                                pkts_processed);
3753   return frame->n_vectors;
3754 }
3755
3756 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3757   .function = snat_hairpin_dst_fn,
3758   .name = "nat44-hairpin-dst",
3759   .vector_size = sizeof (u32),
3760   .type = VLIB_NODE_TYPE_INTERNAL,
3761   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3762   .error_strings = snat_in2out_error_strings,
3763   .n_next_nodes = 2,
3764   .next_nodes = {
3765     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3766     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3767   },
3768 };
3769
3770 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3771                               snat_hairpin_dst_fn);
3772
3773 static uword
3774 snat_hairpin_src_fn (vlib_main_t * vm,
3775                      vlib_node_runtime_t * node,
3776                      vlib_frame_t * frame)
3777 {
3778   u32 n_left_from, * from, * to_next;
3779   snat_in2out_next_t next_index;
3780   u32 pkts_processed = 0;
3781   snat_main_t *sm = &snat_main;
3782
3783   from = vlib_frame_vector_args (frame);
3784   n_left_from = frame->n_vectors;
3785   next_index = node->cached_next_index;
3786
3787   while (n_left_from > 0)
3788     {
3789       u32 n_left_to_next;
3790
3791       vlib_get_next_frame (vm, node, next_index,
3792                            to_next, n_left_to_next);
3793
3794       while (n_left_from > 0 && n_left_to_next > 0)
3795         {
3796           u32 bi0;
3797           vlib_buffer_t * b0;
3798           u32 next0;
3799           snat_interface_t *i;
3800           u32 sw_if_index0;
3801
3802           /* speculatively enqueue b0 to the current next frame */
3803           bi0 = from[0];
3804           to_next[0] = bi0;
3805           from += 1;
3806           to_next += 1;
3807           n_left_from -= 1;
3808           n_left_to_next -= 1;
3809
3810           b0 = vlib_get_buffer (vm, bi0);
3811           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3812           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3813
3814           pool_foreach (i, sm->output_feature_interfaces,
3815           ({
3816             /* Only packets from NAT inside interface */
3817             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
3818               {
3819                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3820                                     SNAT_FLAG_HAIRPINNING))
3821                   {
3822                     if (PREDICT_TRUE (sm->num_workers > 1))
3823                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3824                     else
3825                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3826                   }
3827                 break;
3828               }
3829           }));
3830
3831           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3832
3833           /* verify speculative enqueue, maybe switch current next frame */
3834           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3835                                            to_next, n_left_to_next,
3836                                            bi0, next0);
3837          }
3838
3839       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3840     }
3841
3842   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3843                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3844                                pkts_processed);
3845   return frame->n_vectors;
3846 }
3847
3848 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3849   .function = snat_hairpin_src_fn,
3850   .name = "nat44-hairpin-src",
3851   .vector_size = sizeof (u32),
3852   .type = VLIB_NODE_TYPE_INTERNAL,
3853   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3854   .error_strings = snat_in2out_error_strings,
3855   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3856   .next_nodes = {
3857      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3858      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3859      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3860      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3861   },
3862 };
3863
3864 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3865                               snat_hairpin_src_fn);
3866
3867 static uword
3868 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3869                                 vlib_node_runtime_t * node,
3870                                 vlib_frame_t * frame)
3871 {
3872   u32 n_left_from, * from, * to_next;
3873   snat_in2out_next_t next_index;
3874   u32 pkts_processed = 0;
3875   snat_main_t * sm = &snat_main;
3876   u32 stats_node_index;
3877
3878   stats_node_index = snat_in2out_fast_node.index;
3879
3880   from = vlib_frame_vector_args (frame);
3881   n_left_from = frame->n_vectors;
3882   next_index = node->cached_next_index;
3883
3884   while (n_left_from > 0)
3885     {
3886       u32 n_left_to_next;
3887
3888       vlib_get_next_frame (vm, node, next_index,
3889                            to_next, n_left_to_next);
3890
3891       while (n_left_from > 0 && n_left_to_next > 0)
3892         {
3893           u32 bi0;
3894           vlib_buffer_t * b0;
3895           u32 next0;
3896           u32 sw_if_index0;
3897           ip4_header_t * ip0;
3898           ip_csum_t sum0;
3899           u32 new_addr0, old_addr0;
3900           u16 old_port0, new_port0;
3901           udp_header_t * udp0;
3902           tcp_header_t * tcp0;
3903           icmp46_header_t * icmp0;
3904           snat_session_key_t key0, sm0;
3905           u32 proto0;
3906           u32 rx_fib_index0;
3907
3908           /* speculatively enqueue b0 to the current next frame */
3909           bi0 = from[0];
3910           to_next[0] = bi0;
3911           from += 1;
3912           to_next += 1;
3913           n_left_from -= 1;
3914           n_left_to_next -= 1;
3915
3916           b0 = vlib_get_buffer (vm, bi0);
3917           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3918
3919           ip0 = vlib_buffer_get_current (b0);
3920           udp0 = ip4_next_header (ip0);
3921           tcp0 = (tcp_header_t *) udp0;
3922           icmp0 = (icmp46_header_t *) udp0;
3923
3924           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3925           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3926
3927           if (PREDICT_FALSE(ip0->ttl == 1))
3928             {
3929               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3930               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3931                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3932                                            0);
3933               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3934               goto trace0;
3935             }
3936
3937           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3938
3939           if (PREDICT_FALSE (proto0 == ~0))
3940               goto trace0;
3941
3942           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3943             {
3944               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3945                                   rx_fib_index0, node, next0, ~0, 0, 0);
3946               goto trace0;
3947             }
3948
3949           key0.addr = ip0->src_address;
3950           key0.protocol = proto0;
3951           key0.port = udp0->src_port;
3952           key0.fib_index = rx_fib_index0;
3953
3954           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0))
3955             {
3956               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3957               next0= SNAT_IN2OUT_NEXT_DROP;
3958               goto trace0;
3959             }
3960
3961           new_addr0 = sm0.addr.as_u32;
3962           new_port0 = sm0.port;
3963           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3964           old_addr0 = ip0->src_address.as_u32;
3965           ip0->src_address.as_u32 = new_addr0;
3966
3967           sum0 = ip0->checksum;
3968           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3969                                  ip4_header_t,
3970                                  src_address /* changed member */);
3971           ip0->checksum = ip_csum_fold (sum0);
3972
3973           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3974             {
3975               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3976                 {
3977                   old_port0 = tcp0->src_port;
3978                   tcp0->src_port = new_port0;
3979
3980                   sum0 = tcp0->checksum;
3981                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3982                                          ip4_header_t,
3983                                          dst_address /* changed member */);
3984                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3985                                          ip4_header_t /* cheat */,
3986                                          length /* changed member */);
3987                   tcp0->checksum = ip_csum_fold(sum0);
3988                 }
3989               else
3990                 {
3991                   old_port0 = udp0->src_port;
3992                   udp0->src_port = new_port0;
3993                   udp0->checksum = 0;
3994                 }
3995             }
3996           else
3997             {
3998               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3999                 {
4000                   sum0 = tcp0->checksum;
4001                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4002                                          ip4_header_t,
4003                                          dst_address /* changed member */);
4004                   tcp0->checksum = ip_csum_fold(sum0);
4005                 }
4006             }
4007
4008           /* Hairpinning */
4009           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
4010
4011         trace0:
4012           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4013                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4014             {
4015               snat_in2out_trace_t *t =
4016                  vlib_add_trace (vm, node, b0, sizeof (*t));
4017               t->sw_if_index = sw_if_index0;
4018               t->next_index = next0;
4019             }
4020
4021           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4022
4023           /* verify speculative enqueue, maybe switch current next frame */
4024           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4025                                            to_next, n_left_to_next,
4026                                            bi0, next0);
4027         }
4028
4029       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4030     }
4031
4032   vlib_node_increment_counter (vm, stats_node_index,
4033                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4034                                pkts_processed);
4035   return frame->n_vectors;
4036 }
4037
4038
4039 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
4040   .function = snat_in2out_fast_static_map_fn,
4041   .name = "nat44-in2out-fast",
4042   .vector_size = sizeof (u32),
4043   .format_trace = format_snat_in2out_fast_trace,
4044   .type = VLIB_NODE_TYPE_INTERNAL,
4045
4046   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4047   .error_strings = snat_in2out_error_strings,
4048
4049   .runtime_data_bytes = sizeof (snat_runtime_t),
4050
4051   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
4052
4053   /* edit / add dispositions here */
4054   .next_nodes = {
4055     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4056     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4057     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
4058     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4059     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
4060   },
4061 };
4062
4063 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);