NAT: Twice NAT44 (VPP-969)
[vpp.git] / src / plugins / nat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
28
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
32
33 typedef struct {
34   u32 sw_if_index;
35   u32 next_index;
36   u32 session_index;
37   u32 is_slow_path;
38 } snat_in2out_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_in2out_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
51   char * tag;
52
53   tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
54
55   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
56               t->sw_if_index, t->next_index, t->session_index);
57
58   return s;
59 }
60
61 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
66
67   s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
68               t->sw_if_index, t->next_index);
69
70   return s;
71 }
72
73 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
74 {
75   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
76   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
77   snat_in2out_worker_handoff_trace_t * t =
78     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
79   char * m;
80
81   m = t->do_handoff ? "next worker" : "same worker";
82   s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
83
84   return s;
85 }
86
87 typedef struct {
88   u32 sw_if_index;
89   u32 next_index;
90   u8 cached;
91 } nat44_in2out_reass_trace_t;
92
93 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
94 {
95   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
96   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
97   nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
98
99   s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
100               t->sw_if_index, t->next_index,
101               t->cached ? "cached" : "translated");
102
103   return s;
104 }
105
106 vlib_node_registration_t snat_in2out_node;
107 vlib_node_registration_t snat_in2out_slowpath_node;
108 vlib_node_registration_t snat_in2out_fast_node;
109 vlib_node_registration_t snat_in2out_worker_handoff_node;
110 vlib_node_registration_t snat_det_in2out_node;
111 vlib_node_registration_t snat_in2out_output_node;
112 vlib_node_registration_t snat_in2out_output_slowpath_node;
113 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
114 vlib_node_registration_t snat_hairpin_dst_node;
115 vlib_node_registration_t snat_hairpin_src_node;
116 vlib_node_registration_t nat44_hairpinning_node;
117 vlib_node_registration_t nat44_in2out_reass_node;
118
119
120 #define foreach_snat_in2out_error                       \
121 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
122 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
123 _(OUT_OF_PORTS, "Out of ports")                         \
124 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
125 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
126 _(NO_TRANSLATION, "No translation")                     \
127 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
128 _(DROP_FRAGMENT, "Drop fragment")                       \
129 _(MAX_REASS, "Maximum reassemblies exceeded")           \
130 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
131
132 typedef enum {
133 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
134   foreach_snat_in2out_error
135 #undef _
136   SNAT_IN2OUT_N_ERROR,
137 } snat_in2out_error_t;
138
139 static char * snat_in2out_error_strings[] = {
140 #define _(sym,string) string,
141   foreach_snat_in2out_error
142 #undef _
143 };
144
145 typedef enum {
146   SNAT_IN2OUT_NEXT_LOOKUP,
147   SNAT_IN2OUT_NEXT_DROP,
148   SNAT_IN2OUT_NEXT_ICMP_ERROR,
149   SNAT_IN2OUT_NEXT_SLOW_PATH,
150   SNAT_IN2OUT_NEXT_REASS,
151   SNAT_IN2OUT_N_NEXT,
152 } snat_in2out_next_t;
153
154 typedef enum {
155   SNAT_HAIRPIN_SRC_NEXT_DROP,
156   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
157   SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
158   SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
159   SNAT_HAIRPIN_SRC_N_NEXT,
160 } snat_hairpin_next_t;
161
162 /**
163  * @brief Check if packet should be translated
164  *
165  * Packets aimed at outside interface and external addresss with active session
166  * should be translated.
167  *
168  * @param sm            NAT main
169  * @param rt            NAT runtime data
170  * @param sw_if_index0  index of the inside interface
171  * @param ip0           IPv4 header
172  * @param proto0        NAT protocol
173  * @param rx_fib_index0 RX FIB index
174  *
175  * @returns 0 if packet should be translated otherwise 1
176  */
177 static inline int
178 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
179                          u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
180                          u32 rx_fib_index0)
181 {
182   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
183   fib_prefix_t pfx = {
184     .fp_proto = FIB_PROTOCOL_IP4,
185     .fp_len = 32,
186     .fp_addr = {
187         .ip4.as_u32 = ip0->dst_address.as_u32,
188     },
189   };
190
191   /* Don't NAT packet aimed at the intfc address */
192   if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
193                                       ip0->dst_address.as_u32)))
194     return 1;
195
196   fei = fib_table_lookup (rx_fib_index0, &pfx);
197   if (FIB_NODE_INDEX_INVALID != fei)
198     {
199       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
200       if (sw_if_index == ~0)
201         {
202           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
203           if (FIB_NODE_INDEX_INVALID != fei)
204             sw_if_index = fib_entry_get_resolving_interface (fei);
205         }
206       snat_interface_t *i;
207       pool_foreach (i, sm->interfaces,
208       ({
209         /* NAT packet aimed at outside interface */
210         if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
211           return 0;
212       }));
213     }
214
215   return 1;
216 }
217
218 static inline int
219 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
220                     u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
221                     u32 rx_fib_index0, u32 thread_index)
222 {
223   udp_header_t * udp0 = ip4_next_header (ip0);
224   snat_session_key_t key0, sm0;
225   clib_bihash_kv_8_8_t kv0, value0;
226
227   key0.addr = ip0->dst_address;
228   key0.port = udp0->dst_port;
229   key0.protocol = proto0;
230   key0.fib_index = sm->outside_fib_index;
231   kv0.key = key0.as_u64;
232
233   /* NAT packet aimed at external address if */
234   /* has active sessions */
235   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
236                               &value0))
237     {
238       /* or is static mappings */
239       if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
240         return 0;
241     }
242   else
243     return 0;
244
245   return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
246                                  rx_fib_index0);
247 }
248
249 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
250                       ip4_header_t * ip0,
251                       u32 rx_fib_index0,
252                       snat_session_key_t * key0,
253                       snat_session_t ** sessionp,
254                       vlib_node_runtime_t * node,
255                       u32 next0,
256                       u32 thread_index)
257 {
258   snat_user_t *u;
259   snat_session_t *s;
260   clib_bihash_kv_8_8_t kv0;
261   snat_session_key_t key1;
262   u32 address_index = ~0;
263   u32 outside_fib_index;
264   uword * p;
265   udp_header_t * udp0 = ip4_next_header (ip0);
266
267   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
268     {
269       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
270       return SNAT_IN2OUT_NEXT_DROP;
271     }
272
273   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
274   if (! p)
275     {
276       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
277       return SNAT_IN2OUT_NEXT_DROP;
278     }
279   outside_fib_index = p[0];
280
281   key1.protocol = key0->protocol;
282
283   u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
284                               thread_index);
285   if (!u)
286     {
287       clib_warning ("create NAT user failed");
288       return SNAT_IN2OUT_NEXT_DROP;
289     }
290
291   s = nat_session_alloc_or_recycle (sm, u, thread_index);
292   if (!s)
293     {
294       clib_warning ("create NAT session failed");
295       return SNAT_IN2OUT_NEXT_DROP;
296     }
297
298   /* First try to match static mapping by local address and port */
299   if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0))
300     {
301       /* Try to create dynamic translation */
302       if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
303                                                thread_index, &key1,
304                                                &address_index,
305                                                sm->port_per_thread,
306                                                sm->per_thread_data[thread_index].snat_thread_index))
307         {
308           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
309           return SNAT_IN2OUT_NEXT_DROP;
310         }
311       u->nsessions++;
312     }
313   else
314     {
315       u->nstaticsessions++;
316       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
317     }
318
319   s->outside_address_index = address_index;
320   s->in2out = *key0;
321   s->out2in = key1;
322   s->out2in.protocol = key0->protocol;
323   s->out2in.fib_index = outside_fib_index;
324   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
325   s->ext_host_port = udp0->dst_port;
326   *sessionp = s;
327
328   /* Add to translation hashes */
329   kv0.key = s->in2out.as_u64;
330   kv0.value = s - sm->per_thread_data[thread_index].sessions;
331   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
332                                1 /* is_add */))
333       clib_warning ("in2out key add failed");
334
335   kv0.key = s->out2in.as_u64;
336   kv0.value = s - sm->per_thread_data[thread_index].sessions;
337
338   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
339                                1 /* is_add */))
340       clib_warning ("out2in key add failed");
341
342   /* log NAT event */
343   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
344                                       s->out2in.addr.as_u32,
345                                       s->in2out.protocol,
346                                       s->in2out.port,
347                                       s->out2in.port,
348                                       s->in2out.fib_index);
349   return next0;
350 }
351
352 static_always_inline
353 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
354                                  snat_session_key_t *p_key0)
355 {
356   icmp46_header_t *icmp0;
357   snat_session_key_t key0;
358   icmp_echo_header_t *echo0, *inner_echo0 = 0;
359   ip4_header_t *inner_ip0 = 0;
360   void *l4_header = 0;
361   icmp46_header_t *inner_icmp0;
362
363   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
364   echo0 = (icmp_echo_header_t *)(icmp0+1);
365
366   if (!icmp_is_error_message (icmp0))
367     {
368       key0.protocol = SNAT_PROTOCOL_ICMP;
369       key0.addr = ip0->src_address;
370       key0.port = echo0->identifier;
371     }
372   else
373     {
374       inner_ip0 = (ip4_header_t *)(echo0+1);
375       l4_header = ip4_next_header (inner_ip0);
376       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
377       key0.addr = inner_ip0->dst_address;
378       switch (key0.protocol)
379         {
380         case SNAT_PROTOCOL_ICMP:
381           inner_icmp0 = (icmp46_header_t*)l4_header;
382           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
383           key0.port = inner_echo0->identifier;
384           break;
385         case SNAT_PROTOCOL_UDP:
386         case SNAT_PROTOCOL_TCP:
387           key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
388           break;
389         default:
390           return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
391         }
392     }
393   *p_key0 = key0;
394   return -1; /* success */
395 }
396
397 /**
398  * Get address and port values to be used for ICMP packet translation
399  * and create session if needed
400  *
401  * @param[in,out] sm             NAT main
402  * @param[in,out] node           NAT node runtime
403  * @param[in] thread_index       thread index
404  * @param[in,out] b0             buffer containing packet to be translated
405  * @param[out] p_proto           protocol used for matching
406  * @param[out] p_value           address and port after NAT translation
407  * @param[out] p_dont_translate  if packet should not be translated
408  * @param d                      optional parameter
409  * @param e                      optional parameter
410  */
411 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
412                            u32 thread_index, vlib_buffer_t *b0,
413                            ip4_header_t *ip0, u8 *p_proto,
414                            snat_session_key_t *p_value,
415                            u8 *p_dont_translate, void *d, void *e)
416 {
417   icmp46_header_t *icmp0;
418   u32 sw_if_index0;
419   u32 rx_fib_index0;
420   snat_session_key_t key0;
421   snat_session_t *s0 = 0;
422   u8 dont_translate = 0;
423   clib_bihash_kv_8_8_t kv0, value0;
424   u32 next0 = ~0;
425   int err;
426
427   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
428   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
429   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
430
431   err = icmp_get_key (ip0, &key0);
432   if (err != -1)
433     {
434       b0->error = node->errors[err];
435       next0 = SNAT_IN2OUT_NEXT_DROP;
436       goto out;
437     }
438   key0.fib_index = rx_fib_index0;
439
440   kv0.key = key0.as_u64;
441
442   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
443                               &value0))
444     {
445       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
446           IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
447           vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
448         {
449           dont_translate = 1;
450           goto out;
451         }
452
453       if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
454         {
455           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
456           next0 = SNAT_IN2OUT_NEXT_DROP;
457           goto out;
458         }
459
460       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
461                          &s0, node, next0, thread_index);
462
463       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
464         goto out;
465     }
466   else
467     {
468       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
469                         icmp0->type != ICMP4_echo_reply &&
470                         !icmp_is_error_message (icmp0)))
471         {
472           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
473           next0 = SNAT_IN2OUT_NEXT_DROP;
474           goto out;
475         }
476
477       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
478                               value0.value);
479     }
480
481 out:
482   *p_proto = key0.protocol;
483   if (s0)
484     *p_value = s0->out2in;
485   *p_dont_translate = dont_translate;
486   if (d)
487     *(snat_session_t**)d = s0;
488   return next0;
489 }
490
491 /**
492  * Get address and port values to be used for ICMP packet translation
493  *
494  * @param[in] sm                 NAT main
495  * @param[in,out] node           NAT node runtime
496  * @param[in] thread_index       thread index
497  * @param[in,out] b0             buffer containing packet to be translated
498  * @param[out] p_proto           protocol used for matching
499  * @param[out] p_value           address and port after NAT translation
500  * @param[out] p_dont_translate  if packet should not be translated
501  * @param d                      optional parameter
502  * @param e                      optional parameter
503  */
504 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
505                            u32 thread_index, vlib_buffer_t *b0,
506                            ip4_header_t *ip0, u8 *p_proto,
507                            snat_session_key_t *p_value,
508                            u8 *p_dont_translate, void *d, void *e)
509 {
510   icmp46_header_t *icmp0;
511   u32 sw_if_index0;
512   u32 rx_fib_index0;
513   snat_session_key_t key0;
514   snat_session_key_t sm0;
515   u8 dont_translate = 0;
516   u8 is_addr_only;
517   u32 next0 = ~0;
518   int err;
519
520   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
521   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
522   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
523
524   err = icmp_get_key (ip0, &key0);
525   if (err != -1)
526     {
527       b0->error = node->errors[err];
528       next0 = SNAT_IN2OUT_NEXT_DROP;
529       goto out2;
530     }
531   key0.fib_index = rx_fib_index0;
532
533   if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0))
534     {
535       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
536           IP_PROTOCOL_ICMP, rx_fib_index0)))
537         {
538           dont_translate = 1;
539           goto out;
540         }
541
542       if (icmp_is_error_message (icmp0))
543         {
544           next0 = SNAT_IN2OUT_NEXT_DROP;
545           goto out;
546         }
547
548       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
549       next0 = SNAT_IN2OUT_NEXT_DROP;
550       goto out;
551     }
552
553   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
554                     (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
555                     !icmp_is_error_message (icmp0)))
556     {
557       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
558       next0 = SNAT_IN2OUT_NEXT_DROP;
559       goto out;
560     }
561
562 out:
563   *p_value = sm0;
564 out2:
565   *p_proto = key0.protocol;
566   *p_dont_translate = dont_translate;
567   return next0;
568 }
569
570 static inline u32 icmp_in2out (snat_main_t *sm,
571                                vlib_buffer_t * b0,
572                                ip4_header_t * ip0,
573                                icmp46_header_t * icmp0,
574                                u32 sw_if_index0,
575                                u32 rx_fib_index0,
576                                vlib_node_runtime_t * node,
577                                u32 next0,
578                                u32 thread_index,
579                                void *d,
580                                void *e)
581 {
582   snat_session_key_t sm0;
583   u8 protocol;
584   icmp_echo_header_t *echo0, *inner_echo0 = 0;
585   ip4_header_t *inner_ip0;
586   void *l4_header = 0;
587   icmp46_header_t *inner_icmp0;
588   u8 dont_translate;
589   u32 new_addr0, old_addr0;
590   u16 old_id0, new_id0;
591   ip_csum_t sum0;
592   u16 checksum0;
593   u32 next0_tmp;
594
595   echo0 = (icmp_echo_header_t *)(icmp0+1);
596
597   next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
598                                        &protocol, &sm0, &dont_translate, d, e);
599   if (next0_tmp != ~0)
600     next0 = next0_tmp;
601   if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
602     goto out;
603
604   sum0 = ip_incremental_checksum (0, icmp0,
605                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
606   checksum0 = ~ip_csum_fold (sum0);
607   if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
608     {
609       next0 = SNAT_IN2OUT_NEXT_DROP;
610       goto out;
611     }
612
613   old_addr0 = ip0->src_address.as_u32;
614   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
615   if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
616     vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
617
618   sum0 = ip0->checksum;
619   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
620                          src_address /* changed member */);
621   ip0->checksum = ip_csum_fold (sum0);
622
623   if (!icmp_is_error_message (icmp0))
624     {
625       new_id0 = sm0.port;
626       if (PREDICT_FALSE(new_id0 != echo0->identifier))
627         {
628           old_id0 = echo0->identifier;
629           new_id0 = sm0.port;
630           echo0->identifier = new_id0;
631
632           sum0 = icmp0->checksum;
633           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
634                                  identifier);
635           icmp0->checksum = ip_csum_fold (sum0);
636         }
637     }
638   else
639     {
640       inner_ip0 = (ip4_header_t *)(echo0+1);
641       l4_header = ip4_next_header (inner_ip0);
642
643       if (!ip4_header_checksum_is_valid (inner_ip0))
644         {
645           next0 = SNAT_IN2OUT_NEXT_DROP;
646           goto out;
647         }
648
649       old_addr0 = inner_ip0->dst_address.as_u32;
650       inner_ip0->dst_address = sm0.addr;
651       new_addr0 = inner_ip0->dst_address.as_u32;
652
653       sum0 = icmp0->checksum;
654       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
655                              dst_address /* changed member */);
656       icmp0->checksum = ip_csum_fold (sum0);
657
658       switch (protocol)
659         {
660           case SNAT_PROTOCOL_ICMP:
661             inner_icmp0 = (icmp46_header_t*)l4_header;
662             inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
663
664             old_id0 = inner_echo0->identifier;
665             new_id0 = sm0.port;
666             inner_echo0->identifier = new_id0;
667
668             sum0 = icmp0->checksum;
669             sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
670                                    identifier);
671             icmp0->checksum = ip_csum_fold (sum0);
672             break;
673           case SNAT_PROTOCOL_UDP:
674           case SNAT_PROTOCOL_TCP:
675             old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
676             new_id0 = sm0.port;
677             ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
678
679             sum0 = icmp0->checksum;
680             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
681                                    dst_port);
682             icmp0->checksum = ip_csum_fold (sum0);
683             break;
684           default:
685             ASSERT(0);
686         }
687     }
688
689 out:
690   return next0;
691 }
692
693 /**
694  * @brief Hairpinning
695  *
696  * Hairpinning allows two endpoints on the internal side of the NAT to
697  * communicate even if they only use each other's external IP addresses
698  * and ports.
699  *
700  * @param sm     NAT main.
701  * @param b0     Vlib buffer.
702  * @param ip0    IP header.
703  * @param udp0   UDP header.
704  * @param tcp0   TCP header.
705  * @param proto0 NAT protocol.
706  */
707 static inline int
708 snat_hairpinning (snat_main_t *sm,
709                   vlib_buffer_t * b0,
710                   ip4_header_t * ip0,
711                   udp_header_t * udp0,
712                   tcp_header_t * tcp0,
713                   u32 proto0)
714 {
715   snat_session_key_t key0, sm0;
716   snat_session_t * s0;
717   clib_bihash_kv_8_8_t kv0, value0;
718   ip_csum_t sum0;
719   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
720   u16 new_dst_port0, old_dst_port0;
721
722   key0.addr = ip0->dst_address;
723   key0.port = udp0->dst_port;
724   key0.protocol = proto0;
725   key0.fib_index = sm->outside_fib_index;
726   kv0.key = key0.as_u64;
727
728   /* Check if destination is static mappings */
729   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
730     {
731       new_dst_addr0 = sm0.addr.as_u32;
732       new_dst_port0 = sm0.port;
733       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
734     }
735   /* or active session */
736   else
737     {
738       if (sm->num_workers > 1)
739         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
740       else
741         ti = sm->num_workers;
742
743       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
744         {
745           si = value0.value;
746
747           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
748           new_dst_addr0 = s0->in2out.addr.as_u32;
749           new_dst_port0 = s0->in2out.port;
750           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
751         }
752     }
753
754   /* Destination is behind the same NAT, use internal address and port */
755   if (new_dst_addr0)
756     {
757       old_dst_addr0 = ip0->dst_address.as_u32;
758       ip0->dst_address.as_u32 = new_dst_addr0;
759       sum0 = ip0->checksum;
760       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
761                              ip4_header_t, dst_address);
762       ip0->checksum = ip_csum_fold (sum0);
763
764       old_dst_port0 = tcp0->dst;
765       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
766         {
767           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
768             {
769               tcp0->dst = new_dst_port0;
770               sum0 = tcp0->checksum;
771               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
772                                      ip4_header_t, dst_address);
773               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
774                                      ip4_header_t /* cheat */, length);
775               tcp0->checksum = ip_csum_fold(sum0);
776             }
777           else
778             {
779               udp0->dst_port = new_dst_port0;
780               udp0->checksum = 0;
781             }
782         }
783       else
784         {
785           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
786             {
787               sum0 = tcp0->checksum;
788               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
789                                      ip4_header_t, dst_address);
790               tcp0->checksum = ip_csum_fold(sum0);
791             }
792         }
793       return 1;
794     }
795   return 0;
796 }
797
798 static inline void
799 snat_icmp_hairpinning (snat_main_t *sm,
800                        vlib_buffer_t * b0,
801                        ip4_header_t * ip0,
802                        icmp46_header_t * icmp0)
803 {
804   snat_session_key_t key0, sm0;
805   clib_bihash_kv_8_8_t kv0, value0;
806   u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
807   ip_csum_t sum0;
808   snat_session_t *s0;
809
810   if (!icmp_is_error_message (icmp0))
811     {
812       icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
813       u16 icmp_id0 = echo0->identifier;
814       key0.addr = ip0->dst_address;
815       key0.port = icmp_id0;
816       key0.protocol = SNAT_PROTOCOL_ICMP;
817       key0.fib_index = sm->outside_fib_index;
818       kv0.key = key0.as_u64;
819
820       if (sm->num_workers > 1)
821         ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
822       else
823         ti = sm->num_workers;
824
825       /* Check if destination is in active sessions */
826       if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
827                                   &value0))
828         {
829           /* or static mappings */
830           if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
831             {
832               new_dst_addr0 = sm0.addr.as_u32;
833               vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
834             }
835         }
836       else
837         {
838           si = value0.value;
839
840           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
841           new_dst_addr0 = s0->in2out.addr.as_u32;
842           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
843           echo0->identifier = s0->in2out.port;
844           sum0 = icmp0->checksum;
845           sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
846                                  icmp_echo_header_t, identifier);
847           icmp0->checksum = ip_csum_fold (sum0);
848         }
849
850       /* Destination is behind the same NAT, use internal address and port */
851       if (new_dst_addr0)
852         {
853           old_dst_addr0 = ip0->dst_address.as_u32;
854           ip0->dst_address.as_u32 = new_dst_addr0;
855           sum0 = ip0->checksum;
856           sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
857                                  ip4_header_t, dst_address);
858           ip0->checksum = ip_csum_fold (sum0);
859         }
860     }
861
862 }
863
864 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
865                                          vlib_buffer_t * b0,
866                                          ip4_header_t * ip0,
867                                          icmp46_header_t * icmp0,
868                                          u32 sw_if_index0,
869                                          u32 rx_fib_index0,
870                                          vlib_node_runtime_t * node,
871                                          u32 next0,
872                                          f64 now,
873                                          u32 thread_index,
874                                          snat_session_t ** p_s0)
875 {
876   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
877                       next0, thread_index, p_s0, 0);
878   snat_session_t * s0 = *p_s0;
879   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
880     {
881       /* Hairpinning */
882       if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
883         snat_icmp_hairpinning(sm, b0, ip0, icmp0);
884       /* Accounting */
885       s0->last_heard = now;
886       s0->total_pkts++;
887       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
888       /* Per-user LRU list maintenance */
889       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
890                          s0->per_user_index);
891       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
892                           s0->per_user_list_head_index,
893                           s0->per_user_index);
894     }
895   return next0;
896 }
897 static inline void
898 snat_hairpinning_unknown_proto (snat_main_t *sm,
899                                 vlib_buffer_t * b,
900                                 ip4_header_t * ip)
901 {
902   u32 old_addr, new_addr = 0, ti = 0;
903   clib_bihash_kv_8_8_t kv, value;
904   clib_bihash_kv_16_8_t s_kv, s_value;
905   nat_ed_ses_key_t key;
906   snat_session_key_t m_key;
907   snat_static_mapping_t *m;
908   ip_csum_t sum;
909   snat_session_t *s;
910
911   old_addr = ip->dst_address.as_u32;
912   key.l_addr.as_u32 = ip->dst_address.as_u32;
913   key.r_addr.as_u32 = ip->src_address.as_u32;
914   key.fib_index = sm->outside_fib_index;
915   key.proto = ip->protocol;
916   key.r_port = 0;
917   key.l_port = 0;
918   s_kv.key[0] = key.as_u64[0];
919   s_kv.key[1] = key.as_u64[1];
920   if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
921     {
922       m_key.addr = ip->dst_address;
923       m_key.fib_index = sm->outside_fib_index;
924       m_key.port = 0;
925       m_key.protocol = 0;
926       kv.key = m_key.as_u64;
927       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
928         return;
929
930       m = pool_elt_at_index (sm->static_mappings, value.value);
931       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
932         vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
933       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
934     }
935   else
936     {
937       if (sm->num_workers > 1)
938         ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
939       else
940         ti = sm->num_workers;
941
942       s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
943       if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
944         vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
945       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
946     }
947   sum = ip->checksum;
948   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
949   ip->checksum = ip_csum_fold (sum);
950 }
951
952 static snat_session_t *
953 snat_in2out_unknown_proto (snat_main_t *sm,
954                            vlib_buffer_t * b,
955                            ip4_header_t * ip,
956                            u32 rx_fib_index,
957                            u32 thread_index,
958                            f64 now,
959                            vlib_main_t * vm,
960                            vlib_node_runtime_t * node)
961 {
962   clib_bihash_kv_8_8_t kv, value;
963   clib_bihash_kv_16_8_t s_kv, s_value;
964   snat_static_mapping_t *m;
965   snat_session_key_t m_key;
966   u32 old_addr, new_addr = 0;
967   ip_csum_t sum;
968   snat_user_t *u;
969   dlist_elt_t *head, *elt;
970   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
971   u32 elt_index, head_index, ses_index;
972   snat_session_t * s;
973   nat_ed_ses_key_t key;
974   u32 address_index = ~0;
975   int i;
976   u8 is_sm = 0;
977
978   old_addr = ip->src_address.as_u32;
979
980   key.l_addr = ip->src_address;
981   key.r_addr = ip->dst_address;
982   key.fib_index = rx_fib_index;
983   key.proto = ip->protocol;
984   key.l_port = 0;
985   key.l_port = 0;
986   s_kv.key[0] = key.as_u64[0];
987   s_kv.key[1] = key.as_u64[1];
988
989   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
990     {
991       s = pool_elt_at_index (tsm->sessions, s_value.value);
992       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
993     }
994   else
995     {
996       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
997         {
998           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
999           return 0;
1000         }
1001
1002       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1003                                   thread_index);
1004       if (!u)
1005         {
1006           clib_warning ("create NAT user failed");
1007           return 0;
1008         }
1009
1010       m_key.addr = ip->src_address;
1011       m_key.port = 0;
1012       m_key.protocol = 0;
1013       m_key.fib_index = rx_fib_index;
1014       kv.key = m_key.as_u64;
1015
1016       /* Try to find static mapping first */
1017       if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1018         {
1019           m = pool_elt_at_index (sm->static_mappings, value.value);
1020           new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1021           is_sm = 1;
1022           goto create_ses;
1023         }
1024       /* Fallback to 3-tuple key */
1025       else
1026         {
1027           /* Choose same out address as for TCP/UDP session to same destination */
1028           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1029             {
1030               head_index = u->sessions_per_user_list_head_index;
1031               head = pool_elt_at_index (tsm->list_pool, head_index);
1032               elt_index = head->next;
1033               elt = pool_elt_at_index (tsm->list_pool, elt_index);
1034               ses_index = elt->value;
1035               while (ses_index != ~0)
1036                 {
1037                   s =  pool_elt_at_index (tsm->sessions, ses_index);
1038                   elt_index = elt->next;
1039                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1040                   ses_index = elt->value;
1041
1042                   if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1043                     {
1044                       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1045                       address_index = s->outside_address_index;
1046
1047                       key.fib_index = sm->outside_fib_index;
1048                       key.l_addr.as_u32 = new_addr;
1049                       s_kv.key[0] = key.as_u64[0];
1050                       s_kv.key[1] = key.as_u64[1];
1051                       if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1052                         break;
1053
1054                       goto create_ses;
1055                     }
1056                 }
1057             }
1058           key.fib_index = sm->outside_fib_index;
1059           for (i = 0; i < vec_len (sm->addresses); i++)
1060             {
1061               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1062               s_kv.key[0] = key.as_u64[0];
1063               s_kv.key[1] = key.as_u64[1];
1064               if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1065                 {
1066                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1067                   address_index = i;
1068                   goto create_ses;
1069                 }
1070             }
1071           return 0;
1072         }
1073
1074 create_ses:
1075       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1076       if (!s)
1077         {
1078           clib_warning ("create NAT session failed");
1079           return 0;
1080         }
1081
1082       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1083       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1084       s->outside_address_index = address_index;
1085       s->out2in.addr.as_u32 = new_addr;
1086       s->out2in.fib_index = sm->outside_fib_index;
1087       s->in2out.addr.as_u32 = old_addr;
1088       s->in2out.fib_index = rx_fib_index;
1089       s->in2out.port = s->out2in.port = ip->protocol;
1090       if (is_sm)
1091         {
1092           u->nstaticsessions++;
1093           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1094         }
1095       else
1096         {
1097           u->nsessions++;
1098         }
1099
1100       /* Add to lookup tables */
1101       key.l_addr.as_u32 = old_addr;
1102       key.r_addr = ip->dst_address;
1103       key.proto = ip->protocol;
1104       key.fib_index = rx_fib_index;
1105       s_kv.key[0] = key.as_u64[0];
1106       s_kv.key[1] = key.as_u64[1];
1107       s_kv.value = s - tsm->sessions;
1108       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1109         clib_warning ("in2out key add failed");
1110
1111       key.l_addr.as_u32 = new_addr;
1112       key.fib_index = sm->outside_fib_index;
1113       s_kv.key[0] = key.as_u64[0];
1114       s_kv.key[1] = key.as_u64[1];
1115       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1116         clib_warning ("out2in key add failed");
1117   }
1118
1119   /* Update IP checksum */
1120   sum = ip->checksum;
1121   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1122   ip->checksum = ip_csum_fold (sum);
1123
1124   /* Accounting */
1125   s->last_heard = now;
1126   s->total_pkts++;
1127   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1128   /* Per-user LRU list maintenance */
1129   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1130   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1131                       s->per_user_index);
1132
1133   /* Hairpinning */
1134   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1135     snat_hairpinning_unknown_proto(sm, b, ip);
1136
1137   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1138     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1139
1140   return s;
1141 }
1142
1143 static snat_session_t *
1144 snat_in2out_lb (snat_main_t *sm,
1145                 vlib_buffer_t * b,
1146                 ip4_header_t * ip,
1147                 u32 rx_fib_index,
1148                 u32 thread_index,
1149                 f64 now,
1150                 vlib_main_t * vm,
1151                 vlib_node_runtime_t * node)
1152 {
1153   nat_ed_ses_key_t key;
1154   clib_bihash_kv_16_8_t s_kv, s_value;
1155   udp_header_t *udp = ip4_next_header (ip);
1156   tcp_header_t *tcp = (tcp_header_t *) udp;
1157   snat_session_t *s = 0;
1158   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1159   u32 old_addr, new_addr;
1160   u16 new_port, old_port;
1161   ip_csum_t sum;
1162   u32 proto = ip_proto_to_snat_proto (ip->protocol);
1163   snat_session_key_t e_key, l_key;
1164   snat_user_t *u;
1165
1166   old_addr = ip->src_address.as_u32;
1167
1168   key.l_addr = ip->src_address;
1169   key.r_addr = ip->dst_address;
1170   key.fib_index = rx_fib_index;
1171   key.proto = ip->protocol;
1172   key.r_port = udp->dst_port;
1173   key.l_port = udp->src_port;
1174   s_kv.key[0] = key.as_u64[0];
1175   s_kv.key[1] = key.as_u64[1];
1176
1177   if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1178     {
1179       s = pool_elt_at_index (tsm->sessions, s_value.value);
1180     }
1181   else
1182     {
1183       if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1184         {
1185           b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1186           return 0;
1187         }
1188
1189       l_key.addr = ip->src_address;
1190       l_key.port = udp->src_port;
1191       l_key.protocol = proto;
1192       l_key.fib_index = rx_fib_index;
1193       if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0, 0))
1194         return 0;
1195
1196       u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1197                                   thread_index);
1198       if (!u)
1199         {
1200           clib_warning ("create NAT user failed");
1201           return 0;
1202         }
1203
1204       s = nat_session_alloc_or_recycle (sm, u, thread_index);
1205       if (!s)
1206         {
1207           clib_warning ("create NAT session failed");
1208           return 0;
1209         }
1210
1211       s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1212       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1213       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1214       s->outside_address_index = ~0;
1215       s->in2out = l_key;
1216       s->out2in = e_key;
1217       u->nstaticsessions++;
1218
1219       /* Add to lookup tables */
1220       s_kv.value = s - tsm->sessions;
1221       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1222         clib_warning ("in2out-ed key add failed");
1223
1224       key.l_addr = e_key.addr;
1225       key.fib_index = e_key.fib_index;
1226       key.l_port = e_key.port;
1227       s_kv.key[0] = key.as_u64[0];
1228       s_kv.key[1] = key.as_u64[1];
1229       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1230         clib_warning ("out2in-ed key add failed");
1231     }
1232
1233   new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1234
1235   /* Update IP checksum */
1236   sum = ip->checksum;
1237   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1238   if (is_twice_nat_session (s))
1239     sum = ip_csum_update (sum, ip->dst_address.as_u32,
1240                           s->ext_host_addr.as_u32, ip4_header_t, dst_address);
1241   ip->checksum = ip_csum_fold (sum);
1242
1243   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1244     {
1245       old_port = tcp->src_port;
1246       tcp->src_port = s->out2in.port;
1247       new_port = tcp->src_port;
1248
1249       sum = tcp->checksum;
1250       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1251       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1252       if (is_twice_nat_session (s))
1253         {
1254           sum = ip_csum_update (sum, ip->dst_address.as_u32,
1255                                 s->ext_host_addr.as_u32, ip4_header_t,
1256                                 dst_address);
1257           sum = ip_csum_update (sum, tcp->dst_port, s->ext_host_port,
1258                                 ip4_header_t, length);
1259           tcp->dst_port = s->ext_host_port;
1260           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1261         }
1262       tcp->checksum = ip_csum_fold(sum);
1263     }
1264   else
1265     {
1266       udp->src_port = s->out2in.port;
1267       if (is_twice_nat_session (s))
1268         {
1269           udp->dst_port = s->ext_host_port;
1270           ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1271         }
1272       udp->checksum = 0;
1273     }
1274
1275   if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1276     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1277
1278   /* Accounting */
1279   s->last_heard = now;
1280   s->total_pkts++;
1281   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1282   /* Per-user LRU list maintenance */
1283   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1284   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1285                       s->per_user_index);
1286   return s;
1287 }
1288
1289 static inline uword
1290 snat_in2out_node_fn_inline (vlib_main_t * vm,
1291                             vlib_node_runtime_t * node,
1292                             vlib_frame_t * frame, int is_slow_path,
1293                             int is_output_feature)
1294 {
1295   u32 n_left_from, * from, * to_next;
1296   snat_in2out_next_t next_index;
1297   u32 pkts_processed = 0;
1298   snat_main_t * sm = &snat_main;
1299   f64 now = vlib_time_now (vm);
1300   u32 stats_node_index;
1301   u32 thread_index = vlib_get_thread_index ();
1302
1303   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1304     snat_in2out_node.index;
1305
1306   from = vlib_frame_vector_args (frame);
1307   n_left_from = frame->n_vectors;
1308   next_index = node->cached_next_index;
1309
1310   while (n_left_from > 0)
1311     {
1312       u32 n_left_to_next;
1313
1314       vlib_get_next_frame (vm, node, next_index,
1315                            to_next, n_left_to_next);
1316
1317       while (n_left_from >= 4 && n_left_to_next >= 2)
1318         {
1319           u32 bi0, bi1;
1320           vlib_buffer_t * b0, * b1;
1321           u32 next0, next1;
1322           u32 sw_if_index0, sw_if_index1;
1323           ip4_header_t * ip0, * ip1;
1324           ip_csum_t sum0, sum1;
1325           u32 new_addr0, old_addr0, new_addr1, old_addr1;
1326           u16 old_port0, new_port0, old_port1, new_port1;
1327           udp_header_t * udp0, * udp1;
1328           tcp_header_t * tcp0, * tcp1;
1329           icmp46_header_t * icmp0, * icmp1;
1330           snat_session_key_t key0, key1;
1331           u32 rx_fib_index0, rx_fib_index1;
1332           u32 proto0, proto1;
1333           snat_session_t * s0 = 0, * s1 = 0;
1334           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1335           u32 iph_offset0 = 0, iph_offset1 = 0;
1336
1337           /* Prefetch next iteration. */
1338           {
1339             vlib_buffer_t * p2, * p3;
1340
1341             p2 = vlib_get_buffer (vm, from[2]);
1342             p3 = vlib_get_buffer (vm, from[3]);
1343
1344             vlib_prefetch_buffer_header (p2, LOAD);
1345             vlib_prefetch_buffer_header (p3, LOAD);
1346
1347             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1348             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1349           }
1350
1351           /* speculatively enqueue b0 and b1 to the current next frame */
1352           to_next[0] = bi0 = from[0];
1353           to_next[1] = bi1 = from[1];
1354           from += 2;
1355           to_next += 2;
1356           n_left_from -= 2;
1357           n_left_to_next -= 2;
1358
1359           b0 = vlib_get_buffer (vm, bi0);
1360           b1 = vlib_get_buffer (vm, bi1);
1361
1362           if (is_output_feature)
1363             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1364
1365           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1366                  iph_offset0);
1367
1368           udp0 = ip4_next_header (ip0);
1369           tcp0 = (tcp_header_t *) udp0;
1370           icmp0 = (icmp46_header_t *) udp0;
1371
1372           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1373           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1374                                    sw_if_index0);
1375
1376           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1377
1378           if (PREDICT_FALSE(ip0->ttl == 1))
1379             {
1380               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1381               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1382                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1383                                            0);
1384               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1385               goto trace00;
1386             }
1387
1388           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1389
1390           /* Next configured feature, probably ip4-lookup */
1391           if (is_slow_path)
1392             {
1393               if (PREDICT_FALSE (proto0 == ~0))
1394                 {
1395                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1396                                                   thread_index, now, vm, node);
1397                   if (!s0)
1398                     next0 = SNAT_IN2OUT_NEXT_DROP;
1399                   goto trace00;
1400                 }
1401
1402               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1403                 {
1404                   next0 = icmp_in2out_slow_path
1405                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1406                      node, next0, now, thread_index, &s0);
1407                   goto trace00;
1408                 }
1409             }
1410           else
1411             {
1412               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1413                 {
1414                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1415                   goto trace00;
1416                 }
1417
1418               if (ip4_is_fragment (ip0))
1419                 {
1420                   next0 = SNAT_IN2OUT_NEXT_REASS;
1421                   goto trace00;
1422                 }
1423             }
1424
1425           key0.addr = ip0->src_address;
1426           key0.port = udp0->src_port;
1427           key0.protocol = proto0;
1428           key0.fib_index = rx_fib_index0;
1429
1430           kv0.key = key0.as_u64;
1431
1432           if (PREDICT_FALSE (clib_bihash_search_8_8 (
1433               &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1434             {
1435               if (is_slow_path)
1436                 {
1437                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1438                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1439                     goto trace00;
1440
1441                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1442                                      &s0, node, next0, thread_index);
1443                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1444                     goto trace00;
1445                 }
1446               else
1447                 {
1448                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1449                   goto trace00;
1450                 }
1451             }
1452           else
1453             {
1454               if (PREDICT_FALSE (value0.value == ~0ULL))
1455                 {
1456                   if (is_slow_path)
1457                     {
1458                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1459                                           thread_index, now, vm, node);
1460                       if (!s0)
1461                         next0 = SNAT_IN2OUT_NEXT_DROP;
1462                       goto trace00;
1463                     }
1464                   else
1465                     {
1466                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1467                       goto trace00;
1468                     }
1469                 }
1470               else
1471                 {
1472                   s0 = pool_elt_at_index (
1473                     sm->per_thread_data[thread_index].sessions,
1474                     value0.value);
1475                 }
1476             }
1477
1478           b0->flags |= VNET_BUFFER_F_IS_NATED;
1479
1480           old_addr0 = ip0->src_address.as_u32;
1481           ip0->src_address = s0->out2in.addr;
1482           new_addr0 = ip0->src_address.as_u32;
1483           if (!is_output_feature)
1484             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1485
1486           sum0 = ip0->checksum;
1487           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1488                                  ip4_header_t,
1489                                  src_address /* changed member */);
1490           ip0->checksum = ip_csum_fold (sum0);
1491
1492           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1493             {
1494               old_port0 = tcp0->src_port;
1495               tcp0->src_port = s0->out2in.port;
1496               new_port0 = tcp0->src_port;
1497
1498               sum0 = tcp0->checksum;
1499               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1500                                      ip4_header_t,
1501                                      dst_address /* changed member */);
1502               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1503                                      ip4_header_t /* cheat */,
1504                                      length /* changed member */);
1505               tcp0->checksum = ip_csum_fold(sum0);
1506             }
1507           else
1508             {
1509               old_port0 = udp0->src_port;
1510               udp0->src_port = s0->out2in.port;
1511               udp0->checksum = 0;
1512             }
1513
1514           /* Accounting */
1515           s0->last_heard = now;
1516           s0->total_pkts++;
1517           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1518           /* Per-user LRU list maintenance */
1519           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1520                              s0->per_user_index);
1521           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1522                               s0->per_user_list_head_index,
1523                               s0->per_user_index);
1524         trace00:
1525
1526           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1527                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1528             {
1529               snat_in2out_trace_t *t =
1530                  vlib_add_trace (vm, node, b0, sizeof (*t));
1531               t->is_slow_path = is_slow_path;
1532               t->sw_if_index = sw_if_index0;
1533               t->next_index = next0;
1534                   t->session_index = ~0;
1535               if (s0)
1536                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1537             }
1538
1539           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1540
1541           if (is_output_feature)
1542             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1543
1544           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1545                  iph_offset1);
1546
1547           udp1 = ip4_next_header (ip1);
1548           tcp1 = (tcp_header_t *) udp1;
1549           icmp1 = (icmp46_header_t *) udp1;
1550
1551           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1552           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1553                                    sw_if_index1);
1554
1555           if (PREDICT_FALSE(ip1->ttl == 1))
1556             {
1557               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1558               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1559                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1560                                            0);
1561               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1562               goto trace01;
1563             }
1564
1565           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1566
1567           /* Next configured feature, probably ip4-lookup */
1568           if (is_slow_path)
1569             {
1570               if (PREDICT_FALSE (proto1 == ~0))
1571                 {
1572                   s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1573                                                   thread_index, now, vm, node);
1574                   if (!s1)
1575                     next1 = SNAT_IN2OUT_NEXT_DROP;
1576                   goto trace01;
1577                 }
1578
1579               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1580                 {
1581                   next1 = icmp_in2out_slow_path
1582                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1583                      next1, now, thread_index, &s1);
1584                   goto trace01;
1585                 }
1586             }
1587           else
1588             {
1589               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1590                 {
1591                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1592                   goto trace01;
1593                 }
1594
1595               if (ip4_is_fragment (ip1))
1596                 {
1597                   next1 = SNAT_IN2OUT_NEXT_REASS;
1598                   goto trace01;
1599                 }
1600             }
1601
1602           b1->flags |= VNET_BUFFER_F_IS_NATED;
1603
1604           key1.addr = ip1->src_address;
1605           key1.port = udp1->src_port;
1606           key1.protocol = proto1;
1607           key1.fib_index = rx_fib_index1;
1608
1609           kv1.key = key1.as_u64;
1610
1611             if (PREDICT_FALSE(clib_bihash_search_8_8 (
1612                 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1613             {
1614               if (is_slow_path)
1615                 {
1616                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1617                       ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
1618                     goto trace01;
1619
1620                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1621                                      &s1, node, next1, thread_index);
1622                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1623                     goto trace01;
1624                 }
1625               else
1626                 {
1627                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1628                   goto trace01;
1629                 }
1630             }
1631           else
1632             {
1633               if (PREDICT_FALSE (value1.value == ~0ULL))
1634                 {
1635                   if (is_slow_path)
1636                     {
1637                       s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1638                                           thread_index, now, vm, node);
1639                       if (!s1)
1640                         next1 = SNAT_IN2OUT_NEXT_DROP;
1641                       goto trace01;
1642                     }
1643                   else
1644                     {
1645                       next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1646                       goto trace01;
1647                     }
1648                 }
1649               else
1650                 {
1651                   s1 = pool_elt_at_index (
1652                     sm->per_thread_data[thread_index].sessions,
1653                     value1.value);
1654                 }
1655             }
1656
1657           old_addr1 = ip1->src_address.as_u32;
1658           ip1->src_address = s1->out2in.addr;
1659           new_addr1 = ip1->src_address.as_u32;
1660           if (!is_output_feature)
1661             vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1662
1663           sum1 = ip1->checksum;
1664           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1665                                  ip4_header_t,
1666                                  src_address /* changed member */);
1667           ip1->checksum = ip_csum_fold (sum1);
1668
1669           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1670             {
1671               old_port1 = tcp1->src_port;
1672               tcp1->src_port = s1->out2in.port;
1673               new_port1 = tcp1->src_port;
1674
1675               sum1 = tcp1->checksum;
1676               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1677                                      ip4_header_t,
1678                                      dst_address /* changed member */);
1679               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1680                                      ip4_header_t /* cheat */,
1681                                      length /* changed member */);
1682               tcp1->checksum = ip_csum_fold(sum1);
1683             }
1684           else
1685             {
1686               old_port1 = udp1->src_port;
1687               udp1->src_port = s1->out2in.port;
1688               udp1->checksum = 0;
1689             }
1690
1691           /* Accounting */
1692           s1->last_heard = now;
1693           s1->total_pkts++;
1694           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1695           /* Per-user LRU list maintenance */
1696           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1697                              s1->per_user_index);
1698           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1699                               s1->per_user_list_head_index,
1700                               s1->per_user_index);
1701         trace01:
1702
1703           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1704                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1705             {
1706               snat_in2out_trace_t *t =
1707                  vlib_add_trace (vm, node, b1, sizeof (*t));
1708               t->sw_if_index = sw_if_index1;
1709               t->next_index = next1;
1710               t->session_index = ~0;
1711               if (s1)
1712                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1713             }
1714
1715           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1716
1717           /* verify speculative enqueues, maybe switch current next frame */
1718           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1719                                            to_next, n_left_to_next,
1720                                            bi0, bi1, next0, next1);
1721         }
1722
1723       while (n_left_from > 0 && n_left_to_next > 0)
1724         {
1725           u32 bi0;
1726           vlib_buffer_t * b0;
1727           u32 next0;
1728           u32 sw_if_index0;
1729           ip4_header_t * ip0;
1730           ip_csum_t sum0;
1731           u32 new_addr0, old_addr0;
1732           u16 old_port0, new_port0;
1733           udp_header_t * udp0;
1734           tcp_header_t * tcp0;
1735           icmp46_header_t * icmp0;
1736           snat_session_key_t key0;
1737           u32 rx_fib_index0;
1738           u32 proto0;
1739           snat_session_t * s0 = 0;
1740           clib_bihash_kv_8_8_t kv0, value0;
1741           u32 iph_offset0 = 0;
1742
1743           /* speculatively enqueue b0 to the current next frame */
1744           bi0 = from[0];
1745           to_next[0] = bi0;
1746           from += 1;
1747           to_next += 1;
1748           n_left_from -= 1;
1749           n_left_to_next -= 1;
1750
1751           b0 = vlib_get_buffer (vm, bi0);
1752           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1753
1754           if (is_output_feature)
1755             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1756
1757           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1758                  iph_offset0);
1759
1760           udp0 = ip4_next_header (ip0);
1761           tcp0 = (tcp_header_t *) udp0;
1762           icmp0 = (icmp46_header_t *) udp0;
1763
1764           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1765           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1766                                    sw_if_index0);
1767
1768           if (PREDICT_FALSE(ip0->ttl == 1))
1769             {
1770               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1771               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1772                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1773                                            0);
1774               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1775               goto trace0;
1776             }
1777
1778           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1779
1780           /* Next configured feature, probably ip4-lookup */
1781           if (is_slow_path)
1782             {
1783               if (PREDICT_FALSE (proto0 == ~0))
1784                 {
1785                   s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1786                                                   thread_index, now, vm, node);
1787                   if (!s0)
1788                     next0 = SNAT_IN2OUT_NEXT_DROP;
1789                   goto trace0;
1790                 }
1791
1792               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1793                 {
1794                   next0 = icmp_in2out_slow_path
1795                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1796                      next0, now, thread_index, &s0);
1797                   goto trace0;
1798                 }
1799             }
1800           else
1801             {
1802               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1803                 {
1804                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1805                   goto trace0;
1806                 }
1807
1808               if (ip4_is_fragment (ip0))
1809                 {
1810                   next0 = SNAT_IN2OUT_NEXT_REASS;
1811                   goto trace0;
1812                 }
1813             }
1814
1815           key0.addr = ip0->src_address;
1816           key0.port = udp0->src_port;
1817           key0.protocol = proto0;
1818           key0.fib_index = rx_fib_index0;
1819
1820           kv0.key = key0.as_u64;
1821
1822           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
1823                                       &kv0, &value0))
1824             {
1825               if (is_slow_path)
1826                 {
1827                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1828                       ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
1829                     goto trace0;
1830
1831                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1832                                      &s0, node, next0, thread_index);
1833
1834                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1835                     goto trace0;
1836                 }
1837               else
1838                 {
1839                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1840                   goto trace0;
1841                 }
1842             }
1843           else
1844             {
1845               if (PREDICT_FALSE (value0.value == ~0ULL))
1846                 {
1847                   if (is_slow_path)
1848                     {
1849                       s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1850                                           thread_index, now, vm, node);
1851                       if (!s0)
1852                         next0 = SNAT_IN2OUT_NEXT_DROP;
1853                       goto trace0;
1854                     }
1855                   else
1856                     {
1857                       next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1858                       goto trace0;
1859                     }
1860                 }
1861               else
1862                 {
1863                   s0 = pool_elt_at_index (
1864                     sm->per_thread_data[thread_index].sessions,
1865                     value0.value);
1866                 }
1867             }
1868
1869           b0->flags |= VNET_BUFFER_F_IS_NATED;
1870
1871           old_addr0 = ip0->src_address.as_u32;
1872           ip0->src_address = s0->out2in.addr;
1873           new_addr0 = ip0->src_address.as_u32;
1874           if (!is_output_feature)
1875             vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1876
1877           sum0 = ip0->checksum;
1878           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1879                                  ip4_header_t,
1880                                  src_address /* changed member */);
1881           ip0->checksum = ip_csum_fold (sum0);
1882
1883           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1884             {
1885               old_port0 = tcp0->src_port;
1886               tcp0->src_port = s0->out2in.port;
1887               new_port0 = tcp0->src_port;
1888
1889               sum0 = tcp0->checksum;
1890               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1891                                      ip4_header_t,
1892                                      dst_address /* changed member */);
1893               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1894                                      ip4_header_t /* cheat */,
1895                                      length /* changed member */);
1896               tcp0->checksum = ip_csum_fold(sum0);
1897             }
1898           else
1899             {
1900               old_port0 = udp0->src_port;
1901               udp0->src_port = s0->out2in.port;
1902               udp0->checksum = 0;
1903             }
1904
1905           /* Accounting */
1906           s0->last_heard = now;
1907           s0->total_pkts++;
1908           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1909           /* Per-user LRU list maintenance */
1910           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1911                              s0->per_user_index);
1912           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1913                               s0->per_user_list_head_index,
1914                               s0->per_user_index);
1915
1916         trace0:
1917           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1918                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1919             {
1920               snat_in2out_trace_t *t =
1921                  vlib_add_trace (vm, node, b0, sizeof (*t));
1922               t->is_slow_path = is_slow_path;
1923               t->sw_if_index = sw_if_index0;
1924               t->next_index = next0;
1925                   t->session_index = ~0;
1926               if (s0)
1927                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1928             }
1929
1930           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1931
1932           /* verify speculative enqueue, maybe switch current next frame */
1933           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1934                                            to_next, n_left_to_next,
1935                                            bi0, next0);
1936         }
1937
1938       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1939     }
1940
1941   vlib_node_increment_counter (vm, stats_node_index,
1942                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1943                                pkts_processed);
1944   return frame->n_vectors;
1945 }
1946
1947 static uword
1948 snat_in2out_fast_path_fn (vlib_main_t * vm,
1949                           vlib_node_runtime_t * node,
1950                           vlib_frame_t * frame)
1951 {
1952   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
1953 }
1954
1955 VLIB_REGISTER_NODE (snat_in2out_node) = {
1956   .function = snat_in2out_fast_path_fn,
1957   .name = "nat44-in2out",
1958   .vector_size = sizeof (u32),
1959   .format_trace = format_snat_in2out_trace,
1960   .type = VLIB_NODE_TYPE_INTERNAL,
1961
1962   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1963   .error_strings = snat_in2out_error_strings,
1964
1965   .runtime_data_bytes = sizeof (snat_runtime_t),
1966
1967   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1968
1969   /* edit / add dispositions here */
1970   .next_nodes = {
1971     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1972     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1973     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
1974     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1975     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
1976   },
1977 };
1978
1979 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1980
1981 static uword
1982 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
1983                                  vlib_node_runtime_t * node,
1984                                  vlib_frame_t * frame)
1985 {
1986   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
1987 }
1988
1989 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
1990   .function = snat_in2out_output_fast_path_fn,
1991   .name = "nat44-in2out-output",
1992   .vector_size = sizeof (u32),
1993   .format_trace = format_snat_in2out_trace,
1994   .type = VLIB_NODE_TYPE_INTERNAL,
1995
1996   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1997   .error_strings = snat_in2out_error_strings,
1998
1999   .runtime_data_bytes = sizeof (snat_runtime_t),
2000
2001   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2002
2003   /* edit / add dispositions here */
2004   .next_nodes = {
2005     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2006     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2007     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2008     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2009     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2010   },
2011 };
2012
2013 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2014                               snat_in2out_output_fast_path_fn);
2015
2016 static uword
2017 snat_in2out_slow_path_fn (vlib_main_t * vm,
2018                           vlib_node_runtime_t * node,
2019                           vlib_frame_t * frame)
2020 {
2021   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2022 }
2023
2024 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2025   .function = snat_in2out_slow_path_fn,
2026   .name = "nat44-in2out-slowpath",
2027   .vector_size = sizeof (u32),
2028   .format_trace = format_snat_in2out_trace,
2029   .type = VLIB_NODE_TYPE_INTERNAL,
2030
2031   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2032   .error_strings = snat_in2out_error_strings,
2033
2034   .runtime_data_bytes = sizeof (snat_runtime_t),
2035
2036   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2037
2038   /* edit / add dispositions here */
2039   .next_nodes = {
2040     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2041     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2042     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2043     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2044     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2045   },
2046 };
2047
2048 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2049                               snat_in2out_slow_path_fn);
2050
2051 static uword
2052 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2053                                  vlib_node_runtime_t * node,
2054                                  vlib_frame_t * frame)
2055 {
2056   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2057 }
2058
2059 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2060   .function = snat_in2out_output_slow_path_fn,
2061   .name = "nat44-in2out-output-slowpath",
2062   .vector_size = sizeof (u32),
2063   .format_trace = format_snat_in2out_trace,
2064   .type = VLIB_NODE_TYPE_INTERNAL,
2065
2066   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2067   .error_strings = snat_in2out_error_strings,
2068
2069   .runtime_data_bytes = sizeof (snat_runtime_t),
2070
2071   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2072
2073   /* edit / add dispositions here */
2074   .next_nodes = {
2075     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2076     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2077     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2078     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2079     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2080   },
2081 };
2082
2083 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2084                               snat_in2out_output_slow_path_fn);
2085
2086 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
2087
2088 static uword
2089 nat44_hairpinning_fn (vlib_main_t * vm,
2090                       vlib_node_runtime_t * node,
2091                       vlib_frame_t * frame)
2092 {
2093   u32 n_left_from, * from, * to_next;
2094   snat_in2out_next_t next_index;
2095   u32 pkts_processed = 0;
2096   snat_main_t * sm = &snat_main;
2097   vnet_feature_main_t *fm = &feature_main;
2098   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
2099   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
2100
2101   from = vlib_frame_vector_args (frame);
2102   n_left_from = frame->n_vectors;
2103   next_index = node->cached_next_index;
2104
2105   while (n_left_from > 0)
2106     {
2107       u32 n_left_to_next;
2108
2109       vlib_get_next_frame (vm, node, next_index,
2110                            to_next, n_left_to_next);
2111
2112       while (n_left_from > 0 && n_left_to_next > 0)
2113         {
2114           u32 bi0;
2115           vlib_buffer_t * b0;
2116           u32 next0;
2117           ip4_header_t * ip0;
2118           u32 proto0;
2119           udp_header_t * udp0;
2120           tcp_header_t * tcp0;
2121
2122           /* speculatively enqueue b0 to the current next frame */
2123           bi0 = from[0];
2124           to_next[0] = bi0;
2125           from += 1;
2126           to_next += 1;
2127           n_left_from -= 1;
2128           n_left_to_next -= 1;
2129
2130           b0 = vlib_get_buffer (vm, bi0);
2131           ip0 = vlib_buffer_get_current (b0);
2132           udp0 = ip4_next_header (ip0);
2133           tcp0 = (tcp_header_t *) udp0;
2134
2135           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2136
2137           vnet_get_config_data (&cm->config_main, &b0->current_config_index,
2138                                 &next0, 0);
2139
2140           if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0))
2141             next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2142
2143           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2144
2145           /* verify speculative enqueue, maybe switch current next frame */
2146           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2147                                            to_next, n_left_to_next,
2148                                            bi0, next0);
2149          }
2150
2151       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2152     }
2153
2154   vlib_node_increment_counter (vm, nat44_hairpinning_node.index,
2155                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2156                                pkts_processed);
2157   return frame->n_vectors;
2158 }
2159
2160 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
2161   .function = nat44_hairpinning_fn,
2162   .name = "nat44-hairpinning",
2163   .vector_size = sizeof (u32),
2164   .type = VLIB_NODE_TYPE_INTERNAL,
2165   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2166   .error_strings = snat_in2out_error_strings,
2167   .n_next_nodes = 2,
2168   .next_nodes = {
2169     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2170     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2171   },
2172 };
2173
2174 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
2175                               nat44_hairpinning_fn);
2176
2177 static inline void
2178 nat44_reass_hairpinning (snat_main_t *sm,
2179                          vlib_buffer_t * b0,
2180                          ip4_header_t * ip0,
2181                          u16 sport,
2182                          u16 dport,
2183                          u32 proto0)
2184 {
2185   snat_session_key_t key0, sm0;
2186   snat_session_t * s0;
2187   clib_bihash_kv_8_8_t kv0, value0;
2188   ip_csum_t sum0;
2189   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2190   u16 new_dst_port0, old_dst_port0;
2191   udp_header_t * udp0;
2192   tcp_header_t * tcp0;
2193
2194   key0.addr = ip0->dst_address;
2195   key0.port = dport;
2196   key0.protocol = proto0;
2197   key0.fib_index = sm->outside_fib_index;
2198   kv0.key = key0.as_u64;
2199
2200   udp0 = ip4_next_header (ip0);
2201
2202   /* Check if destination is static mappings */
2203   if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2204     {
2205       new_dst_addr0 = sm0.addr.as_u32;
2206       new_dst_port0 = sm0.port;
2207       vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2208     }
2209   /* or active sessions */
2210   else
2211     {
2212       if (sm->num_workers > 1)
2213         ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2214       else
2215         ti = sm->num_workers;
2216
2217       if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2218         {
2219           si = value0.value;
2220           s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2221           new_dst_addr0 = s0->in2out.addr.as_u32;
2222           new_dst_port0 = s0->in2out.port;
2223           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2224         }
2225     }
2226
2227   /* Destination is behind the same NAT, use internal address and port */
2228   if (new_dst_addr0)
2229     {
2230       old_dst_addr0 = ip0->dst_address.as_u32;
2231       ip0->dst_address.as_u32 = new_dst_addr0;
2232       sum0 = ip0->checksum;
2233       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2234                              ip4_header_t, dst_address);
2235       ip0->checksum = ip_csum_fold (sum0);
2236
2237       old_dst_port0 = dport;
2238       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2239                        ip4_is_first_fragment (ip0)))
2240         {
2241           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2242             {
2243               tcp0 = ip4_next_header (ip0);
2244               tcp0->dst = new_dst_port0;
2245               sum0 = tcp0->checksum;
2246               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2247                                      ip4_header_t, dst_address);
2248               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2249                                      ip4_header_t /* cheat */, length);
2250               tcp0->checksum = ip_csum_fold(sum0);
2251             }
2252           else
2253             {
2254               udp0->dst_port = new_dst_port0;
2255               udp0->checksum = 0;
2256             }
2257         }
2258       else
2259         {
2260           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2261             {
2262               tcp0 = ip4_next_header (ip0);
2263               sum0 = tcp0->checksum;
2264               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2265                                      ip4_header_t, dst_address);
2266               tcp0->checksum = ip_csum_fold(sum0);
2267             }
2268         }
2269     }
2270 }
2271
2272 static uword
2273 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2274                             vlib_node_runtime_t * node,
2275                             vlib_frame_t * frame)
2276 {
2277   u32 n_left_from, *from, *to_next;
2278   snat_in2out_next_t next_index;
2279   u32 pkts_processed = 0;
2280   snat_main_t *sm = &snat_main;
2281   f64 now = vlib_time_now (vm);
2282   u32 thread_index = vlib_get_thread_index ();
2283   snat_main_per_thread_data_t *per_thread_data =
2284     &sm->per_thread_data[thread_index];
2285   u32 *fragments_to_drop = 0;
2286   u32 *fragments_to_loopback = 0;
2287
2288   from = vlib_frame_vector_args (frame);
2289   n_left_from = frame->n_vectors;
2290   next_index = node->cached_next_index;
2291
2292   while (n_left_from > 0)
2293     {
2294       u32 n_left_to_next;
2295
2296       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2297
2298       while (n_left_from > 0 && n_left_to_next > 0)
2299        {
2300           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2301           vlib_buffer_t *b0;
2302           u32 next0;
2303           u8 cached0 = 0;
2304           ip4_header_t *ip0;
2305           nat_reass_ip4_t *reass0;
2306           udp_header_t * udp0;
2307           tcp_header_t * tcp0;
2308           snat_session_key_t key0;
2309           clib_bihash_kv_8_8_t kv0, value0;
2310           snat_session_t * s0 = 0;
2311           u16 old_port0, new_port0;
2312           ip_csum_t sum0;
2313
2314           /* speculatively enqueue b0 to the current next frame */
2315           bi0 = from[0];
2316           to_next[0] = bi0;
2317           from += 1;
2318           to_next += 1;
2319           n_left_from -= 1;
2320           n_left_to_next -= 1;
2321
2322           b0 = vlib_get_buffer (vm, bi0);
2323           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2324
2325           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2326           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2327                                                                sw_if_index0);
2328
2329           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2330             {
2331               next0 = SNAT_IN2OUT_NEXT_DROP;
2332               b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2333               goto trace0;
2334             }
2335
2336           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2337           udp0 = ip4_next_header (ip0);
2338           tcp0 = (tcp_header_t *) udp0;
2339           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2340
2341           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2342                                                  ip0->dst_address,
2343                                                  ip0->fragment_id,
2344                                                  ip0->protocol,
2345                                                  1,
2346                                                  &fragments_to_drop);
2347
2348           if (PREDICT_FALSE (!reass0))
2349             {
2350               next0 = SNAT_IN2OUT_NEXT_DROP;
2351               b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2352               goto trace0;
2353             }
2354
2355           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2356             {
2357               key0.addr = ip0->src_address;
2358               key0.port = udp0->src_port;
2359               key0.protocol = proto0;
2360               key0.fib_index = rx_fib_index0;
2361               kv0.key = key0.as_u64;
2362
2363               if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2364                 {
2365                   if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2366                       ip0, proto0, rx_fib_index0, thread_index)))
2367                     goto trace0;
2368
2369                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2370                                      &s0, node, next0, thread_index);
2371
2372                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2373                     goto trace0;
2374
2375                   reass0->sess_index = s0 - per_thread_data->sessions;
2376                 }
2377               else
2378                 {
2379                   s0 = pool_elt_at_index (per_thread_data->sessions,
2380                                           value0.value);
2381                   reass0->sess_index = value0.value;
2382                 }
2383               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2384             }
2385           else
2386             {
2387               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2388                 {
2389                   if (nat_ip4_reass_add_fragment (reass0, bi0))
2390                     {
2391                       b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2392                       next0 = SNAT_IN2OUT_NEXT_DROP;
2393                       goto trace0;
2394                     }
2395                   cached0 = 1;
2396                   goto trace0;
2397                 }
2398               s0 = pool_elt_at_index (per_thread_data->sessions,
2399                                       reass0->sess_index);
2400             }
2401
2402           old_addr0 = ip0->src_address.as_u32;
2403           ip0->src_address = s0->out2in.addr;
2404           new_addr0 = ip0->src_address.as_u32;
2405           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2406
2407           sum0 = ip0->checksum;
2408           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2409                                  ip4_header_t,
2410                                  src_address /* changed member */);
2411           ip0->checksum = ip_csum_fold (sum0);
2412
2413           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2414             {
2415               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2416                 {
2417                   old_port0 = tcp0->src_port;
2418                   tcp0->src_port = s0->out2in.port;
2419                   new_port0 = tcp0->src_port;
2420
2421                   sum0 = tcp0->checksum;
2422                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2423                                          ip4_header_t,
2424                                          dst_address /* changed member */);
2425                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2426                                          ip4_header_t /* cheat */,
2427                                          length /* changed member */);
2428                   tcp0->checksum = ip_csum_fold(sum0);
2429                 }
2430               else
2431                 {
2432                   old_port0 = udp0->src_port;
2433                   udp0->src_port = s0->out2in.port;
2434                   udp0->checksum = 0;
2435                 }
2436             }
2437
2438           /* Hairpinning */
2439           nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2440                                    s0->ext_host_port, proto0);
2441
2442           /* Accounting */
2443           s0->last_heard = now;
2444           s0->total_pkts++;
2445           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2446           /* Per-user LRU list maintenance */
2447           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2448                              s0->per_user_index);
2449           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2450                               s0->per_user_list_head_index,
2451                               s0->per_user_index);
2452
2453         trace0:
2454           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2455                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2456             {
2457               nat44_in2out_reass_trace_t *t =
2458                  vlib_add_trace (vm, node, b0, sizeof (*t));
2459               t->cached = cached0;
2460               t->sw_if_index = sw_if_index0;
2461               t->next_index = next0;
2462             }
2463
2464           if (cached0)
2465             {
2466               n_left_to_next++;
2467               to_next--;
2468             }
2469           else
2470             {
2471               pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2472
2473               /* verify speculative enqueue, maybe switch current next frame */
2474               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2475                                                to_next, n_left_to_next,
2476                                                bi0, next0);
2477             }
2478
2479           if (n_left_from == 0 && vec_len (fragments_to_loopback))
2480             {
2481               from = vlib_frame_vector_args (frame);
2482               u32 len = vec_len (fragments_to_loopback);
2483               if (len <= VLIB_FRAME_SIZE)
2484                 {
2485                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2486                   n_left_from = len;
2487                   vec_reset_length (fragments_to_loopback);
2488                 }
2489               else
2490                 {
2491                   clib_memcpy (from,
2492                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2493                                sizeof (u32) * VLIB_FRAME_SIZE);
2494                   n_left_from = VLIB_FRAME_SIZE;
2495                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2496                 }
2497             }
2498        }
2499
2500       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2501     }
2502
2503   vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2504                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2505                                pkts_processed);
2506
2507   nat_send_all_to_node (vm, fragments_to_drop, node,
2508                         &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2509                         SNAT_IN2OUT_NEXT_DROP);
2510
2511   vec_free (fragments_to_drop);
2512   vec_free (fragments_to_loopback);
2513   return frame->n_vectors;
2514 }
2515
2516 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2517   .function = nat44_in2out_reass_node_fn,
2518   .name = "nat44-in2out-reass",
2519   .vector_size = sizeof (u32),
2520   .format_trace = format_nat44_in2out_reass_trace,
2521   .type = VLIB_NODE_TYPE_INTERNAL,
2522
2523   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2524   .error_strings = snat_in2out_error_strings,
2525
2526   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2527   .next_nodes = {
2528     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2529     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2530     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2531     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2532     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2533   },
2534 };
2535
2536 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2537                               nat44_in2out_reass_node_fn);
2538
2539 /**************************/
2540 /*** deterministic mode ***/
2541 /**************************/
2542 static uword
2543 snat_det_in2out_node_fn (vlib_main_t * vm,
2544                          vlib_node_runtime_t * node,
2545                          vlib_frame_t * frame)
2546 {
2547   u32 n_left_from, * from, * to_next;
2548   snat_in2out_next_t next_index;
2549   u32 pkts_processed = 0;
2550   snat_main_t * sm = &snat_main;
2551   u32 now = (u32) vlib_time_now (vm);
2552   u32 thread_index = vlib_get_thread_index ();
2553
2554   from = vlib_frame_vector_args (frame);
2555   n_left_from = frame->n_vectors;
2556   next_index = node->cached_next_index;
2557
2558   while (n_left_from > 0)
2559     {
2560       u32 n_left_to_next;
2561
2562       vlib_get_next_frame (vm, node, next_index,
2563                            to_next, n_left_to_next);
2564
2565       while (n_left_from >= 4 && n_left_to_next >= 2)
2566         {
2567           u32 bi0, bi1;
2568           vlib_buffer_t * b0, * b1;
2569           u32 next0, next1;
2570           u32 sw_if_index0, sw_if_index1;
2571           ip4_header_t * ip0, * ip1;
2572           ip_csum_t sum0, sum1;
2573           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2574           u16 old_port0, new_port0, lo_port0, i0;
2575           u16 old_port1, new_port1, lo_port1, i1;
2576           udp_header_t * udp0, * udp1;
2577           tcp_header_t * tcp0, * tcp1;
2578           u32 proto0, proto1;
2579           snat_det_out_key_t key0, key1;
2580           snat_det_map_t * dm0, * dm1;
2581           snat_det_session_t * ses0 = 0, * ses1 = 0;
2582           u32 rx_fib_index0, rx_fib_index1;
2583           icmp46_header_t * icmp0, * icmp1;
2584
2585           /* Prefetch next iteration. */
2586           {
2587             vlib_buffer_t * p2, * p3;
2588
2589             p2 = vlib_get_buffer (vm, from[2]);
2590             p3 = vlib_get_buffer (vm, from[3]);
2591
2592             vlib_prefetch_buffer_header (p2, LOAD);
2593             vlib_prefetch_buffer_header (p3, LOAD);
2594
2595             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2596             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2597           }
2598
2599           /* speculatively enqueue b0 and b1 to the current next frame */
2600           to_next[0] = bi0 = from[0];
2601           to_next[1] = bi1 = from[1];
2602           from += 2;
2603           to_next += 2;
2604           n_left_from -= 2;
2605           n_left_to_next -= 2;
2606
2607           b0 = vlib_get_buffer (vm, bi0);
2608           b1 = vlib_get_buffer (vm, bi1);
2609
2610           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2611           next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2612
2613           ip0 = vlib_buffer_get_current (b0);
2614           udp0 = ip4_next_header (ip0);
2615           tcp0 = (tcp_header_t *) udp0;
2616
2617           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2618
2619           if (PREDICT_FALSE(ip0->ttl == 1))
2620             {
2621               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2622               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2623                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2624                                            0);
2625               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2626               goto trace0;
2627             }
2628
2629           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2630
2631           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2632             {
2633               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2634               icmp0 = (icmp46_header_t *) udp0;
2635
2636               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2637                                   rx_fib_index0, node, next0, thread_index,
2638                                   &ses0, &dm0);
2639               goto trace0;
2640             }
2641
2642           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2643           if (PREDICT_FALSE(!dm0))
2644             {
2645               clib_warning("no match for internal host %U",
2646                            format_ip4_address, &ip0->src_address);
2647               next0 = SNAT_IN2OUT_NEXT_DROP;
2648               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2649               goto trace0;
2650             }
2651
2652           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2653
2654           key0.ext_host_addr = ip0->dst_address;
2655           key0.ext_host_port = tcp0->dst;
2656
2657           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2658           if (PREDICT_FALSE(!ses0))
2659             {
2660               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2661                 {
2662                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2663                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2664
2665                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2666                     continue;
2667
2668                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2669                   break;
2670                 }
2671               if (PREDICT_FALSE(!ses0))
2672                 {
2673                   /* too many sessions for user, send ICMP error packet */
2674
2675                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2676                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2677                                                ICMP4_destination_unreachable_destination_unreachable_host,
2678                                                0);
2679                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2680                   goto trace0;
2681                 }
2682             }
2683
2684           new_port0 = ses0->out.out_port;
2685
2686           old_addr0.as_u32 = ip0->src_address.as_u32;
2687           ip0->src_address.as_u32 = new_addr0.as_u32;
2688           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2689
2690           sum0 = ip0->checksum;
2691           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2692                                  ip4_header_t,
2693                                  src_address /* changed member */);
2694           ip0->checksum = ip_csum_fold (sum0);
2695
2696           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2697             {
2698               if (tcp0->flags & TCP_FLAG_SYN)
2699                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2700               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2701                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2702               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2703                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2704               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2705                 snat_det_ses_close(dm0, ses0);
2706               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2707                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2708               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2709                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2710
2711               old_port0 = tcp0->src;
2712               tcp0->src = new_port0;
2713
2714               sum0 = tcp0->checksum;
2715               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2716                                      ip4_header_t,
2717                                      dst_address /* changed member */);
2718               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2719                                      ip4_header_t /* cheat */,
2720                                      length /* changed member */);
2721               tcp0->checksum = ip_csum_fold(sum0);
2722             }
2723           else
2724             {
2725               ses0->state = SNAT_SESSION_UDP_ACTIVE;
2726               old_port0 = udp0->src_port;
2727               udp0->src_port = new_port0;
2728               udp0->checksum = 0;
2729             }
2730
2731           switch(ses0->state)
2732             {
2733             case SNAT_SESSION_UDP_ACTIVE:
2734                 ses0->expire = now + sm->udp_timeout;
2735                 break;
2736             case SNAT_SESSION_TCP_SYN_SENT:
2737             case SNAT_SESSION_TCP_FIN_WAIT:
2738             case SNAT_SESSION_TCP_CLOSE_WAIT:
2739             case SNAT_SESSION_TCP_LAST_ACK:
2740                 ses0->expire = now + sm->tcp_transitory_timeout;
2741                 break;
2742             case SNAT_SESSION_TCP_ESTABLISHED:
2743                 ses0->expire = now + sm->tcp_established_timeout;
2744                 break;
2745             }
2746
2747         trace0:
2748           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2749                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2750             {
2751               snat_in2out_trace_t *t =
2752                  vlib_add_trace (vm, node, b0, sizeof (*t));
2753               t->is_slow_path = 0;
2754               t->sw_if_index = sw_if_index0;
2755               t->next_index = next0;
2756               t->session_index = ~0;
2757               if (ses0)
2758                 t->session_index = ses0 - dm0->sessions;
2759             }
2760
2761           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2762
2763           ip1 = vlib_buffer_get_current (b1);
2764           udp1 = ip4_next_header (ip1);
2765           tcp1 = (tcp_header_t *) udp1;
2766
2767           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2768
2769           if (PREDICT_FALSE(ip1->ttl == 1))
2770             {
2771               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2772               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2773                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2774                                            0);
2775               next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2776               goto trace1;
2777             }
2778
2779           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2780
2781           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2782             {
2783               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2784               icmp1 = (icmp46_header_t *) udp1;
2785
2786               next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2787                                   rx_fib_index1, node, next1, thread_index,
2788                                   &ses1, &dm1);
2789               goto trace1;
2790             }
2791
2792           dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2793           if (PREDICT_FALSE(!dm1))
2794             {
2795               clib_warning("no match for internal host %U",
2796                            format_ip4_address, &ip0->src_address);
2797               next1 = SNAT_IN2OUT_NEXT_DROP;
2798               b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2799               goto trace1;
2800             }
2801
2802           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
2803
2804           key1.ext_host_addr = ip1->dst_address;
2805           key1.ext_host_port = tcp1->dst;
2806
2807           ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
2808           if (PREDICT_FALSE(!ses1))
2809             {
2810               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
2811                 {
2812                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
2813                     ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
2814
2815                   if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
2816                     continue;
2817
2818                   ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
2819                   break;
2820                 }
2821               if (PREDICT_FALSE(!ses1))
2822                 {
2823                   /* too many sessions for user, send ICMP error packet */
2824
2825                   vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2826                   icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
2827                                                ICMP4_destination_unreachable_destination_unreachable_host,
2828                                                0);
2829                   next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2830                   goto trace1;
2831                 }
2832             }
2833
2834           new_port1 = ses1->out.out_port;
2835
2836           old_addr1.as_u32 = ip1->src_address.as_u32;
2837           ip1->src_address.as_u32 = new_addr1.as_u32;
2838           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2839
2840           sum1 = ip1->checksum;
2841           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2842                                  ip4_header_t,
2843                                  src_address /* changed member */);
2844           ip1->checksum = ip_csum_fold (sum1);
2845
2846           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2847             {
2848               if (tcp1->flags & TCP_FLAG_SYN)
2849                 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
2850               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
2851                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2852               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2853                 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
2854               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
2855                 snat_det_ses_close(dm1, ses1);
2856               else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2857                 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
2858               else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
2859                 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
2860
2861               old_port1 = tcp1->src;
2862               tcp1->src = new_port1;
2863
2864               sum1 = tcp1->checksum;
2865               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2866                                      ip4_header_t,
2867                                      dst_address /* changed member */);
2868               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2869                                      ip4_header_t /* cheat */,
2870                                      length /* changed member */);
2871               tcp1->checksum = ip_csum_fold(sum1);
2872             }
2873           else
2874             {
2875               ses1->state = SNAT_SESSION_UDP_ACTIVE;
2876               old_port1 = udp1->src_port;
2877               udp1->src_port = new_port1;
2878               udp1->checksum = 0;
2879             }
2880
2881           switch(ses1->state)
2882             {
2883             case SNAT_SESSION_UDP_ACTIVE:
2884                 ses1->expire = now + sm->udp_timeout;
2885                 break;
2886             case SNAT_SESSION_TCP_SYN_SENT:
2887             case SNAT_SESSION_TCP_FIN_WAIT:
2888             case SNAT_SESSION_TCP_CLOSE_WAIT:
2889             case SNAT_SESSION_TCP_LAST_ACK:
2890                 ses1->expire = now + sm->tcp_transitory_timeout;
2891                 break;
2892             case SNAT_SESSION_TCP_ESTABLISHED:
2893                 ses1->expire = now + sm->tcp_established_timeout;
2894                 break;
2895             }
2896
2897         trace1:
2898           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2899                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2900             {
2901               snat_in2out_trace_t *t =
2902                  vlib_add_trace (vm, node, b1, sizeof (*t));
2903               t->is_slow_path = 0;
2904               t->sw_if_index = sw_if_index1;
2905               t->next_index = next1;
2906               t->session_index = ~0;
2907               if (ses1)
2908                 t->session_index = ses1 - dm1->sessions;
2909             }
2910
2911           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
2912
2913           /* verify speculative enqueues, maybe switch current next frame */
2914           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2915                                            to_next, n_left_to_next,
2916                                            bi0, bi1, next0, next1);
2917          }
2918
2919       while (n_left_from > 0 && n_left_to_next > 0)
2920         {
2921           u32 bi0;
2922           vlib_buffer_t * b0;
2923           u32 next0;
2924           u32 sw_if_index0;
2925           ip4_header_t * ip0;
2926           ip_csum_t sum0;
2927           ip4_address_t new_addr0, old_addr0;
2928           u16 old_port0, new_port0, lo_port0, i0;
2929           udp_header_t * udp0;
2930           tcp_header_t * tcp0;
2931           u32 proto0;
2932           snat_det_out_key_t key0;
2933           snat_det_map_t * dm0;
2934           snat_det_session_t * ses0 = 0;
2935           u32 rx_fib_index0;
2936           icmp46_header_t * icmp0;
2937
2938           /* speculatively enqueue b0 to the current next frame */
2939           bi0 = from[0];
2940           to_next[0] = bi0;
2941           from += 1;
2942           to_next += 1;
2943           n_left_from -= 1;
2944           n_left_to_next -= 1;
2945
2946           b0 = vlib_get_buffer (vm, bi0);
2947           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2948
2949           ip0 = vlib_buffer_get_current (b0);
2950           udp0 = ip4_next_header (ip0);
2951           tcp0 = (tcp_header_t *) udp0;
2952
2953           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2954
2955           if (PREDICT_FALSE(ip0->ttl == 1))
2956             {
2957               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2958               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2959                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2960                                            0);
2961               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2962               goto trace00;
2963             }
2964
2965           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2966
2967           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2968             {
2969               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2970               icmp0 = (icmp46_header_t *) udp0;
2971
2972               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2973                                   rx_fib_index0, node, next0, thread_index,
2974                                   &ses0, &dm0);
2975               goto trace00;
2976             }
2977
2978           dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2979           if (PREDICT_FALSE(!dm0))
2980             {
2981               clib_warning("no match for internal host %U",
2982                            format_ip4_address, &ip0->src_address);
2983               next0 = SNAT_IN2OUT_NEXT_DROP;
2984               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2985               goto trace00;
2986             }
2987
2988           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2989
2990           key0.ext_host_addr = ip0->dst_address;
2991           key0.ext_host_port = tcp0->dst;
2992
2993           ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2994           if (PREDICT_FALSE(!ses0))
2995             {
2996               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2997                 {
2998                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
2999                     ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3000
3001                   if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3002                     continue;
3003
3004                   ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3005                   break;
3006                 }
3007               if (PREDICT_FALSE(!ses0))
3008                 {
3009                   /* too many sessions for user, send ICMP error packet */
3010
3011                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3012                   icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3013                                                ICMP4_destination_unreachable_destination_unreachable_host,
3014                                                0);
3015                   next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3016                   goto trace00;
3017                 }
3018             }
3019
3020           new_port0 = ses0->out.out_port;
3021
3022           old_addr0.as_u32 = ip0->src_address.as_u32;
3023           ip0->src_address.as_u32 = new_addr0.as_u32;
3024           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3025
3026           sum0 = ip0->checksum;
3027           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3028                                  ip4_header_t,
3029                                  src_address /* changed member */);
3030           ip0->checksum = ip_csum_fold (sum0);
3031
3032           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3033             {
3034               if (tcp0->flags & TCP_FLAG_SYN)
3035                 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3036               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3037                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3038               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3039                 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3040               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3041                 snat_det_ses_close(dm0, ses0);
3042               else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3043                 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3044               else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3045                 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3046
3047               old_port0 = tcp0->src;
3048               tcp0->src = new_port0;
3049
3050               sum0 = tcp0->checksum;
3051               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3052                                      ip4_header_t,
3053                                      dst_address /* changed member */);
3054               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3055                                      ip4_header_t /* cheat */,
3056                                      length /* changed member */);
3057               tcp0->checksum = ip_csum_fold(sum0);
3058             }
3059           else
3060             {
3061               ses0->state = SNAT_SESSION_UDP_ACTIVE;
3062               old_port0 = udp0->src_port;
3063               udp0->src_port = new_port0;
3064               udp0->checksum = 0;
3065             }
3066
3067           switch(ses0->state)
3068             {
3069             case SNAT_SESSION_UDP_ACTIVE:
3070                 ses0->expire = now + sm->udp_timeout;
3071                 break;
3072             case SNAT_SESSION_TCP_SYN_SENT:
3073             case SNAT_SESSION_TCP_FIN_WAIT:
3074             case SNAT_SESSION_TCP_CLOSE_WAIT:
3075             case SNAT_SESSION_TCP_LAST_ACK:
3076                 ses0->expire = now + sm->tcp_transitory_timeout;
3077                 break;
3078             case SNAT_SESSION_TCP_ESTABLISHED:
3079                 ses0->expire = now + sm->tcp_established_timeout;
3080                 break;
3081             }
3082
3083         trace00:
3084           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3085                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3086             {
3087               snat_in2out_trace_t *t =
3088                  vlib_add_trace (vm, node, b0, sizeof (*t));
3089               t->is_slow_path = 0;
3090               t->sw_if_index = sw_if_index0;
3091               t->next_index = next0;
3092               t->session_index = ~0;
3093               if (ses0)
3094                 t->session_index = ses0 - dm0->sessions;
3095             }
3096
3097           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3098
3099           /* verify speculative enqueue, maybe switch current next frame */
3100           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3101                                            to_next, n_left_to_next,
3102                                            bi0, next0);
3103         }
3104
3105       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3106     }
3107
3108   vlib_node_increment_counter (vm, snat_det_in2out_node.index,
3109                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3110                                pkts_processed);
3111   return frame->n_vectors;
3112 }
3113
3114 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
3115   .function = snat_det_in2out_node_fn,
3116   .name = "nat44-det-in2out",
3117   .vector_size = sizeof (u32),
3118   .format_trace = format_snat_in2out_trace,
3119   .type = VLIB_NODE_TYPE_INTERNAL,
3120
3121   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3122   .error_strings = snat_in2out_error_strings,
3123
3124   .runtime_data_bytes = sizeof (snat_runtime_t),
3125
3126   .n_next_nodes = 3,
3127
3128   /* edit / add dispositions here */
3129   .next_nodes = {
3130     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3131     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3132     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3133   },
3134 };
3135
3136 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
3137
3138 /**
3139  * Get address and port values to be used for ICMP packet translation
3140  * and create session if needed
3141  *
3142  * @param[in,out] sm             NAT main
3143  * @param[in,out] node           NAT node runtime
3144  * @param[in] thread_index       thread index
3145  * @param[in,out] b0             buffer containing packet to be translated
3146  * @param[out] p_proto           protocol used for matching
3147  * @param[out] p_value           address and port after NAT translation
3148  * @param[out] p_dont_translate  if packet should not be translated
3149  * @param d                      optional parameter
3150  * @param e                      optional parameter
3151  */
3152 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
3153                           u32 thread_index, vlib_buffer_t *b0,
3154                           ip4_header_t *ip0, u8 *p_proto,
3155                           snat_session_key_t *p_value,
3156                           u8 *p_dont_translate, void *d, void *e)
3157 {
3158   icmp46_header_t *icmp0;
3159   u32 sw_if_index0;
3160   u32 rx_fib_index0;
3161   u8 protocol;
3162   snat_det_out_key_t key0;
3163   u8 dont_translate = 0;
3164   u32 next0 = ~0;
3165   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3166   ip4_header_t *inner_ip0;
3167   void *l4_header = 0;
3168   icmp46_header_t *inner_icmp0;
3169   snat_det_map_t * dm0 = 0;
3170   ip4_address_t new_addr0;
3171   u16 lo_port0, i0;
3172   snat_det_session_t * ses0 = 0;
3173   ip4_address_t in_addr;
3174   u16 in_port;
3175
3176   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3177   echo0 = (icmp_echo_header_t *)(icmp0+1);
3178   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3179   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
3180
3181   if (!icmp_is_error_message (icmp0))
3182     {
3183       protocol = SNAT_PROTOCOL_ICMP;
3184       in_addr = ip0->src_address;
3185       in_port = echo0->identifier;
3186     }
3187   else
3188     {
3189       inner_ip0 = (ip4_header_t *)(echo0+1);
3190       l4_header = ip4_next_header (inner_ip0);
3191       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3192       in_addr = inner_ip0->dst_address;
3193       switch (protocol)
3194         {
3195         case SNAT_PROTOCOL_ICMP:
3196           inner_icmp0 = (icmp46_header_t*)l4_header;
3197           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3198           in_port = inner_echo0->identifier;
3199           break;
3200         case SNAT_PROTOCOL_UDP:
3201         case SNAT_PROTOCOL_TCP:
3202           in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3203           break;
3204         default:
3205           b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
3206           next0 = SNAT_IN2OUT_NEXT_DROP;
3207           goto out;
3208         }
3209     }
3210
3211   dm0 = snat_det_map_by_user(sm, &in_addr);
3212   if (PREDICT_FALSE(!dm0))
3213     {
3214       clib_warning("no match for internal host %U",
3215                    format_ip4_address, &in_addr);
3216       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3217           IP_PROTOCOL_ICMP, rx_fib_index0)))
3218         {
3219           dont_translate = 1;
3220           goto out;
3221         }
3222       next0 = SNAT_IN2OUT_NEXT_DROP;
3223       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3224       goto out;
3225     }
3226
3227   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
3228
3229   key0.ext_host_addr = ip0->dst_address;
3230   key0.ext_host_port = 0;
3231
3232   ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3233   if (PREDICT_FALSE(!ses0))
3234     {
3235       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3236           IP_PROTOCOL_ICMP, rx_fib_index0)))
3237         {
3238           dont_translate = 1;
3239           goto out;
3240         }
3241       if (icmp0->type != ICMP4_echo_request)
3242         {
3243           b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3244           next0 = SNAT_IN2OUT_NEXT_DROP;
3245           goto out;
3246         }
3247       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3248         {
3249           key0.out_port = clib_host_to_net_u16 (lo_port0 +
3250             ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3251
3252           if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3253             continue;
3254
3255           ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3256           break;
3257         }
3258       if (PREDICT_FALSE(!ses0))
3259         {
3260           next0 = SNAT_IN2OUT_NEXT_DROP;
3261           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3262           goto out;
3263         }
3264     }
3265
3266   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3267                     !icmp_is_error_message (icmp0)))
3268     {
3269       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3270       next0 = SNAT_IN2OUT_NEXT_DROP;
3271       goto out;
3272     }
3273
3274   u32 now = (u32) vlib_time_now (sm->vlib_main);
3275
3276   ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3277   ses0->expire = now + sm->icmp_timeout;
3278
3279 out:
3280   *p_proto = protocol;
3281   if (ses0)
3282     {
3283       p_value->addr = new_addr0;
3284       p_value->fib_index = sm->outside_fib_index;
3285       p_value->port = ses0->out.out_port;
3286     }
3287   *p_dont_translate = dont_translate;
3288   if (d)
3289     *(snat_det_session_t**)d = ses0;
3290   if (e)
3291     *(snat_det_map_t**)e = dm0;
3292   return next0;
3293 }
3294
3295 /**********************/
3296 /*** worker handoff ***/
3297 /**********************/
3298 static inline uword
3299 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3300                                       vlib_node_runtime_t * node,
3301                                       vlib_frame_t * frame,
3302                                       u8 is_output)
3303 {
3304   snat_main_t *sm = &snat_main;
3305   vlib_thread_main_t *tm = vlib_get_thread_main ();
3306   u32 n_left_from, *from, *to_next = 0;
3307   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3308   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3309     = 0;
3310   vlib_frame_queue_elt_t *hf = 0;
3311   vlib_frame_t *f = 0;
3312   int i;
3313   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3314   u32 next_worker_index = 0;
3315   u32 current_worker_index = ~0;
3316   u32 thread_index = vlib_get_thread_index ();
3317   u32 fq_index;
3318   u32 to_node_index;
3319
3320   ASSERT (vec_len (sm->workers));
3321
3322   if (is_output)
3323     {
3324       fq_index = sm->fq_in2out_output_index;
3325       to_node_index = sm->in2out_output_node_index;
3326     }
3327   else
3328     {
3329       fq_index = sm->fq_in2out_index;
3330       to_node_index = sm->in2out_node_index;
3331     }
3332
3333   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3334     {
3335       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3336
3337       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3338                                sm->first_worker_index + sm->num_workers - 1,
3339                                (vlib_frame_queue_t *) (~0));
3340     }
3341
3342   from = vlib_frame_vector_args (frame);
3343   n_left_from = frame->n_vectors;
3344
3345   while (n_left_from > 0)
3346     {
3347       u32 bi0;
3348       vlib_buffer_t *b0;
3349       u32 sw_if_index0;
3350       u32 rx_fib_index0;
3351       ip4_header_t * ip0;
3352       u8 do_handoff;
3353
3354       bi0 = from[0];
3355       from += 1;
3356       n_left_from -= 1;
3357
3358       b0 = vlib_get_buffer (vm, bi0);
3359
3360       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3361       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3362
3363       ip0 = vlib_buffer_get_current (b0);
3364
3365       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3366
3367       if (PREDICT_FALSE (next_worker_index != thread_index))
3368         {
3369           do_handoff = 1;
3370
3371           if (next_worker_index != current_worker_index)
3372             {
3373               if (hf)
3374                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3375
3376               hf = vlib_get_worker_handoff_queue_elt (fq_index,
3377                                                       next_worker_index,
3378                                                       handoff_queue_elt_by_worker_index);
3379
3380               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3381               to_next_worker = &hf->buffer_index[hf->n_vectors];
3382               current_worker_index = next_worker_index;
3383             }
3384
3385           /* enqueue to correct worker thread */
3386           to_next_worker[0] = bi0;
3387           to_next_worker++;
3388           n_left_to_next_worker--;
3389
3390           if (n_left_to_next_worker == 0)
3391             {
3392               hf->n_vectors = VLIB_FRAME_SIZE;
3393               vlib_put_frame_queue_elt (hf);
3394               current_worker_index = ~0;
3395               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3396               hf = 0;
3397             }
3398         }
3399       else
3400         {
3401           do_handoff = 0;
3402           /* if this is 1st frame */
3403           if (!f)
3404             {
3405               f = vlib_get_frame_to_node (vm, to_node_index);
3406               to_next = vlib_frame_vector_args (f);
3407             }
3408
3409           to_next[0] = bi0;
3410           to_next += 1;
3411           f->n_vectors++;
3412         }
3413
3414       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3415                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3416         {
3417           snat_in2out_worker_handoff_trace_t *t =
3418             vlib_add_trace (vm, node, b0, sizeof (*t));
3419           t->next_worker_index = next_worker_index;
3420           t->do_handoff = do_handoff;
3421         }
3422     }
3423
3424   if (f)
3425     vlib_put_frame_to_node (vm, to_node_index, f);
3426
3427   if (hf)
3428     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3429
3430   /* Ship frames to the worker nodes */
3431   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3432     {
3433       if (handoff_queue_elt_by_worker_index[i])
3434         {
3435           hf = handoff_queue_elt_by_worker_index[i];
3436           /*
3437            * It works better to let the handoff node
3438            * rate-adapt, always ship the handoff queue element.
3439            */
3440           if (1 || hf->n_vectors == hf->last_n_vectors)
3441             {
3442               vlib_put_frame_queue_elt (hf);
3443               handoff_queue_elt_by_worker_index[i] = 0;
3444             }
3445           else
3446             hf->last_n_vectors = hf->n_vectors;
3447         }
3448       congested_handoff_queue_by_worker_index[i] =
3449         (vlib_frame_queue_t *) (~0);
3450     }
3451   hf = 0;
3452   current_worker_index = ~0;
3453   return frame->n_vectors;
3454 }
3455
3456 static uword
3457 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3458                                vlib_node_runtime_t * node,
3459                                vlib_frame_t * frame)
3460 {
3461   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3462 }
3463
3464 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3465   .function = snat_in2out_worker_handoff_fn,
3466   .name = "nat44-in2out-worker-handoff",
3467   .vector_size = sizeof (u32),
3468   .format_trace = format_snat_in2out_worker_handoff_trace,
3469   .type = VLIB_NODE_TYPE_INTERNAL,
3470
3471   .n_next_nodes = 1,
3472
3473   .next_nodes = {
3474     [0] = "error-drop",
3475   },
3476 };
3477
3478 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3479                               snat_in2out_worker_handoff_fn);
3480
3481 static uword
3482 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3483                                       vlib_node_runtime_t * node,
3484                                       vlib_frame_t * frame)
3485 {
3486   return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3487 }
3488
3489 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3490   .function = snat_in2out_output_worker_handoff_fn,
3491   .name = "nat44-in2out-output-worker-handoff",
3492   .vector_size = sizeof (u32),
3493   .format_trace = format_snat_in2out_worker_handoff_trace,
3494   .type = VLIB_NODE_TYPE_INTERNAL,
3495
3496   .n_next_nodes = 1,
3497
3498   .next_nodes = {
3499     [0] = "error-drop",
3500   },
3501 };
3502
3503 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3504                               snat_in2out_output_worker_handoff_fn);
3505
3506 static_always_inline int
3507 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3508 {
3509   snat_address_t * ap;
3510   clib_bihash_kv_8_8_t kv, value;
3511   snat_session_key_t m_key;
3512
3513   vec_foreach (ap, sm->addresses)
3514     {
3515       if (ap->addr.as_u32 == dst_addr->as_u32)
3516         return 1;
3517     }
3518
3519   m_key.addr.as_u32 = dst_addr->as_u32;
3520   m_key.fib_index = sm->outside_fib_index;
3521   m_key.port = 0;
3522   m_key.protocol = 0;
3523   kv.key = m_key.as_u64;
3524   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3525     return 1;
3526
3527   return 0;
3528 }
3529
3530 static uword
3531 snat_hairpin_dst_fn (vlib_main_t * vm,
3532                      vlib_node_runtime_t * node,
3533                      vlib_frame_t * frame)
3534 {
3535   u32 n_left_from, * from, * to_next;
3536   snat_in2out_next_t next_index;
3537   u32 pkts_processed = 0;
3538   snat_main_t * sm = &snat_main;
3539
3540   from = vlib_frame_vector_args (frame);
3541   n_left_from = frame->n_vectors;
3542   next_index = node->cached_next_index;
3543
3544   while (n_left_from > 0)
3545     {
3546       u32 n_left_to_next;
3547
3548       vlib_get_next_frame (vm, node, next_index,
3549                            to_next, n_left_to_next);
3550
3551       while (n_left_from > 0 && n_left_to_next > 0)
3552         {
3553           u32 bi0;
3554           vlib_buffer_t * b0;
3555           u32 next0;
3556           ip4_header_t * ip0;
3557           u32 proto0;
3558
3559           /* speculatively enqueue b0 to the current next frame */
3560           bi0 = from[0];
3561           to_next[0] = bi0;
3562           from += 1;
3563           to_next += 1;
3564           n_left_from -= 1;
3565           n_left_to_next -= 1;
3566
3567           b0 = vlib_get_buffer (vm, bi0);
3568           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3569           ip0 = vlib_buffer_get_current (b0);
3570
3571           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3572
3573           vnet_buffer (b0)->snat.flags = 0;
3574           if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3575             {
3576               if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3577                 {
3578                   udp_header_t * udp0 = ip4_next_header (ip0);
3579                   tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3580
3581                   snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3582                 }
3583               else if (proto0 == SNAT_PROTOCOL_ICMP)
3584                 {
3585                   icmp46_header_t * icmp0 = ip4_next_header (ip0);
3586
3587                   snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3588                 }
3589               else
3590                 {
3591                   snat_hairpinning_unknown_proto (sm, b0, ip0);
3592                 }
3593
3594               vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3595             }
3596
3597           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3598
3599           /* verify speculative enqueue, maybe switch current next frame */
3600           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3601                                            to_next, n_left_to_next,
3602                                            bi0, next0);
3603          }
3604
3605       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3606     }
3607
3608   vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3609                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3610                                pkts_processed);
3611   return frame->n_vectors;
3612 }
3613
3614 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3615   .function = snat_hairpin_dst_fn,
3616   .name = "nat44-hairpin-dst",
3617   .vector_size = sizeof (u32),
3618   .type = VLIB_NODE_TYPE_INTERNAL,
3619   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3620   .error_strings = snat_in2out_error_strings,
3621   .n_next_nodes = 2,
3622   .next_nodes = {
3623     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3624     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3625   },
3626 };
3627
3628 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3629                               snat_hairpin_dst_fn);
3630
3631 static uword
3632 snat_hairpin_src_fn (vlib_main_t * vm,
3633                      vlib_node_runtime_t * node,
3634                      vlib_frame_t * frame)
3635 {
3636   u32 n_left_from, * from, * to_next;
3637   snat_in2out_next_t next_index;
3638   u32 pkts_processed = 0;
3639   snat_main_t *sm = &snat_main;
3640
3641   from = vlib_frame_vector_args (frame);
3642   n_left_from = frame->n_vectors;
3643   next_index = node->cached_next_index;
3644
3645   while (n_left_from > 0)
3646     {
3647       u32 n_left_to_next;
3648
3649       vlib_get_next_frame (vm, node, next_index,
3650                            to_next, n_left_to_next);
3651
3652       while (n_left_from > 0 && n_left_to_next > 0)
3653         {
3654           u32 bi0;
3655           vlib_buffer_t * b0;
3656           u32 next0;
3657           snat_interface_t *i;
3658           u32 sw_if_index0;
3659
3660           /* speculatively enqueue b0 to the current next frame */
3661           bi0 = from[0];
3662           to_next[0] = bi0;
3663           from += 1;
3664           to_next += 1;
3665           n_left_from -= 1;
3666           n_left_to_next -= 1;
3667
3668           b0 = vlib_get_buffer (vm, bi0);
3669           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3670           next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3671
3672           pool_foreach (i, sm->output_feature_interfaces,
3673           ({
3674             /* Only packets from NAT inside interface */
3675             if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
3676               {
3677                 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3678                                     SNAT_FLAG_HAIRPINNING))
3679                   {
3680                     if (PREDICT_TRUE (sm->num_workers > 1))
3681                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3682                     else
3683                       next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3684                   }
3685                 break;
3686               }
3687           }));
3688
3689           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3690
3691           /* verify speculative enqueue, maybe switch current next frame */
3692           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3693                                            to_next, n_left_to_next,
3694                                            bi0, next0);
3695          }
3696
3697       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3698     }
3699
3700   vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3701                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3702                                pkts_processed);
3703   return frame->n_vectors;
3704 }
3705
3706 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3707   .function = snat_hairpin_src_fn,
3708   .name = "nat44-hairpin-src",
3709   .vector_size = sizeof (u32),
3710   .type = VLIB_NODE_TYPE_INTERNAL,
3711   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3712   .error_strings = snat_in2out_error_strings,
3713   .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3714   .next_nodes = {
3715      [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3716      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3717      [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3718      [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3719   },
3720 };
3721
3722 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3723                               snat_hairpin_src_fn);
3724
3725 static uword
3726 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3727                                 vlib_node_runtime_t * node,
3728                                 vlib_frame_t * frame)
3729 {
3730   u32 n_left_from, * from, * to_next;
3731   snat_in2out_next_t next_index;
3732   u32 pkts_processed = 0;
3733   snat_main_t * sm = &snat_main;
3734   u32 stats_node_index;
3735
3736   stats_node_index = snat_in2out_fast_node.index;
3737
3738   from = vlib_frame_vector_args (frame);
3739   n_left_from = frame->n_vectors;
3740   next_index = node->cached_next_index;
3741
3742   while (n_left_from > 0)
3743     {
3744       u32 n_left_to_next;
3745
3746       vlib_get_next_frame (vm, node, next_index,
3747                            to_next, n_left_to_next);
3748
3749       while (n_left_from > 0 && n_left_to_next > 0)
3750         {
3751           u32 bi0;
3752           vlib_buffer_t * b0;
3753           u32 next0;
3754           u32 sw_if_index0;
3755           ip4_header_t * ip0;
3756           ip_csum_t sum0;
3757           u32 new_addr0, old_addr0;
3758           u16 old_port0, new_port0;
3759           udp_header_t * udp0;
3760           tcp_header_t * tcp0;
3761           icmp46_header_t * icmp0;
3762           snat_session_key_t key0, sm0;
3763           u32 proto0;
3764           u32 rx_fib_index0;
3765
3766           /* speculatively enqueue b0 to the current next frame */
3767           bi0 = from[0];
3768           to_next[0] = bi0;
3769           from += 1;
3770           to_next += 1;
3771           n_left_from -= 1;
3772           n_left_to_next -= 1;
3773
3774           b0 = vlib_get_buffer (vm, bi0);
3775           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3776
3777           ip0 = vlib_buffer_get_current (b0);
3778           udp0 = ip4_next_header (ip0);
3779           tcp0 = (tcp_header_t *) udp0;
3780           icmp0 = (icmp46_header_t *) udp0;
3781
3782           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3783           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3784
3785           if (PREDICT_FALSE(ip0->ttl == 1))
3786             {
3787               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3788               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3789                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3790                                            0);
3791               next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3792               goto trace0;
3793             }
3794
3795           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3796
3797           if (PREDICT_FALSE (proto0 == ~0))
3798               goto trace0;
3799
3800           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3801             {
3802               next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3803                                   rx_fib_index0, node, next0, ~0, 0, 0);
3804               goto trace0;
3805             }
3806
3807           key0.addr = ip0->src_address;
3808           key0.protocol = proto0;
3809           key0.port = udp0->src_port;
3810           key0.fib_index = rx_fib_index0;
3811
3812           if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0))
3813             {
3814               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3815               next0= SNAT_IN2OUT_NEXT_DROP;
3816               goto trace0;
3817             }
3818
3819           new_addr0 = sm0.addr.as_u32;
3820           new_port0 = sm0.port;
3821           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3822           old_addr0 = ip0->src_address.as_u32;
3823           ip0->src_address.as_u32 = new_addr0;
3824
3825           sum0 = ip0->checksum;
3826           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3827                                  ip4_header_t,
3828                                  src_address /* changed member */);
3829           ip0->checksum = ip_csum_fold (sum0);
3830
3831           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3832             {
3833               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3834                 {
3835                   old_port0 = tcp0->src_port;
3836                   tcp0->src_port = new_port0;
3837
3838                   sum0 = tcp0->checksum;
3839                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3840                                          ip4_header_t,
3841                                          dst_address /* changed member */);
3842                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3843                                          ip4_header_t /* cheat */,
3844                                          length /* changed member */);
3845                   tcp0->checksum = ip_csum_fold(sum0);
3846                 }
3847               else
3848                 {
3849                   old_port0 = udp0->src_port;
3850                   udp0->src_port = new_port0;
3851                   udp0->checksum = 0;
3852                 }
3853             }
3854           else
3855             {
3856               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3857                 {
3858                   sum0 = tcp0->checksum;
3859                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3860                                          ip4_header_t,
3861                                          dst_address /* changed member */);
3862                   tcp0->checksum = ip_csum_fold(sum0);
3863                 }
3864             }
3865
3866           /* Hairpinning */
3867           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3868
3869         trace0:
3870           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3871                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3872             {
3873               snat_in2out_trace_t *t =
3874                  vlib_add_trace (vm, node, b0, sizeof (*t));
3875               t->sw_if_index = sw_if_index0;
3876               t->next_index = next0;
3877             }
3878
3879           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3880
3881           /* verify speculative enqueue, maybe switch current next frame */
3882           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3883                                            to_next, n_left_to_next,
3884                                            bi0, next0);
3885         }
3886
3887       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3888     }
3889
3890   vlib_node_increment_counter (vm, stats_node_index,
3891                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3892                                pkts_processed);
3893   return frame->n_vectors;
3894 }
3895
3896
3897 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
3898   .function = snat_in2out_fast_static_map_fn,
3899   .name = "nat44-in2out-fast",
3900   .vector_size = sizeof (u32),
3901   .format_trace = format_snat_in2out_fast_trace,
3902   .type = VLIB_NODE_TYPE_INTERNAL,
3903
3904   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3905   .error_strings = snat_in2out_error_strings,
3906
3907   .runtime_data_bytes = sizeof (snat_runtime_t),
3908
3909   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
3910
3911   /* edit / add dispositions here */
3912   .next_nodes = {
3913     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3914     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3915     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
3916     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3917     [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
3918   },
3919 };
3920
3921 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);