NAT44: interface output feature and service host direct access (VPP-1176)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38 } snat_out2in_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_out2in_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
51
52   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
53               t->sw_if_index, t->next_index, t->session_index);
54   return s;
55 }
56
57 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
62
63   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
64               t->sw_if_index, t->next_index);
65   return s;
66 }
67
68 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   snat_out2in_worker_handoff_trace_t * t =
73     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74   char * m;
75
76   m = t->do_handoff ? "next worker" : "same worker";
77   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
78
79   return s;
80 }
81
82 typedef struct {
83   u32 sw_if_index;
84   u32 next_index;
85   u8 cached;
86 } nat44_out2in_reass_trace_t;
87
88 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
89 {
90   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
91   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
92   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
93
94   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
95               t->sw_if_index, t->next_index,
96               t->cached ? "cached" : "translated");
97
98   return s;
99 }
100
101 vlib_node_registration_t snat_out2in_node;
102 vlib_node_registration_t snat_out2in_fast_node;
103 vlib_node_registration_t snat_out2in_worker_handoff_node;
104 vlib_node_registration_t snat_det_out2in_node;
105 vlib_node_registration_t nat44_out2in_reass_node;
106
107 #define foreach_snat_out2in_error                       \
108 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
109 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
110 _(OUT_OF_PORTS, "Out of ports")                         \
111 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
112 _(NO_TRANSLATION, "No translation")                     \
113 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
114 _(DROP_FRAGMENT, "Drop fragment")                       \
115 _(MAX_REASS, "Maximum reassemblies exceeded")           \
116 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
117
118 typedef enum {
119 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
120   foreach_snat_out2in_error
121 #undef _
122   SNAT_OUT2IN_N_ERROR,
123 } snat_out2in_error_t;
124
125 static char * snat_out2in_error_strings[] = {
126 #define _(sym,string) string,
127   foreach_snat_out2in_error
128 #undef _
129 };
130
131 typedef enum {
132   SNAT_OUT2IN_NEXT_DROP,
133   SNAT_OUT2IN_NEXT_LOOKUP,
134   SNAT_OUT2IN_NEXT_ICMP_ERROR,
135   SNAT_OUT2IN_NEXT_REASS,
136   SNAT_OUT2IN_N_NEXT,
137 } snat_out2in_next_t;
138
139 /**
140  * @brief Create session for static mapping.
141  *
142  * Create NAT session initiated by host from external network with static
143  * mapping.
144  *
145  * @param sm     NAT main.
146  * @param b0     Vlib buffer.
147  * @param in2out In2out NAT44 session key.
148  * @param out2in Out2in NAT44 session key.
149  * @param node   Vlib node.
150  *
151  * @returns SNAT session if successfully created otherwise 0.
152  */
153 static inline snat_session_t *
154 create_session_for_static_mapping (snat_main_t *sm,
155                                    vlib_buffer_t *b0,
156                                    snat_session_key_t in2out,
157                                    snat_session_key_t out2in,
158                                    vlib_node_runtime_t * node,
159                                    u32 thread_index)
160 {
161   snat_user_t *u;
162   snat_session_t *s;
163   clib_bihash_kv_8_8_t kv0;
164   ip4_header_t *ip0;
165   udp_header_t *udp0;
166
167   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
168     {
169       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
170       return 0;
171     }
172
173   ip0 = vlib_buffer_get_current (b0);
174   udp0 = ip4_next_header (ip0);
175
176   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
177   if (!u)
178     {
179       clib_warning ("create NAT user failed");
180       return 0;
181     }
182
183   s = nat_session_alloc_or_recycle (sm, u, thread_index);
184   if (!s)
185     {
186       clib_warning ("create NAT session failed");
187       return 0;
188     }
189
190   s->outside_address_index = ~0;
191   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
192   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
193   s->ext_host_port = udp0->src_port;
194   u->nstaticsessions++;
195   s->in2out = in2out;
196   s->out2in = out2in;
197   s->in2out.protocol = out2in.protocol;
198
199   /* Add to translation hashes */
200   kv0.key = s->in2out.as_u64;
201   kv0.value = s - sm->per_thread_data[thread_index].sessions;
202   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
203                                1 /* is_add */))
204       clib_warning ("in2out key add failed");
205
206   kv0.key = s->out2in.as_u64;
207
208   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
209                                1 /* is_add */))
210       clib_warning ("out2in key add failed");
211
212   /* log NAT event */
213   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
214                                       s->out2in.addr.as_u32,
215                                       s->in2out.protocol,
216                                       s->in2out.port,
217                                       s->out2in.port,
218                                       s->in2out.fib_index);
219    return s;
220 }
221
222 static_always_inline
223 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
224                                  snat_session_key_t *p_key0)
225 {
226   icmp46_header_t *icmp0;
227   snat_session_key_t key0;
228   icmp_echo_header_t *echo0, *inner_echo0 = 0;
229   ip4_header_t *inner_ip0;
230   void *l4_header = 0;
231   icmp46_header_t *inner_icmp0;
232
233   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
234   echo0 = (icmp_echo_header_t *)(icmp0+1);
235
236   if (!icmp_is_error_message (icmp0))
237     {
238       key0.protocol = SNAT_PROTOCOL_ICMP;
239       key0.addr = ip0->dst_address;
240       key0.port = echo0->identifier;
241     }
242   else
243     {
244       inner_ip0 = (ip4_header_t *)(echo0+1);
245       l4_header = ip4_next_header (inner_ip0);
246       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
247       key0.addr = inner_ip0->src_address;
248       switch (key0.protocol)
249         {
250         case SNAT_PROTOCOL_ICMP:
251           inner_icmp0 = (icmp46_header_t*)l4_header;
252           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
253           key0.port = inner_echo0->identifier;
254           break;
255         case SNAT_PROTOCOL_UDP:
256         case SNAT_PROTOCOL_TCP:
257           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
258           break;
259         default:
260           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
261         }
262     }
263   *p_key0 = key0;
264   return -1; /* success */
265 }
266
267 static_always_inline int
268 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
269 {
270   icmp46_header_t *icmp0;
271   nat_ed_ses_key_t key0;
272   icmp_echo_header_t *echo0, *inner_echo0 = 0;
273   ip4_header_t *inner_ip0;
274   void *l4_header = 0;
275   icmp46_header_t *inner_icmp0;
276
277   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
278   echo0 = (icmp_echo_header_t *)(icmp0+1);
279
280   if (!icmp_is_error_message (icmp0))
281     {
282       key0.proto = IP_PROTOCOL_ICMP;
283       key0.l_addr = ip0->dst_address;
284       key0.r_addr = ip0->src_address;
285       key0.l_port = key0.r_port = echo0->identifier;
286     }
287   else
288     {
289       inner_ip0 = (ip4_header_t *)(echo0+1);
290       l4_header = ip4_next_header (inner_ip0);
291       key0.proto = inner_ip0->protocol;
292       key0.l_addr = inner_ip0->src_address;
293       key0.r_addr = inner_ip0->dst_address;
294       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
295         {
296         case SNAT_PROTOCOL_ICMP:
297           inner_icmp0 = (icmp46_header_t*)l4_header;
298           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
299           key0.l_port = key0.r_port = inner_echo0->identifier;
300           break;
301         case SNAT_PROTOCOL_UDP:
302         case SNAT_PROTOCOL_TCP:
303           key0.l_port = ((tcp_udp_header_t*)l4_header)->src_port;
304           key0.r_port = ((tcp_udp_header_t*)l4_header)->dst_port;
305           break;
306         default:
307           return -1;
308         }
309     }
310   *p_key0 = key0;
311   return 0;
312 }
313
314 static void
315 create_bypass_for_fwd(snat_main_t * sm, ip4_header_t * ip)
316 {
317   nat_ed_ses_key_t key;
318   clib_bihash_kv_16_8_t kv;
319   udp_header_t *udp;
320
321   if (ip->protocol == IP_PROTOCOL_ICMP)
322     {
323       if (icmp_get_ed_key (ip, &key))
324         return;
325     }
326   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
327     {
328       udp = ip4_next_header(ip);
329       key.r_addr = ip->src_address;
330       key.l_addr = ip->dst_address;
331       key.proto = ip->protocol;
332       key.l_port = udp->dst_port;
333       key.r_port = udp->src_port;
334     }
335   else
336     {
337       key.r_addr = ip->src_address;
338       key.l_addr = ip->dst_address;
339       key.proto = ip->protocol;
340       key.l_port = key.r_port = 0;
341     }
342   key.fib_index = 0;
343   kv.key[0] = key.as_u64[0];
344   kv.key[1] = key.as_u64[1];
345   kv.value = ~0ULL;
346
347   if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &kv, 1))
348     clib_warning ("in2out_ed key add failed");
349 }
350
351 /**
352  * Get address and port values to be used for ICMP packet translation
353  * and create session if needed
354  *
355  * @param[in,out] sm             NAT main
356  * @param[in,out] node           NAT node runtime
357  * @param[in] thread_index       thread index
358  * @param[in,out] b0             buffer containing packet to be translated
359  * @param[out] p_proto           protocol used for matching
360  * @param[out] p_value           address and port after NAT translation
361  * @param[out] p_dont_translate  if packet should not be translated
362  * @param d                      optional parameter
363  * @param e                      optional parameter
364  */
365 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
366                            u32 thread_index, vlib_buffer_t *b0,
367                            ip4_header_t *ip0, u8 *p_proto,
368                            snat_session_key_t *p_value,
369                            u8 *p_dont_translate, void *d, void *e)
370 {
371   icmp46_header_t *icmp0;
372   u32 sw_if_index0;
373   u32 rx_fib_index0;
374   snat_session_key_t key0;
375   snat_session_key_t sm0;
376   snat_session_t *s0 = 0;
377   u8 dont_translate = 0;
378   clib_bihash_kv_8_8_t kv0, value0;
379   u8 is_addr_only;
380   u32 next0 = ~0;
381   int err;
382
383   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
384   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
385   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
386
387   key0.protocol = 0;
388
389   err = icmp_get_key (ip0, &key0);
390   if (err != -1)
391     {
392       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
393       next0 = SNAT_OUT2IN_NEXT_DROP;
394       goto out;
395     }
396   key0.fib_index = rx_fib_index0;
397
398   kv0.key = key0.as_u64;
399
400   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
401                               &value0))
402     {
403       /* Try to match static mapping by external address and port,
404          destination address and port in packet */
405       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
406         {
407           if (!sm->forwarding_enabled)
408             {
409               /* Don't NAT packet aimed at the intfc address */
410               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
411                                                   ip0->dst_address.as_u32)))
412                 {
413                   dont_translate = 1;
414                   goto out;
415                 }
416               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
417               next0 = SNAT_OUT2IN_NEXT_DROP;
418               goto out;
419             }
420           else
421             {
422               create_bypass_for_fwd(sm, ip0);
423               dont_translate = 1;
424               goto out;
425             }
426         }
427
428       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
429                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
430         {
431           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
432           next0 = SNAT_OUT2IN_NEXT_DROP;
433           goto out;
434         }
435
436       /* Create session initiated by host from external network */
437       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
438                                              node, thread_index);
439
440       if (!s0)
441         {
442           next0 = SNAT_OUT2IN_NEXT_DROP;
443           goto out;
444         }
445     }
446   else
447     {
448       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
449                         icmp0->type != ICMP4_echo_request &&
450                         !icmp_is_error_message (icmp0)))
451         {
452           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
453           next0 = SNAT_OUT2IN_NEXT_DROP;
454           goto out;
455         }
456
457       if (PREDICT_FALSE (value0.value == ~0ULL))
458         {
459           nat_ed_ses_key_t key;
460           clib_bihash_kv_16_8_t s_kv, s_value;
461
462           key.as_u64[0] = 0;
463           key.as_u64[1] = 0;
464           if (icmp_get_ed_key (ip0, &key))
465             {
466               b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
467               next0 = SNAT_OUT2IN_NEXT_DROP;
468               goto out;
469             }
470           key.fib_index = rx_fib_index0;
471           s_kv.key[0] = key.as_u64[0];
472           s_kv.key[1] = key.as_u64[1];
473           if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
474             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
475                                     s_value.value);
476           else
477            {
478               next0 = SNAT_OUT2IN_NEXT_DROP;
479               goto out;
480            }
481         }
482       else
483         s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
484                                 value0.value);
485     }
486
487 out:
488   *p_proto = key0.protocol;
489   if (s0)
490     *p_value = s0->in2out;
491   *p_dont_translate = dont_translate;
492   if (d)
493     *(snat_session_t**)d = s0;
494   return next0;
495 }
496
497 /**
498  * Get address and port values to be used for ICMP packet translation
499  *
500  * @param[in] sm                 NAT main
501  * @param[in,out] node           NAT node runtime
502  * @param[in] thread_index       thread index
503  * @param[in,out] b0             buffer containing packet to be translated
504  * @param[out] p_proto           protocol used for matching
505  * @param[out] p_value           address and port after NAT translation
506  * @param[out] p_dont_translate  if packet should not be translated
507  * @param d                      optional parameter
508  * @param e                      optional parameter
509  */
510 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
511                            u32 thread_index, vlib_buffer_t *b0,
512                            ip4_header_t *ip0, u8 *p_proto,
513                            snat_session_key_t *p_value,
514                            u8 *p_dont_translate, void *d, void *e)
515 {
516   icmp46_header_t *icmp0;
517   u32 sw_if_index0;
518   u32 rx_fib_index0;
519   snat_session_key_t key0;
520   snat_session_key_t sm0;
521   u8 dont_translate = 0;
522   u8 is_addr_only;
523   u32 next0 = ~0;
524   int err;
525
526   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
527   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
528   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
529
530   err = icmp_get_key (ip0, &key0);
531   if (err != -1)
532     {
533       b0->error = node->errors[err];
534       next0 = SNAT_OUT2IN_NEXT_DROP;
535       goto out2;
536     }
537   key0.fib_index = rx_fib_index0;
538
539   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
540     {
541       /* Don't NAT packet aimed at the intfc address */
542       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
543         {
544           dont_translate = 1;
545           goto out;
546         }
547       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
548       next0 = SNAT_OUT2IN_NEXT_DROP;
549       goto out;
550     }
551
552   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
553                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
554                     !icmp_is_error_message (icmp0)))
555     {
556       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
557       next0 = SNAT_OUT2IN_NEXT_DROP;
558       goto out;
559     }
560
561 out:
562   *p_value = sm0;
563 out2:
564   *p_proto = key0.protocol;
565   *p_dont_translate = dont_translate;
566   return next0;
567 }
568
569 static inline u32 icmp_out2in (snat_main_t *sm,
570                                vlib_buffer_t * b0,
571                                ip4_header_t * ip0,
572                                icmp46_header_t * icmp0,
573                                u32 sw_if_index0,
574                                u32 rx_fib_index0,
575                                vlib_node_runtime_t * node,
576                                u32 next0,
577                                u32 thread_index,
578                                void *d,
579                                void *e)
580 {
581   snat_session_key_t sm0;
582   u8 protocol;
583   icmp_echo_header_t *echo0, *inner_echo0 = 0;
584   ip4_header_t *inner_ip0 = 0;
585   void *l4_header = 0;
586   icmp46_header_t *inner_icmp0;
587   u8 dont_translate;
588   u32 new_addr0, old_addr0;
589   u16 old_id0, new_id0;
590   ip_csum_t sum0;
591   u16 checksum0;
592   u32 next0_tmp;
593
594   echo0 = (icmp_echo_header_t *)(icmp0+1);
595
596   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
597                                        &protocol, &sm0, &dont_translate, d, e);
598   if (next0_tmp != ~0)
599     next0 = next0_tmp;
600   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
601     goto out;
602
603   sum0 = ip_incremental_checksum (0, icmp0,
604                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
605   checksum0 = ~ip_csum_fold (sum0);
606   if (checksum0 != 0 && checksum0 != 0xffff)
607     {
608       next0 = SNAT_OUT2IN_NEXT_DROP;
609       goto out;
610     }
611
612   old_addr0 = ip0->dst_address.as_u32;
613   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
614   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
615
616   sum0 = ip0->checksum;
617   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
618                          dst_address /* changed member */);
619   ip0->checksum = ip_csum_fold (sum0);
620
621   if (!icmp_is_error_message (icmp0))
622     {
623       new_id0 = sm0.port;
624       if (PREDICT_FALSE(new_id0 != echo0->identifier))
625         {
626           old_id0 = echo0->identifier;
627           new_id0 = sm0.port;
628           echo0->identifier = new_id0;
629
630           sum0 = icmp0->checksum;
631           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
632                                  identifier /* changed member */);
633           icmp0->checksum = ip_csum_fold (sum0);
634         }
635     }
636   else
637     {
638       inner_ip0 = (ip4_header_t *)(echo0+1);
639       l4_header = ip4_next_header (inner_ip0);
640
641       if (!ip4_header_checksum_is_valid (inner_ip0))
642         {
643           next0 = SNAT_OUT2IN_NEXT_DROP;
644           goto out;
645         }
646
647       old_addr0 = inner_ip0->src_address.as_u32;
648       inner_ip0->src_address = sm0.addr;
649       new_addr0 = inner_ip0->src_address.as_u32;
650
651       sum0 = icmp0->checksum;
652       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
653                              src_address /* changed member */);
654       icmp0->checksum = ip_csum_fold (sum0);
655
656       switch (protocol)
657         {
658         case SNAT_PROTOCOL_ICMP:
659           inner_icmp0 = (icmp46_header_t*)l4_header;
660           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
661
662           old_id0 = inner_echo0->identifier;
663           new_id0 = sm0.port;
664           inner_echo0->identifier = new_id0;
665
666           sum0 = icmp0->checksum;
667           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
668                                  identifier);
669           icmp0->checksum = ip_csum_fold (sum0);
670           break;
671         case SNAT_PROTOCOL_UDP:
672         case SNAT_PROTOCOL_TCP:
673           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
674           new_id0 = sm0.port;
675           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
676
677           sum0 = icmp0->checksum;
678           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
679                                  src_port);
680           icmp0->checksum = ip_csum_fold (sum0);
681           break;
682         default:
683           ASSERT(0);
684         }
685     }
686
687 out:
688   return next0;
689 }
690
691
692 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
693                                          vlib_buffer_t * b0,
694                                          ip4_header_t * ip0,
695                                          icmp46_header_t * icmp0,
696                                          u32 sw_if_index0,
697                                          u32 rx_fib_index0,
698                                          vlib_node_runtime_t * node,
699                                          u32 next0, f64 now,
700                                          u32 thread_index,
701                                          snat_session_t ** p_s0)
702 {
703   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
704                       next0, thread_index, p_s0, 0);
705   snat_session_t * s0 = *p_s0;
706   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
707     {
708       /* Accounting */
709       s0->last_heard = now;
710       s0->total_pkts++;
711       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
712       /* Per-user LRU list maintenance */
713       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
714                          s0->per_user_index);
715       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
716                           s0->per_user_list_head_index,
717                           s0->per_user_index);
718     }
719   return next0;
720 }
721
722 static snat_session_t *
723 snat_out2in_unknown_proto (snat_main_t *sm,
724                            vlib_buffer_t * b,
725                            ip4_header_t * ip,
726                            u32 rx_fib_index,
727                            u32 thread_index,
728                            f64 now,
729                            vlib_main_t * vm,
730                            vlib_node_runtime_t * node)
731 {
732   clib_bihash_kv_8_8_t kv, value;
733   clib_bihash_kv_16_8_t s_kv, s_value;
734   snat_static_mapping_t *m;
735   snat_session_key_t m_key;
736   u32 old_addr, new_addr;
737   ip_csum_t sum;
738   nat_ed_ses_key_t key;
739   snat_session_t * s;
740   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
741   snat_user_t *u;
742
743   old_addr = ip->dst_address.as_u32;
744
745   key.l_addr = ip->dst_address;
746   key.r_addr = ip->src_address;
747   key.fib_index = rx_fib_index;
748   key.proto = ip->protocol;
749   key.r_port = 0;
750   key.l_port = 0;
751   s_kv.key[0] = key.as_u64[0];
752   s_kv.key[1] = key.as_u64[1];
753
754   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
755     {
756       s = pool_elt_at_index (tsm->sessions, s_value.value);
757       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
758     }
759   else
760     {
761       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
762         {
763           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
764           return 0;
765         }
766
767       m_key.addr = ip->dst_address;
768       m_key.port = 0;
769       m_key.protocol = 0;
770       m_key.fib_index = rx_fib_index;
771       kv.key = m_key.as_u64;
772       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
773         {
774           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
775           return 0;
776         }
777
778       m = pool_elt_at_index (sm->static_mappings, value.value);
779
780       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
781
782       u = nat_user_get_or_create (sm, &ip->src_address, m->fib_index,
783                                   thread_index);
784       if (!u)
785         {
786           clib_warning ("create NAT user failed");
787           return 0;
788         }
789
790       /* Create a new session */
791       s = nat_session_alloc_or_recycle (sm, u, thread_index);
792       if (!s)
793         {
794           clib_warning ("create NAT session failed");
795           return 0;
796         }
797
798       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
799       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
800       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
801       s->outside_address_index = ~0;
802       s->out2in.addr.as_u32 = old_addr;
803       s->out2in.fib_index = rx_fib_index;
804       s->in2out.addr.as_u32 = new_addr;
805       s->in2out.fib_index = m->fib_index;
806       s->in2out.port = s->out2in.port = ip->protocol;
807       u->nstaticsessions++;
808
809       /* Add to lookup tables */
810       s_kv.value = s - tsm->sessions;
811       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
812         clib_warning ("out2in key add failed");
813
814       key.l_addr = ip->dst_address;
815       key.fib_index = m->fib_index;
816       s_kv.key[0] = key.as_u64[0];
817       s_kv.key[1] = key.as_u64[1];
818       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
819         clib_warning ("in2out key add failed");
820    }
821
822   /* Update IP checksum */
823   sum = ip->checksum;
824   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
825   ip->checksum = ip_csum_fold (sum);
826
827   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
828
829   /* Accounting */
830   s->last_heard = now;
831   s->total_pkts++;
832   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
833   /* Per-user LRU list maintenance */
834   clib_dlist_remove (tsm->list_pool, s->per_user_index);
835   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
836                       s->per_user_index);
837
838   return s;
839 }
840
841 static snat_session_t *
842 snat_out2in_lb (snat_main_t *sm,
843                 vlib_buffer_t * b,
844                 ip4_header_t * ip,
845                 u32 rx_fib_index,
846                 u32 thread_index,
847                 f64 now,
848                 vlib_main_t * vm,
849                 vlib_node_runtime_t * node)
850 {
851   nat_ed_ses_key_t key;
852   clib_bihash_kv_16_8_t s_kv, s_value;
853   udp_header_t *udp = ip4_next_header (ip);
854   tcp_header_t *tcp = (tcp_header_t *) udp;
855   snat_session_t *s = 0;
856   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
857   snat_session_key_t e_key, l_key;
858   u32 old_addr, new_addr;
859   u32 proto = ip_proto_to_snat_proto (ip->protocol);
860   u16 new_port, old_port;
861   ip_csum_t sum;
862   snat_user_t *u;
863   u32 address_index;
864   snat_session_key_t eh_key;
865   u8 twice_nat;
866
867   old_addr = ip->dst_address.as_u32;
868
869   key.l_addr = ip->dst_address;
870   key.r_addr = ip->src_address;
871   key.fib_index = rx_fib_index;
872   key.proto = ip->protocol;
873   key.r_port = udp->src_port;
874   key.l_port = udp->dst_port;
875   s_kv.key[0] = key.as_u64[0];
876   s_kv.key[1] = key.as_u64[1];
877
878   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
879     {
880       s = pool_elt_at_index (tsm->sessions, s_value.value);
881     }
882   else
883     {
884       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
885         {
886           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
887           return 0;
888         }
889
890       e_key.addr = ip->dst_address;
891       e_key.port = udp->dst_port;
892       e_key.protocol = proto;
893       e_key.fib_index = rx_fib_index;
894       if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0, &twice_nat))
895         return 0;
896
897       u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index,
898                                   thread_index);
899       if (!u)
900       {
901         clib_warning ("create NAT user failed");
902         return 0;
903       }
904
905       s = nat_session_alloc_or_recycle (sm, u, thread_index);
906       if (!s)
907         {
908           clib_warning ("create NAT session failed");
909           return 0;
910         }
911
912       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
913       s->ext_host_port = udp->src_port;
914       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
915       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
916       s->outside_address_index = ~0;
917       s->out2in = e_key;
918       s->in2out = l_key;
919       u->nstaticsessions++;
920
921       /* Add to lookup tables */
922       s_kv.value = s - tsm->sessions;
923       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
924         clib_warning ("out2in-ed key add failed");
925
926       if (twice_nat)
927         {
928           eh_key.protocol = proto;
929           if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
930                                                    thread_index, &eh_key,
931                                                    &address_index,
932                                                    sm->port_per_thread,
933                                                    sm->per_thread_data[thread_index].snat_thread_index))
934             {
935               b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
936               return 0;
937             }
938           key.r_addr.as_u32 = s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
939           key.r_port = s->ext_host_nat_port = eh_key.port;
940           s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
941         }
942       key.l_addr = l_key.addr;
943       key.fib_index = l_key.fib_index;
944       key.l_port = l_key.port;
945       s_kv.key[0] = key.as_u64[0];
946       s_kv.key[1] = key.as_u64[1];
947       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
948         clib_warning ("in2out-ed key add failed");
949     }
950
951   new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
952
953   /* Update IP checksum */
954   sum = ip->checksum;
955   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
956   if (is_twice_nat_session (s))
957     sum = ip_csum_update (sum, ip->src_address.as_u32,
958                           s->ext_host_nat_addr.as_u32, ip4_header_t,
959                           src_address);
960   ip->checksum = ip_csum_fold (sum);
961
962   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
963     {
964       old_port = tcp->dst_port;
965       tcp->dst_port = s->in2out.port;
966       new_port = tcp->dst_port;
967
968       sum = tcp->checksum;
969       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
970       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
971       if (is_twice_nat_session (s))
972         {
973           sum = ip_csum_update (sum, ip->src_address.as_u32,
974                                 s->ext_host_nat_addr.as_u32, ip4_header_t,
975                                 dst_address);
976           sum = ip_csum_update (sum, tcp->src_port, s->ext_host_nat_port,
977                                 ip4_header_t, length);
978           tcp->src_port = s->ext_host_nat_port;
979           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
980         }
981       tcp->checksum = ip_csum_fold(sum);
982     }
983   else
984     {
985       udp->dst_port = s->in2out.port;
986       if (is_twice_nat_session (s))
987         {
988           udp->src_port = s->ext_host_nat_port;
989           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
990         }
991       udp->checksum = 0;
992     }
993
994   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
995
996   /* Accounting */
997   s->last_heard = now;
998   s->total_pkts++;
999   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1000   /* Per-user LRU list maintenance */
1001   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1002   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1003                       s->per_user_index);
1004
1005   return s;
1006 }
1007
1008 static uword
1009 snat_out2in_node_fn (vlib_main_t * vm,
1010                   vlib_node_runtime_t * node,
1011                   vlib_frame_t * frame)
1012 {
1013   u32 n_left_from, * from, * to_next;
1014   snat_out2in_next_t next_index;
1015   u32 pkts_processed = 0;
1016   snat_main_t * sm = &snat_main;
1017   f64 now = vlib_time_now (vm);
1018   u32 thread_index = vlib_get_thread_index ();
1019
1020   from = vlib_frame_vector_args (frame);
1021   n_left_from = frame->n_vectors;
1022   next_index = node->cached_next_index;
1023
1024   while (n_left_from > 0)
1025     {
1026       u32 n_left_to_next;
1027
1028       vlib_get_next_frame (vm, node, next_index,
1029                            to_next, n_left_to_next);
1030
1031       while (n_left_from >= 4 && n_left_to_next >= 2)
1032         {
1033           u32 bi0, bi1;
1034           vlib_buffer_t * b0, * b1;
1035           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1036           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1037           u32 sw_if_index0, sw_if_index1;
1038           ip4_header_t * ip0, *ip1;
1039           ip_csum_t sum0, sum1;
1040           u32 new_addr0, old_addr0;
1041           u16 new_port0, old_port0;
1042           u32 new_addr1, old_addr1;
1043           u16 new_port1, old_port1;
1044           udp_header_t * udp0, * udp1;
1045           tcp_header_t * tcp0, * tcp1;
1046           icmp46_header_t * icmp0, * icmp1;
1047           snat_session_key_t key0, key1, sm0, sm1;
1048           u32 rx_fib_index0, rx_fib_index1;
1049           u32 proto0, proto1;
1050           snat_session_t * s0 = 0, * s1 = 0;
1051           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
1052
1053           /* Prefetch next iteration. */
1054           {
1055             vlib_buffer_t * p2, * p3;
1056
1057             p2 = vlib_get_buffer (vm, from[2]);
1058             p3 = vlib_get_buffer (vm, from[3]);
1059
1060             vlib_prefetch_buffer_header (p2, LOAD);
1061             vlib_prefetch_buffer_header (p3, LOAD);
1062
1063             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1064             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1065           }
1066
1067           /* speculatively enqueue b0 and b1 to the current next frame */
1068           to_next[0] = bi0 = from[0];
1069           to_next[1] = bi1 = from[1];
1070           from += 2;
1071           to_next += 2;
1072           n_left_from -= 2;
1073           n_left_to_next -= 2;
1074
1075           b0 = vlib_get_buffer (vm, bi0);
1076           b1 = vlib_get_buffer (vm, bi1);
1077
1078           vnet_buffer (b0)->snat.flags = 0;
1079           vnet_buffer (b1)->snat.flags = 0;
1080
1081           ip0 = vlib_buffer_get_current (b0);
1082           udp0 = ip4_next_header (ip0);
1083           tcp0 = (tcp_header_t *) udp0;
1084           icmp0 = (icmp46_header_t *) udp0;
1085
1086           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1087           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1088                                    sw_if_index0);
1089
1090           if (PREDICT_FALSE(ip0->ttl == 1))
1091             {
1092               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1093               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1094                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1095                                            0);
1096               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1097               goto trace0;
1098             }
1099
1100           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1101
1102           if (PREDICT_FALSE (proto0 == ~0))
1103             {
1104               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1105                                              thread_index, now, vm, node);
1106               if (!s0)
1107                 next0 = SNAT_OUT2IN_NEXT_DROP;
1108               goto trace0;
1109             }
1110
1111           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1112             {
1113               next0 = icmp_out2in_slow_path
1114                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1115                  next0, now, thread_index, &s0);
1116               goto trace0;
1117             }
1118
1119           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1120             {
1121               next0 = SNAT_OUT2IN_NEXT_REASS;
1122               goto trace0;
1123             }
1124
1125           key0.addr = ip0->dst_address;
1126           key0.port = udp0->dst_port;
1127           key0.protocol = proto0;
1128           key0.fib_index = rx_fib_index0;
1129
1130           kv0.key = key0.as_u64;
1131
1132           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1133                                       &kv0, &value0))
1134             {
1135               /* Try to match static mapping by external address and port,
1136                  destination address and port in packet */
1137               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1138                 {
1139                   if (!sm->forwarding_enabled)
1140                     {
1141                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1142                       /*
1143                        * Send DHCP packets to the ipv4 stack, or we won't
1144                        * be able to use dhcp client on the outside interface
1145                        */
1146                       if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1147                           || (udp0->dst_port
1148                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1149                         next0 = SNAT_OUT2IN_NEXT_DROP;
1150                       else
1151                         vnet_feature_next
1152                           (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1153                            &next0, b0);
1154                       goto trace0;
1155                     }
1156                   else
1157                     {
1158                       create_bypass_for_fwd(sm, ip0);
1159                       goto trace0;
1160                     }
1161                 }
1162
1163               /* Create session initiated by host from external network */
1164               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1165                                                      thread_index);
1166               if (!s0)
1167                 {
1168                   next0 = SNAT_OUT2IN_NEXT_DROP;
1169                   goto trace0;
1170                 }
1171             }
1172           else
1173             {
1174               if (PREDICT_FALSE (value0.value == ~0ULL))
1175                 {
1176                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1177                                       now, vm, node);
1178                   if (!s0)
1179                     next0 = SNAT_OUT2IN_NEXT_DROP;
1180                   goto trace0;
1181                 }
1182               else
1183                 {
1184                   s0 = pool_elt_at_index (
1185                     sm->per_thread_data[thread_index].sessions,
1186                     value0.value);
1187                 }
1188             }
1189
1190           old_addr0 = ip0->dst_address.as_u32;
1191           ip0->dst_address = s0->in2out.addr;
1192           new_addr0 = ip0->dst_address.as_u32;
1193           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1194
1195           sum0 = ip0->checksum;
1196           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1197                                  ip4_header_t,
1198                                  dst_address /* changed member */);
1199           ip0->checksum = ip_csum_fold (sum0);
1200
1201           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1202             {
1203               old_port0 = tcp0->dst_port;
1204               tcp0->dst_port = s0->in2out.port;
1205               new_port0 = tcp0->dst_port;
1206
1207               sum0 = tcp0->checksum;
1208               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1209                                      ip4_header_t,
1210                                      dst_address /* changed member */);
1211
1212               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1213                                      ip4_header_t /* cheat */,
1214                                      length /* changed member */);
1215               tcp0->checksum = ip_csum_fold(sum0);
1216             }
1217           else
1218             {
1219               old_port0 = udp0->dst_port;
1220               udp0->dst_port = s0->in2out.port;
1221               udp0->checksum = 0;
1222             }
1223
1224           /* Accounting */
1225           s0->last_heard = now;
1226           s0->total_pkts++;
1227           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1228           /* Per-user LRU list maintenance */
1229           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1230                              s0->per_user_index);
1231           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1232                               s0->per_user_list_head_index,
1233                               s0->per_user_index);
1234         trace0:
1235
1236           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1237                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1238             {
1239               snat_out2in_trace_t *t =
1240                  vlib_add_trace (vm, node, b0, sizeof (*t));
1241               t->sw_if_index = sw_if_index0;
1242               t->next_index = next0;
1243               t->session_index = ~0;
1244               if (s0)
1245                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1246             }
1247
1248           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1249
1250
1251           ip1 = vlib_buffer_get_current (b1);
1252           udp1 = ip4_next_header (ip1);
1253           tcp1 = (tcp_header_t *) udp1;
1254           icmp1 = (icmp46_header_t *) udp1;
1255
1256           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1257           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1258                                    sw_if_index1);
1259
1260           if (PREDICT_FALSE(ip1->ttl == 1))
1261             {
1262               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1263               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1264                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1265                                            0);
1266               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1267               goto trace1;
1268             }
1269
1270           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1271
1272           if (PREDICT_FALSE (proto1 == ~0))
1273             {
1274               s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
1275                                              thread_index, now, vm, node);
1276               if (!s1)
1277                 next1 = SNAT_OUT2IN_NEXT_DROP;
1278               goto trace1;
1279             }
1280
1281           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1282             {
1283               next1 = icmp_out2in_slow_path
1284                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1285                  next1, now, thread_index, &s1);
1286               goto trace1;
1287             }
1288
1289           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
1290             {
1291               next1 = SNAT_OUT2IN_NEXT_REASS;
1292               goto trace1;
1293             }
1294
1295           key1.addr = ip1->dst_address;
1296           key1.port = udp1->dst_port;
1297           key1.protocol = proto1;
1298           key1.fib_index = rx_fib_index1;
1299
1300           kv1.key = key1.as_u64;
1301
1302           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1303                                       &kv1, &value1))
1304             {
1305               /* Try to match static mapping by external address and port,
1306                  destination address and port in packet */
1307               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0))
1308                 {
1309                   if (!sm->forwarding_enabled)
1310                     {
1311                       b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1312                       /*
1313                        * Send DHCP packets to the ipv4 stack, or we won't
1314                        * be able to use dhcp client on the outside interface
1315                        */
1316                       if (PREDICT_TRUE (proto1 != SNAT_PROTOCOL_UDP
1317                           || (udp1->dst_port
1318                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1319                         next1 = SNAT_OUT2IN_NEXT_DROP;
1320                       else
1321                         vnet_feature_next
1322                           (vnet_buffer (b1)->sw_if_index[VLIB_RX],
1323                            &next1, b1);
1324                       goto trace1;
1325                     }
1326                   else
1327                     {
1328                       create_bypass_for_fwd(sm, ip1);
1329                       goto trace1;
1330                     }
1331                 }
1332
1333               /* Create session initiated by host from external network */
1334               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1335                                                      thread_index);
1336               if (!s1)
1337                 {
1338                   next1 = SNAT_OUT2IN_NEXT_DROP;
1339                   goto trace1;
1340                 }
1341             }
1342           else
1343             {
1344               if (PREDICT_FALSE (value1.value == ~0ULL))
1345                 {
1346                   s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1347                                       now, vm, node);
1348                   if (!s1)
1349                     next1 = SNAT_OUT2IN_NEXT_DROP;
1350                   goto trace1;
1351                 }
1352               else
1353                 {
1354                   s1 = pool_elt_at_index (
1355                     sm->per_thread_data[thread_index].sessions,
1356                     value1.value);
1357                 }
1358             }
1359
1360           old_addr1 = ip1->dst_address.as_u32;
1361           ip1->dst_address = s1->in2out.addr;
1362           new_addr1 = ip1->dst_address.as_u32;
1363           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1364
1365           sum1 = ip1->checksum;
1366           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1367                                  ip4_header_t,
1368                                  dst_address /* changed member */);
1369           ip1->checksum = ip_csum_fold (sum1);
1370
1371           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1372             {
1373               old_port1 = tcp1->dst_port;
1374               tcp1->dst_port = s1->in2out.port;
1375               new_port1 = tcp1->dst_port;
1376
1377               sum1 = tcp1->checksum;
1378               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1379                                      ip4_header_t,
1380                                      dst_address /* changed member */);
1381
1382               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1383                                      ip4_header_t /* cheat */,
1384                                      length /* changed member */);
1385               tcp1->checksum = ip_csum_fold(sum1);
1386             }
1387           else
1388             {
1389               old_port1 = udp1->dst_port;
1390               udp1->dst_port = s1->in2out.port;
1391               udp1->checksum = 0;
1392             }
1393
1394           /* Accounting */
1395           s1->last_heard = now;
1396           s1->total_pkts++;
1397           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1398           /* Per-user LRU list maintenance */
1399           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1400                              s1->per_user_index);
1401           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1402                               s1->per_user_list_head_index,
1403                               s1->per_user_index);
1404         trace1:
1405
1406           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1407                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1408             {
1409               snat_out2in_trace_t *t =
1410                  vlib_add_trace (vm, node, b1, sizeof (*t));
1411               t->sw_if_index = sw_if_index1;
1412               t->next_index = next1;
1413               t->session_index = ~0;
1414               if (s1)
1415                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1416             }
1417
1418           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1419
1420           /* verify speculative enqueues, maybe switch current next frame */
1421           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1422                                            to_next, n_left_to_next,
1423                                            bi0, bi1, next0, next1);
1424         }
1425
1426       while (n_left_from > 0 && n_left_to_next > 0)
1427         {
1428           u32 bi0;
1429           vlib_buffer_t * b0;
1430           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1431           u32 sw_if_index0;
1432           ip4_header_t * ip0;
1433           ip_csum_t sum0;
1434           u32 new_addr0, old_addr0;
1435           u16 new_port0, old_port0;
1436           udp_header_t * udp0;
1437           tcp_header_t * tcp0;
1438           icmp46_header_t * icmp0;
1439           snat_session_key_t key0, sm0;
1440           u32 rx_fib_index0;
1441           u32 proto0;
1442           snat_session_t * s0 = 0;
1443           clib_bihash_kv_8_8_t kv0, value0;
1444
1445           /* speculatively enqueue b0 to the current next frame */
1446           bi0 = from[0];
1447           to_next[0] = bi0;
1448           from += 1;
1449           to_next += 1;
1450           n_left_from -= 1;
1451           n_left_to_next -= 1;
1452
1453           b0 = vlib_get_buffer (vm, bi0);
1454
1455           vnet_buffer (b0)->snat.flags = 0;
1456
1457           ip0 = vlib_buffer_get_current (b0);
1458           udp0 = ip4_next_header (ip0);
1459           tcp0 = (tcp_header_t *) udp0;
1460           icmp0 = (icmp46_header_t *) udp0;
1461
1462           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1463           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1464                                    sw_if_index0);
1465
1466           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1467
1468           if (PREDICT_FALSE (proto0 == ~0))
1469             {
1470               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1471                                              thread_index, now, vm, node);
1472               if (!s0)
1473                 next0 = SNAT_OUT2IN_NEXT_DROP;
1474               goto trace00;
1475             }
1476
1477           if (PREDICT_FALSE(ip0->ttl == 1))
1478             {
1479               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1480               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1481                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1482                                            0);
1483               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1484               goto trace00;
1485             }
1486
1487           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1488             {
1489               next0 = icmp_out2in_slow_path
1490                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1491                  next0, now, thread_index, &s0);
1492               goto trace00;
1493             }
1494
1495           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1496             {
1497               next0 = SNAT_OUT2IN_NEXT_REASS;
1498               goto trace00;
1499             }
1500
1501           key0.addr = ip0->dst_address;
1502           key0.port = udp0->dst_port;
1503           key0.protocol = proto0;
1504           key0.fib_index = rx_fib_index0;
1505
1506           kv0.key = key0.as_u64;
1507
1508           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1509                                       &kv0, &value0))
1510             {
1511               /* Try to match static mapping by external address and port,
1512                  destination address and port in packet */
1513               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1514                 {
1515                   if (!sm->forwarding_enabled)
1516                     {
1517                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1518                       /*
1519                        * Send DHCP packets to the ipv4 stack, or we won't
1520                        * be able to use dhcp client on the outside interface
1521                        */
1522                       if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1523                           || (udp0->dst_port
1524                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1525                         next0 = SNAT_OUT2IN_NEXT_DROP;
1526                       else
1527                         vnet_feature_next
1528                           (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1529                            &next0, b0);
1530                       goto trace00;
1531                     }
1532                   else
1533                     {
1534                       create_bypass_for_fwd(sm, ip0);
1535                       goto trace00;
1536                     }
1537                 }
1538
1539               /* Create session initiated by host from external network */
1540               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1541                                                      thread_index);
1542               if (!s0)
1543                 {
1544                   next0 = SNAT_OUT2IN_NEXT_DROP;
1545                   goto trace00;
1546                 }
1547             }
1548           else
1549             {
1550               if (PREDICT_FALSE (value0.value == ~0ULL))
1551                 {
1552                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1553                                       now, vm, node);
1554                   if (!s0)
1555                     next0 = SNAT_OUT2IN_NEXT_DROP;
1556                   goto trace00;
1557                 }
1558               else
1559                 {
1560                   s0 = pool_elt_at_index (
1561                     sm->per_thread_data[thread_index].sessions,
1562                     value0.value);
1563                 }
1564             }
1565
1566           old_addr0 = ip0->dst_address.as_u32;
1567           ip0->dst_address = s0->in2out.addr;
1568           new_addr0 = ip0->dst_address.as_u32;
1569           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1570
1571           sum0 = ip0->checksum;
1572           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1573                                  ip4_header_t,
1574                                  dst_address /* changed member */);
1575           ip0->checksum = ip_csum_fold (sum0);
1576
1577           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1578             {
1579               old_port0 = tcp0->dst_port;
1580               tcp0->dst_port = s0->in2out.port;
1581               new_port0 = tcp0->dst_port;
1582
1583               sum0 = tcp0->checksum;
1584               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1585                                      ip4_header_t,
1586                                      dst_address /* changed member */);
1587
1588               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1589                                      ip4_header_t /* cheat */,
1590                                      length /* changed member */);
1591               tcp0->checksum = ip_csum_fold(sum0);
1592             }
1593           else
1594             {
1595               old_port0 = udp0->dst_port;
1596               udp0->dst_port = s0->in2out.port;
1597               udp0->checksum = 0;
1598             }
1599
1600           /* Accounting */
1601           s0->last_heard = now;
1602           s0->total_pkts++;
1603           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1604           /* Per-user LRU list maintenance */
1605           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1606                              s0->per_user_index);
1607           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1608                               s0->per_user_list_head_index,
1609                               s0->per_user_index);
1610         trace00:
1611
1612           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1613                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1614             {
1615               snat_out2in_trace_t *t =
1616                  vlib_add_trace (vm, node, b0, sizeof (*t));
1617               t->sw_if_index = sw_if_index0;
1618               t->next_index = next0;
1619               t->session_index = ~0;
1620               if (s0)
1621                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1622             }
1623
1624           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1625
1626           /* verify speculative enqueue, maybe switch current next frame */
1627           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1628                                            to_next, n_left_to_next,
1629                                            bi0, next0);
1630         }
1631
1632       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1633     }
1634
1635   vlib_node_increment_counter (vm, snat_out2in_node.index,
1636                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1637                                pkts_processed);
1638   return frame->n_vectors;
1639 }
1640
1641 VLIB_REGISTER_NODE (snat_out2in_node) = {
1642   .function = snat_out2in_node_fn,
1643   .name = "nat44-out2in",
1644   .vector_size = sizeof (u32),
1645   .format_trace = format_snat_out2in_trace,
1646   .type = VLIB_NODE_TYPE_INTERNAL,
1647
1648   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1649   .error_strings = snat_out2in_error_strings,
1650
1651   .runtime_data_bytes = sizeof (snat_runtime_t),
1652
1653   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1654
1655   /* edit / add dispositions here */
1656   .next_nodes = {
1657     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1658     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1659     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1660     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1661   },
1662 };
1663 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1664
1665 static uword
1666 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1667                             vlib_node_runtime_t * node,
1668                             vlib_frame_t * frame)
1669 {
1670   u32 n_left_from, *from, *to_next;
1671   snat_out2in_next_t next_index;
1672   u32 pkts_processed = 0;
1673   snat_main_t *sm = &snat_main;
1674   f64 now = vlib_time_now (vm);
1675   u32 thread_index = vlib_get_thread_index ();
1676   snat_main_per_thread_data_t *per_thread_data =
1677     &sm->per_thread_data[thread_index];
1678   u32 *fragments_to_drop = 0;
1679   u32 *fragments_to_loopback = 0;
1680
1681   from = vlib_frame_vector_args (frame);
1682   n_left_from = frame->n_vectors;
1683   next_index = node->cached_next_index;
1684
1685   while (n_left_from > 0)
1686     {
1687       u32 n_left_to_next;
1688
1689       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1690
1691       while (n_left_from > 0 && n_left_to_next > 0)
1692        {
1693           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1694           vlib_buffer_t *b0;
1695           u32 next0;
1696           u8 cached0 = 0;
1697           ip4_header_t *ip0;
1698           nat_reass_ip4_t *reass0;
1699           udp_header_t * udp0;
1700           tcp_header_t * tcp0;
1701           snat_session_key_t key0, sm0;
1702           clib_bihash_kv_8_8_t kv0, value0;
1703           snat_session_t * s0 = 0;
1704           u16 old_port0, new_port0;
1705           ip_csum_t sum0;
1706
1707           /* speculatively enqueue b0 to the current next frame */
1708           bi0 = from[0];
1709           to_next[0] = bi0;
1710           from += 1;
1711           to_next += 1;
1712           n_left_from -= 1;
1713           n_left_to_next -= 1;
1714
1715           b0 = vlib_get_buffer (vm, bi0);
1716           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1717
1718           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1719           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1720                                                                sw_if_index0);
1721
1722           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1723             {
1724               next0 = SNAT_OUT2IN_NEXT_DROP;
1725               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1726               goto trace0;
1727             }
1728
1729           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1730           udp0 = ip4_next_header (ip0);
1731           tcp0 = (tcp_header_t *) udp0;
1732           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1733
1734           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1735                                                  ip0->dst_address,
1736                                                  ip0->fragment_id,
1737                                                  ip0->protocol,
1738                                                  1,
1739                                                  &fragments_to_drop);
1740
1741           if (PREDICT_FALSE (!reass0))
1742             {
1743               next0 = SNAT_OUT2IN_NEXT_DROP;
1744               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1745               goto trace0;
1746             }
1747
1748           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1749             {
1750               key0.addr = ip0->dst_address;
1751               key0.port = udp0->dst_port;
1752               key0.protocol = proto0;
1753               key0.fib_index = rx_fib_index0;
1754               kv0.key = key0.as_u64;
1755
1756               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1757                 {
1758                   /* Try to match static mapping by external address and port,
1759                      destination address and port in packet */
1760                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1761                     {
1762                       if (!sm->forwarding_enabled)
1763                         {
1764                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1765                           /*
1766                            * Send DHCP packets to the ipv4 stack, or we won't
1767                            * be able to use dhcp client on the outside interface
1768                            */
1769                           if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1770                               || (udp0->dst_port
1771                                   != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1772                             next0 = SNAT_OUT2IN_NEXT_DROP;
1773                           else
1774                             vnet_feature_next
1775                               (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1776                                &next0, b0);
1777                           goto trace0;
1778                         }
1779                       else
1780                         {
1781                           create_bypass_for_fwd(sm, ip0);
1782                           goto trace0;
1783                         }
1784                     }
1785
1786                   /* Create session initiated by host from external network */
1787                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1788                                                          thread_index);
1789                   if (!s0)
1790                     {
1791                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1792                       next0 = SNAT_OUT2IN_NEXT_DROP;
1793                       goto trace0;
1794                     }
1795                   reass0->sess_index = s0 - per_thread_data->sessions;
1796                   reass0->thread_index = thread_index;
1797                 }
1798               else
1799                 {
1800                   s0 = pool_elt_at_index (per_thread_data->sessions,
1801                                           value0.value);
1802                   reass0->sess_index = value0.value;
1803                 }
1804               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1805             }
1806           else
1807             {
1808               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1809                 {
1810                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1811                     {
1812                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1813                       next0 = SNAT_OUT2IN_NEXT_DROP;
1814                       goto trace0;
1815                     }
1816                   cached0 = 1;
1817                   goto trace0;
1818                 }
1819               s0 = pool_elt_at_index (per_thread_data->sessions,
1820                                       reass0->sess_index);
1821             }
1822
1823           old_addr0 = ip0->dst_address.as_u32;
1824           ip0->dst_address = s0->in2out.addr;
1825           new_addr0 = ip0->dst_address.as_u32;
1826           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1827
1828           sum0 = ip0->checksum;
1829           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1830                                  ip4_header_t,
1831                                  dst_address /* changed member */);
1832           ip0->checksum = ip_csum_fold (sum0);
1833
1834           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1835             {
1836               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1837                 {
1838                   old_port0 = tcp0->dst_port;
1839                   tcp0->dst_port = s0->in2out.port;
1840                   new_port0 = tcp0->dst_port;
1841
1842                   sum0 = tcp0->checksum;
1843                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1844                                          ip4_header_t,
1845                                          dst_address /* changed member */);
1846
1847                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1848                                          ip4_header_t /* cheat */,
1849                                          length /* changed member */);
1850                   tcp0->checksum = ip_csum_fold(sum0);
1851                 }
1852               else
1853                 {
1854                   old_port0 = udp0->dst_port;
1855                   udp0->dst_port = s0->in2out.port;
1856                   udp0->checksum = 0;
1857                 }
1858             }
1859
1860           /* Accounting */
1861           s0->last_heard = now;
1862           s0->total_pkts++;
1863           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1864           /* Per-user LRU list maintenance */
1865           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1866                              s0->per_user_index);
1867           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1868                               s0->per_user_list_head_index,
1869                               s0->per_user_index);
1870
1871         trace0:
1872           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1873                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1874             {
1875               nat44_out2in_reass_trace_t *t =
1876                  vlib_add_trace (vm, node, b0, sizeof (*t));
1877               t->cached = cached0;
1878               t->sw_if_index = sw_if_index0;
1879               t->next_index = next0;
1880             }
1881
1882           if (cached0)
1883             {
1884               n_left_to_next++;
1885               to_next--;
1886             }
1887           else
1888             {
1889               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1890
1891               /* verify speculative enqueue, maybe switch current next frame */
1892               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1893                                                to_next, n_left_to_next,
1894                                                bi0, next0);
1895             }
1896
1897           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1898             {
1899               from = vlib_frame_vector_args (frame);
1900               u32 len = vec_len (fragments_to_loopback);
1901               if (len <= VLIB_FRAME_SIZE)
1902                 {
1903                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1904                   n_left_from = len;
1905                   vec_reset_length (fragments_to_loopback);
1906                 }
1907               else
1908                 {
1909                   clib_memcpy (from,
1910                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1911                                sizeof (u32) * VLIB_FRAME_SIZE);
1912                   n_left_from = VLIB_FRAME_SIZE;
1913                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1914                 }
1915             }
1916        }
1917
1918       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1919     }
1920
1921   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1922                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1923                                pkts_processed);
1924
1925   nat_send_all_to_node (vm, fragments_to_drop, node,
1926                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1927                         SNAT_OUT2IN_NEXT_DROP);
1928
1929   vec_free (fragments_to_drop);
1930   vec_free (fragments_to_loopback);
1931   return frame->n_vectors;
1932 }
1933
1934 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1935   .function = nat44_out2in_reass_node_fn,
1936   .name = "nat44-out2in-reass",
1937   .vector_size = sizeof (u32),
1938   .format_trace = format_nat44_out2in_reass_trace,
1939   .type = VLIB_NODE_TYPE_INTERNAL,
1940
1941   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1942   .error_strings = snat_out2in_error_strings,
1943
1944   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1945
1946   /* edit / add dispositions here */
1947   .next_nodes = {
1948     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1949     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1950     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1951     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1952   },
1953 };
1954 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1955                               nat44_out2in_reass_node_fn);
1956
1957 /**************************/
1958 /*** deterministic mode ***/
1959 /**************************/
1960 static uword
1961 snat_det_out2in_node_fn (vlib_main_t * vm,
1962                          vlib_node_runtime_t * node,
1963                          vlib_frame_t * frame)
1964 {
1965   u32 n_left_from, * from, * to_next;
1966   snat_out2in_next_t next_index;
1967   u32 pkts_processed = 0;
1968   snat_main_t * sm = &snat_main;
1969   u32 thread_index = vlib_get_thread_index ();
1970
1971   from = vlib_frame_vector_args (frame);
1972   n_left_from = frame->n_vectors;
1973   next_index = node->cached_next_index;
1974
1975   while (n_left_from > 0)
1976     {
1977       u32 n_left_to_next;
1978
1979       vlib_get_next_frame (vm, node, next_index,
1980                            to_next, n_left_to_next);
1981
1982       while (n_left_from >= 4 && n_left_to_next >= 2)
1983         {
1984           u32 bi0, bi1;
1985           vlib_buffer_t * b0, * b1;
1986           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1987           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1988           u32 sw_if_index0, sw_if_index1;
1989           ip4_header_t * ip0, * ip1;
1990           ip_csum_t sum0, sum1;
1991           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1992           u16 new_port0, old_port0, old_port1, new_port1;
1993           udp_header_t * udp0, * udp1;
1994           tcp_header_t * tcp0, * tcp1;
1995           u32 proto0, proto1;
1996           snat_det_out_key_t key0, key1;
1997           snat_det_map_t * dm0, * dm1;
1998           snat_det_session_t * ses0 = 0, * ses1 = 0;
1999           u32 rx_fib_index0, rx_fib_index1;
2000           icmp46_header_t * icmp0, * icmp1;
2001
2002           /* Prefetch next iteration. */
2003           {
2004             vlib_buffer_t * p2, * p3;
2005
2006             p2 = vlib_get_buffer (vm, from[2]);
2007             p3 = vlib_get_buffer (vm, from[3]);
2008
2009             vlib_prefetch_buffer_header (p2, LOAD);
2010             vlib_prefetch_buffer_header (p3, LOAD);
2011
2012             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2013             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2014           }
2015
2016           /* speculatively enqueue b0 and b1 to the current next frame */
2017           to_next[0] = bi0 = from[0];
2018           to_next[1] = bi1 = from[1];
2019           from += 2;
2020           to_next += 2;
2021           n_left_from -= 2;
2022           n_left_to_next -= 2;
2023
2024           b0 = vlib_get_buffer (vm, bi0);
2025           b1 = vlib_get_buffer (vm, bi1);
2026
2027           ip0 = vlib_buffer_get_current (b0);
2028           udp0 = ip4_next_header (ip0);
2029           tcp0 = (tcp_header_t *) udp0;
2030
2031           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2032
2033           if (PREDICT_FALSE(ip0->ttl == 1))
2034             {
2035               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2036               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2037                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2038                                            0);
2039               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2040               goto trace0;
2041             }
2042
2043           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2044
2045           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2046             {
2047               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2048               icmp0 = (icmp46_header_t *) udp0;
2049
2050               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2051                                   rx_fib_index0, node, next0, thread_index,
2052                                   &ses0, &dm0);
2053               goto trace0;
2054             }
2055
2056           key0.ext_host_addr = ip0->src_address;
2057           key0.ext_host_port = tcp0->src;
2058           key0.out_port = tcp0->dst;
2059
2060           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2061           if (PREDICT_FALSE(!dm0))
2062             {
2063               clib_warning("unknown dst address:  %U",
2064                            format_ip4_address, &ip0->dst_address);
2065               next0 = SNAT_OUT2IN_NEXT_DROP;
2066               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2067               goto trace0;
2068             }
2069
2070           snat_det_reverse(dm0, &ip0->dst_address,
2071                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2072
2073           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2074           if (PREDICT_FALSE(!ses0))
2075             {
2076               clib_warning("no match src %U:%d dst %U:%d for user %U",
2077                            format_ip4_address, &ip0->src_address,
2078                            clib_net_to_host_u16 (tcp0->src),
2079                            format_ip4_address, &ip0->dst_address,
2080                            clib_net_to_host_u16 (tcp0->dst),
2081                            format_ip4_address, &new_addr0);
2082               next0 = SNAT_OUT2IN_NEXT_DROP;
2083               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2084               goto trace0;
2085             }
2086           new_port0 = ses0->in_port;
2087
2088           old_addr0 = ip0->dst_address;
2089           ip0->dst_address = new_addr0;
2090           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2091
2092           sum0 = ip0->checksum;
2093           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2094                                  ip4_header_t,
2095                                  dst_address /* changed member */);
2096           ip0->checksum = ip_csum_fold (sum0);
2097
2098           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2099             {
2100               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2101                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2102               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2103                 snat_det_ses_close(dm0, ses0);
2104
2105               old_port0 = tcp0->dst;
2106               tcp0->dst = new_port0;
2107
2108               sum0 = tcp0->checksum;
2109               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2110                                      ip4_header_t,
2111                                      dst_address /* changed member */);
2112
2113               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2114                                      ip4_header_t /* cheat */,
2115                                      length /* changed member */);
2116               tcp0->checksum = ip_csum_fold(sum0);
2117             }
2118           else
2119             {
2120               old_port0 = udp0->dst_port;
2121               udp0->dst_port = new_port0;
2122               udp0->checksum = 0;
2123             }
2124
2125         trace0:
2126
2127           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2128                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2129             {
2130               snat_out2in_trace_t *t =
2131                  vlib_add_trace (vm, node, b0, sizeof (*t));
2132               t->sw_if_index = sw_if_index0;
2133               t->next_index = next0;
2134               t->session_index = ~0;
2135               if (ses0)
2136                 t->session_index = ses0 - dm0->sessions;
2137             }
2138
2139           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2140
2141           b1 = vlib_get_buffer (vm, bi1);
2142
2143           ip1 = vlib_buffer_get_current (b1);
2144           udp1 = ip4_next_header (ip1);
2145           tcp1 = (tcp_header_t *) udp1;
2146
2147           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2148
2149           if (PREDICT_FALSE(ip1->ttl == 1))
2150             {
2151               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2152               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2153                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2154                                            0);
2155               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2156               goto trace1;
2157             }
2158
2159           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2160
2161           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2162             {
2163               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2164               icmp1 = (icmp46_header_t *) udp1;
2165
2166               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
2167                                   rx_fib_index1, node, next1, thread_index,
2168                                   &ses1, &dm1);
2169               goto trace1;
2170             }
2171
2172           key1.ext_host_addr = ip1->src_address;
2173           key1.ext_host_port = tcp1->src;
2174           key1.out_port = tcp1->dst;
2175
2176           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
2177           if (PREDICT_FALSE(!dm1))
2178             {
2179               clib_warning("unknown dst address:  %U",
2180                            format_ip4_address, &ip1->dst_address);
2181               next1 = SNAT_OUT2IN_NEXT_DROP;
2182               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2183               goto trace1;
2184             }
2185
2186           snat_det_reverse(dm1, &ip1->dst_address,
2187                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
2188
2189           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
2190           if (PREDICT_FALSE(!ses1))
2191             {
2192               clib_warning("no match src %U:%d dst %U:%d for user %U",
2193                            format_ip4_address, &ip1->src_address,
2194                            clib_net_to_host_u16 (tcp1->src),
2195                            format_ip4_address, &ip1->dst_address,
2196                            clib_net_to_host_u16 (tcp1->dst),
2197                            format_ip4_address, &new_addr1);
2198               next1 = SNAT_OUT2IN_NEXT_DROP;
2199               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2200               goto trace1;
2201             }
2202           new_port1 = ses1->in_port;
2203
2204           old_addr1 = ip1->dst_address;
2205           ip1->dst_address = new_addr1;
2206           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2207
2208           sum1 = ip1->checksum;
2209           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2210                                  ip4_header_t,
2211                                  dst_address /* changed member */);
2212           ip1->checksum = ip_csum_fold (sum1);
2213
2214           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2215             {
2216               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2217                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2218               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
2219                 snat_det_ses_close(dm1, ses1);
2220
2221               old_port1 = tcp1->dst;
2222               tcp1->dst = new_port1;
2223
2224               sum1 = tcp1->checksum;
2225               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2226                                      ip4_header_t,
2227                                      dst_address /* changed member */);
2228
2229               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2230                                      ip4_header_t /* cheat */,
2231                                      length /* changed member */);
2232               tcp1->checksum = ip_csum_fold(sum1);
2233             }
2234           else
2235             {
2236               old_port1 = udp1->dst_port;
2237               udp1->dst_port = new_port1;
2238               udp1->checksum = 0;
2239             }
2240
2241         trace1:
2242
2243           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2244                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2245             {
2246               snat_out2in_trace_t *t =
2247                  vlib_add_trace (vm, node, b1, sizeof (*t));
2248               t->sw_if_index = sw_if_index1;
2249               t->next_index = next1;
2250               t->session_index = ~0;
2251               if (ses1)
2252                 t->session_index = ses1 - dm1->sessions;
2253             }
2254
2255           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
2256
2257           /* verify speculative enqueues, maybe switch current next frame */
2258           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2259                                            to_next, n_left_to_next,
2260                                            bi0, bi1, next0, next1);
2261          }
2262
2263       while (n_left_from > 0 && n_left_to_next > 0)
2264         {
2265           u32 bi0;
2266           vlib_buffer_t * b0;
2267           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
2268           u32 sw_if_index0;
2269           ip4_header_t * ip0;
2270           ip_csum_t sum0;
2271           ip4_address_t new_addr0, old_addr0;
2272           u16 new_port0, old_port0;
2273           udp_header_t * udp0;
2274           tcp_header_t * tcp0;
2275           u32 proto0;
2276           snat_det_out_key_t key0;
2277           snat_det_map_t * dm0;
2278           snat_det_session_t * ses0 = 0;
2279           u32 rx_fib_index0;
2280           icmp46_header_t * icmp0;
2281
2282           /* speculatively enqueue b0 to the current next frame */
2283           bi0 = from[0];
2284           to_next[0] = bi0;
2285           from += 1;
2286           to_next += 1;
2287           n_left_from -= 1;
2288           n_left_to_next -= 1;
2289
2290           b0 = vlib_get_buffer (vm, bi0);
2291
2292           ip0 = vlib_buffer_get_current (b0);
2293           udp0 = ip4_next_header (ip0);
2294           tcp0 = (tcp_header_t *) udp0;
2295
2296           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2297
2298           if (PREDICT_FALSE(ip0->ttl == 1))
2299             {
2300               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2301               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2302                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2303                                            0);
2304               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2305               goto trace00;
2306             }
2307
2308           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2309
2310           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2311             {
2312               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2313               icmp0 = (icmp46_header_t *) udp0;
2314
2315               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2316                                   rx_fib_index0, node, next0, thread_index,
2317                                   &ses0, &dm0);
2318               goto trace00;
2319             }
2320
2321           key0.ext_host_addr = ip0->src_address;
2322           key0.ext_host_port = tcp0->src;
2323           key0.out_port = tcp0->dst;
2324
2325           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2326           if (PREDICT_FALSE(!dm0))
2327             {
2328               clib_warning("unknown dst address:  %U",
2329                            format_ip4_address, &ip0->dst_address);
2330               next0 = SNAT_OUT2IN_NEXT_DROP;
2331               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2332               goto trace00;
2333             }
2334
2335           snat_det_reverse(dm0, &ip0->dst_address,
2336                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2337
2338           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2339           if (PREDICT_FALSE(!ses0))
2340             {
2341               clib_warning("no match src %U:%d dst %U:%d for user %U",
2342                            format_ip4_address, &ip0->src_address,
2343                            clib_net_to_host_u16 (tcp0->src),
2344                            format_ip4_address, &ip0->dst_address,
2345                            clib_net_to_host_u16 (tcp0->dst),
2346                            format_ip4_address, &new_addr0);
2347               next0 = SNAT_OUT2IN_NEXT_DROP;
2348               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2349               goto trace00;
2350             }
2351           new_port0 = ses0->in_port;
2352
2353           old_addr0 = ip0->dst_address;
2354           ip0->dst_address = new_addr0;
2355           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2356
2357           sum0 = ip0->checksum;
2358           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2359                                  ip4_header_t,
2360                                  dst_address /* changed member */);
2361           ip0->checksum = ip_csum_fold (sum0);
2362
2363           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2364             {
2365               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2366                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2367               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2368                 snat_det_ses_close(dm0, ses0);
2369
2370               old_port0 = tcp0->dst;
2371               tcp0->dst = new_port0;
2372
2373               sum0 = tcp0->checksum;
2374               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2375                                      ip4_header_t,
2376                                      dst_address /* changed member */);
2377
2378               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2379                                      ip4_header_t /* cheat */,
2380                                      length /* changed member */);
2381               tcp0->checksum = ip_csum_fold(sum0);
2382             }
2383           else
2384             {
2385               old_port0 = udp0->dst_port;
2386               udp0->dst_port = new_port0;
2387               udp0->checksum = 0;
2388             }
2389
2390         trace00:
2391
2392           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2393                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2394             {
2395               snat_out2in_trace_t *t =
2396                  vlib_add_trace (vm, node, b0, sizeof (*t));
2397               t->sw_if_index = sw_if_index0;
2398               t->next_index = next0;
2399               t->session_index = ~0;
2400               if (ses0)
2401                 t->session_index = ses0 - dm0->sessions;
2402             }
2403
2404           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2405
2406           /* verify speculative enqueue, maybe switch current next frame */
2407           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2408                                            to_next, n_left_to_next,
2409                                            bi0, next0);
2410         }
2411
2412       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2413     }
2414
2415   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
2416                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2417                                pkts_processed);
2418   return frame->n_vectors;
2419 }
2420
2421 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
2422   .function = snat_det_out2in_node_fn,
2423   .name = "nat44-det-out2in",
2424   .vector_size = sizeof (u32),
2425   .format_trace = format_snat_out2in_trace,
2426   .type = VLIB_NODE_TYPE_INTERNAL,
2427
2428   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2429   .error_strings = snat_out2in_error_strings,
2430
2431   .runtime_data_bytes = sizeof (snat_runtime_t),
2432
2433   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2434
2435   /* edit / add dispositions here */
2436   .next_nodes = {
2437     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2438     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2439     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2440     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2441   },
2442 };
2443 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
2444
2445 /**
2446  * Get address and port values to be used for ICMP packet translation
2447  * and create session if needed
2448  *
2449  * @param[in,out] sm             NAT main
2450  * @param[in,out] node           NAT node runtime
2451  * @param[in] thread_index       thread index
2452  * @param[in,out] b0             buffer containing packet to be translated
2453  * @param[out] p_proto           protocol used for matching
2454  * @param[out] p_value           address and port after NAT translation
2455  * @param[out] p_dont_translate  if packet should not be translated
2456  * @param d                      optional parameter
2457  * @param e                      optional parameter
2458  */
2459 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
2460                           u32 thread_index, vlib_buffer_t *b0,
2461                           ip4_header_t *ip0, u8 *p_proto,
2462                           snat_session_key_t *p_value,
2463                           u8 *p_dont_translate, void *d, void *e)
2464 {
2465   icmp46_header_t *icmp0;
2466   u32 sw_if_index0;
2467   u8 protocol;
2468   snat_det_out_key_t key0;
2469   u8 dont_translate = 0;
2470   u32 next0 = ~0;
2471   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2472   ip4_header_t *inner_ip0;
2473   void *l4_header = 0;
2474   icmp46_header_t *inner_icmp0;
2475   snat_det_map_t * dm0 = 0;
2476   ip4_address_t new_addr0 = {{0}};
2477   snat_det_session_t * ses0 = 0;
2478   ip4_address_t out_addr;
2479
2480   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2481   echo0 = (icmp_echo_header_t *)(icmp0+1);
2482   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2483
2484   if (!icmp_is_error_message (icmp0))
2485     {
2486       protocol = SNAT_PROTOCOL_ICMP;
2487       key0.ext_host_addr = ip0->src_address;
2488       key0.ext_host_port = 0;
2489       key0.out_port = echo0->identifier;
2490       out_addr = ip0->dst_address;
2491     }
2492   else
2493     {
2494       inner_ip0 = (ip4_header_t *)(echo0+1);
2495       l4_header = ip4_next_header (inner_ip0);
2496       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2497       key0.ext_host_addr = inner_ip0->dst_address;
2498       out_addr = inner_ip0->src_address;
2499       switch (protocol)
2500         {
2501         case SNAT_PROTOCOL_ICMP:
2502           inner_icmp0 = (icmp46_header_t*)l4_header;
2503           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2504           key0.ext_host_port = 0;
2505           key0.out_port = inner_echo0->identifier;
2506           break;
2507         case SNAT_PROTOCOL_UDP:
2508         case SNAT_PROTOCOL_TCP:
2509           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2510           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
2511           break;
2512         default:
2513           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2514           next0 = SNAT_OUT2IN_NEXT_DROP;
2515           goto out;
2516         }
2517     }
2518
2519   dm0 = snat_det_map_by_out(sm, &out_addr);
2520   if (PREDICT_FALSE(!dm0))
2521     {
2522       /* Don't NAT packet aimed at the intfc address */
2523       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2524                                           ip0->dst_address.as_u32)))
2525         {
2526           dont_translate = 1;
2527           goto out;
2528         }
2529       clib_warning("unknown dst address:  %U",
2530                    format_ip4_address, &ip0->dst_address);
2531       goto out;
2532     }
2533
2534   snat_det_reverse(dm0, &ip0->dst_address,
2535                    clib_net_to_host_u16(key0.out_port), &new_addr0);
2536
2537   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2538   if (PREDICT_FALSE(!ses0))
2539     {
2540       /* Don't NAT packet aimed at the intfc address */
2541       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2542                                           ip0->dst_address.as_u32)))
2543         {
2544           dont_translate = 1;
2545           goto out;
2546         }
2547       clib_warning("no match src %U:%d dst %U:%d for user %U",
2548                    format_ip4_address, &key0.ext_host_addr,
2549                    clib_net_to_host_u16 (key0.ext_host_port),
2550                    format_ip4_address, &out_addr,
2551                    clib_net_to_host_u16 (key0.out_port),
2552                    format_ip4_address, &new_addr0);
2553       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2554       next0 = SNAT_OUT2IN_NEXT_DROP;
2555       goto out;
2556     }
2557
2558   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
2559                     !icmp_is_error_message (icmp0)))
2560     {
2561       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2562       next0 = SNAT_OUT2IN_NEXT_DROP;
2563       goto out;
2564     }
2565
2566   goto out;
2567
2568 out:
2569   *p_proto = protocol;
2570   if (ses0)
2571     {
2572       p_value->addr = new_addr0;
2573       p_value->fib_index = sm->inside_fib_index;
2574       p_value->port = ses0->in_port;
2575     }
2576   *p_dont_translate = dont_translate;
2577   if (d)
2578     *(snat_det_session_t**)d = ses0;
2579   if (e)
2580     *(snat_det_map_t**)e = dm0;
2581   return next0;
2582 }
2583
2584 /**********************/
2585 /*** worker handoff ***/
2586 /**********************/
2587 static uword
2588 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
2589                                vlib_node_runtime_t * node,
2590                                vlib_frame_t * frame)
2591 {
2592   snat_main_t *sm = &snat_main;
2593   vlib_thread_main_t *tm = vlib_get_thread_main ();
2594   u32 n_left_from, *from, *to_next = 0;
2595   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2596   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2597     = 0;
2598   vlib_frame_queue_elt_t *hf = 0;
2599   vlib_frame_t *f = 0;
2600   int i;
2601   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2602   u32 next_worker_index = 0;
2603   u32 current_worker_index = ~0;
2604   u32 thread_index = vlib_get_thread_index ();
2605
2606   ASSERT (vec_len (sm->workers));
2607
2608   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2609     {
2610       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2611
2612       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2613                                sm->first_worker_index + sm->num_workers - 1,
2614                                (vlib_frame_queue_t *) (~0));
2615     }
2616
2617   from = vlib_frame_vector_args (frame);
2618   n_left_from = frame->n_vectors;
2619
2620   while (n_left_from > 0)
2621     {
2622       u32 bi0;
2623       vlib_buffer_t *b0;
2624       u32 sw_if_index0;
2625       u32 rx_fib_index0;
2626       ip4_header_t * ip0;
2627       u8 do_handoff;
2628
2629       bi0 = from[0];
2630       from += 1;
2631       n_left_from -= 1;
2632
2633       b0 = vlib_get_buffer (vm, bi0);
2634
2635       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2636       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2637
2638       ip0 = vlib_buffer_get_current (b0);
2639
2640       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2641
2642       if (PREDICT_FALSE (next_worker_index != thread_index))
2643         {
2644           do_handoff = 1;
2645
2646           if (next_worker_index != current_worker_index)
2647             {
2648               if (hf)
2649                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2650
2651               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2652                                                       next_worker_index,
2653                                                       handoff_queue_elt_by_worker_index);
2654
2655               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2656               to_next_worker = &hf->buffer_index[hf->n_vectors];
2657               current_worker_index = next_worker_index;
2658             }
2659
2660           /* enqueue to correct worker thread */
2661           to_next_worker[0] = bi0;
2662           to_next_worker++;
2663           n_left_to_next_worker--;
2664
2665           if (n_left_to_next_worker == 0)
2666             {
2667               hf->n_vectors = VLIB_FRAME_SIZE;
2668               vlib_put_frame_queue_elt (hf);
2669               current_worker_index = ~0;
2670               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2671               hf = 0;
2672             }
2673         }
2674       else
2675         {
2676           do_handoff = 0;
2677           /* if this is 1st frame */
2678           if (!f)
2679             {
2680               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2681               to_next = vlib_frame_vector_args (f);
2682             }
2683
2684           to_next[0] = bi0;
2685           to_next += 1;
2686           f->n_vectors++;
2687         }
2688
2689       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2690                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2691         {
2692           snat_out2in_worker_handoff_trace_t *t =
2693             vlib_add_trace (vm, node, b0, sizeof (*t));
2694           t->next_worker_index = next_worker_index;
2695           t->do_handoff = do_handoff;
2696         }
2697     }
2698
2699   if (f)
2700     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2701
2702   if (hf)
2703     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2704
2705   /* Ship frames to the worker nodes */
2706   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2707     {
2708       if (handoff_queue_elt_by_worker_index[i])
2709         {
2710           hf = handoff_queue_elt_by_worker_index[i];
2711           /*
2712            * It works better to let the handoff node
2713            * rate-adapt, always ship the handoff queue element.
2714            */
2715           if (1 || hf->n_vectors == hf->last_n_vectors)
2716             {
2717               vlib_put_frame_queue_elt (hf);
2718               handoff_queue_elt_by_worker_index[i] = 0;
2719             }
2720           else
2721             hf->last_n_vectors = hf->n_vectors;
2722         }
2723       congested_handoff_queue_by_worker_index[i] =
2724         (vlib_frame_queue_t *) (~0);
2725     }
2726   hf = 0;
2727   current_worker_index = ~0;
2728   return frame->n_vectors;
2729 }
2730
2731 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2732   .function = snat_out2in_worker_handoff_fn,
2733   .name = "nat44-out2in-worker-handoff",
2734   .vector_size = sizeof (u32),
2735   .format_trace = format_snat_out2in_worker_handoff_trace,
2736   .type = VLIB_NODE_TYPE_INTERNAL,
2737
2738   .n_next_nodes = 1,
2739
2740   .next_nodes = {
2741     [0] = "error-drop",
2742   },
2743 };
2744
2745 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2746
2747 static uword
2748 snat_out2in_fast_node_fn (vlib_main_t * vm,
2749                           vlib_node_runtime_t * node,
2750                           vlib_frame_t * frame)
2751 {
2752   u32 n_left_from, * from, * to_next;
2753   snat_out2in_next_t next_index;
2754   u32 pkts_processed = 0;
2755   snat_main_t * sm = &snat_main;
2756
2757   from = vlib_frame_vector_args (frame);
2758   n_left_from = frame->n_vectors;
2759   next_index = node->cached_next_index;
2760
2761   while (n_left_from > 0)
2762     {
2763       u32 n_left_to_next;
2764
2765       vlib_get_next_frame (vm, node, next_index,
2766                            to_next, n_left_to_next);
2767
2768       while (n_left_from > 0 && n_left_to_next > 0)
2769         {
2770           u32 bi0;
2771           vlib_buffer_t * b0;
2772           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2773           u32 sw_if_index0;
2774           ip4_header_t * ip0;
2775           ip_csum_t sum0;
2776           u32 new_addr0, old_addr0;
2777           u16 new_port0, old_port0;
2778           udp_header_t * udp0;
2779           tcp_header_t * tcp0;
2780           icmp46_header_t * icmp0;
2781           snat_session_key_t key0, sm0;
2782           u32 proto0;
2783           u32 rx_fib_index0;
2784
2785           /* speculatively enqueue b0 to the current next frame */
2786           bi0 = from[0];
2787           to_next[0] = bi0;
2788           from += 1;
2789           to_next += 1;
2790           n_left_from -= 1;
2791           n_left_to_next -= 1;
2792
2793           b0 = vlib_get_buffer (vm, bi0);
2794
2795           ip0 = vlib_buffer_get_current (b0);
2796           udp0 = ip4_next_header (ip0);
2797           tcp0 = (tcp_header_t *) udp0;
2798           icmp0 = (icmp46_header_t *) udp0;
2799
2800           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2801           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2802
2803           vnet_feature_next (sw_if_index0, &next0, b0);
2804
2805           if (PREDICT_FALSE(ip0->ttl == 1))
2806             {
2807               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2808               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2809                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2810                                            0);
2811               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2812               goto trace00;
2813             }
2814
2815           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2816
2817           if (PREDICT_FALSE (proto0 == ~0))
2818               goto trace00;
2819
2820           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2821             {
2822               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2823                                   rx_fib_index0, node, next0, ~0, 0, 0);
2824               goto trace00;
2825             }
2826
2827           key0.addr = ip0->dst_address;
2828           key0.port = udp0->dst_port;
2829           key0.fib_index = rx_fib_index0;
2830
2831           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2832             {
2833               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2834               goto trace00;
2835             }
2836
2837           new_addr0 = sm0.addr.as_u32;
2838           new_port0 = sm0.port;
2839           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2840           old_addr0 = ip0->dst_address.as_u32;
2841           ip0->dst_address.as_u32 = new_addr0;
2842
2843           sum0 = ip0->checksum;
2844           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2845                                  ip4_header_t,
2846                                  dst_address /* changed member */);
2847           ip0->checksum = ip_csum_fold (sum0);
2848
2849           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2850             {
2851                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2852                 {
2853                   old_port0 = tcp0->dst_port;
2854                   tcp0->dst_port = new_port0;
2855
2856                   sum0 = tcp0->checksum;
2857                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2858                                          ip4_header_t,
2859                                          dst_address /* changed member */);
2860
2861                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2862                                          ip4_header_t /* cheat */,
2863                                          length /* changed member */);
2864                   tcp0->checksum = ip_csum_fold(sum0);
2865                 }
2866               else
2867                 {
2868                   old_port0 = udp0->dst_port;
2869                   udp0->dst_port = new_port0;
2870                   udp0->checksum = 0;
2871                 }
2872             }
2873           else
2874             {
2875               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2876                 {
2877                   sum0 = tcp0->checksum;
2878                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2879                                          ip4_header_t,
2880                                          dst_address /* changed member */);
2881
2882                   tcp0->checksum = ip_csum_fold(sum0);
2883                 }
2884             }
2885
2886         trace00:
2887
2888           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2889                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2890             {
2891               snat_out2in_trace_t *t =
2892                  vlib_add_trace (vm, node, b0, sizeof (*t));
2893               t->sw_if_index = sw_if_index0;
2894               t->next_index = next0;
2895             }
2896
2897           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2898
2899           /* verify speculative enqueue, maybe switch current next frame */
2900           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2901                                            to_next, n_left_to_next,
2902                                            bi0, next0);
2903         }
2904
2905       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2906     }
2907
2908   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2909                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2910                                pkts_processed);
2911   return frame->n_vectors;
2912 }
2913
2914 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2915   .function = snat_out2in_fast_node_fn,
2916   .name = "nat44-out2in-fast",
2917   .vector_size = sizeof (u32),
2918   .format_trace = format_snat_out2in_fast_trace,
2919   .type = VLIB_NODE_TYPE_INTERNAL,
2920
2921   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2922   .error_strings = snat_out2in_error_strings,
2923
2924   .runtime_data_bytes = sizeof (snat_runtime_t),
2925
2926   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2927
2928   /* edit / add dispositions here */
2929   .next_nodes = {
2930     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2931     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2932     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2933     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2934   },
2935 };
2936 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);