NAT44 - unknown protocols work with forwarding
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38 } snat_out2in_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_out2in_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
51
52   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
53               t->sw_if_index, t->next_index, t->session_index);
54   return s;
55 }
56
57 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
62
63   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
64               t->sw_if_index, t->next_index);
65   return s;
66 }
67
68 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   snat_out2in_worker_handoff_trace_t * t =
73     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74   char * m;
75
76   m = t->do_handoff ? "next worker" : "same worker";
77   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
78
79   return s;
80 }
81
82 typedef struct {
83   u32 sw_if_index;
84   u32 next_index;
85   u8 cached;
86 } nat44_out2in_reass_trace_t;
87
88 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
89 {
90   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
91   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
92   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
93
94   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
95               t->sw_if_index, t->next_index,
96               t->cached ? "cached" : "translated");
97
98   return s;
99 }
100
101 vlib_node_registration_t snat_out2in_node;
102 vlib_node_registration_t snat_out2in_fast_node;
103 vlib_node_registration_t snat_out2in_worker_handoff_node;
104 vlib_node_registration_t snat_det_out2in_node;
105 vlib_node_registration_t nat44_out2in_reass_node;
106
107 #define foreach_snat_out2in_error                       \
108 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
109 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
110 _(OUT_OF_PORTS, "Out of ports")                         \
111 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
112 _(NO_TRANSLATION, "No translation")                     \
113 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
114 _(DROP_FRAGMENT, "Drop fragment")                       \
115 _(MAX_REASS, "Maximum reassemblies exceeded")           \
116 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
117
118 typedef enum {
119 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
120   foreach_snat_out2in_error
121 #undef _
122   SNAT_OUT2IN_N_ERROR,
123 } snat_out2in_error_t;
124
125 static char * snat_out2in_error_strings[] = {
126 #define _(sym,string) string,
127   foreach_snat_out2in_error
128 #undef _
129 };
130
131 typedef enum {
132   SNAT_OUT2IN_NEXT_DROP,
133   SNAT_OUT2IN_NEXT_LOOKUP,
134   SNAT_OUT2IN_NEXT_ICMP_ERROR,
135   SNAT_OUT2IN_NEXT_REASS,
136   SNAT_OUT2IN_N_NEXT,
137 } snat_out2in_next_t;
138
139 /**
140  * @brief Create session for static mapping.
141  *
142  * Create NAT session initiated by host from external network with static
143  * mapping.
144  *
145  * @param sm     NAT main.
146  * @param b0     Vlib buffer.
147  * @param in2out In2out NAT44 session key.
148  * @param out2in Out2in NAT44 session key.
149  * @param node   Vlib node.
150  *
151  * @returns SNAT session if successfully created otherwise 0.
152  */
153 static inline snat_session_t *
154 create_session_for_static_mapping (snat_main_t *sm,
155                                    vlib_buffer_t *b0,
156                                    snat_session_key_t in2out,
157                                    snat_session_key_t out2in,
158                                    vlib_node_runtime_t * node,
159                                    u32 thread_index)
160 {
161   snat_user_t *u;
162   snat_session_t *s;
163   clib_bihash_kv_8_8_t kv0;
164   ip4_header_t *ip0;
165   udp_header_t *udp0;
166
167   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
168     {
169       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
170       return 0;
171     }
172
173   ip0 = vlib_buffer_get_current (b0);
174   udp0 = ip4_next_header (ip0);
175
176   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
177   if (!u)
178     {
179       clib_warning ("create NAT user failed");
180       return 0;
181     }
182
183   s = nat_session_alloc_or_recycle (sm, u, thread_index);
184   if (!s)
185     {
186       clib_warning ("create NAT session failed");
187       return 0;
188     }
189
190   s->outside_address_index = ~0;
191   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
192   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
193   s->ext_host_port = udp0->src_port;
194   u->nstaticsessions++;
195   s->in2out = in2out;
196   s->out2in = out2in;
197   s->in2out.protocol = out2in.protocol;
198
199   /* Add to translation hashes */
200   kv0.key = s->in2out.as_u64;
201   kv0.value = s - sm->per_thread_data[thread_index].sessions;
202   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
203                                1 /* is_add */))
204       clib_warning ("in2out key add failed");
205
206   kv0.key = s->out2in.as_u64;
207
208   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
209                                1 /* is_add */))
210       clib_warning ("out2in key add failed");
211
212   /* log NAT event */
213   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
214                                       s->out2in.addr.as_u32,
215                                       s->in2out.protocol,
216                                       s->in2out.port,
217                                       s->out2in.port,
218                                       s->in2out.fib_index);
219    return s;
220 }
221
222 static_always_inline
223 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
224                                  snat_session_key_t *p_key0)
225 {
226   icmp46_header_t *icmp0;
227   snat_session_key_t key0;
228   icmp_echo_header_t *echo0, *inner_echo0 = 0;
229   ip4_header_t *inner_ip0;
230   void *l4_header = 0;
231   icmp46_header_t *inner_icmp0;
232
233   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
234   echo0 = (icmp_echo_header_t *)(icmp0+1);
235
236   if (!icmp_is_error_message (icmp0))
237     {
238       key0.protocol = SNAT_PROTOCOL_ICMP;
239       key0.addr = ip0->dst_address;
240       key0.port = echo0->identifier;
241     }
242   else
243     {
244       inner_ip0 = (ip4_header_t *)(echo0+1);
245       l4_header = ip4_next_header (inner_ip0);
246       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
247       key0.addr = inner_ip0->src_address;
248       switch (key0.protocol)
249         {
250         case SNAT_PROTOCOL_ICMP:
251           inner_icmp0 = (icmp46_header_t*)l4_header;
252           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
253           key0.port = inner_echo0->identifier;
254           break;
255         case SNAT_PROTOCOL_UDP:
256         case SNAT_PROTOCOL_TCP:
257           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
258           break;
259         default:
260           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
261         }
262     }
263   *p_key0 = key0;
264   return -1; /* success */
265 }
266
267 static_always_inline int
268 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
269 {
270   icmp46_header_t *icmp0;
271   nat_ed_ses_key_t key0;
272   icmp_echo_header_t *echo0, *inner_echo0 = 0;
273   ip4_header_t *inner_ip0;
274   void *l4_header = 0;
275   icmp46_header_t *inner_icmp0;
276
277   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
278   echo0 = (icmp_echo_header_t *)(icmp0+1);
279
280   if (!icmp_is_error_message (icmp0))
281     {
282       key0.proto = IP_PROTOCOL_ICMP;
283       key0.l_addr = ip0->dst_address;
284       key0.r_addr = ip0->src_address;
285       key0.l_port = key0.r_port = echo0->identifier;
286     }
287   else
288     {
289       inner_ip0 = (ip4_header_t *)(echo0+1);
290       l4_header = ip4_next_header (inner_ip0);
291       key0.proto = inner_ip0->protocol;
292       key0.l_addr = inner_ip0->src_address;
293       key0.r_addr = inner_ip0->dst_address;
294       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
295         {
296         case SNAT_PROTOCOL_ICMP:
297           inner_icmp0 = (icmp46_header_t*)l4_header;
298           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
299           key0.l_port = key0.r_port = inner_echo0->identifier;
300           break;
301         case SNAT_PROTOCOL_UDP:
302         case SNAT_PROTOCOL_TCP:
303           key0.l_port = ((tcp_udp_header_t*)l4_header)->src_port;
304           key0.r_port = ((tcp_udp_header_t*)l4_header)->dst_port;
305           break;
306         default:
307           return -1;
308         }
309     }
310   *p_key0 = key0;
311   return 0;
312 }
313
314 static void
315 create_bypass_for_fwd(snat_main_t * sm, ip4_header_t * ip)
316 {
317   nat_ed_ses_key_t key;
318   clib_bihash_kv_16_8_t kv;
319   udp_header_t *udp;
320
321   if (ip->protocol == IP_PROTOCOL_ICMP)
322     {
323       if (icmp_get_ed_key (ip, &key))
324         return;
325     }
326   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
327     {
328       udp = ip4_next_header(ip);
329       key.r_addr = ip->src_address;
330       key.l_addr = ip->dst_address;
331       key.proto = ip->protocol;
332       key.l_port = udp->dst_port;
333       key.r_port = udp->src_port;
334     }
335   else
336     {
337       key.r_addr = ip->src_address;
338       key.l_addr = ip->dst_address;
339       key.proto = ip->protocol;
340       key.l_port = key.r_port = 0;
341     }
342   key.fib_index = 0;
343   kv.key[0] = key.as_u64[0];
344   kv.key[1] = key.as_u64[1];
345   kv.value = ~0ULL;
346
347   if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &kv, 1))
348     clib_warning ("in2out_ed key add failed");
349 }
350
351 /**
352  * Get address and port values to be used for ICMP packet translation
353  * and create session if needed
354  *
355  * @param[in,out] sm             NAT main
356  * @param[in,out] node           NAT node runtime
357  * @param[in] thread_index       thread index
358  * @param[in,out] b0             buffer containing packet to be translated
359  * @param[out] p_proto           protocol used for matching
360  * @param[out] p_value           address and port after NAT translation
361  * @param[out] p_dont_translate  if packet should not be translated
362  * @param d                      optional parameter
363  * @param e                      optional parameter
364  */
365 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
366                            u32 thread_index, vlib_buffer_t *b0,
367                            ip4_header_t *ip0, u8 *p_proto,
368                            snat_session_key_t *p_value,
369                            u8 *p_dont_translate, void *d, void *e)
370 {
371   icmp46_header_t *icmp0;
372   u32 sw_if_index0;
373   u32 rx_fib_index0;
374   snat_session_key_t key0;
375   snat_session_key_t sm0;
376   snat_session_t *s0 = 0;
377   u8 dont_translate = 0;
378   clib_bihash_kv_8_8_t kv0, value0;
379   u8 is_addr_only;
380   u32 next0 = ~0;
381   int err;
382
383   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
384   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
385   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
386
387   key0.protocol = 0;
388
389   err = icmp_get_key (ip0, &key0);
390   if (err != -1)
391     {
392       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
393       next0 = SNAT_OUT2IN_NEXT_DROP;
394       goto out;
395     }
396   key0.fib_index = rx_fib_index0;
397
398   kv0.key = key0.as_u64;
399
400   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
401                               &value0))
402     {
403       /* Try to match static mapping by external address and port,
404          destination address and port in packet */
405       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
406         {
407           if (!sm->forwarding_enabled)
408             {
409               /* Don't NAT packet aimed at the intfc address */
410               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
411                                                   ip0->dst_address.as_u32)))
412                 {
413                   dont_translate = 1;
414                   goto out;
415                 }
416               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
417               next0 = SNAT_OUT2IN_NEXT_DROP;
418               goto out;
419             }
420           else
421             {
422               create_bypass_for_fwd(sm, ip0);
423               dont_translate = 1;
424               goto out;
425             }
426         }
427
428       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
429                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
430         {
431           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
432           next0 = SNAT_OUT2IN_NEXT_DROP;
433           goto out;
434         }
435
436       /* Create session initiated by host from external network */
437       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
438                                              node, thread_index);
439
440       if (!s0)
441         {
442           next0 = SNAT_OUT2IN_NEXT_DROP;
443           goto out;
444         }
445     }
446   else
447     {
448       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
449                         icmp0->type != ICMP4_echo_request &&
450                         !icmp_is_error_message (icmp0)))
451         {
452           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
453           next0 = SNAT_OUT2IN_NEXT_DROP;
454           goto out;
455         }
456
457       if (PREDICT_FALSE (value0.value == ~0ULL))
458         {
459           nat_ed_ses_key_t key;
460           clib_bihash_kv_16_8_t s_kv, s_value;
461
462           key.as_u64[0] = 0;
463           key.as_u64[1] = 0;
464           if (icmp_get_ed_key (ip0, &key))
465             {
466               b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
467               next0 = SNAT_OUT2IN_NEXT_DROP;
468               goto out;
469             }
470           key.fib_index = rx_fib_index0;
471           s_kv.key[0] = key.as_u64[0];
472           s_kv.key[1] = key.as_u64[1];
473           if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
474             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
475                                     s_value.value);
476           else
477            {
478               next0 = SNAT_OUT2IN_NEXT_DROP;
479               goto out;
480            }
481         }
482       else
483         s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
484                                 value0.value);
485     }
486
487 out:
488   *p_proto = key0.protocol;
489   if (s0)
490     *p_value = s0->in2out;
491   *p_dont_translate = dont_translate;
492   if (d)
493     *(snat_session_t**)d = s0;
494   return next0;
495 }
496
497 /**
498  * Get address and port values to be used for ICMP packet translation
499  *
500  * @param[in] sm                 NAT main
501  * @param[in,out] node           NAT node runtime
502  * @param[in] thread_index       thread index
503  * @param[in,out] b0             buffer containing packet to be translated
504  * @param[out] p_proto           protocol used for matching
505  * @param[out] p_value           address and port after NAT translation
506  * @param[out] p_dont_translate  if packet should not be translated
507  * @param d                      optional parameter
508  * @param e                      optional parameter
509  */
510 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
511                            u32 thread_index, vlib_buffer_t *b0,
512                            ip4_header_t *ip0, u8 *p_proto,
513                            snat_session_key_t *p_value,
514                            u8 *p_dont_translate, void *d, void *e)
515 {
516   icmp46_header_t *icmp0;
517   u32 sw_if_index0;
518   u32 rx_fib_index0;
519   snat_session_key_t key0;
520   snat_session_key_t sm0;
521   u8 dont_translate = 0;
522   u8 is_addr_only;
523   u32 next0 = ~0;
524   int err;
525
526   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
527   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
528   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
529
530   err = icmp_get_key (ip0, &key0);
531   if (err != -1)
532     {
533       b0->error = node->errors[err];
534       next0 = SNAT_OUT2IN_NEXT_DROP;
535       goto out2;
536     }
537   key0.fib_index = rx_fib_index0;
538
539   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
540     {
541       /* Don't NAT packet aimed at the intfc address */
542       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
543         {
544           dont_translate = 1;
545           goto out;
546         }
547       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
548       next0 = SNAT_OUT2IN_NEXT_DROP;
549       goto out;
550     }
551
552   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
553                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
554                     !icmp_is_error_message (icmp0)))
555     {
556       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
557       next0 = SNAT_OUT2IN_NEXT_DROP;
558       goto out;
559     }
560
561 out:
562   *p_value = sm0;
563 out2:
564   *p_proto = key0.protocol;
565   *p_dont_translate = dont_translate;
566   return next0;
567 }
568
569 static inline u32 icmp_out2in (snat_main_t *sm,
570                                vlib_buffer_t * b0,
571                                ip4_header_t * ip0,
572                                icmp46_header_t * icmp0,
573                                u32 sw_if_index0,
574                                u32 rx_fib_index0,
575                                vlib_node_runtime_t * node,
576                                u32 next0,
577                                u32 thread_index,
578                                void *d,
579                                void *e)
580 {
581   snat_session_key_t sm0;
582   u8 protocol;
583   icmp_echo_header_t *echo0, *inner_echo0 = 0;
584   ip4_header_t *inner_ip0 = 0;
585   void *l4_header = 0;
586   icmp46_header_t *inner_icmp0;
587   u8 dont_translate;
588   u32 new_addr0, old_addr0;
589   u16 old_id0, new_id0;
590   ip_csum_t sum0;
591   u16 checksum0;
592   u32 next0_tmp;
593
594   echo0 = (icmp_echo_header_t *)(icmp0+1);
595
596   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
597                                        &protocol, &sm0, &dont_translate, d, e);
598   if (next0_tmp != ~0)
599     next0 = next0_tmp;
600   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
601     goto out;
602
603   sum0 = ip_incremental_checksum (0, icmp0,
604                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
605   checksum0 = ~ip_csum_fold (sum0);
606   if (checksum0 != 0 && checksum0 != 0xffff)
607     {
608       next0 = SNAT_OUT2IN_NEXT_DROP;
609       goto out;
610     }
611
612   old_addr0 = ip0->dst_address.as_u32;
613   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
614   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
615
616   sum0 = ip0->checksum;
617   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
618                          dst_address /* changed member */);
619   ip0->checksum = ip_csum_fold (sum0);
620
621   if (!icmp_is_error_message (icmp0))
622     {
623       new_id0 = sm0.port;
624       if (PREDICT_FALSE(new_id0 != echo0->identifier))
625         {
626           old_id0 = echo0->identifier;
627           new_id0 = sm0.port;
628           echo0->identifier = new_id0;
629
630           sum0 = icmp0->checksum;
631           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
632                                  identifier /* changed member */);
633           icmp0->checksum = ip_csum_fold (sum0);
634         }
635     }
636   else
637     {
638       inner_ip0 = (ip4_header_t *)(echo0+1);
639       l4_header = ip4_next_header (inner_ip0);
640
641       if (!ip4_header_checksum_is_valid (inner_ip0))
642         {
643           next0 = SNAT_OUT2IN_NEXT_DROP;
644           goto out;
645         }
646
647       old_addr0 = inner_ip0->src_address.as_u32;
648       inner_ip0->src_address = sm0.addr;
649       new_addr0 = inner_ip0->src_address.as_u32;
650
651       sum0 = icmp0->checksum;
652       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
653                              src_address /* changed member */);
654       icmp0->checksum = ip_csum_fold (sum0);
655
656       switch (protocol)
657         {
658         case SNAT_PROTOCOL_ICMP:
659           inner_icmp0 = (icmp46_header_t*)l4_header;
660           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
661
662           old_id0 = inner_echo0->identifier;
663           new_id0 = sm0.port;
664           inner_echo0->identifier = new_id0;
665
666           sum0 = icmp0->checksum;
667           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
668                                  identifier);
669           icmp0->checksum = ip_csum_fold (sum0);
670           break;
671         case SNAT_PROTOCOL_UDP:
672         case SNAT_PROTOCOL_TCP:
673           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
674           new_id0 = sm0.port;
675           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
676
677           sum0 = icmp0->checksum;
678           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
679                                  src_port);
680           icmp0->checksum = ip_csum_fold (sum0);
681           break;
682         default:
683           ASSERT(0);
684         }
685     }
686
687 out:
688   return next0;
689 }
690
691
692 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
693                                          vlib_buffer_t * b0,
694                                          ip4_header_t * ip0,
695                                          icmp46_header_t * icmp0,
696                                          u32 sw_if_index0,
697                                          u32 rx_fib_index0,
698                                          vlib_node_runtime_t * node,
699                                          u32 next0, f64 now,
700                                          u32 thread_index,
701                                          snat_session_t ** p_s0)
702 {
703   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
704                       next0, thread_index, p_s0, 0);
705   snat_session_t * s0 = *p_s0;
706   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
707     {
708       /* Accounting */
709       s0->last_heard = now;
710       s0->total_pkts++;
711       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
712       /* Per-user LRU list maintenance */
713       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
714                          s0->per_user_index);
715       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
716                           s0->per_user_list_head_index,
717                           s0->per_user_index);
718     }
719   return next0;
720 }
721
722 static snat_session_t *
723 snat_out2in_unknown_proto (snat_main_t *sm,
724                            vlib_buffer_t * b,
725                            ip4_header_t * ip,
726                            u32 rx_fib_index,
727                            u32 thread_index,
728                            f64 now,
729                            vlib_main_t * vm,
730                            vlib_node_runtime_t * node)
731 {
732   clib_bihash_kv_8_8_t kv, value;
733   clib_bihash_kv_16_8_t s_kv, s_value;
734   snat_static_mapping_t *m;
735   snat_session_key_t m_key;
736   u32 old_addr, new_addr;
737   ip_csum_t sum;
738   nat_ed_ses_key_t key;
739   snat_session_t * s;
740   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
741   snat_user_t *u;
742
743   old_addr = ip->dst_address.as_u32;
744
745   key.l_addr = ip->dst_address;
746   key.r_addr = ip->src_address;
747   key.fib_index = rx_fib_index;
748   key.proto = ip->protocol;
749   key.r_port = 0;
750   key.l_port = 0;
751   s_kv.key[0] = key.as_u64[0];
752   s_kv.key[1] = key.as_u64[1];
753
754   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
755     {
756       s = pool_elt_at_index (tsm->sessions, s_value.value);
757       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
758     }
759   else
760     {
761       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
762         {
763           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
764           return 0;
765         }
766
767       m_key.addr = ip->dst_address;
768       m_key.port = 0;
769       m_key.protocol = 0;
770       m_key.fib_index = rx_fib_index;
771       kv.key = m_key.as_u64;
772       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
773         {
774           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
775           return 0;
776         }
777
778       m = pool_elt_at_index (sm->static_mappings, value.value);
779
780       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
781
782       u = nat_user_get_or_create (sm, &ip->src_address, m->fib_index,
783                                   thread_index);
784       if (!u)
785         {
786           clib_warning ("create NAT user failed");
787           return 0;
788         }
789
790       /* Create a new session */
791       s = nat_session_alloc_or_recycle (sm, u, thread_index);
792       if (!s)
793         {
794           clib_warning ("create NAT session failed");
795           return 0;
796         }
797
798       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
799       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
800       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
801       s->outside_address_index = ~0;
802       s->out2in.addr.as_u32 = old_addr;
803       s->out2in.fib_index = rx_fib_index;
804       s->in2out.addr.as_u32 = new_addr;
805       s->in2out.fib_index = m->fib_index;
806       s->in2out.port = s->out2in.port = ip->protocol;
807       u->nstaticsessions++;
808
809       /* Add to lookup tables */
810       s_kv.value = s - tsm->sessions;
811       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
812         clib_warning ("out2in key add failed");
813
814       key.l_addr = ip->dst_address;
815       key.fib_index = m->fib_index;
816       s_kv.key[0] = key.as_u64[0];
817       s_kv.key[1] = key.as_u64[1];
818       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
819         clib_warning ("in2out key add failed");
820    }
821
822   /* Update IP checksum */
823   sum = ip->checksum;
824   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
825   ip->checksum = ip_csum_fold (sum);
826
827   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
828
829   /* Accounting */
830   s->last_heard = now;
831   s->total_pkts++;
832   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
833   /* Per-user LRU list maintenance */
834   clib_dlist_remove (tsm->list_pool, s->per_user_index);
835   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
836                       s->per_user_index);
837
838   return s;
839 }
840
841 static snat_session_t *
842 snat_out2in_lb (snat_main_t *sm,
843                 vlib_buffer_t * b,
844                 ip4_header_t * ip,
845                 u32 rx_fib_index,
846                 u32 thread_index,
847                 f64 now,
848                 vlib_main_t * vm,
849                 vlib_node_runtime_t * node)
850 {
851   nat_ed_ses_key_t key;
852   clib_bihash_kv_16_8_t s_kv, s_value;
853   udp_header_t *udp = ip4_next_header (ip);
854   tcp_header_t *tcp = (tcp_header_t *) udp;
855   snat_session_t *s = 0;
856   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
857   snat_session_key_t e_key, l_key;
858   u32 old_addr, new_addr;
859   u32 proto = ip_proto_to_snat_proto (ip->protocol);
860   u16 new_port, old_port;
861   ip_csum_t sum;
862   snat_user_t *u;
863   u32 address_index;
864   snat_session_key_t eh_key;
865   u8 twice_nat;
866
867   old_addr = ip->dst_address.as_u32;
868
869   key.l_addr = ip->dst_address;
870   key.r_addr = ip->src_address;
871   key.fib_index = rx_fib_index;
872   key.proto = ip->protocol;
873   key.r_port = udp->src_port;
874   key.l_port = udp->dst_port;
875   s_kv.key[0] = key.as_u64[0];
876   s_kv.key[1] = key.as_u64[1];
877
878   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
879     {
880       s = pool_elt_at_index (tsm->sessions, s_value.value);
881     }
882   else
883     {
884       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
885         {
886           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
887           return 0;
888         }
889
890       e_key.addr = ip->dst_address;
891       e_key.port = udp->dst_port;
892       e_key.protocol = proto;
893       e_key.fib_index = rx_fib_index;
894       if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0, &twice_nat))
895         return 0;
896
897       u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index,
898                                   thread_index);
899       if (!u)
900       {
901         clib_warning ("create NAT user failed");
902         return 0;
903       }
904
905       s = nat_session_alloc_or_recycle (sm, u, thread_index);
906       if (!s)
907         {
908           clib_warning ("create NAT session failed");
909           return 0;
910         }
911
912       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
913       s->ext_host_port = udp->src_port;
914       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
915       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
916       s->outside_address_index = ~0;
917       s->out2in = e_key;
918       s->in2out = l_key;
919       u->nstaticsessions++;
920
921       /* Add to lookup tables */
922       s_kv.value = s - tsm->sessions;
923       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
924         clib_warning ("out2in-ed key add failed");
925
926       if (twice_nat)
927         {
928           eh_key.protocol = proto;
929           if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
930                                                    thread_index, &eh_key,
931                                                    &address_index,
932                                                    sm->port_per_thread,
933                                                    sm->per_thread_data[thread_index].snat_thread_index))
934             {
935               b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
936               return 0;
937             }
938           key.r_addr.as_u32 = s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
939           key.r_port = s->ext_host_nat_port = eh_key.port;
940           s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
941         }
942       key.l_addr = l_key.addr;
943       key.fib_index = l_key.fib_index;
944       key.l_port = l_key.port;
945       s_kv.key[0] = key.as_u64[0];
946       s_kv.key[1] = key.as_u64[1];
947       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
948         clib_warning ("in2out-ed key add failed");
949     }
950
951   new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
952
953   /* Update IP checksum */
954   sum = ip->checksum;
955   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
956   if (is_twice_nat_session (s))
957     sum = ip_csum_update (sum, ip->src_address.as_u32,
958                           s->ext_host_nat_addr.as_u32, ip4_header_t,
959                           src_address);
960   ip->checksum = ip_csum_fold (sum);
961
962   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
963     {
964       old_port = tcp->dst_port;
965       tcp->dst_port = s->in2out.port;
966       new_port = tcp->dst_port;
967
968       sum = tcp->checksum;
969       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
970       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
971       if (is_twice_nat_session (s))
972         {
973           sum = ip_csum_update (sum, ip->src_address.as_u32,
974                                 s->ext_host_nat_addr.as_u32, ip4_header_t,
975                                 dst_address);
976           sum = ip_csum_update (sum, tcp->src_port, s->ext_host_nat_port,
977                                 ip4_header_t, length);
978           tcp->src_port = s->ext_host_nat_port;
979           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
980         }
981       tcp->checksum = ip_csum_fold(sum);
982     }
983   else
984     {
985       udp->dst_port = s->in2out.port;
986       if (is_twice_nat_session (s))
987         {
988           udp->src_port = s->ext_host_nat_port;
989           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
990         }
991       udp->checksum = 0;
992     }
993
994   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
995
996   /* Accounting */
997   s->last_heard = now;
998   s->total_pkts++;
999   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1000   /* Per-user LRU list maintenance */
1001   clib_dlist_remove (tsm->list_pool, s->per_user_index);
1002   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1003                       s->per_user_index);
1004
1005   return s;
1006 }
1007
1008 static uword
1009 snat_out2in_node_fn (vlib_main_t * vm,
1010                   vlib_node_runtime_t * node,
1011                   vlib_frame_t * frame)
1012 {
1013   u32 n_left_from, * from, * to_next;
1014   snat_out2in_next_t next_index;
1015   u32 pkts_processed = 0;
1016   snat_main_t * sm = &snat_main;
1017   f64 now = vlib_time_now (vm);
1018   u32 thread_index = vlib_get_thread_index ();
1019
1020   from = vlib_frame_vector_args (frame);
1021   n_left_from = frame->n_vectors;
1022   next_index = node->cached_next_index;
1023
1024   while (n_left_from > 0)
1025     {
1026       u32 n_left_to_next;
1027
1028       vlib_get_next_frame (vm, node, next_index,
1029                            to_next, n_left_to_next);
1030
1031       while (n_left_from >= 4 && n_left_to_next >= 2)
1032         {
1033           u32 bi0, bi1;
1034           vlib_buffer_t * b0, * b1;
1035           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1036           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1037           u32 sw_if_index0, sw_if_index1;
1038           ip4_header_t * ip0, *ip1;
1039           ip_csum_t sum0, sum1;
1040           u32 new_addr0, old_addr0;
1041           u16 new_port0, old_port0;
1042           u32 new_addr1, old_addr1;
1043           u16 new_port1, old_port1;
1044           udp_header_t * udp0, * udp1;
1045           tcp_header_t * tcp0, * tcp1;
1046           icmp46_header_t * icmp0, * icmp1;
1047           snat_session_key_t key0, key1, sm0, sm1;
1048           u32 rx_fib_index0, rx_fib_index1;
1049           u32 proto0, proto1;
1050           snat_session_t * s0 = 0, * s1 = 0;
1051           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
1052
1053           /* Prefetch next iteration. */
1054           {
1055             vlib_buffer_t * p2, * p3;
1056
1057             p2 = vlib_get_buffer (vm, from[2]);
1058             p3 = vlib_get_buffer (vm, from[3]);
1059
1060             vlib_prefetch_buffer_header (p2, LOAD);
1061             vlib_prefetch_buffer_header (p3, LOAD);
1062
1063             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1064             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1065           }
1066
1067           /* speculatively enqueue b0 and b1 to the current next frame */
1068           to_next[0] = bi0 = from[0];
1069           to_next[1] = bi1 = from[1];
1070           from += 2;
1071           to_next += 2;
1072           n_left_from -= 2;
1073           n_left_to_next -= 2;
1074
1075           b0 = vlib_get_buffer (vm, bi0);
1076           b1 = vlib_get_buffer (vm, bi1);
1077
1078           vnet_buffer (b0)->snat.flags = 0;
1079           vnet_buffer (b1)->snat.flags = 0;
1080
1081           ip0 = vlib_buffer_get_current (b0);
1082           udp0 = ip4_next_header (ip0);
1083           tcp0 = (tcp_header_t *) udp0;
1084           icmp0 = (icmp46_header_t *) udp0;
1085
1086           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1087           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1088                                    sw_if_index0);
1089
1090           if (PREDICT_FALSE(ip0->ttl == 1))
1091             {
1092               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1093               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1094                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1095                                            0);
1096               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1097               goto trace0;
1098             }
1099
1100           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1101
1102           if (PREDICT_FALSE (proto0 == ~0))
1103             {
1104               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1105                                              thread_index, now, vm, node);
1106               if (!sm->forwarding_enabled)
1107                 if (!s0)
1108                   next0 = SNAT_OUT2IN_NEXT_DROP;
1109               goto trace0;
1110             }
1111
1112           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1113             {
1114               next0 = icmp_out2in_slow_path
1115                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1116                  next0, now, thread_index, &s0);
1117               goto trace0;
1118             }
1119
1120           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1121             {
1122               next0 = SNAT_OUT2IN_NEXT_REASS;
1123               goto trace0;
1124             }
1125
1126           key0.addr = ip0->dst_address;
1127           key0.port = udp0->dst_port;
1128           key0.protocol = proto0;
1129           key0.fib_index = rx_fib_index0;
1130
1131           kv0.key = key0.as_u64;
1132
1133           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1134                                       &kv0, &value0))
1135             {
1136               /* Try to match static mapping by external address and port,
1137                  destination address and port in packet */
1138               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1139                 {
1140                   if (!sm->forwarding_enabled)
1141                     {
1142                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1143                       /*
1144                        * Send DHCP packets to the ipv4 stack, or we won't
1145                        * be able to use dhcp client on the outside interface
1146                        */
1147                       if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1148                           || (udp0->dst_port
1149                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1150                         next0 = SNAT_OUT2IN_NEXT_DROP;
1151                       else
1152                         vnet_feature_next
1153                           (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1154                            &next0, b0);
1155                       goto trace0;
1156                     }
1157                   else
1158                     {
1159                       create_bypass_for_fwd(sm, ip0);
1160                       goto trace0;
1161                     }
1162                 }
1163
1164               /* Create session initiated by host from external network */
1165               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1166                                                      thread_index);
1167               if (!s0)
1168                 {
1169                   next0 = SNAT_OUT2IN_NEXT_DROP;
1170                   goto trace0;
1171                 }
1172             }
1173           else
1174             {
1175               if (PREDICT_FALSE (value0.value == ~0ULL))
1176                 {
1177                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1178                                       now, vm, node);
1179                   if (!s0)
1180                     next0 = SNAT_OUT2IN_NEXT_DROP;
1181                   goto trace0;
1182                 }
1183               else
1184                 {
1185                   s0 = pool_elt_at_index (
1186                     sm->per_thread_data[thread_index].sessions,
1187                     value0.value);
1188                 }
1189             }
1190
1191           old_addr0 = ip0->dst_address.as_u32;
1192           ip0->dst_address = s0->in2out.addr;
1193           new_addr0 = ip0->dst_address.as_u32;
1194           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1195
1196           sum0 = ip0->checksum;
1197           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1198                                  ip4_header_t,
1199                                  dst_address /* changed member */);
1200           ip0->checksum = ip_csum_fold (sum0);
1201
1202           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1203             {
1204               old_port0 = tcp0->dst_port;
1205               tcp0->dst_port = s0->in2out.port;
1206               new_port0 = tcp0->dst_port;
1207
1208               sum0 = tcp0->checksum;
1209               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1210                                      ip4_header_t,
1211                                      dst_address /* changed member */);
1212
1213               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1214                                      ip4_header_t /* cheat */,
1215                                      length /* changed member */);
1216               tcp0->checksum = ip_csum_fold(sum0);
1217             }
1218           else
1219             {
1220               old_port0 = udp0->dst_port;
1221               udp0->dst_port = s0->in2out.port;
1222               udp0->checksum = 0;
1223             }
1224
1225           /* Accounting */
1226           s0->last_heard = now;
1227           s0->total_pkts++;
1228           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1229           /* Per-user LRU list maintenance */
1230           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1231                              s0->per_user_index);
1232           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1233                               s0->per_user_list_head_index,
1234                               s0->per_user_index);
1235         trace0:
1236
1237           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1238                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1239             {
1240               snat_out2in_trace_t *t =
1241                  vlib_add_trace (vm, node, b0, sizeof (*t));
1242               t->sw_if_index = sw_if_index0;
1243               t->next_index = next0;
1244               t->session_index = ~0;
1245               if (s0)
1246                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1247             }
1248
1249           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1250
1251
1252           ip1 = vlib_buffer_get_current (b1);
1253           udp1 = ip4_next_header (ip1);
1254           tcp1 = (tcp_header_t *) udp1;
1255           icmp1 = (icmp46_header_t *) udp1;
1256
1257           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1258           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1259                                    sw_if_index1);
1260
1261           if (PREDICT_FALSE(ip1->ttl == 1))
1262             {
1263               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1264               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1265                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1266                                            0);
1267               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1268               goto trace1;
1269             }
1270
1271           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1272
1273           if (PREDICT_FALSE (proto1 == ~0))
1274             {
1275               s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
1276                                              thread_index, now, vm, node);
1277               if (!sm->forwarding_enabled)
1278                 if (!s1)
1279                   next1 = SNAT_OUT2IN_NEXT_DROP;
1280               goto trace1;
1281             }
1282
1283           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1284             {
1285               next1 = icmp_out2in_slow_path
1286                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1287                  next1, now, thread_index, &s1);
1288               goto trace1;
1289             }
1290
1291           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
1292             {
1293               next1 = SNAT_OUT2IN_NEXT_REASS;
1294               goto trace1;
1295             }
1296
1297           key1.addr = ip1->dst_address;
1298           key1.port = udp1->dst_port;
1299           key1.protocol = proto1;
1300           key1.fib_index = rx_fib_index1;
1301
1302           kv1.key = key1.as_u64;
1303
1304           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1305                                       &kv1, &value1))
1306             {
1307               /* Try to match static mapping by external address and port,
1308                  destination address and port in packet */
1309               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0))
1310                 {
1311                   if (!sm->forwarding_enabled)
1312                     {
1313                       b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1314                       /*
1315                        * Send DHCP packets to the ipv4 stack, or we won't
1316                        * be able to use dhcp client on the outside interface
1317                        */
1318                       if (PREDICT_TRUE (proto1 != SNAT_PROTOCOL_UDP
1319                           || (udp1->dst_port
1320                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1321                         next1 = SNAT_OUT2IN_NEXT_DROP;
1322                       else
1323                         vnet_feature_next
1324                           (vnet_buffer (b1)->sw_if_index[VLIB_RX],
1325                            &next1, b1);
1326                       goto trace1;
1327                     }
1328                   else
1329                     {
1330                       create_bypass_for_fwd(sm, ip1);
1331                       goto trace1;
1332                     }
1333                 }
1334
1335               /* Create session initiated by host from external network */
1336               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1337                                                      thread_index);
1338               if (!s1)
1339                 {
1340                   next1 = SNAT_OUT2IN_NEXT_DROP;
1341                   goto trace1;
1342                 }
1343             }
1344           else
1345             {
1346               if (PREDICT_FALSE (value1.value == ~0ULL))
1347                 {
1348                   s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1349                                       now, vm, node);
1350                   if (!s1)
1351                     next1 = SNAT_OUT2IN_NEXT_DROP;
1352                   goto trace1;
1353                 }
1354               else
1355                 {
1356                   s1 = pool_elt_at_index (
1357                     sm->per_thread_data[thread_index].sessions,
1358                     value1.value);
1359                 }
1360             }
1361
1362           old_addr1 = ip1->dst_address.as_u32;
1363           ip1->dst_address = s1->in2out.addr;
1364           new_addr1 = ip1->dst_address.as_u32;
1365           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1366
1367           sum1 = ip1->checksum;
1368           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1369                                  ip4_header_t,
1370                                  dst_address /* changed member */);
1371           ip1->checksum = ip_csum_fold (sum1);
1372
1373           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1374             {
1375               old_port1 = tcp1->dst_port;
1376               tcp1->dst_port = s1->in2out.port;
1377               new_port1 = tcp1->dst_port;
1378
1379               sum1 = tcp1->checksum;
1380               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1381                                      ip4_header_t,
1382                                      dst_address /* changed member */);
1383
1384               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1385                                      ip4_header_t /* cheat */,
1386                                      length /* changed member */);
1387               tcp1->checksum = ip_csum_fold(sum1);
1388             }
1389           else
1390             {
1391               old_port1 = udp1->dst_port;
1392               udp1->dst_port = s1->in2out.port;
1393               udp1->checksum = 0;
1394             }
1395
1396           /* Accounting */
1397           s1->last_heard = now;
1398           s1->total_pkts++;
1399           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1400           /* Per-user LRU list maintenance */
1401           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1402                              s1->per_user_index);
1403           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1404                               s1->per_user_list_head_index,
1405                               s1->per_user_index);
1406         trace1:
1407
1408           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1409                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1410             {
1411               snat_out2in_trace_t *t =
1412                  vlib_add_trace (vm, node, b1, sizeof (*t));
1413               t->sw_if_index = sw_if_index1;
1414               t->next_index = next1;
1415               t->session_index = ~0;
1416               if (s1)
1417                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1418             }
1419
1420           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1421
1422           /* verify speculative enqueues, maybe switch current next frame */
1423           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1424                                            to_next, n_left_to_next,
1425                                            bi0, bi1, next0, next1);
1426         }
1427
1428       while (n_left_from > 0 && n_left_to_next > 0)
1429         {
1430           u32 bi0;
1431           vlib_buffer_t * b0;
1432           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1433           u32 sw_if_index0;
1434           ip4_header_t * ip0;
1435           ip_csum_t sum0;
1436           u32 new_addr0, old_addr0;
1437           u16 new_port0, old_port0;
1438           udp_header_t * udp0;
1439           tcp_header_t * tcp0;
1440           icmp46_header_t * icmp0;
1441           snat_session_key_t key0, sm0;
1442           u32 rx_fib_index0;
1443           u32 proto0;
1444           snat_session_t * s0 = 0;
1445           clib_bihash_kv_8_8_t kv0, value0;
1446
1447           /* speculatively enqueue b0 to the current next frame */
1448           bi0 = from[0];
1449           to_next[0] = bi0;
1450           from += 1;
1451           to_next += 1;
1452           n_left_from -= 1;
1453           n_left_to_next -= 1;
1454
1455           b0 = vlib_get_buffer (vm, bi0);
1456
1457           vnet_buffer (b0)->snat.flags = 0;
1458
1459           ip0 = vlib_buffer_get_current (b0);
1460           udp0 = ip4_next_header (ip0);
1461           tcp0 = (tcp_header_t *) udp0;
1462           icmp0 = (icmp46_header_t *) udp0;
1463
1464           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1465           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1466                                    sw_if_index0);
1467
1468           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1469
1470           if (PREDICT_FALSE (proto0 == ~0))
1471             {
1472               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1473                                              thread_index, now, vm, node);
1474               if (!sm->forwarding_enabled)
1475                 if (!s0)
1476                   next0 = SNAT_OUT2IN_NEXT_DROP;
1477               goto trace00;
1478             }
1479
1480           if (PREDICT_FALSE(ip0->ttl == 1))
1481             {
1482               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1483               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1484                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1485                                            0);
1486               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1487               goto trace00;
1488             }
1489
1490           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1491             {
1492               next0 = icmp_out2in_slow_path
1493                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1494                  next0, now, thread_index, &s0);
1495               goto trace00;
1496             }
1497
1498           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1499             {
1500               next0 = SNAT_OUT2IN_NEXT_REASS;
1501               goto trace00;
1502             }
1503
1504           key0.addr = ip0->dst_address;
1505           key0.port = udp0->dst_port;
1506           key0.protocol = proto0;
1507           key0.fib_index = rx_fib_index0;
1508
1509           kv0.key = key0.as_u64;
1510
1511           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1512                                       &kv0, &value0))
1513             {
1514               /* Try to match static mapping by external address and port,
1515                  destination address and port in packet */
1516               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1517                 {
1518                   if (!sm->forwarding_enabled)
1519                     {
1520                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1521                       /*
1522                        * Send DHCP packets to the ipv4 stack, or we won't
1523                        * be able to use dhcp client on the outside interface
1524                        */
1525                       if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1526                           || (udp0->dst_port
1527                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1528                         next0 = SNAT_OUT2IN_NEXT_DROP;
1529                       else
1530                         vnet_feature_next
1531                           (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1532                            &next0, b0);
1533                       goto trace00;
1534                     }
1535                   else
1536                     {
1537                       create_bypass_for_fwd(sm, ip0);
1538                       goto trace00;
1539                     }
1540                 }
1541
1542               /* Create session initiated by host from external network */
1543               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1544                                                      thread_index);
1545               if (!s0)
1546                 {
1547                   next0 = SNAT_OUT2IN_NEXT_DROP;
1548                   goto trace00;
1549                 }
1550             }
1551           else
1552             {
1553               if (PREDICT_FALSE (value0.value == ~0ULL))
1554                 {
1555                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1556                                       now, vm, node);
1557                   if (!s0)
1558                     next0 = SNAT_OUT2IN_NEXT_DROP;
1559                   goto trace00;
1560                 }
1561               else
1562                 {
1563                   s0 = pool_elt_at_index (
1564                     sm->per_thread_data[thread_index].sessions,
1565                     value0.value);
1566                 }
1567             }
1568
1569           old_addr0 = ip0->dst_address.as_u32;
1570           ip0->dst_address = s0->in2out.addr;
1571           new_addr0 = ip0->dst_address.as_u32;
1572           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1573
1574           sum0 = ip0->checksum;
1575           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1576                                  ip4_header_t,
1577                                  dst_address /* changed member */);
1578           ip0->checksum = ip_csum_fold (sum0);
1579
1580           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1581             {
1582               old_port0 = tcp0->dst_port;
1583               tcp0->dst_port = s0->in2out.port;
1584               new_port0 = tcp0->dst_port;
1585
1586               sum0 = tcp0->checksum;
1587               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1588                                      ip4_header_t,
1589                                      dst_address /* changed member */);
1590
1591               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1592                                      ip4_header_t /* cheat */,
1593                                      length /* changed member */);
1594               tcp0->checksum = ip_csum_fold(sum0);
1595             }
1596           else
1597             {
1598               old_port0 = udp0->dst_port;
1599               udp0->dst_port = s0->in2out.port;
1600               udp0->checksum = 0;
1601             }
1602
1603           /* Accounting */
1604           s0->last_heard = now;
1605           s0->total_pkts++;
1606           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1607           /* Per-user LRU list maintenance */
1608           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1609                              s0->per_user_index);
1610           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1611                               s0->per_user_list_head_index,
1612                               s0->per_user_index);
1613         trace00:
1614
1615           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1616                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1617             {
1618               snat_out2in_trace_t *t =
1619                  vlib_add_trace (vm, node, b0, sizeof (*t));
1620               t->sw_if_index = sw_if_index0;
1621               t->next_index = next0;
1622               t->session_index = ~0;
1623               if (s0)
1624                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1625             }
1626
1627           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1628
1629           /* verify speculative enqueue, maybe switch current next frame */
1630           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1631                                            to_next, n_left_to_next,
1632                                            bi0, next0);
1633         }
1634
1635       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1636     }
1637
1638   vlib_node_increment_counter (vm, snat_out2in_node.index,
1639                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1640                                pkts_processed);
1641   return frame->n_vectors;
1642 }
1643
1644 VLIB_REGISTER_NODE (snat_out2in_node) = {
1645   .function = snat_out2in_node_fn,
1646   .name = "nat44-out2in",
1647   .vector_size = sizeof (u32),
1648   .format_trace = format_snat_out2in_trace,
1649   .type = VLIB_NODE_TYPE_INTERNAL,
1650
1651   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1652   .error_strings = snat_out2in_error_strings,
1653
1654   .runtime_data_bytes = sizeof (snat_runtime_t),
1655
1656   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1657
1658   /* edit / add dispositions here */
1659   .next_nodes = {
1660     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1661     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1662     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1663     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1664   },
1665 };
1666 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1667
1668 static uword
1669 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1670                             vlib_node_runtime_t * node,
1671                             vlib_frame_t * frame)
1672 {
1673   u32 n_left_from, *from, *to_next;
1674   snat_out2in_next_t next_index;
1675   u32 pkts_processed = 0;
1676   snat_main_t *sm = &snat_main;
1677   f64 now = vlib_time_now (vm);
1678   u32 thread_index = vlib_get_thread_index ();
1679   snat_main_per_thread_data_t *per_thread_data =
1680     &sm->per_thread_data[thread_index];
1681   u32 *fragments_to_drop = 0;
1682   u32 *fragments_to_loopback = 0;
1683
1684   from = vlib_frame_vector_args (frame);
1685   n_left_from = frame->n_vectors;
1686   next_index = node->cached_next_index;
1687
1688   while (n_left_from > 0)
1689     {
1690       u32 n_left_to_next;
1691
1692       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1693
1694       while (n_left_from > 0 && n_left_to_next > 0)
1695        {
1696           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1697           vlib_buffer_t *b0;
1698           u32 next0;
1699           u8 cached0 = 0;
1700           ip4_header_t *ip0;
1701           nat_reass_ip4_t *reass0;
1702           udp_header_t * udp0;
1703           tcp_header_t * tcp0;
1704           snat_session_key_t key0, sm0;
1705           clib_bihash_kv_8_8_t kv0, value0;
1706           snat_session_t * s0 = 0;
1707           u16 old_port0, new_port0;
1708           ip_csum_t sum0;
1709
1710           /* speculatively enqueue b0 to the current next frame */
1711           bi0 = from[0];
1712           to_next[0] = bi0;
1713           from += 1;
1714           to_next += 1;
1715           n_left_from -= 1;
1716           n_left_to_next -= 1;
1717
1718           b0 = vlib_get_buffer (vm, bi0);
1719           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1720
1721           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1722           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1723                                                                sw_if_index0);
1724
1725           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1726             {
1727               next0 = SNAT_OUT2IN_NEXT_DROP;
1728               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1729               goto trace0;
1730             }
1731
1732           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1733           udp0 = ip4_next_header (ip0);
1734           tcp0 = (tcp_header_t *) udp0;
1735           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1736
1737           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1738                                                  ip0->dst_address,
1739                                                  ip0->fragment_id,
1740                                                  ip0->protocol,
1741                                                  1,
1742                                                  &fragments_to_drop);
1743
1744           if (PREDICT_FALSE (!reass0))
1745             {
1746               next0 = SNAT_OUT2IN_NEXT_DROP;
1747               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1748               goto trace0;
1749             }
1750
1751           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1752             {
1753               key0.addr = ip0->dst_address;
1754               key0.port = udp0->dst_port;
1755               key0.protocol = proto0;
1756               key0.fib_index = rx_fib_index0;
1757               kv0.key = key0.as_u64;
1758
1759               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1760                 {
1761                   /* Try to match static mapping by external address and port,
1762                      destination address and port in packet */
1763                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1764                     {
1765                       if (!sm->forwarding_enabled)
1766                         {
1767                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1768                           /*
1769                            * Send DHCP packets to the ipv4 stack, or we won't
1770                            * be able to use dhcp client on the outside interface
1771                            */
1772                           if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1773                               || (udp0->dst_port
1774                                   != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1775                             next0 = SNAT_OUT2IN_NEXT_DROP;
1776                           else
1777                             vnet_feature_next
1778                               (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1779                                &next0, b0);
1780                           goto trace0;
1781                         }
1782                       else
1783                         {
1784                           create_bypass_for_fwd(sm, ip0);
1785                           goto trace0;
1786                         }
1787                     }
1788
1789                   /* Create session initiated by host from external network */
1790                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1791                                                          thread_index);
1792                   if (!s0)
1793                     {
1794                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1795                       next0 = SNAT_OUT2IN_NEXT_DROP;
1796                       goto trace0;
1797                     }
1798                   reass0->sess_index = s0 - per_thread_data->sessions;
1799                   reass0->thread_index = thread_index;
1800                 }
1801               else
1802                 {
1803                   s0 = pool_elt_at_index (per_thread_data->sessions,
1804                                           value0.value);
1805                   reass0->sess_index = value0.value;
1806                 }
1807               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1808             }
1809           else
1810             {
1811               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1812                 {
1813                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1814                     {
1815                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1816                       next0 = SNAT_OUT2IN_NEXT_DROP;
1817                       goto trace0;
1818                     }
1819                   cached0 = 1;
1820                   goto trace0;
1821                 }
1822               s0 = pool_elt_at_index (per_thread_data->sessions,
1823                                       reass0->sess_index);
1824             }
1825
1826           old_addr0 = ip0->dst_address.as_u32;
1827           ip0->dst_address = s0->in2out.addr;
1828           new_addr0 = ip0->dst_address.as_u32;
1829           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1830
1831           sum0 = ip0->checksum;
1832           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1833                                  ip4_header_t,
1834                                  dst_address /* changed member */);
1835           ip0->checksum = ip_csum_fold (sum0);
1836
1837           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1838             {
1839               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1840                 {
1841                   old_port0 = tcp0->dst_port;
1842                   tcp0->dst_port = s0->in2out.port;
1843                   new_port0 = tcp0->dst_port;
1844
1845                   sum0 = tcp0->checksum;
1846                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1847                                          ip4_header_t,
1848                                          dst_address /* changed member */);
1849
1850                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1851                                          ip4_header_t /* cheat */,
1852                                          length /* changed member */);
1853                   tcp0->checksum = ip_csum_fold(sum0);
1854                 }
1855               else
1856                 {
1857                   old_port0 = udp0->dst_port;
1858                   udp0->dst_port = s0->in2out.port;
1859                   udp0->checksum = 0;
1860                 }
1861             }
1862
1863           /* Accounting */
1864           s0->last_heard = now;
1865           s0->total_pkts++;
1866           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1867           /* Per-user LRU list maintenance */
1868           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1869                              s0->per_user_index);
1870           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1871                               s0->per_user_list_head_index,
1872                               s0->per_user_index);
1873
1874         trace0:
1875           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1876                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1877             {
1878               nat44_out2in_reass_trace_t *t =
1879                  vlib_add_trace (vm, node, b0, sizeof (*t));
1880               t->cached = cached0;
1881               t->sw_if_index = sw_if_index0;
1882               t->next_index = next0;
1883             }
1884
1885           if (cached0)
1886             {
1887               n_left_to_next++;
1888               to_next--;
1889             }
1890           else
1891             {
1892               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1893
1894               /* verify speculative enqueue, maybe switch current next frame */
1895               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1896                                                to_next, n_left_to_next,
1897                                                bi0, next0);
1898             }
1899
1900           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1901             {
1902               from = vlib_frame_vector_args (frame);
1903               u32 len = vec_len (fragments_to_loopback);
1904               if (len <= VLIB_FRAME_SIZE)
1905                 {
1906                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1907                   n_left_from = len;
1908                   vec_reset_length (fragments_to_loopback);
1909                 }
1910               else
1911                 {
1912                   clib_memcpy (from,
1913                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1914                                sizeof (u32) * VLIB_FRAME_SIZE);
1915                   n_left_from = VLIB_FRAME_SIZE;
1916                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1917                 }
1918             }
1919        }
1920
1921       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1922     }
1923
1924   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1925                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1926                                pkts_processed);
1927
1928   nat_send_all_to_node (vm, fragments_to_drop, node,
1929                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1930                         SNAT_OUT2IN_NEXT_DROP);
1931
1932   vec_free (fragments_to_drop);
1933   vec_free (fragments_to_loopback);
1934   return frame->n_vectors;
1935 }
1936
1937 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1938   .function = nat44_out2in_reass_node_fn,
1939   .name = "nat44-out2in-reass",
1940   .vector_size = sizeof (u32),
1941   .format_trace = format_nat44_out2in_reass_trace,
1942   .type = VLIB_NODE_TYPE_INTERNAL,
1943
1944   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1945   .error_strings = snat_out2in_error_strings,
1946
1947   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1948
1949   /* edit / add dispositions here */
1950   .next_nodes = {
1951     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1952     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1953     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1954     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1955   },
1956 };
1957 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1958                               nat44_out2in_reass_node_fn);
1959
1960 /**************************/
1961 /*** deterministic mode ***/
1962 /**************************/
1963 static uword
1964 snat_det_out2in_node_fn (vlib_main_t * vm,
1965                          vlib_node_runtime_t * node,
1966                          vlib_frame_t * frame)
1967 {
1968   u32 n_left_from, * from, * to_next;
1969   snat_out2in_next_t next_index;
1970   u32 pkts_processed = 0;
1971   snat_main_t * sm = &snat_main;
1972   u32 thread_index = vlib_get_thread_index ();
1973
1974   from = vlib_frame_vector_args (frame);
1975   n_left_from = frame->n_vectors;
1976   next_index = node->cached_next_index;
1977
1978   while (n_left_from > 0)
1979     {
1980       u32 n_left_to_next;
1981
1982       vlib_get_next_frame (vm, node, next_index,
1983                            to_next, n_left_to_next);
1984
1985       while (n_left_from >= 4 && n_left_to_next >= 2)
1986         {
1987           u32 bi0, bi1;
1988           vlib_buffer_t * b0, * b1;
1989           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1990           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1991           u32 sw_if_index0, sw_if_index1;
1992           ip4_header_t * ip0, * ip1;
1993           ip_csum_t sum0, sum1;
1994           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1995           u16 new_port0, old_port0, old_port1, new_port1;
1996           udp_header_t * udp0, * udp1;
1997           tcp_header_t * tcp0, * tcp1;
1998           u32 proto0, proto1;
1999           snat_det_out_key_t key0, key1;
2000           snat_det_map_t * dm0, * dm1;
2001           snat_det_session_t * ses0 = 0, * ses1 = 0;
2002           u32 rx_fib_index0, rx_fib_index1;
2003           icmp46_header_t * icmp0, * icmp1;
2004
2005           /* Prefetch next iteration. */
2006           {
2007             vlib_buffer_t * p2, * p3;
2008
2009             p2 = vlib_get_buffer (vm, from[2]);
2010             p3 = vlib_get_buffer (vm, from[3]);
2011
2012             vlib_prefetch_buffer_header (p2, LOAD);
2013             vlib_prefetch_buffer_header (p3, LOAD);
2014
2015             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2016             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2017           }
2018
2019           /* speculatively enqueue b0 and b1 to the current next frame */
2020           to_next[0] = bi0 = from[0];
2021           to_next[1] = bi1 = from[1];
2022           from += 2;
2023           to_next += 2;
2024           n_left_from -= 2;
2025           n_left_to_next -= 2;
2026
2027           b0 = vlib_get_buffer (vm, bi0);
2028           b1 = vlib_get_buffer (vm, bi1);
2029
2030           ip0 = vlib_buffer_get_current (b0);
2031           udp0 = ip4_next_header (ip0);
2032           tcp0 = (tcp_header_t *) udp0;
2033
2034           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2035
2036           if (PREDICT_FALSE(ip0->ttl == 1))
2037             {
2038               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2039               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2040                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2041                                            0);
2042               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2043               goto trace0;
2044             }
2045
2046           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2047
2048           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2049             {
2050               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2051               icmp0 = (icmp46_header_t *) udp0;
2052
2053               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2054                                   rx_fib_index0, node, next0, thread_index,
2055                                   &ses0, &dm0);
2056               goto trace0;
2057             }
2058
2059           key0.ext_host_addr = ip0->src_address;
2060           key0.ext_host_port = tcp0->src;
2061           key0.out_port = tcp0->dst;
2062
2063           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2064           if (PREDICT_FALSE(!dm0))
2065             {
2066               clib_warning("unknown dst address:  %U",
2067                            format_ip4_address, &ip0->dst_address);
2068               next0 = SNAT_OUT2IN_NEXT_DROP;
2069               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2070               goto trace0;
2071             }
2072
2073           snat_det_reverse(dm0, &ip0->dst_address,
2074                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2075
2076           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2077           if (PREDICT_FALSE(!ses0))
2078             {
2079               clib_warning("no match src %U:%d dst %U:%d for user %U",
2080                            format_ip4_address, &ip0->src_address,
2081                            clib_net_to_host_u16 (tcp0->src),
2082                            format_ip4_address, &ip0->dst_address,
2083                            clib_net_to_host_u16 (tcp0->dst),
2084                            format_ip4_address, &new_addr0);
2085               next0 = SNAT_OUT2IN_NEXT_DROP;
2086               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2087               goto trace0;
2088             }
2089           new_port0 = ses0->in_port;
2090
2091           old_addr0 = ip0->dst_address;
2092           ip0->dst_address = new_addr0;
2093           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2094
2095           sum0 = ip0->checksum;
2096           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2097                                  ip4_header_t,
2098                                  dst_address /* changed member */);
2099           ip0->checksum = ip_csum_fold (sum0);
2100
2101           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2102             {
2103               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2104                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2105               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2106                 snat_det_ses_close(dm0, ses0);
2107
2108               old_port0 = tcp0->dst;
2109               tcp0->dst = new_port0;
2110
2111               sum0 = tcp0->checksum;
2112               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2113                                      ip4_header_t,
2114                                      dst_address /* changed member */);
2115
2116               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2117                                      ip4_header_t /* cheat */,
2118                                      length /* changed member */);
2119               tcp0->checksum = ip_csum_fold(sum0);
2120             }
2121           else
2122             {
2123               old_port0 = udp0->dst_port;
2124               udp0->dst_port = new_port0;
2125               udp0->checksum = 0;
2126             }
2127
2128         trace0:
2129
2130           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2131                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2132             {
2133               snat_out2in_trace_t *t =
2134                  vlib_add_trace (vm, node, b0, sizeof (*t));
2135               t->sw_if_index = sw_if_index0;
2136               t->next_index = next0;
2137               t->session_index = ~0;
2138               if (ses0)
2139                 t->session_index = ses0 - dm0->sessions;
2140             }
2141
2142           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2143
2144           b1 = vlib_get_buffer (vm, bi1);
2145
2146           ip1 = vlib_buffer_get_current (b1);
2147           udp1 = ip4_next_header (ip1);
2148           tcp1 = (tcp_header_t *) udp1;
2149
2150           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2151
2152           if (PREDICT_FALSE(ip1->ttl == 1))
2153             {
2154               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2155               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2156                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2157                                            0);
2158               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2159               goto trace1;
2160             }
2161
2162           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2163
2164           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2165             {
2166               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2167               icmp1 = (icmp46_header_t *) udp1;
2168
2169               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
2170                                   rx_fib_index1, node, next1, thread_index,
2171                                   &ses1, &dm1);
2172               goto trace1;
2173             }
2174
2175           key1.ext_host_addr = ip1->src_address;
2176           key1.ext_host_port = tcp1->src;
2177           key1.out_port = tcp1->dst;
2178
2179           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
2180           if (PREDICT_FALSE(!dm1))
2181             {
2182               clib_warning("unknown dst address:  %U",
2183                            format_ip4_address, &ip1->dst_address);
2184               next1 = SNAT_OUT2IN_NEXT_DROP;
2185               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2186               goto trace1;
2187             }
2188
2189           snat_det_reverse(dm1, &ip1->dst_address,
2190                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
2191
2192           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
2193           if (PREDICT_FALSE(!ses1))
2194             {
2195               clib_warning("no match src %U:%d dst %U:%d for user %U",
2196                            format_ip4_address, &ip1->src_address,
2197                            clib_net_to_host_u16 (tcp1->src),
2198                            format_ip4_address, &ip1->dst_address,
2199                            clib_net_to_host_u16 (tcp1->dst),
2200                            format_ip4_address, &new_addr1);
2201               next1 = SNAT_OUT2IN_NEXT_DROP;
2202               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2203               goto trace1;
2204             }
2205           new_port1 = ses1->in_port;
2206
2207           old_addr1 = ip1->dst_address;
2208           ip1->dst_address = new_addr1;
2209           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2210
2211           sum1 = ip1->checksum;
2212           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2213                                  ip4_header_t,
2214                                  dst_address /* changed member */);
2215           ip1->checksum = ip_csum_fold (sum1);
2216
2217           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2218             {
2219               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2220                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2221               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
2222                 snat_det_ses_close(dm1, ses1);
2223
2224               old_port1 = tcp1->dst;
2225               tcp1->dst = new_port1;
2226
2227               sum1 = tcp1->checksum;
2228               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2229                                      ip4_header_t,
2230                                      dst_address /* changed member */);
2231
2232               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2233                                      ip4_header_t /* cheat */,
2234                                      length /* changed member */);
2235               tcp1->checksum = ip_csum_fold(sum1);
2236             }
2237           else
2238             {
2239               old_port1 = udp1->dst_port;
2240               udp1->dst_port = new_port1;
2241               udp1->checksum = 0;
2242             }
2243
2244         trace1:
2245
2246           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2247                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2248             {
2249               snat_out2in_trace_t *t =
2250                  vlib_add_trace (vm, node, b1, sizeof (*t));
2251               t->sw_if_index = sw_if_index1;
2252               t->next_index = next1;
2253               t->session_index = ~0;
2254               if (ses1)
2255                 t->session_index = ses1 - dm1->sessions;
2256             }
2257
2258           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
2259
2260           /* verify speculative enqueues, maybe switch current next frame */
2261           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2262                                            to_next, n_left_to_next,
2263                                            bi0, bi1, next0, next1);
2264          }
2265
2266       while (n_left_from > 0 && n_left_to_next > 0)
2267         {
2268           u32 bi0;
2269           vlib_buffer_t * b0;
2270           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
2271           u32 sw_if_index0;
2272           ip4_header_t * ip0;
2273           ip_csum_t sum0;
2274           ip4_address_t new_addr0, old_addr0;
2275           u16 new_port0, old_port0;
2276           udp_header_t * udp0;
2277           tcp_header_t * tcp0;
2278           u32 proto0;
2279           snat_det_out_key_t key0;
2280           snat_det_map_t * dm0;
2281           snat_det_session_t * ses0 = 0;
2282           u32 rx_fib_index0;
2283           icmp46_header_t * icmp0;
2284
2285           /* speculatively enqueue b0 to the current next frame */
2286           bi0 = from[0];
2287           to_next[0] = bi0;
2288           from += 1;
2289           to_next += 1;
2290           n_left_from -= 1;
2291           n_left_to_next -= 1;
2292
2293           b0 = vlib_get_buffer (vm, bi0);
2294
2295           ip0 = vlib_buffer_get_current (b0);
2296           udp0 = ip4_next_header (ip0);
2297           tcp0 = (tcp_header_t *) udp0;
2298
2299           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2300
2301           if (PREDICT_FALSE(ip0->ttl == 1))
2302             {
2303               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2304               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2305                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2306                                            0);
2307               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2308               goto trace00;
2309             }
2310
2311           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2312
2313           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2314             {
2315               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2316               icmp0 = (icmp46_header_t *) udp0;
2317
2318               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2319                                   rx_fib_index0, node, next0, thread_index,
2320                                   &ses0, &dm0);
2321               goto trace00;
2322             }
2323
2324           key0.ext_host_addr = ip0->src_address;
2325           key0.ext_host_port = tcp0->src;
2326           key0.out_port = tcp0->dst;
2327
2328           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2329           if (PREDICT_FALSE(!dm0))
2330             {
2331               clib_warning("unknown dst address:  %U",
2332                            format_ip4_address, &ip0->dst_address);
2333               next0 = SNAT_OUT2IN_NEXT_DROP;
2334               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2335               goto trace00;
2336             }
2337
2338           snat_det_reverse(dm0, &ip0->dst_address,
2339                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2340
2341           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2342           if (PREDICT_FALSE(!ses0))
2343             {
2344               clib_warning("no match src %U:%d dst %U:%d for user %U",
2345                            format_ip4_address, &ip0->src_address,
2346                            clib_net_to_host_u16 (tcp0->src),
2347                            format_ip4_address, &ip0->dst_address,
2348                            clib_net_to_host_u16 (tcp0->dst),
2349                            format_ip4_address, &new_addr0);
2350               next0 = SNAT_OUT2IN_NEXT_DROP;
2351               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2352               goto trace00;
2353             }
2354           new_port0 = ses0->in_port;
2355
2356           old_addr0 = ip0->dst_address;
2357           ip0->dst_address = new_addr0;
2358           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2359
2360           sum0 = ip0->checksum;
2361           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2362                                  ip4_header_t,
2363                                  dst_address /* changed member */);
2364           ip0->checksum = ip_csum_fold (sum0);
2365
2366           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2367             {
2368               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2369                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2370               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2371                 snat_det_ses_close(dm0, ses0);
2372
2373               old_port0 = tcp0->dst;
2374               tcp0->dst = new_port0;
2375
2376               sum0 = tcp0->checksum;
2377               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2378                                      ip4_header_t,
2379                                      dst_address /* changed member */);
2380
2381               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2382                                      ip4_header_t /* cheat */,
2383                                      length /* changed member */);
2384               tcp0->checksum = ip_csum_fold(sum0);
2385             }
2386           else
2387             {
2388               old_port0 = udp0->dst_port;
2389               udp0->dst_port = new_port0;
2390               udp0->checksum = 0;
2391             }
2392
2393         trace00:
2394
2395           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2396                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2397             {
2398               snat_out2in_trace_t *t =
2399                  vlib_add_trace (vm, node, b0, sizeof (*t));
2400               t->sw_if_index = sw_if_index0;
2401               t->next_index = next0;
2402               t->session_index = ~0;
2403               if (ses0)
2404                 t->session_index = ses0 - dm0->sessions;
2405             }
2406
2407           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2408
2409           /* verify speculative enqueue, maybe switch current next frame */
2410           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2411                                            to_next, n_left_to_next,
2412                                            bi0, next0);
2413         }
2414
2415       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2416     }
2417
2418   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
2419                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2420                                pkts_processed);
2421   return frame->n_vectors;
2422 }
2423
2424 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
2425   .function = snat_det_out2in_node_fn,
2426   .name = "nat44-det-out2in",
2427   .vector_size = sizeof (u32),
2428   .format_trace = format_snat_out2in_trace,
2429   .type = VLIB_NODE_TYPE_INTERNAL,
2430
2431   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2432   .error_strings = snat_out2in_error_strings,
2433
2434   .runtime_data_bytes = sizeof (snat_runtime_t),
2435
2436   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2437
2438   /* edit / add dispositions here */
2439   .next_nodes = {
2440     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2441     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2442     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2443     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2444   },
2445 };
2446 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
2447
2448 /**
2449  * Get address and port values to be used for ICMP packet translation
2450  * and create session if needed
2451  *
2452  * @param[in,out] sm             NAT main
2453  * @param[in,out] node           NAT node runtime
2454  * @param[in] thread_index       thread index
2455  * @param[in,out] b0             buffer containing packet to be translated
2456  * @param[out] p_proto           protocol used for matching
2457  * @param[out] p_value           address and port after NAT translation
2458  * @param[out] p_dont_translate  if packet should not be translated
2459  * @param d                      optional parameter
2460  * @param e                      optional parameter
2461  */
2462 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
2463                           u32 thread_index, vlib_buffer_t *b0,
2464                           ip4_header_t *ip0, u8 *p_proto,
2465                           snat_session_key_t *p_value,
2466                           u8 *p_dont_translate, void *d, void *e)
2467 {
2468   icmp46_header_t *icmp0;
2469   u32 sw_if_index0;
2470   u8 protocol;
2471   snat_det_out_key_t key0;
2472   u8 dont_translate = 0;
2473   u32 next0 = ~0;
2474   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2475   ip4_header_t *inner_ip0;
2476   void *l4_header = 0;
2477   icmp46_header_t *inner_icmp0;
2478   snat_det_map_t * dm0 = 0;
2479   ip4_address_t new_addr0 = {{0}};
2480   snat_det_session_t * ses0 = 0;
2481   ip4_address_t out_addr;
2482
2483   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2484   echo0 = (icmp_echo_header_t *)(icmp0+1);
2485   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2486
2487   if (!icmp_is_error_message (icmp0))
2488     {
2489       protocol = SNAT_PROTOCOL_ICMP;
2490       key0.ext_host_addr = ip0->src_address;
2491       key0.ext_host_port = 0;
2492       key0.out_port = echo0->identifier;
2493       out_addr = ip0->dst_address;
2494     }
2495   else
2496     {
2497       inner_ip0 = (ip4_header_t *)(echo0+1);
2498       l4_header = ip4_next_header (inner_ip0);
2499       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2500       key0.ext_host_addr = inner_ip0->dst_address;
2501       out_addr = inner_ip0->src_address;
2502       switch (protocol)
2503         {
2504         case SNAT_PROTOCOL_ICMP:
2505           inner_icmp0 = (icmp46_header_t*)l4_header;
2506           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2507           key0.ext_host_port = 0;
2508           key0.out_port = inner_echo0->identifier;
2509           break;
2510         case SNAT_PROTOCOL_UDP:
2511         case SNAT_PROTOCOL_TCP:
2512           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2513           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
2514           break;
2515         default:
2516           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2517           next0 = SNAT_OUT2IN_NEXT_DROP;
2518           goto out;
2519         }
2520     }
2521
2522   dm0 = snat_det_map_by_out(sm, &out_addr);
2523   if (PREDICT_FALSE(!dm0))
2524     {
2525       /* Don't NAT packet aimed at the intfc address */
2526       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2527                                           ip0->dst_address.as_u32)))
2528         {
2529           dont_translate = 1;
2530           goto out;
2531         }
2532       clib_warning("unknown dst address:  %U",
2533                    format_ip4_address, &ip0->dst_address);
2534       goto out;
2535     }
2536
2537   snat_det_reverse(dm0, &ip0->dst_address,
2538                    clib_net_to_host_u16(key0.out_port), &new_addr0);
2539
2540   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2541   if (PREDICT_FALSE(!ses0))
2542     {
2543       /* Don't NAT packet aimed at the intfc address */
2544       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2545                                           ip0->dst_address.as_u32)))
2546         {
2547           dont_translate = 1;
2548           goto out;
2549         }
2550       clib_warning("no match src %U:%d dst %U:%d for user %U",
2551                    format_ip4_address, &key0.ext_host_addr,
2552                    clib_net_to_host_u16 (key0.ext_host_port),
2553                    format_ip4_address, &out_addr,
2554                    clib_net_to_host_u16 (key0.out_port),
2555                    format_ip4_address, &new_addr0);
2556       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2557       next0 = SNAT_OUT2IN_NEXT_DROP;
2558       goto out;
2559     }
2560
2561   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
2562                     !icmp_is_error_message (icmp0)))
2563     {
2564       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2565       next0 = SNAT_OUT2IN_NEXT_DROP;
2566       goto out;
2567     }
2568
2569   goto out;
2570
2571 out:
2572   *p_proto = protocol;
2573   if (ses0)
2574     {
2575       p_value->addr = new_addr0;
2576       p_value->fib_index = sm->inside_fib_index;
2577       p_value->port = ses0->in_port;
2578     }
2579   *p_dont_translate = dont_translate;
2580   if (d)
2581     *(snat_det_session_t**)d = ses0;
2582   if (e)
2583     *(snat_det_map_t**)e = dm0;
2584   return next0;
2585 }
2586
2587 /**********************/
2588 /*** worker handoff ***/
2589 /**********************/
2590 static uword
2591 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
2592                                vlib_node_runtime_t * node,
2593                                vlib_frame_t * frame)
2594 {
2595   snat_main_t *sm = &snat_main;
2596   vlib_thread_main_t *tm = vlib_get_thread_main ();
2597   u32 n_left_from, *from, *to_next = 0;
2598   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2599   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2600     = 0;
2601   vlib_frame_queue_elt_t *hf = 0;
2602   vlib_frame_t *f = 0;
2603   int i;
2604   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2605   u32 next_worker_index = 0;
2606   u32 current_worker_index = ~0;
2607   u32 thread_index = vlib_get_thread_index ();
2608
2609   ASSERT (vec_len (sm->workers));
2610
2611   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2612     {
2613       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2614
2615       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2616                                sm->first_worker_index + sm->num_workers - 1,
2617                                (vlib_frame_queue_t *) (~0));
2618     }
2619
2620   from = vlib_frame_vector_args (frame);
2621   n_left_from = frame->n_vectors;
2622
2623   while (n_left_from > 0)
2624     {
2625       u32 bi0;
2626       vlib_buffer_t *b0;
2627       u32 sw_if_index0;
2628       u32 rx_fib_index0;
2629       ip4_header_t * ip0;
2630       u8 do_handoff;
2631
2632       bi0 = from[0];
2633       from += 1;
2634       n_left_from -= 1;
2635
2636       b0 = vlib_get_buffer (vm, bi0);
2637
2638       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2639       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2640
2641       ip0 = vlib_buffer_get_current (b0);
2642
2643       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2644
2645       if (PREDICT_FALSE (next_worker_index != thread_index))
2646         {
2647           do_handoff = 1;
2648
2649           if (next_worker_index != current_worker_index)
2650             {
2651               if (hf)
2652                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2653
2654               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2655                                                       next_worker_index,
2656                                                       handoff_queue_elt_by_worker_index);
2657
2658               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2659               to_next_worker = &hf->buffer_index[hf->n_vectors];
2660               current_worker_index = next_worker_index;
2661             }
2662
2663           /* enqueue to correct worker thread */
2664           to_next_worker[0] = bi0;
2665           to_next_worker++;
2666           n_left_to_next_worker--;
2667
2668           if (n_left_to_next_worker == 0)
2669             {
2670               hf->n_vectors = VLIB_FRAME_SIZE;
2671               vlib_put_frame_queue_elt (hf);
2672               current_worker_index = ~0;
2673               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2674               hf = 0;
2675             }
2676         }
2677       else
2678         {
2679           do_handoff = 0;
2680           /* if this is 1st frame */
2681           if (!f)
2682             {
2683               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2684               to_next = vlib_frame_vector_args (f);
2685             }
2686
2687           to_next[0] = bi0;
2688           to_next += 1;
2689           f->n_vectors++;
2690         }
2691
2692       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2693                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2694         {
2695           snat_out2in_worker_handoff_trace_t *t =
2696             vlib_add_trace (vm, node, b0, sizeof (*t));
2697           t->next_worker_index = next_worker_index;
2698           t->do_handoff = do_handoff;
2699         }
2700     }
2701
2702   if (f)
2703     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2704
2705   if (hf)
2706     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2707
2708   /* Ship frames to the worker nodes */
2709   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2710     {
2711       if (handoff_queue_elt_by_worker_index[i])
2712         {
2713           hf = handoff_queue_elt_by_worker_index[i];
2714           /*
2715            * It works better to let the handoff node
2716            * rate-adapt, always ship the handoff queue element.
2717            */
2718           if (1 || hf->n_vectors == hf->last_n_vectors)
2719             {
2720               vlib_put_frame_queue_elt (hf);
2721               handoff_queue_elt_by_worker_index[i] = 0;
2722             }
2723           else
2724             hf->last_n_vectors = hf->n_vectors;
2725         }
2726       congested_handoff_queue_by_worker_index[i] =
2727         (vlib_frame_queue_t *) (~0);
2728     }
2729   hf = 0;
2730   current_worker_index = ~0;
2731   return frame->n_vectors;
2732 }
2733
2734 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2735   .function = snat_out2in_worker_handoff_fn,
2736   .name = "nat44-out2in-worker-handoff",
2737   .vector_size = sizeof (u32),
2738   .format_trace = format_snat_out2in_worker_handoff_trace,
2739   .type = VLIB_NODE_TYPE_INTERNAL,
2740
2741   .n_next_nodes = 1,
2742
2743   .next_nodes = {
2744     [0] = "error-drop",
2745   },
2746 };
2747
2748 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2749
2750 static uword
2751 snat_out2in_fast_node_fn (vlib_main_t * vm,
2752                           vlib_node_runtime_t * node,
2753                           vlib_frame_t * frame)
2754 {
2755   u32 n_left_from, * from, * to_next;
2756   snat_out2in_next_t next_index;
2757   u32 pkts_processed = 0;
2758   snat_main_t * sm = &snat_main;
2759
2760   from = vlib_frame_vector_args (frame);
2761   n_left_from = frame->n_vectors;
2762   next_index = node->cached_next_index;
2763
2764   while (n_left_from > 0)
2765     {
2766       u32 n_left_to_next;
2767
2768       vlib_get_next_frame (vm, node, next_index,
2769                            to_next, n_left_to_next);
2770
2771       while (n_left_from > 0 && n_left_to_next > 0)
2772         {
2773           u32 bi0;
2774           vlib_buffer_t * b0;
2775           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2776           u32 sw_if_index0;
2777           ip4_header_t * ip0;
2778           ip_csum_t sum0;
2779           u32 new_addr0, old_addr0;
2780           u16 new_port0, old_port0;
2781           udp_header_t * udp0;
2782           tcp_header_t * tcp0;
2783           icmp46_header_t * icmp0;
2784           snat_session_key_t key0, sm0;
2785           u32 proto0;
2786           u32 rx_fib_index0;
2787
2788           /* speculatively enqueue b0 to the current next frame */
2789           bi0 = from[0];
2790           to_next[0] = bi0;
2791           from += 1;
2792           to_next += 1;
2793           n_left_from -= 1;
2794           n_left_to_next -= 1;
2795
2796           b0 = vlib_get_buffer (vm, bi0);
2797
2798           ip0 = vlib_buffer_get_current (b0);
2799           udp0 = ip4_next_header (ip0);
2800           tcp0 = (tcp_header_t *) udp0;
2801           icmp0 = (icmp46_header_t *) udp0;
2802
2803           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2804           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2805
2806           vnet_feature_next (sw_if_index0, &next0, b0);
2807
2808           if (PREDICT_FALSE(ip0->ttl == 1))
2809             {
2810               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2811               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2812                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2813                                            0);
2814               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2815               goto trace00;
2816             }
2817
2818           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2819
2820           if (PREDICT_FALSE (proto0 == ~0))
2821               goto trace00;
2822
2823           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2824             {
2825               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2826                                   rx_fib_index0, node, next0, ~0, 0, 0);
2827               goto trace00;
2828             }
2829
2830           key0.addr = ip0->dst_address;
2831           key0.port = udp0->dst_port;
2832           key0.fib_index = rx_fib_index0;
2833
2834           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2835             {
2836               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2837               goto trace00;
2838             }
2839
2840           new_addr0 = sm0.addr.as_u32;
2841           new_port0 = sm0.port;
2842           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2843           old_addr0 = ip0->dst_address.as_u32;
2844           ip0->dst_address.as_u32 = new_addr0;
2845
2846           sum0 = ip0->checksum;
2847           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2848                                  ip4_header_t,
2849                                  dst_address /* changed member */);
2850           ip0->checksum = ip_csum_fold (sum0);
2851
2852           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2853             {
2854                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2855                 {
2856                   old_port0 = tcp0->dst_port;
2857                   tcp0->dst_port = new_port0;
2858
2859                   sum0 = tcp0->checksum;
2860                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2861                                          ip4_header_t,
2862                                          dst_address /* changed member */);
2863
2864                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2865                                          ip4_header_t /* cheat */,
2866                                          length /* changed member */);
2867                   tcp0->checksum = ip_csum_fold(sum0);
2868                 }
2869               else
2870                 {
2871                   old_port0 = udp0->dst_port;
2872                   udp0->dst_port = new_port0;
2873                   udp0->checksum = 0;
2874                 }
2875             }
2876           else
2877             {
2878               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2879                 {
2880                   sum0 = tcp0->checksum;
2881                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2882                                          ip4_header_t,
2883                                          dst_address /* changed member */);
2884
2885                   tcp0->checksum = ip_csum_fold(sum0);
2886                 }
2887             }
2888
2889         trace00:
2890
2891           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2892                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2893             {
2894               snat_out2in_trace_t *t =
2895                  vlib_add_trace (vm, node, b0, sizeof (*t));
2896               t->sw_if_index = sw_if_index0;
2897               t->next_index = next0;
2898             }
2899
2900           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2901
2902           /* verify speculative enqueue, maybe switch current next frame */
2903           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2904                                            to_next, n_left_to_next,
2905                                            bi0, next0);
2906         }
2907
2908       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2909     }
2910
2911   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2912                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2913                                pkts_processed);
2914   return frame->n_vectors;
2915 }
2916
2917 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2918   .function = snat_out2in_fast_node_fn,
2919   .name = "nat44-out2in-fast",
2920   .vector_size = sizeof (u32),
2921   .format_trace = format_snat_out2in_fast_trace,
2922   .type = VLIB_NODE_TYPE_INTERNAL,
2923
2924   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2925   .error_strings = snat_out2in_error_strings,
2926
2927   .runtime_data_bytes = sizeof (snat_runtime_t),
2928
2929   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2930
2931   /* edit / add dispositions here */
2932   .next_nodes = {
2933     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2934     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2935     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2936     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2937   },
2938 };
2939 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);