NAT44 virtual fragmentation reassembly for endpoint-dependent mode (VPP-1325)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29 #include <nat/nat_inlines.h>
30
31 #include <vppinfra/hash.h>
32 #include <vppinfra/error.h>
33 #include <vppinfra/elog.h>
34
35 typedef struct {
36   u32 sw_if_index;
37   u32 next_index;
38   u32 session_index;
39 } snat_out2in_trace_t;
40
41 typedef struct {
42   u32 next_worker_index;
43   u8 do_handoff;
44 } snat_out2in_worker_handoff_trace_t;
45
46 /* packet trace format function */
47 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
48 {
49   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
50   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
51   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
52
53   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
54               t->sw_if_index, t->next_index, t->session_index);
55   return s;
56 }
57
58 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
63
64   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
65               t->sw_if_index, t->next_index);
66   return s;
67 }
68
69 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
70 {
71   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
72   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
73   snat_out2in_worker_handoff_trace_t * t =
74     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
75   char * m;
76
77   m = t->do_handoff ? "next worker" : "same worker";
78   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
79
80   return s;
81 }
82
83 typedef struct {
84   u32 sw_if_index;
85   u32 next_index;
86   u8 cached;
87 } nat44_out2in_reass_trace_t;
88
89 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
90 {
91   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
92   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
93   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
94
95   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
96               t->sw_if_index, t->next_index,
97               t->cached ? "cached" : "translated");
98
99   return s;
100 }
101
102 vlib_node_registration_t snat_out2in_node;
103 vlib_node_registration_t snat_out2in_fast_node;
104 vlib_node_registration_t snat_out2in_worker_handoff_node;
105 vlib_node_registration_t snat_det_out2in_node;
106 vlib_node_registration_t nat44_out2in_reass_node;
107 vlib_node_registration_t nat44_ed_out2in_node;
108 vlib_node_registration_t nat44_ed_out2in_slowpath_node;
109 vlib_node_registration_t nat44_ed_out2in_reass_node;
110
111 #define foreach_snat_out2in_error                       \
112 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
113 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
114 _(OUT_OF_PORTS, "Out of ports")                         \
115 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
116 _(NO_TRANSLATION, "No translation")                     \
117 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
118 _(DROP_FRAGMENT, "Drop fragment")                       \
119 _(MAX_REASS, "Maximum reassemblies exceeded")           \
120 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")\
121 _(FQ_CONGESTED, "Handoff frame queue congested")
122
123 typedef enum {
124 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
125   foreach_snat_out2in_error
126 #undef _
127   SNAT_OUT2IN_N_ERROR,
128 } snat_out2in_error_t;
129
130 static char * snat_out2in_error_strings[] = {
131 #define _(sym,string) string,
132   foreach_snat_out2in_error
133 #undef _
134 };
135
136 typedef enum {
137   SNAT_OUT2IN_NEXT_DROP,
138   SNAT_OUT2IN_NEXT_LOOKUP,
139   SNAT_OUT2IN_NEXT_ICMP_ERROR,
140   SNAT_OUT2IN_NEXT_REASS,
141   SNAT_OUT2IN_N_NEXT,
142 } snat_out2in_next_t;
143
144 int
145 nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void * arg)
146 {
147   snat_main_t *sm = &snat_main;
148   nat44_is_idle_session_ctx_t *ctx = arg;
149   snat_session_t *s;
150   u64 sess_timeout_time;
151   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
152                                                        ctx->thread_index);
153   clib_bihash_kv_8_8_t s_kv;
154
155   s = pool_elt_at_index (tsm->sessions, kv->value);
156   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
157   if (ctx->now >= sess_timeout_time)
158     {
159       s_kv.key = s->in2out.as_u64;
160       if (clib_bihash_add_del_8_8 (&tsm->in2out, &s_kv, 0))
161         nat_log_warn ("out2in key del failed");
162
163       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
164                                           s->out2in.addr.as_u32,
165                                           s->in2out.protocol,
166                                           s->in2out.port,
167                                           s->out2in.port,
168                                           s->in2out.fib_index);
169
170       if (!snat_is_session_static (s))
171         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
172                                             &s->out2in);
173
174       nat44_delete_session (sm, s, ctx->thread_index);
175       return 1;
176     }
177
178   return 0;
179 }
180
181 /**
182  * @brief Create session for static mapping.
183  *
184  * Create NAT session initiated by host from external network with static
185  * mapping.
186  *
187  * @param sm     NAT main.
188  * @param b0     Vlib buffer.
189  * @param in2out In2out NAT44 session key.
190  * @param out2in Out2in NAT44 session key.
191  * @param node   Vlib node.
192  *
193  * @returns SNAT session if successfully created otherwise 0.
194  */
195 static inline snat_session_t *
196 create_session_for_static_mapping (snat_main_t *sm,
197                                    vlib_buffer_t *b0,
198                                    snat_session_key_t in2out,
199                                    snat_session_key_t out2in,
200                                    vlib_node_runtime_t * node,
201                                    u32 thread_index,
202                                    f64 now)
203 {
204   snat_user_t *u;
205   snat_session_t *s;
206   clib_bihash_kv_8_8_t kv0;
207   ip4_header_t *ip0;
208   udp_header_t *udp0;
209   nat44_is_idle_session_ctx_t ctx0;
210
211   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
212     {
213       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
214       nat_log_notice ("maximum sessions exceeded");
215       return 0;
216     }
217
218   ip0 = vlib_buffer_get_current (b0);
219   udp0 = ip4_next_header (ip0);
220
221   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
222   if (!u)
223     {
224       nat_log_warn ("create NAT user failed");
225       return 0;
226     }
227
228   s = nat_session_alloc_or_recycle (sm, u, thread_index);
229   if (!s)
230     {
231       nat44_delete_user_with_no_session (sm, u, thread_index);
232       nat_log_warn ("create NAT session failed");
233       return 0;
234     }
235
236   s->outside_address_index = ~0;
237   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
238   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
239   s->ext_host_port = udp0->src_port;
240   user_session_increment (sm, u, 1 /* static */);
241   s->in2out = in2out;
242   s->out2in = out2in;
243   s->in2out.protocol = out2in.protocol;
244
245   /* Add to translation hashes */
246   ctx0.now = now;
247   ctx0.thread_index = thread_index;
248   kv0.key = s->in2out.as_u64;
249   kv0.value = s - sm->per_thread_data[thread_index].sessions;
250   if (clib_bihash_add_or_overwrite_stale_8_8 (
251        &sm->per_thread_data[thread_index].in2out, &kv0,
252        nat44_i2o_is_idle_session_cb, &ctx0))
253       nat_log_notice ("in2out key add failed");
254
255   kv0.key = s->out2in.as_u64;
256
257   if (clib_bihash_add_or_overwrite_stale_8_8 (
258         &sm->per_thread_data[thread_index].out2in, &kv0,
259         nat44_o2i_is_idle_session_cb, &ctx0))
260       nat_log_notice ("out2in key add failed");
261
262   /* log NAT event */
263   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
264                                       s->out2in.addr.as_u32,
265                                       s->in2out.protocol,
266                                       s->in2out.port,
267                                       s->out2in.port,
268                                       s->in2out.fib_index);
269   return s;
270 }
271
272 static_always_inline
273 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
274                                  snat_session_key_t *p_key0)
275 {
276   icmp46_header_t *icmp0;
277   snat_session_key_t key0;
278   icmp_echo_header_t *echo0, *inner_echo0 = 0;
279   ip4_header_t *inner_ip0;
280   void *l4_header = 0;
281   icmp46_header_t *inner_icmp0;
282
283   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
284   echo0 = (icmp_echo_header_t *)(icmp0+1);
285
286   if (!icmp_is_error_message (icmp0))
287     {
288       key0.protocol = SNAT_PROTOCOL_ICMP;
289       key0.addr = ip0->dst_address;
290       key0.port = echo0->identifier;
291     }
292   else
293     {
294       inner_ip0 = (ip4_header_t *)(echo0+1);
295       l4_header = ip4_next_header (inner_ip0);
296       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
297       key0.addr = inner_ip0->src_address;
298       switch (key0.protocol)
299         {
300         case SNAT_PROTOCOL_ICMP:
301           inner_icmp0 = (icmp46_header_t*)l4_header;
302           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
303           key0.port = inner_echo0->identifier;
304           break;
305         case SNAT_PROTOCOL_UDP:
306         case SNAT_PROTOCOL_TCP:
307           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
308           break;
309         default:
310           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
311         }
312     }
313   *p_key0 = key0;
314   return -1; /* success */
315 }
316
317 /**
318  * Get address and port values to be used for ICMP packet translation
319  * and create session if needed
320  *
321  * @param[in,out] sm             NAT main
322  * @param[in,out] node           NAT node runtime
323  * @param[in] thread_index       thread index
324  * @param[in,out] b0             buffer containing packet to be translated
325  * @param[out] p_proto           protocol used for matching
326  * @param[out] p_value           address and port after NAT translation
327  * @param[out] p_dont_translate  if packet should not be translated
328  * @param d                      optional parameter
329  * @param e                      optional parameter
330  */
331 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
332                            u32 thread_index, vlib_buffer_t *b0,
333                            ip4_header_t *ip0, u8 *p_proto,
334                            snat_session_key_t *p_value,
335                            u8 *p_dont_translate, void *d, void *e)
336 {
337   icmp46_header_t *icmp0;
338   u32 sw_if_index0;
339   u32 rx_fib_index0;
340   snat_session_key_t key0;
341   snat_session_key_t sm0;
342   snat_session_t *s0 = 0;
343   u8 dont_translate = 0;
344   clib_bihash_kv_8_8_t kv0, value0;
345   u8 is_addr_only;
346   u32 next0 = ~0;
347   int err;
348
349   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
350   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
351   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
352
353   key0.protocol = 0;
354
355   err = icmp_get_key (ip0, &key0);
356   if (err != -1)
357     {
358       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
359       next0 = SNAT_OUT2IN_NEXT_DROP;
360       goto out;
361     }
362   key0.fib_index = rx_fib_index0;
363
364   kv0.key = key0.as_u64;
365
366   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
367                               &value0))
368     {
369       /* Try to match static mapping by external address and port,
370          destination address and port in packet */
371       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0, 0))
372         {
373           if (!sm->forwarding_enabled)
374             {
375               /* Don't NAT packet aimed at the intfc address */
376               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
377                                                   ip0->dst_address.as_u32)))
378                 {
379                   dont_translate = 1;
380                   goto out;
381                 }
382               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
383               next0 = SNAT_OUT2IN_NEXT_DROP;
384               goto out;
385             }
386           else
387             {
388               dont_translate = 1;
389               goto out;
390             }
391         }
392
393       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
394                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
395         {
396           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
397           next0 = SNAT_OUT2IN_NEXT_DROP;
398           goto out;
399         }
400
401       /* Create session initiated by host from external network */
402       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
403                                              node, thread_index,
404                                              vlib_time_now (sm->vlib_main));
405
406       if (!s0)
407         {
408           next0 = SNAT_OUT2IN_NEXT_DROP;
409           goto out;
410         }
411     }
412   else
413     {
414       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
415                         icmp0->type != ICMP4_echo_request &&
416                         !icmp_is_error_message (icmp0)))
417         {
418           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
419           next0 = SNAT_OUT2IN_NEXT_DROP;
420           goto out;
421         }
422
423       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
424                               value0.value);
425     }
426
427 out:
428   *p_proto = key0.protocol;
429   if (s0)
430     *p_value = s0->in2out;
431   *p_dont_translate = dont_translate;
432   if (d)
433     *(snat_session_t**)d = s0;
434   return next0;
435 }
436
437 /**
438  * Get address and port values to be used for ICMP packet translation
439  *
440  * @param[in] sm                 NAT main
441  * @param[in,out] node           NAT node runtime
442  * @param[in] thread_index       thread index
443  * @param[in,out] b0             buffer containing packet to be translated
444  * @param[out] p_proto           protocol used for matching
445  * @param[out] p_value           address and port after NAT translation
446  * @param[out] p_dont_translate  if packet should not be translated
447  * @param d                      optional parameter
448  * @param e                      optional parameter
449  */
450 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
451                            u32 thread_index, vlib_buffer_t *b0,
452                            ip4_header_t *ip0, u8 *p_proto,
453                            snat_session_key_t *p_value,
454                            u8 *p_dont_translate, void *d, void *e)
455 {
456   icmp46_header_t *icmp0;
457   u32 sw_if_index0;
458   u32 rx_fib_index0;
459   snat_session_key_t key0;
460   snat_session_key_t sm0;
461   u8 dont_translate = 0;
462   u8 is_addr_only;
463   u32 next0 = ~0;
464   int err;
465
466   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
467   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
468   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
469
470   err = icmp_get_key (ip0, &key0);
471   if (err != -1)
472     {
473       b0->error = node->errors[err];
474       next0 = SNAT_OUT2IN_NEXT_DROP;
475       goto out2;
476     }
477   key0.fib_index = rx_fib_index0;
478
479   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0, 0))
480     {
481       /* Don't NAT packet aimed at the intfc address */
482       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
483         {
484           dont_translate = 1;
485           goto out;
486         }
487       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
488       next0 = SNAT_OUT2IN_NEXT_DROP;
489       goto out;
490     }
491
492   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
493                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
494                     !icmp_is_error_message (icmp0)))
495     {
496       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
497       next0 = SNAT_OUT2IN_NEXT_DROP;
498       goto out;
499     }
500
501 out:
502   *p_value = sm0;
503 out2:
504   *p_proto = key0.protocol;
505   *p_dont_translate = dont_translate;
506   return next0;
507 }
508
509 static inline u32 icmp_out2in (snat_main_t *sm,
510                                vlib_buffer_t * b0,
511                                ip4_header_t * ip0,
512                                icmp46_header_t * icmp0,
513                                u32 sw_if_index0,
514                                u32 rx_fib_index0,
515                                vlib_node_runtime_t * node,
516                                u32 next0,
517                                u32 thread_index,
518                                void *d,
519                                void *e)
520 {
521   snat_session_key_t sm0;
522   u8 protocol;
523   icmp_echo_header_t *echo0, *inner_echo0 = 0;
524   ip4_header_t *inner_ip0 = 0;
525   void *l4_header = 0;
526   icmp46_header_t *inner_icmp0;
527   u8 dont_translate;
528   u32 new_addr0, old_addr0;
529   u16 old_id0, new_id0;
530   ip_csum_t sum0;
531   u16 checksum0;
532   u32 next0_tmp;
533
534   echo0 = (icmp_echo_header_t *)(icmp0+1);
535
536   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
537                                        &protocol, &sm0, &dont_translate, d, e);
538   if (next0_tmp != ~0)
539     next0 = next0_tmp;
540   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
541     goto out;
542
543   if (PREDICT_TRUE (!ip4_is_fragment (ip0)))
544     {
545       sum0 = ip_incremental_checksum (0, icmp0,
546                                       ntohs(ip0->length) - ip4_header_bytes (ip0));
547       checksum0 = ~ip_csum_fold (sum0);
548       if (checksum0 != 0 && checksum0 != 0xffff)
549         {
550           next0 = SNAT_OUT2IN_NEXT_DROP;
551           goto out;
552         }
553     }
554
555   old_addr0 = ip0->dst_address.as_u32;
556   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
557   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
558
559   sum0 = ip0->checksum;
560   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
561                          dst_address /* changed member */);
562   ip0->checksum = ip_csum_fold (sum0);
563
564   if (icmp0->checksum == 0)
565     icmp0->checksum = 0xffff;
566
567   if (!icmp_is_error_message (icmp0))
568     {
569       new_id0 = sm0.port;
570       if (PREDICT_FALSE(new_id0 != echo0->identifier))
571         {
572           old_id0 = echo0->identifier;
573           new_id0 = sm0.port;
574           echo0->identifier = new_id0;
575
576           sum0 = icmp0->checksum;
577           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
578                                  identifier /* changed member */);
579           icmp0->checksum = ip_csum_fold (sum0);
580         }
581     }
582   else
583     {
584       inner_ip0 = (ip4_header_t *)(echo0+1);
585       l4_header = ip4_next_header (inner_ip0);
586
587       if (!ip4_header_checksum_is_valid (inner_ip0))
588         {
589           next0 = SNAT_OUT2IN_NEXT_DROP;
590           goto out;
591         }
592
593       old_addr0 = inner_ip0->src_address.as_u32;
594       inner_ip0->src_address = sm0.addr;
595       new_addr0 = inner_ip0->src_address.as_u32;
596
597       sum0 = icmp0->checksum;
598       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
599                              src_address /* changed member */);
600       icmp0->checksum = ip_csum_fold (sum0);
601
602       switch (protocol)
603         {
604         case SNAT_PROTOCOL_ICMP:
605           inner_icmp0 = (icmp46_header_t*)l4_header;
606           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
607
608           old_id0 = inner_echo0->identifier;
609           new_id0 = sm0.port;
610           inner_echo0->identifier = new_id0;
611
612           sum0 = icmp0->checksum;
613           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
614                                  identifier);
615           icmp0->checksum = ip_csum_fold (sum0);
616           break;
617         case SNAT_PROTOCOL_UDP:
618         case SNAT_PROTOCOL_TCP:
619           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
620           new_id0 = sm0.port;
621           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
622
623           sum0 = icmp0->checksum;
624           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
625                                  src_port);
626           icmp0->checksum = ip_csum_fold (sum0);
627           break;
628         default:
629           ASSERT(0);
630         }
631     }
632
633 out:
634   return next0;
635 }
636
637
638 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
639                                          vlib_buffer_t * b0,
640                                          ip4_header_t * ip0,
641                                          icmp46_header_t * icmp0,
642                                          u32 sw_if_index0,
643                                          u32 rx_fib_index0,
644                                          vlib_node_runtime_t * node,
645                                          u32 next0, f64 now,
646                                          u32 thread_index,
647                                          snat_session_t ** p_s0)
648 {
649   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
650                       next0, thread_index, p_s0, 0);
651   snat_session_t * s0 = *p_s0;
652   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
653     {
654       /* Accounting */
655       nat44_session_update_counters (s0, now,
656                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
657       /* Per-user LRU list maintenance */
658       nat44_session_update_lru (sm, s0, thread_index);
659     }
660   return next0;
661 }
662
663 static int
664 nat_out2in_sm_unknown_proto (snat_main_t *sm,
665                              vlib_buffer_t * b,
666                              ip4_header_t * ip,
667                              u32 rx_fib_index)
668 {
669   clib_bihash_kv_8_8_t kv, value;
670   snat_static_mapping_t *m;
671   snat_session_key_t m_key;
672   u32 old_addr, new_addr;
673   ip_csum_t sum;
674
675   m_key.addr = ip->dst_address;
676   m_key.port = 0;
677   m_key.protocol = 0;
678   m_key.fib_index = 0;
679   kv.key = m_key.as_u64;
680   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
681     return 1;
682
683   m = pool_elt_at_index (sm->static_mappings, value.value);
684
685   old_addr = ip->dst_address.as_u32;
686   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
687   sum = ip->checksum;
688   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
689   ip->checksum = ip_csum_fold (sum);
690
691   vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
692   return 0;
693 }
694
695 static uword
696 snat_out2in_node_fn (vlib_main_t * vm,
697                   vlib_node_runtime_t * node,
698                   vlib_frame_t * frame)
699 {
700   u32 n_left_from, * from, * to_next;
701   snat_out2in_next_t next_index;
702   u32 pkts_processed = 0;
703   snat_main_t * sm = &snat_main;
704   f64 now = vlib_time_now (vm);
705   u32 thread_index = vm->thread_index;
706
707   from = vlib_frame_vector_args (frame);
708   n_left_from = frame->n_vectors;
709   next_index = node->cached_next_index;
710
711   while (n_left_from > 0)
712     {
713       u32 n_left_to_next;
714
715       vlib_get_next_frame (vm, node, next_index,
716                            to_next, n_left_to_next);
717
718       while (n_left_from >= 4 && n_left_to_next >= 2)
719         {
720           u32 bi0, bi1;
721           vlib_buffer_t * b0, * b1;
722           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
723           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
724           u32 sw_if_index0, sw_if_index1;
725           ip4_header_t * ip0, *ip1;
726           ip_csum_t sum0, sum1;
727           u32 new_addr0, old_addr0;
728           u16 new_port0, old_port0;
729           u32 new_addr1, old_addr1;
730           u16 new_port1, old_port1;
731           udp_header_t * udp0, * udp1;
732           tcp_header_t * tcp0, * tcp1;
733           icmp46_header_t * icmp0, * icmp1;
734           snat_session_key_t key0, key1, sm0, sm1;
735           u32 rx_fib_index0, rx_fib_index1;
736           u32 proto0, proto1;
737           snat_session_t * s0 = 0, * s1 = 0;
738           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
739
740           /* Prefetch next iteration. */
741           {
742             vlib_buffer_t * p2, * p3;
743
744             p2 = vlib_get_buffer (vm, from[2]);
745             p3 = vlib_get_buffer (vm, from[3]);
746
747             vlib_prefetch_buffer_header (p2, LOAD);
748             vlib_prefetch_buffer_header (p3, LOAD);
749
750             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
751             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
752           }
753
754           /* speculatively enqueue b0 and b1 to the current next frame */
755           to_next[0] = bi0 = from[0];
756           to_next[1] = bi1 = from[1];
757           from += 2;
758           to_next += 2;
759           n_left_from -= 2;
760           n_left_to_next -= 2;
761
762           b0 = vlib_get_buffer (vm, bi0);
763           b1 = vlib_get_buffer (vm, bi1);
764
765           vnet_buffer (b0)->snat.flags = 0;
766           vnet_buffer (b1)->snat.flags = 0;
767
768           ip0 = vlib_buffer_get_current (b0);
769           udp0 = ip4_next_header (ip0);
770           tcp0 = (tcp_header_t *) udp0;
771           icmp0 = (icmp46_header_t *) udp0;
772
773           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
774           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
775                                    sw_if_index0);
776
777           if (PREDICT_FALSE(ip0->ttl == 1))
778             {
779               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
780               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
781                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
782                                            0);
783               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
784               goto trace0;
785             }
786
787           proto0 = ip_proto_to_snat_proto (ip0->protocol);
788
789           if (PREDICT_FALSE (proto0 == ~0))
790             {
791               if (nat_out2in_sm_unknown_proto(sm, b0, ip0, rx_fib_index0))
792                 {
793                   if (!sm->forwarding_enabled)
794                     {
795                       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
796                       next0 = SNAT_OUT2IN_NEXT_DROP;
797                     }
798                 }
799               goto trace0;
800             }
801
802           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
803             {
804               next0 = icmp_out2in_slow_path
805                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
806                  next0, now, thread_index, &s0);
807               goto trace0;
808             }
809
810           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
811             {
812               next0 = SNAT_OUT2IN_NEXT_REASS;
813               goto trace0;
814             }
815
816           key0.addr = ip0->dst_address;
817           key0.port = udp0->dst_port;
818           key0.protocol = proto0;
819           key0.fib_index = rx_fib_index0;
820
821           kv0.key = key0.as_u64;
822
823           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
824                                       &kv0, &value0))
825             {
826               /* Try to match static mapping by external address and port,
827                  destination address and port in packet */
828               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
829                 {
830                   /*
831                    * Send DHCP packets to the ipv4 stack, or we won't
832                    * be able to use dhcp client on the outside interface
833                    */
834                   if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
835                       && (udp0->dst_port ==
836                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
837                     {
838                       vnet_feature_next (&next0, b0);
839                       goto trace0;
840                     }
841
842                   if (!sm->forwarding_enabled)
843                     {
844                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
845                       next0 = SNAT_OUT2IN_NEXT_DROP;
846                     }
847                   goto trace0;
848                 }
849
850               /* Create session initiated by host from external network */
851               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
852                                                      thread_index, now);
853               if (!s0)
854                 {
855                   next0 = SNAT_OUT2IN_NEXT_DROP;
856                   goto trace0;
857                 }
858             }
859           else
860             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
861                                     value0.value);
862
863           old_addr0 = ip0->dst_address.as_u32;
864           ip0->dst_address = s0->in2out.addr;
865           new_addr0 = ip0->dst_address.as_u32;
866           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
867
868           sum0 = ip0->checksum;
869           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
870                                  ip4_header_t,
871                                  dst_address /* changed member */);
872           ip0->checksum = ip_csum_fold (sum0);
873
874           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
875             {
876               old_port0 = tcp0->dst_port;
877               tcp0->dst_port = s0->in2out.port;
878               new_port0 = tcp0->dst_port;
879
880               sum0 = tcp0->checksum;
881               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
882                                      ip4_header_t,
883                                      dst_address /* changed member */);
884
885               sum0 = ip_csum_update (sum0, old_port0, new_port0,
886                                      ip4_header_t /* cheat */,
887                                      length /* changed member */);
888               tcp0->checksum = ip_csum_fold(sum0);
889             }
890           else
891             {
892               old_port0 = udp0->dst_port;
893               udp0->dst_port = s0->in2out.port;
894               udp0->checksum = 0;
895             }
896
897           /* Accounting */
898           nat44_session_update_counters (s0, now,
899                                          vlib_buffer_length_in_chain (vm, b0));
900           /* Per-user LRU list maintenance */
901           nat44_session_update_lru (sm, s0, thread_index);
902         trace0:
903
904           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
905                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
906             {
907               snat_out2in_trace_t *t =
908                  vlib_add_trace (vm, node, b0, sizeof (*t));
909               t->sw_if_index = sw_if_index0;
910               t->next_index = next0;
911               t->session_index = ~0;
912               if (s0)
913                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
914             }
915
916           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
917
918
919           ip1 = vlib_buffer_get_current (b1);
920           udp1 = ip4_next_header (ip1);
921           tcp1 = (tcp_header_t *) udp1;
922           icmp1 = (icmp46_header_t *) udp1;
923
924           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
925           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
926                                    sw_if_index1);
927
928           if (PREDICT_FALSE(ip1->ttl == 1))
929             {
930               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
931               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
932                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
933                                            0);
934               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
935               goto trace1;
936             }
937
938           proto1 = ip_proto_to_snat_proto (ip1->protocol);
939
940           if (PREDICT_FALSE (proto1 == ~0))
941             {
942               if (nat_out2in_sm_unknown_proto(sm, b1, ip1, rx_fib_index1))
943                 {
944                   if (!sm->forwarding_enabled)
945                     {
946                       b1->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
947                       next1 = SNAT_OUT2IN_NEXT_DROP;
948                     }
949                 }
950               goto trace1;
951             }
952
953           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
954             {
955               next1 = icmp_out2in_slow_path
956                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
957                  next1, now, thread_index, &s1);
958               goto trace1;
959             }
960
961           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
962             {
963               next1 = SNAT_OUT2IN_NEXT_REASS;
964               goto trace1;
965             }
966
967           key1.addr = ip1->dst_address;
968           key1.port = udp1->dst_port;
969           key1.protocol = proto1;
970           key1.fib_index = rx_fib_index1;
971
972           kv1.key = key1.as_u64;
973
974           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
975                                       &kv1, &value1))
976             {
977               /* Try to match static mapping by external address and port,
978                  destination address and port in packet */
979               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0, 0, 0))
980                 {
981                   /*
982                    * Send DHCP packets to the ipv4 stack, or we won't
983                    * be able to use dhcp client on the outside interface
984                    */
985                   if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
986                       && (udp1->dst_port ==
987                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
988                     {
989                       vnet_feature_next (&next1, b1);
990                       goto trace1;
991                     }
992
993                   if (!sm->forwarding_enabled)
994                     {
995                       b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
996                       next1 = SNAT_OUT2IN_NEXT_DROP;
997                     }
998                   goto trace1;
999                 }
1000
1001               /* Create session initiated by host from external network */
1002               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1003                                                      thread_index, now);
1004               if (!s1)
1005                 {
1006                   next1 = SNAT_OUT2IN_NEXT_DROP;
1007                   goto trace1;
1008                 }
1009             }
1010           else
1011             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1012                                     value1.value);
1013
1014           old_addr1 = ip1->dst_address.as_u32;
1015           ip1->dst_address = s1->in2out.addr;
1016           new_addr1 = ip1->dst_address.as_u32;
1017           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1018
1019           sum1 = ip1->checksum;
1020           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1021                                  ip4_header_t,
1022                                  dst_address /* changed member */);
1023           ip1->checksum = ip_csum_fold (sum1);
1024
1025           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1026             {
1027               old_port1 = tcp1->dst_port;
1028               tcp1->dst_port = s1->in2out.port;
1029               new_port1 = tcp1->dst_port;
1030
1031               sum1 = tcp1->checksum;
1032               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1033                                      ip4_header_t,
1034                                      dst_address /* changed member */);
1035
1036               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1037                                      ip4_header_t /* cheat */,
1038                                      length /* changed member */);
1039               tcp1->checksum = ip_csum_fold(sum1);
1040             }
1041           else
1042             {
1043               old_port1 = udp1->dst_port;
1044               udp1->dst_port = s1->in2out.port;
1045               udp1->checksum = 0;
1046             }
1047
1048           /* Accounting */
1049           nat44_session_update_counters (s1, now,
1050                                          vlib_buffer_length_in_chain (vm, b1));
1051           /* Per-user LRU list maintenance */
1052           nat44_session_update_lru (sm, s1, thread_index);
1053         trace1:
1054
1055           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1056                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1057             {
1058               snat_out2in_trace_t *t =
1059                  vlib_add_trace (vm, node, b1, sizeof (*t));
1060               t->sw_if_index = sw_if_index1;
1061               t->next_index = next1;
1062               t->session_index = ~0;
1063               if (s1)
1064                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1065             }
1066
1067           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1068
1069           /* verify speculative enqueues, maybe switch current next frame */
1070           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1071                                            to_next, n_left_to_next,
1072                                            bi0, bi1, next0, next1);
1073         }
1074
1075       while (n_left_from > 0 && n_left_to_next > 0)
1076         {
1077           u32 bi0;
1078           vlib_buffer_t * b0;
1079           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1080           u32 sw_if_index0;
1081           ip4_header_t * ip0;
1082           ip_csum_t sum0;
1083           u32 new_addr0, old_addr0;
1084           u16 new_port0, old_port0;
1085           udp_header_t * udp0;
1086           tcp_header_t * tcp0;
1087           icmp46_header_t * icmp0;
1088           snat_session_key_t key0, sm0;
1089           u32 rx_fib_index0;
1090           u32 proto0;
1091           snat_session_t * s0 = 0;
1092           clib_bihash_kv_8_8_t kv0, value0;
1093
1094           /* speculatively enqueue b0 to the current next frame */
1095           bi0 = from[0];
1096           to_next[0] = bi0;
1097           from += 1;
1098           to_next += 1;
1099           n_left_from -= 1;
1100           n_left_to_next -= 1;
1101
1102           b0 = vlib_get_buffer (vm, bi0);
1103
1104           vnet_buffer (b0)->snat.flags = 0;
1105
1106           ip0 = vlib_buffer_get_current (b0);
1107           udp0 = ip4_next_header (ip0);
1108           tcp0 = (tcp_header_t *) udp0;
1109           icmp0 = (icmp46_header_t *) udp0;
1110
1111           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1112           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1113                                    sw_if_index0);
1114
1115           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1116
1117           if (PREDICT_FALSE (proto0 == ~0))
1118             {
1119               if (nat_out2in_sm_unknown_proto(sm, b0, ip0, rx_fib_index0))
1120                 {
1121                   if (!sm->forwarding_enabled)
1122                     {
1123                       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
1124                       next0 = SNAT_OUT2IN_NEXT_DROP;
1125                     }
1126                 }
1127               goto trace00;
1128             }
1129
1130           if (PREDICT_FALSE(ip0->ttl == 1))
1131             {
1132               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1133               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1134                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1135                                            0);
1136               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1137               goto trace00;
1138             }
1139
1140           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1141             {
1142               next0 = icmp_out2in_slow_path
1143                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1144                  next0, now, thread_index, &s0);
1145               goto trace00;
1146             }
1147
1148           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1149             {
1150               next0 = SNAT_OUT2IN_NEXT_REASS;
1151               goto trace00;
1152             }
1153
1154           key0.addr = ip0->dst_address;
1155           key0.port = udp0->dst_port;
1156           key0.protocol = proto0;
1157           key0.fib_index = rx_fib_index0;
1158
1159           kv0.key = key0.as_u64;
1160
1161           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1162                                       &kv0, &value0))
1163             {
1164               /* Try to match static mapping by external address and port,
1165                  destination address and port in packet */
1166               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
1167                 {
1168                   /*
1169                    * Send DHCP packets to the ipv4 stack, or we won't
1170                    * be able to use dhcp client on the outside interface
1171                    */
1172                   if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
1173                       && (udp0->dst_port ==
1174                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1175                     {
1176                       vnet_feature_next (&next0, b0);
1177                       goto trace00;
1178                     }
1179
1180                   if (!sm->forwarding_enabled)
1181                     {
1182                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1183                       next0 = SNAT_OUT2IN_NEXT_DROP;
1184                     }
1185                   goto trace00;
1186                 }
1187
1188               /* Create session initiated by host from external network */
1189               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1190                                                      thread_index, now);
1191               if (!s0)
1192                 {
1193                   next0 = SNAT_OUT2IN_NEXT_DROP;
1194                   goto trace00;
1195                 }
1196             }
1197           else
1198             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1199                                     value0.value);
1200
1201           old_addr0 = ip0->dst_address.as_u32;
1202           ip0->dst_address = s0->in2out.addr;
1203           new_addr0 = ip0->dst_address.as_u32;
1204           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1205
1206           sum0 = ip0->checksum;
1207           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1208                                  ip4_header_t,
1209                                  dst_address /* changed member */);
1210           ip0->checksum = ip_csum_fold (sum0);
1211
1212           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1213             {
1214               old_port0 = tcp0->dst_port;
1215               tcp0->dst_port = s0->in2out.port;
1216               new_port0 = tcp0->dst_port;
1217
1218               sum0 = tcp0->checksum;
1219               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1220                                      ip4_header_t,
1221                                      dst_address /* changed member */);
1222
1223               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1224                                      ip4_header_t /* cheat */,
1225                                      length /* changed member */);
1226               tcp0->checksum = ip_csum_fold(sum0);
1227             }
1228           else
1229             {
1230               old_port0 = udp0->dst_port;
1231               udp0->dst_port = s0->in2out.port;
1232               udp0->checksum = 0;
1233             }
1234
1235           /* Accounting */
1236           nat44_session_update_counters (s0, now,
1237                                          vlib_buffer_length_in_chain (vm, b0));
1238           /* Per-user LRU list maintenance */
1239           nat44_session_update_lru (sm, s0, thread_index);
1240         trace00:
1241
1242           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1243                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1244             {
1245               snat_out2in_trace_t *t =
1246                  vlib_add_trace (vm, node, b0, sizeof (*t));
1247               t->sw_if_index = sw_if_index0;
1248               t->next_index = next0;
1249               t->session_index = ~0;
1250               if (s0)
1251                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1252             }
1253
1254           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1255
1256           /* verify speculative enqueue, maybe switch current next frame */
1257           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1258                                            to_next, n_left_to_next,
1259                                            bi0, next0);
1260         }
1261
1262       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1263     }
1264
1265   vlib_node_increment_counter (vm, snat_out2in_node.index,
1266                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1267                                pkts_processed);
1268   return frame->n_vectors;
1269 }
1270
1271 VLIB_REGISTER_NODE (snat_out2in_node) = {
1272   .function = snat_out2in_node_fn,
1273   .name = "nat44-out2in",
1274   .vector_size = sizeof (u32),
1275   .format_trace = format_snat_out2in_trace,
1276   .type = VLIB_NODE_TYPE_INTERNAL,
1277
1278   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1279   .error_strings = snat_out2in_error_strings,
1280
1281   .runtime_data_bytes = sizeof (snat_runtime_t),
1282
1283   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1284
1285   /* edit / add dispositions here */
1286   .next_nodes = {
1287     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1288     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1289     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1290     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1291   },
1292 };
1293 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1294
1295 static uword
1296 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1297                             vlib_node_runtime_t * node,
1298                             vlib_frame_t * frame)
1299 {
1300   u32 n_left_from, *from, *to_next;
1301   snat_out2in_next_t next_index;
1302   u32 pkts_processed = 0;
1303   snat_main_t *sm = &snat_main;
1304   f64 now = vlib_time_now (vm);
1305   u32 thread_index = vm->thread_index;
1306   snat_main_per_thread_data_t *per_thread_data =
1307     &sm->per_thread_data[thread_index];
1308   u32 *fragments_to_drop = 0;
1309   u32 *fragments_to_loopback = 0;
1310
1311   from = vlib_frame_vector_args (frame);
1312   n_left_from = frame->n_vectors;
1313   next_index = node->cached_next_index;
1314
1315   while (n_left_from > 0)
1316     {
1317       u32 n_left_to_next;
1318
1319       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1320
1321       while (n_left_from > 0 && n_left_to_next > 0)
1322        {
1323           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1324           vlib_buffer_t *b0;
1325           u32 next0;
1326           u8 cached0 = 0;
1327           ip4_header_t *ip0;
1328           nat_reass_ip4_t *reass0;
1329           udp_header_t * udp0;
1330           tcp_header_t * tcp0;
1331           snat_session_key_t key0, sm0;
1332           clib_bihash_kv_8_8_t kv0, value0;
1333           snat_session_t * s0 = 0;
1334           u16 old_port0, new_port0;
1335           ip_csum_t sum0;
1336
1337           /* speculatively enqueue b0 to the current next frame */
1338           bi0 = from[0];
1339           to_next[0] = bi0;
1340           from += 1;
1341           to_next += 1;
1342           n_left_from -= 1;
1343           n_left_to_next -= 1;
1344
1345           b0 = vlib_get_buffer (vm, bi0);
1346           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1347
1348           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1349           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1350                                                                sw_if_index0);
1351
1352           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1353             {
1354               next0 = SNAT_OUT2IN_NEXT_DROP;
1355               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1356               goto trace0;
1357             }
1358
1359           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1360           udp0 = ip4_next_header (ip0);
1361           tcp0 = (tcp_header_t *) udp0;
1362           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1363
1364           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1365                                                  ip0->dst_address,
1366                                                  ip0->fragment_id,
1367                                                  ip0->protocol,
1368                                                  1,
1369                                                  &fragments_to_drop);
1370
1371           if (PREDICT_FALSE (!reass0))
1372             {
1373               next0 = SNAT_OUT2IN_NEXT_DROP;
1374               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1375               nat_log_notice ("maximum reassemblies exceeded");
1376               goto trace0;
1377             }
1378
1379           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1380             {
1381               key0.addr = ip0->dst_address;
1382               key0.port = udp0->dst_port;
1383               key0.protocol = proto0;
1384               key0.fib_index = rx_fib_index0;
1385               kv0.key = key0.as_u64;
1386
1387               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1388                 {
1389                   /* Try to match static mapping by external address and port,
1390                      destination address and port in packet */
1391                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
1392                     {
1393                       /*
1394                        * Send DHCP packets to the ipv4 stack, or we won't
1395                        * be able to use dhcp client on the outside interface
1396                        */
1397                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
1398                           && (udp0->dst_port
1399                               == clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1400                         {
1401                           vnet_feature_next (&next0, b0);
1402                           goto trace0;
1403                         }
1404
1405                       if (!sm->forwarding_enabled)
1406                         {
1407                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1408                           next0 = SNAT_OUT2IN_NEXT_DROP;
1409                         }
1410                       goto trace0;
1411                     }
1412
1413                   /* Create session initiated by host from external network */
1414                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1415                                                          thread_index, now);
1416                   if (!s0)
1417                     {
1418                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1419                       next0 = SNAT_OUT2IN_NEXT_DROP;
1420                       goto trace0;
1421                     }
1422                   reass0->sess_index = s0 - per_thread_data->sessions;
1423                   reass0->thread_index = thread_index;
1424                 }
1425               else
1426                 {
1427                   s0 = pool_elt_at_index (per_thread_data->sessions,
1428                                           value0.value);
1429                   reass0->sess_index = value0.value;
1430                 }
1431               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1432             }
1433           else
1434             {
1435               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1436                 {
1437                   if (nat_ip4_reass_add_fragment (reass0, bi0, &fragments_to_drop))
1438                     {
1439                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1440                       nat_log_notice ("maximum fragments per reassembly exceeded");
1441                       next0 = SNAT_OUT2IN_NEXT_DROP;
1442                       goto trace0;
1443                     }
1444                   cached0 = 1;
1445                   goto trace0;
1446                 }
1447               s0 = pool_elt_at_index (per_thread_data->sessions,
1448                                       reass0->sess_index);
1449             }
1450
1451           old_addr0 = ip0->dst_address.as_u32;
1452           ip0->dst_address = s0->in2out.addr;
1453           new_addr0 = ip0->dst_address.as_u32;
1454           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1455
1456           sum0 = ip0->checksum;
1457           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1458                                  ip4_header_t,
1459                                  dst_address /* changed member */);
1460           ip0->checksum = ip_csum_fold (sum0);
1461
1462           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1463             {
1464               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1465                 {
1466                   old_port0 = tcp0->dst_port;
1467                   tcp0->dst_port = s0->in2out.port;
1468                   new_port0 = tcp0->dst_port;
1469
1470                   sum0 = tcp0->checksum;
1471                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1472                                          ip4_header_t,
1473                                          dst_address /* changed member */);
1474
1475                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1476                                          ip4_header_t /* cheat */,
1477                                          length /* changed member */);
1478                   tcp0->checksum = ip_csum_fold(sum0);
1479                 }
1480               else
1481                 {
1482                   old_port0 = udp0->dst_port;
1483                   udp0->dst_port = s0->in2out.port;
1484                   udp0->checksum = 0;
1485                 }
1486             }
1487
1488           /* Accounting */
1489           nat44_session_update_counters (s0, now,
1490                                          vlib_buffer_length_in_chain (vm, b0));
1491           /* Per-user LRU list maintenance */
1492           nat44_session_update_lru (sm, s0, thread_index);
1493
1494         trace0:
1495           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1496                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1497             {
1498               nat44_out2in_reass_trace_t *t =
1499                  vlib_add_trace (vm, node, b0, sizeof (*t));
1500               t->cached = cached0;
1501               t->sw_if_index = sw_if_index0;
1502               t->next_index = next0;
1503             }
1504
1505           if (cached0)
1506             {
1507               n_left_to_next++;
1508               to_next--;
1509             }
1510           else
1511             {
1512               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1513
1514               /* verify speculative enqueue, maybe switch current next frame */
1515               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1516                                                to_next, n_left_to_next,
1517                                                bi0, next0);
1518             }
1519
1520           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1521             {
1522               from = vlib_frame_vector_args (frame);
1523               u32 len = vec_len (fragments_to_loopback);
1524               if (len <= VLIB_FRAME_SIZE)
1525                 {
1526                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1527                   n_left_from = len;
1528                   vec_reset_length (fragments_to_loopback);
1529                 }
1530               else
1531                 {
1532                   clib_memcpy (from,
1533                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1534                                sizeof (u32) * VLIB_FRAME_SIZE);
1535                   n_left_from = VLIB_FRAME_SIZE;
1536                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1537                 }
1538             }
1539        }
1540
1541       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1542     }
1543
1544   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1545                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1546                                pkts_processed);
1547
1548   nat_send_all_to_node (vm, fragments_to_drop, node,
1549                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1550                         SNAT_OUT2IN_NEXT_DROP);
1551
1552   vec_free (fragments_to_drop);
1553   vec_free (fragments_to_loopback);
1554   return frame->n_vectors;
1555 }
1556
1557 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1558   .function = nat44_out2in_reass_node_fn,
1559   .name = "nat44-out2in-reass",
1560   .vector_size = sizeof (u32),
1561   .format_trace = format_nat44_out2in_reass_trace,
1562   .type = VLIB_NODE_TYPE_INTERNAL,
1563
1564   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1565   .error_strings = snat_out2in_error_strings,
1566
1567   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1568
1569   /* edit / add dispositions here */
1570   .next_nodes = {
1571     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1572     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1573     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1574     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1575   },
1576 };
1577 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1578                               nat44_out2in_reass_node_fn);
1579
1580 /*******************************/
1581 /*** endpoint-dependent mode ***/
1582 /*******************************/
1583 typedef enum {
1584   NAT44_ED_OUT2IN_NEXT_DROP,
1585   NAT44_ED_OUT2IN_NEXT_LOOKUP,
1586   NAT44_ED_OUT2IN_NEXT_ICMP_ERROR,
1587   NAT44_ED_OUT2IN_NEXT_IN2OUT,
1588   NAT44_ED_OUT2IN_NEXT_SLOW_PATH,
1589   NAT44_ED_OUT2IN_NEXT_REASS,
1590   NAT44_ED_OUT2IN_N_NEXT,
1591 } nat44_ed_out2in_next_t;
1592
1593 typedef struct {
1594   u32 sw_if_index;
1595   u32 next_index;
1596   u32 session_index;
1597   u32 is_slow_path;
1598 } nat44_ed_out2in_trace_t;
1599
1600 static u8 *
1601 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
1602 {
1603   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1604   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1605   nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *);
1606   char * tag;
1607
1608   tag = t->is_slow_path ? "NAT44_OUT2IN_SLOW_PATH" : "NAT44_OUT2IN_FAST_PATH";
1609
1610   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
1611               t->sw_if_index, t->next_index, t->session_index);
1612
1613   return s;
1614 }
1615
1616 static inline u32
1617 icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
1618                           ip4_header_t * ip0, icmp46_header_t * icmp0,
1619                           u32 sw_if_index0, u32 rx_fib_index0,
1620                           vlib_node_runtime_t * node, u32 next0, f64 now,
1621                           u32 thread_index, snat_session_t ** p_s0)
1622 {
1623   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1624                       next0, thread_index, p_s0, 0);
1625   snat_session_t * s0 = *p_s0;
1626   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
1627     {
1628       /* Accounting */
1629       nat44_session_update_counters (s0, now,
1630                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
1631     }
1632   return next0;
1633 }
1634
1635 int
1636 nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void * arg)
1637 {
1638   snat_main_t *sm = &snat_main;
1639   nat44_is_idle_session_ctx_t *ctx = arg;
1640   snat_session_t *s;
1641   u64 sess_timeout_time;
1642   nat_ed_ses_key_t ed_key;
1643   clib_bihash_kv_16_8_t ed_kv;
1644   int i;
1645   snat_address_t *a;
1646   snat_session_key_t key;
1647   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
1648                                                        ctx->thread_index);
1649
1650   s = pool_elt_at_index (tsm->sessions, kv->value);
1651   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
1652   if (ctx->now >= sess_timeout_time)
1653     {
1654       ed_key.l_addr = s->in2out.addr;
1655       ed_key.r_addr = s->ext_host_addr;
1656       ed_key.fib_index = s->out2in.fib_index;
1657       if (snat_is_unk_proto_session (s))
1658         {
1659           ed_key.proto = s->in2out.port;
1660           ed_key.r_port = 0;
1661           ed_key.l_port = 0;
1662         }
1663       else
1664         {
1665           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
1666           ed_key.l_port = s->in2out.port;
1667           ed_key.r_port = s->ext_host_port;
1668         }
1669       if (is_twice_nat_session (s))
1670         {
1671           ed_key.r_addr = s->ext_host_nat_addr;
1672           ed_key.r_port = s->ext_host_nat_port;
1673         }
1674       ed_kv.key[0] = ed_key.as_u64[0];
1675       ed_kv.key[1] = ed_key.as_u64[1];
1676       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
1677         nat_log_warn ("in2out_ed key del failed");
1678
1679       if (snat_is_unk_proto_session (s))
1680         goto delete;
1681
1682       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1683                                           s->out2in.addr.as_u32,
1684                                           s->in2out.protocol,
1685                                           s->in2out.port,
1686                                           s->out2in.port,
1687                                           s->in2out.fib_index);
1688
1689       if (is_twice_nat_session (s))
1690         {
1691           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
1692             {
1693               key.protocol = s->in2out.protocol;
1694               key.port = s->ext_host_nat_port;
1695               a = sm->twice_nat_addresses + i;
1696               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
1697                 {
1698                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
1699                                                       ctx->thread_index, &key);
1700                   break;
1701                 }
1702             }
1703         }
1704
1705       if (snat_is_session_static (s))
1706         goto delete;
1707
1708       if (s->outside_address_index != ~0)
1709         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
1710                                             &s->out2in);
1711     delete:
1712       nat44_delete_session (sm, s, ctx->thread_index);
1713       return 1;
1714     }
1715
1716   return 0;
1717 }
1718
1719 static snat_session_t *
1720 create_session_for_static_mapping_ed (snat_main_t * sm,
1721                                       vlib_buffer_t *b,
1722                                       snat_session_key_t l_key,
1723                                       snat_session_key_t e_key,
1724                                       vlib_node_runtime_t * node,
1725                                       u32 thread_index,
1726                                       twice_nat_type_t twice_nat,
1727                                       lb_nat_type_t lb_nat,
1728                                       f64 now)
1729 {
1730   snat_session_t *s;
1731   snat_user_t *u;
1732   ip4_header_t *ip;
1733   udp_header_t *udp;
1734   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1735   clib_bihash_kv_16_8_t kv;
1736   snat_session_key_t eh_key;
1737   u32 address_index;
1738   nat44_is_idle_session_ctx_t ctx;
1739
1740   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1741     {
1742       b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
1743       nat_log_notice ("maximum sessions exceeded");
1744       return 0;
1745     }
1746
1747   u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index, thread_index);
1748   if (!u)
1749     {
1750       nat_log_warn ("create NAT user failed");
1751       return 0;
1752     }
1753
1754   s = nat_ed_session_alloc (sm, u, thread_index);
1755   if (!s)
1756     {
1757       nat44_delete_user_with_no_session (sm, u, thread_index);
1758       nat_log_warn ("create NAT session failed");
1759       return 0;
1760     }
1761
1762   ip = vlib_buffer_get_current (b);
1763   udp = ip4_next_header (ip);
1764
1765   s->ext_host_addr.as_u32 = ip->src_address.as_u32;
1766   s->ext_host_port = e_key.protocol == SNAT_PROTOCOL_ICMP ? 0 : udp->src_port;
1767   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1768   if (lb_nat)
1769     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1770   if (lb_nat == AFFINITY_LB_NAT)
1771     s->flags |= SNAT_SESSION_FLAG_AFFINITY;
1772   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
1773   s->outside_address_index = ~0;
1774   s->out2in = e_key;
1775   s->in2out = l_key;
1776   s->in2out.protocol = s->out2in.protocol;
1777   user_session_increment (sm, u, 1);
1778
1779   /* Add to lookup tables */
1780   make_ed_kv (&kv, &e_key.addr, &s->ext_host_addr, ip->protocol,
1781               e_key.fib_index, e_key.port, s->ext_host_port);
1782   kv.value = s - tsm->sessions;
1783   ctx.now = now;
1784   ctx.thread_index = thread_index;
1785   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->out2in_ed, &kv,
1786                                                nat44_o2i_ed_is_idle_session_cb,
1787                                                &ctx))
1788     nat_log_notice ("out2in-ed key add failed");
1789
1790   if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF &&
1791       ip->src_address.as_u32 == l_key.addr.as_u32))
1792     {
1793       eh_key.protocol = e_key.protocol;
1794       if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
1795                                                thread_index, &eh_key,
1796                                                &address_index,
1797                                                sm->port_per_thread,
1798                                                tsm->snat_thread_index))
1799         {
1800           b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
1801           nat44_delete_session (sm, s, thread_index);
1802           if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &kv, 0))
1803             nat_log_notice ("out2in-ed key del failed");
1804           return 0;
1805         }
1806       s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
1807       s->ext_host_nat_port = eh_key.port;
1808       s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
1809       make_ed_kv (&kv, &l_key.addr, &s->ext_host_nat_addr, ip->protocol,
1810                   l_key.fib_index, l_key.port, s->ext_host_nat_port);
1811     }
1812   else
1813     {
1814       make_ed_kv (&kv, &l_key.addr, &s->ext_host_addr, ip->protocol,
1815                   l_key.fib_index, l_key.port, s->ext_host_port);
1816     }
1817   kv.value = s - tsm->sessions;
1818   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv,
1819                                                nat44_i2o_ed_is_idle_session_cb,
1820                                                &ctx))
1821     nat_log_notice ("in2out-ed key add failed");
1822
1823   return s;
1824 }
1825
1826 static_always_inline int
1827 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
1828 {
1829   icmp46_header_t *icmp0;
1830   nat_ed_ses_key_t key0;
1831   icmp_echo_header_t *echo0, *inner_echo0 = 0;
1832   ip4_header_t *inner_ip0;
1833   void *l4_header = 0;
1834   icmp46_header_t *inner_icmp0;
1835
1836   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
1837   echo0 = (icmp_echo_header_t *)(icmp0+1);
1838
1839   if (!icmp_is_error_message (icmp0))
1840     {
1841       key0.proto = IP_PROTOCOL_ICMP;
1842       key0.l_addr = ip0->dst_address;
1843       key0.r_addr = ip0->src_address;
1844       key0.l_port = echo0->identifier;
1845       key0.r_port = 0;
1846     }
1847   else
1848     {
1849       inner_ip0 = (ip4_header_t *)(echo0+1);
1850       l4_header = ip4_next_header (inner_ip0);
1851       key0.proto = inner_ip0->protocol;
1852       key0.l_addr = inner_ip0->src_address;
1853       key0.r_addr = inner_ip0->dst_address;
1854       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
1855         {
1856         case SNAT_PROTOCOL_ICMP:
1857           inner_icmp0 = (icmp46_header_t*)l4_header;
1858           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
1859           key0.l_port = inner_echo0->identifier;
1860           key0.r_port = 0;
1861           break;
1862         case SNAT_PROTOCOL_UDP:
1863         case SNAT_PROTOCOL_TCP:
1864           key0.l_port = ((tcp_udp_header_t*)l4_header)->src_port;
1865           key0.r_port = ((tcp_udp_header_t*)l4_header)->dst_port;
1866           break;
1867         default:
1868           return -1;
1869         }
1870     }
1871   *p_key0 = key0;
1872   return 0;
1873 }
1874
1875 static int
1876 next_src_nat (snat_main_t * sm, ip4_header_t * ip, u8 proto, u16 src_port,
1877               u16 dst_port, u32 thread_index, u32 rx_fib_index)
1878 {
1879   clib_bihash_kv_16_8_t kv, value;
1880   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1881
1882   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto,
1883               rx_fib_index, src_port, dst_port);
1884   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
1885     return 1;
1886
1887   return 0;
1888 }
1889
1890 static void
1891 create_bypass_for_fwd(snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
1892                       u32 thread_index)
1893 {
1894   nat_ed_ses_key_t key;
1895   clib_bihash_kv_16_8_t kv, value;
1896   udp_header_t *udp;
1897   snat_user_t *u;
1898   snat_session_t *s = 0;
1899   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1900   f64 now = vlib_time_now (sm->vlib_main);
1901
1902   if (ip->protocol == IP_PROTOCOL_ICMP)
1903     {
1904       if (icmp_get_ed_key (ip, &key))
1905         return;
1906     }
1907   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
1908     {
1909       udp = ip4_next_header(ip);
1910       key.r_addr = ip->src_address;
1911       key.l_addr = ip->dst_address;
1912       key.proto = ip->protocol;
1913       key.l_port = udp->dst_port;
1914       key.r_port = udp->src_port;
1915     }
1916   else
1917     {
1918       key.r_addr = ip->src_address;
1919       key.l_addr = ip->dst_address;
1920       key.proto = ip->protocol;
1921       key.l_port = key.r_port = 0;
1922     }
1923   key.fib_index = 0;
1924   kv.key[0] = key.as_u64[0];
1925   kv.key[1] = key.as_u64[1];
1926
1927   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
1928     {
1929       s = pool_elt_at_index (tsm->sessions, value.value);
1930     }
1931   else
1932     {
1933       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1934         return;
1935
1936       u = nat_user_get_or_create (sm, &ip->dst_address, sm->inside_fib_index,
1937                                   thread_index);
1938       if (!u)
1939         {
1940           nat_log_warn ("create NAT user failed");
1941           return;
1942         }
1943
1944       s = nat_ed_session_alloc (sm, u, thread_index);
1945       if (!s)
1946         {
1947           nat44_delete_user_with_no_session (sm, u, thread_index);
1948           nat_log_warn ("create NAT session failed");
1949           return;
1950         }
1951
1952       s->ext_host_addr = key.r_addr;
1953       s->ext_host_port = key.r_port;
1954       s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
1955       s->outside_address_index = ~0;
1956       s->out2in.addr = key.l_addr;
1957       s->out2in.port = key.l_port;
1958       s->out2in.protocol = ip_proto_to_snat_proto (key.proto);
1959       s->out2in.fib_index = 0;
1960       s->in2out = s->out2in;
1961       user_session_increment (sm, u, 0);
1962
1963       kv.value = s - tsm->sessions;
1964       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
1965         nat_log_notice ("in2out_ed key add failed");
1966     }
1967
1968   if (ip->protocol == IP_PROTOCOL_TCP)
1969     {
1970       tcp_header_t *tcp = ip4_next_header(ip);
1971       if (nat44_set_tcp_session_state_o2i (sm, s, tcp, thread_index))
1972         return;
1973     }
1974
1975   /* Accounting */
1976   nat44_session_update_counters (s, now, 0);
1977 }
1978
1979 u32
1980 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
1981                       u32 thread_index, vlib_buffer_t * b, ip4_header_t * ip,
1982                       u8 * p_proto, snat_session_key_t * p_value,
1983                       u8 * p_dont_translate, void * d, void * e)
1984 {
1985   u32 next = ~0, sw_if_index, rx_fib_index;
1986   icmp46_header_t *icmp;
1987   nat_ed_ses_key_t key;
1988   clib_bihash_kv_16_8_t kv, value;
1989   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1990   snat_session_t *s = 0;
1991   u8 dont_translate = 0, is_addr_only;
1992   snat_session_key_t e_key, l_key;
1993
1994   icmp = (icmp46_header_t *) ip4_next_header (ip);
1995   sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
1996   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
1997
1998   if (icmp_get_ed_key (ip, &key))
1999     {
2000       b->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2001       next = SNAT_OUT2IN_NEXT_DROP;
2002       goto out;
2003     }
2004   key.fib_index = rx_fib_index;
2005   kv.key[0] = key.as_u64[0];
2006   kv.key[1] = key.as_u64[1];
2007
2008   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2009     {
2010       /* Try to match static mapping */
2011       e_key.addr = ip->dst_address;
2012       e_key.port = key.l_port;
2013       e_key.protocol = ip_proto_to_snat_proto (key.proto);
2014       e_key.fib_index = rx_fib_index;
2015       if (snat_static_mapping_match(sm, e_key, &l_key, 1, &is_addr_only, 0, 0, 0))
2016         {
2017           if (!sm->forwarding_enabled)
2018             {
2019               /* Don't NAT packet aimed at the intfc address */
2020               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index,
2021                                                   ip->dst_address.as_u32)))
2022                 {
2023                   dont_translate = 1;
2024                   goto out;
2025                 }
2026               b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2027               next = NAT44_ED_OUT2IN_NEXT_DROP;
2028               goto out;
2029             }
2030           else
2031             {
2032               dont_translate = 1;
2033               if (next_src_nat(sm, ip, key.proto, key.l_port, key.r_port,
2034                                thread_index, rx_fib_index))
2035                 {
2036                   next = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2037                   goto out;
2038                 }
2039               create_bypass_for_fwd(sm, ip, rx_fib_index, thread_index);
2040               goto out;
2041             }
2042         }
2043
2044       if (PREDICT_FALSE(icmp->type != ICMP4_echo_reply &&
2045                         (icmp->type != ICMP4_echo_request || !is_addr_only)))
2046         {
2047           b->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2048           next = NAT44_ED_OUT2IN_NEXT_DROP;
2049           goto out;
2050         }
2051
2052       /* Create session initiated by host from external network */
2053       s = create_session_for_static_mapping_ed(sm, b, l_key, e_key, node,
2054                                                thread_index, 0, 0,
2055                                                vlib_time_now (sm->vlib_main));
2056
2057       if (!s)
2058         {
2059           next = NAT44_ED_OUT2IN_NEXT_DROP;
2060           goto out;
2061         }
2062     }
2063   else
2064     {
2065       if (PREDICT_FALSE(icmp->type != ICMP4_echo_reply &&
2066                         icmp->type != ICMP4_echo_request &&
2067                         !icmp_is_error_message (icmp)))
2068         {
2069           b->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2070           next = SNAT_OUT2IN_NEXT_DROP;
2071           goto out;
2072         }
2073
2074       s = pool_elt_at_index (tsm->sessions, value.value);
2075     }
2076
2077   *p_proto = ip_proto_to_snat_proto (key.proto);
2078 out:
2079   if (s)
2080     *p_value = s->in2out;
2081   *p_dont_translate = dont_translate;
2082   if (d)
2083     *(snat_session_t**)d = s;
2084   return next;
2085 }
2086
2087 static snat_session_t *
2088 nat44_ed_out2in_unknown_proto (snat_main_t *sm,
2089                                vlib_buffer_t * b,
2090                                ip4_header_t * ip,
2091                                u32 rx_fib_index,
2092                                u32 thread_index,
2093                                f64 now,
2094                                vlib_main_t * vm,
2095                                vlib_node_runtime_t * node)
2096 {
2097   clib_bihash_kv_8_8_t kv, value;
2098   clib_bihash_kv_16_8_t s_kv, s_value;
2099   snat_static_mapping_t *m;
2100   u32 old_addr, new_addr;
2101   ip_csum_t sum;
2102   snat_session_t * s;
2103   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2104   snat_user_t *u;
2105
2106   old_addr = ip->dst_address.as_u32;
2107
2108   make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2109               rx_fib_index, 0, 0);
2110
2111   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2112     {
2113       s = pool_elt_at_index (tsm->sessions, s_value.value);
2114       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
2115     }
2116   else
2117     {
2118       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
2119         {
2120           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
2121           nat_log_notice ("maximum sessions exceeded");
2122           return 0;
2123         }
2124
2125       make_sm_kv (&kv, &ip->dst_address, 0, 0, 0);
2126       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2127         {
2128           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2129           return 0;
2130         }
2131
2132       m = pool_elt_at_index (sm->static_mappings, value.value);
2133
2134       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
2135
2136       u = nat_user_get_or_create (sm, &m->local_addr, m->fib_index,
2137                                   thread_index);
2138       if (!u)
2139         {
2140           nat_log_warn ("create NAT user failed");
2141           return 0;
2142         }
2143
2144       /* Create a new session */
2145       s = nat_ed_session_alloc (sm, u, thread_index);
2146       if (!s)
2147         {
2148           nat44_delete_user_with_no_session (sm, u, thread_index);
2149           nat_log_warn ("create NAT session failed");
2150           return 0;
2151         }
2152
2153       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
2154       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
2155       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2156       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2157       s->outside_address_index = ~0;
2158       s->out2in.addr.as_u32 = old_addr;
2159       s->out2in.fib_index = rx_fib_index;
2160       s->in2out.addr.as_u32 = new_addr;
2161       s->in2out.fib_index = m->fib_index;
2162       s->in2out.port = s->out2in.port = ip->protocol;
2163       user_session_increment (sm, u, 1);
2164
2165       /* Add to lookup tables */
2166       s_kv.value = s - tsm->sessions;
2167       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
2168         nat_log_notice ("out2in key add failed");
2169
2170       make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2171                   m->fib_index, 0, 0);
2172       s_kv.value = s - tsm->sessions;
2173       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
2174         nat_log_notice ("in2out key add failed");
2175    }
2176
2177   /* Update IP checksum */
2178   sum = ip->checksum;
2179   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
2180   ip->checksum = ip_csum_fold (sum);
2181
2182   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
2183
2184   /* Accounting */
2185   nat44_session_update_counters (s, now,
2186                                  vlib_buffer_length_in_chain (vm, b));
2187
2188   return s;
2189 }
2190
2191 static inline uword
2192 nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
2193                                 vlib_node_runtime_t * node,
2194                                 vlib_frame_t * frame, int is_slow_path)
2195 {
2196   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
2197   nat44_ed_out2in_next_t next_index;
2198   snat_main_t *sm = &snat_main;
2199   f64 now = vlib_time_now (vm);
2200   u32 thread_index = vm->thread_index;
2201   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2202
2203   stats_node_index = is_slow_path ? nat44_ed_out2in_slowpath_node.index :
2204     nat44_ed_out2in_node.index;
2205
2206   from = vlib_frame_vector_args (frame);
2207   n_left_from = frame->n_vectors;
2208   next_index = node->cached_next_index;
2209
2210   while (n_left_from > 0)
2211     {
2212       u32 n_left_to_next;
2213
2214       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2215
2216       while (n_left_from >= 4 && n_left_to_next >= 2)
2217         {
2218           u32 bi0, bi1;
2219           vlib_buffer_t *b0, *b1;
2220           u32 next0, sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
2221           u32 next1, sw_if_index1, rx_fib_index1, proto1, old_addr1, new_addr1;
2222           u16 old_port0, new_port0, old_port1, new_port1;
2223           ip4_header_t *ip0, *ip1;
2224           udp_header_t *udp0, *udp1;
2225           tcp_header_t *tcp0, *tcp1;
2226           icmp46_header_t *icmp0, *icmp1;
2227           snat_session_t *s0 = 0, *s1 = 0;
2228           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
2229           ip_csum_t sum0, sum1;
2230           snat_session_key_t e_key0, l_key0, e_key1, l_key1;
2231           lb_nat_type_t lb_nat0, lb_nat1;
2232           twice_nat_type_t twice_nat0, twice_nat1;
2233
2234           /* Prefetch next iteration. */
2235           {
2236             vlib_buffer_t * p2, * p3;
2237
2238             p2 = vlib_get_buffer (vm, from[2]);
2239             p3 = vlib_get_buffer (vm, from[3]);
2240
2241             vlib_prefetch_buffer_header (p2, LOAD);
2242             vlib_prefetch_buffer_header (p3, LOAD);
2243
2244             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2245             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2246           }
2247
2248           /* speculatively enqueue b0 and b1 to the current next frame */
2249           to_next[0] = bi0 = from[0];
2250           to_next[1] = bi1 = from[1];
2251           from += 2;
2252           to_next += 2;
2253           n_left_from -= 2;
2254           n_left_to_next -= 2;
2255
2256           b0 = vlib_get_buffer (vm, bi0);
2257           b1 = vlib_get_buffer (vm, bi1);
2258
2259           next0 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2260           vnet_buffer (b0)->snat.flags = 0;
2261           ip0 = vlib_buffer_get_current (b0);
2262
2263           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2264           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2265                                                                sw_if_index0);
2266
2267           if (PREDICT_FALSE(ip0->ttl == 1))
2268             {
2269               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2270               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2271                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2272                                            0);
2273               next0 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2274               goto trace00;
2275             }
2276
2277           udp0 = ip4_next_header (ip0);
2278           tcp0 = (tcp_header_t *) udp0;
2279           icmp0 = (icmp46_header_t *) udp0;
2280           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2281
2282           if (is_slow_path)
2283             {
2284               if (PREDICT_FALSE (proto0 == ~0))
2285                 {
2286                   s0 = nat44_ed_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
2287                                                      thread_index, now, vm, node);
2288                   if (!sm->forwarding_enabled)
2289                     {
2290                       if (!s0)
2291                         next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2292                       goto trace00;
2293                     }
2294                 }
2295
2296               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2297                 {
2298                   next0 = icmp_out2in_ed_slow_path
2299                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2300                      next0, now, thread_index, &s0);
2301                   goto trace00;
2302                 }
2303             }
2304           else
2305             {
2306               if (PREDICT_FALSE (proto0 == ~0))
2307                 {
2308                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2309                   goto trace00;
2310                 }
2311
2312               if (ip4_is_fragment (ip0))
2313                 {
2314                   next0 = NAT44_ED_OUT2IN_NEXT_REASS;
2315                   goto trace00;
2316                 }
2317
2318               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2319                 {
2320                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2321                   goto trace00;
2322                 }
2323             }
2324
2325           make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, ip0->protocol,
2326                       rx_fib_index0, udp0->dst_port, udp0->src_port);
2327
2328           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
2329             {
2330               if (is_slow_path)
2331                 {
2332                   /* Try to match static mapping by external address and port,
2333                      destination address and port in packet */
2334                   e_key0.addr = ip0->dst_address;
2335                   e_key0.port = udp0->dst_port;
2336                   e_key0.protocol = proto0;
2337                   e_key0.fib_index = rx_fib_index0;
2338                   if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
2339                       &twice_nat0, &lb_nat0, &ip0->src_address))
2340                     {
2341                       /*
2342                        * Send DHCP packets to the ipv4 stack, or we won't
2343                        * be able to use dhcp client on the outside interface
2344                        */
2345                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
2346                           && (udp0->dst_port ==
2347                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2348                         {
2349                           vnet_feature_next (&next0, b0);
2350                           goto trace00;
2351                         }
2352
2353                       if (!sm->forwarding_enabled)
2354                         {
2355                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2356                           next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2357                         }
2358                       else
2359                         {
2360                           if (next_src_nat(sm, ip0, ip0->protocol,
2361                                            udp0->src_port, udp0->dst_port,
2362                                            thread_index, rx_fib_index0))
2363                             {
2364                               next0 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2365                               goto trace00;
2366                             }
2367                           create_bypass_for_fwd(sm, ip0, rx_fib_index0,
2368                                                 thread_index);
2369                         }
2370                       goto trace00;
2371                     }
2372
2373                   /* Create session initiated by host from external network */
2374                   s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
2375                                                             e_key0, node,
2376                                                             thread_index,
2377                                                             twice_nat0,
2378                                                             lb_nat0,
2379                                                             now);
2380
2381                   if (!s0)
2382                     {
2383                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2384                       goto trace00;
2385                     }
2386                 }
2387               else
2388                 {
2389                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2390                   goto trace00;
2391                 }
2392             }
2393           else
2394             {
2395               s0 = pool_elt_at_index (tsm->sessions, value0.value);
2396             }
2397
2398           old_addr0 = ip0->dst_address.as_u32;
2399           new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
2400           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2401
2402           sum0 = ip0->checksum;
2403           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2404                                  dst_address);
2405           if (PREDICT_FALSE (is_twice_nat_session (s0)))
2406             sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2407                                    s0->ext_host_nat_addr.as_u32, ip4_header_t,
2408                                    src_address);
2409           ip0->checksum = ip_csum_fold (sum0);
2410
2411           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
2412             {
2413               old_port0 = tcp0->dst_port;
2414               new_port0 = tcp0->dst_port = s0->in2out.port;
2415
2416               sum0 = tcp0->checksum;
2417               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2418                                      dst_address);
2419               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
2420                                      length);
2421               if (is_twice_nat_session (s0))
2422                 {
2423                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2424                                          s0->ext_host_nat_addr.as_u32,
2425                                          ip4_header_t, dst_address);
2426                   sum0 = ip_csum_update (sum0, tcp0->src_port,
2427                                          s0->ext_host_nat_port, ip4_header_t,
2428                                          length);
2429                   tcp0->src_port = s0->ext_host_nat_port;
2430                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2431                 }
2432               tcp0->checksum = ip_csum_fold(sum0);
2433               if (nat44_set_tcp_session_state_o2i (sm, s0, tcp0, thread_index))
2434                 goto trace00;
2435             }
2436           else
2437             {
2438               udp0->dst_port = s0->in2out.port;
2439               if (is_twice_nat_session (s0))
2440                 {
2441                   udp0->src_port = s0->ext_host_nat_port;
2442                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2443                 }
2444               udp0->checksum = 0;
2445             }
2446
2447           /* Accounting */
2448           nat44_session_update_counters (s0, now,
2449                                          vlib_buffer_length_in_chain (vm, b0));
2450
2451         trace00:
2452           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2453                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2454             {
2455               nat44_ed_out2in_trace_t *t =
2456                 vlib_add_trace (vm, node, b0, sizeof (*t));
2457               t->is_slow_path = is_slow_path;
2458               t->sw_if_index = sw_if_index0;
2459               t->next_index = next0;
2460               t->session_index = ~0;
2461               if (s0)
2462                 t->session_index = s0 - tsm->sessions;
2463             }
2464
2465           pkts_processed += next0 != NAT44_ED_OUT2IN_NEXT_DROP;
2466
2467           next1 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2468           vnet_buffer (b1)->snat.flags = 0;
2469           ip1 = vlib_buffer_get_current (b1);
2470
2471           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2472           rx_fib_index1 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2473                                                                sw_if_index1);
2474
2475           if (PREDICT_FALSE(ip1->ttl == 1))
2476             {
2477               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2478               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2479                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2480                                            0);
2481               next1 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2482               goto trace01;
2483             }
2484
2485           udp1 = ip4_next_header (ip1);
2486           tcp1 = (tcp_header_t *) udp1;
2487           icmp1 = (icmp46_header_t *) udp1;
2488           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2489
2490           if (is_slow_path)
2491             {
2492               if (PREDICT_FALSE (proto1 == ~0))
2493                 {
2494                   s1 = nat44_ed_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
2495                                                      thread_index, now, vm, node);
2496                   if (!sm->forwarding_enabled)
2497                     {
2498                       if (!s1)
2499                         next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2500                       goto trace01;
2501                     }
2502                 }
2503
2504               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
2505                 {
2506                   next1 = icmp_out2in_ed_slow_path
2507                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
2508                      next1, now, thread_index, &s1);
2509                   goto trace01;
2510                 }
2511             }
2512           else
2513             {
2514               if (PREDICT_FALSE (proto1 == ~0))
2515                 {
2516                   next1 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2517                   goto trace01;
2518                 }
2519
2520               if (ip4_is_fragment (ip1))
2521                 {
2522                   next1 = NAT44_ED_OUT2IN_NEXT_REASS;
2523                   goto trace01;
2524                 }
2525
2526               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
2527                 {
2528                   next1 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2529                   goto trace01;
2530                 }
2531             }
2532
2533           make_ed_kv (&kv1, &ip1->dst_address, &ip1->src_address, ip1->protocol,
2534                       rx_fib_index1, udp1->dst_port, udp1->src_port);
2535
2536           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv1, &value1))
2537             {
2538               if (is_slow_path)
2539                 {
2540                   /* Try to match static mapping by external address and port,
2541                      destination address and port in packet */
2542                   e_key1.addr = ip1->dst_address;
2543                   e_key1.port = udp1->dst_port;
2544                   e_key1.protocol = proto1;
2545                   e_key1.fib_index = rx_fib_index1;
2546                   if (snat_static_mapping_match(sm, e_key1, &l_key1, 1, 0,
2547                       &twice_nat1, &lb_nat1, &ip1->src_address))
2548                     {
2549                       /*
2550                        * Send DHCP packets to the ipv4 stack, or we won't
2551                        * be able to use dhcp client on the outside interface
2552                        */
2553                       if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
2554                           && (udp1->dst_port ==
2555                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2556                         {
2557                           vnet_feature_next (&next1, b1);
2558                           goto trace01;
2559                         }
2560
2561                       if (!sm->forwarding_enabled)
2562                         {
2563                           b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2564                           next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2565                         }
2566                       else
2567                         {
2568                           if (next_src_nat(sm, ip1, ip1->protocol,
2569                                            udp1->src_port, udp1->dst_port,
2570                                            thread_index, rx_fib_index1))
2571                             {
2572                               next1 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2573                               goto trace01;
2574                             }
2575                           create_bypass_for_fwd(sm, ip1, rx_fib_index1,
2576                                                 thread_index);
2577                         }
2578                       goto trace01;
2579                     }
2580
2581                   /* Create session initiated by host from external network */
2582                   s1 = create_session_for_static_mapping_ed(sm, b1, l_key1,
2583                                                             e_key1, node,
2584                                                             thread_index,
2585                                                             twice_nat1,
2586                                                             lb_nat1,
2587                                                             now);
2588
2589                   if (!s1)
2590                     {
2591                       next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2592                       goto trace01;
2593                     }
2594                 }
2595               else
2596                 {
2597                   next1 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2598                   goto trace01;
2599                 }
2600             }
2601           else
2602             {
2603               s1 = pool_elt_at_index (tsm->sessions, value1.value);
2604             }
2605
2606           old_addr1 = ip1->dst_address.as_u32;
2607           new_addr1 = ip1->dst_address.as_u32 = s1->in2out.addr.as_u32;
2608           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
2609
2610           sum1 = ip1->checksum;
2611           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
2612                                  dst_address);
2613           if (PREDICT_FALSE (is_twice_nat_session (s1)))
2614             sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
2615                                    s1->ext_host_nat_addr.as_u32, ip4_header_t,
2616                                    src_address);
2617           ip1->checksum = ip_csum_fold (sum1);
2618
2619           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
2620             {
2621               old_port1 = tcp1->dst_port;
2622               new_port1 = tcp1->dst_port = s1->in2out.port;
2623
2624               sum1 = tcp1->checksum;
2625               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
2626                                      dst_address);
2627               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
2628                                      length);
2629               if (is_twice_nat_session (s1))
2630                 {
2631                   sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
2632                                          s1->ext_host_nat_addr.as_u32,
2633                                          ip4_header_t, dst_address);
2634                   sum1 = ip_csum_update (sum1, tcp1->src_port,
2635                                          s1->ext_host_nat_port, ip4_header_t,
2636                                          length);
2637                   tcp1->src_port = s1->ext_host_nat_port;
2638                   ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
2639                 }
2640               tcp1->checksum = ip_csum_fold(sum1);
2641               if (nat44_set_tcp_session_state_o2i (sm, s1, tcp1, thread_index))
2642                 goto trace01;
2643             }
2644           else
2645             {
2646               udp1->dst_port = s1->in2out.port;
2647               if (is_twice_nat_session (s1))
2648                 {
2649                   udp1->src_port = s1->ext_host_nat_port;
2650                   ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
2651                 }
2652               udp1->checksum = 0;
2653             }
2654
2655           /* Accounting */
2656           nat44_session_update_counters (s1, now,
2657                                          vlib_buffer_length_in_chain (vm, b1));
2658
2659         trace01:
2660           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2661                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2662             {
2663               nat44_ed_out2in_trace_t *t =
2664                 vlib_add_trace (vm, node, b1, sizeof (*t));
2665               t->is_slow_path = is_slow_path;
2666               t->sw_if_index = sw_if_index1;
2667               t->next_index = next1;
2668               t->session_index = ~0;
2669               if (s1)
2670                 t->session_index = s1 - tsm->sessions;
2671             }
2672
2673           pkts_processed += next1 != NAT44_ED_OUT2IN_NEXT_DROP;
2674
2675           /* verify speculative enqueues, maybe switch current next frame */
2676           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2677                                            to_next, n_left_to_next,
2678                                            bi0, bi1, next0, next1);
2679         }
2680
2681       while (n_left_from > 0 && n_left_to_next > 0)
2682         {
2683           u32 bi0;
2684           vlib_buffer_t *b0;
2685           u32 next0, sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
2686           u16 old_port0, new_port0;
2687           ip4_header_t *ip0;
2688           udp_header_t *udp0;
2689           tcp_header_t *tcp0;
2690           icmp46_header_t * icmp0;
2691           snat_session_t *s0 = 0;
2692           clib_bihash_kv_16_8_t kv0, value0;
2693           ip_csum_t sum0;
2694           snat_session_key_t e_key0, l_key0;
2695           lb_nat_type_t lb_nat0;
2696           twice_nat_type_t twice_nat0;
2697
2698           /* speculatively enqueue b0 to the current next frame */
2699           bi0 = from[0];
2700           to_next[0] = bi0;
2701           from += 1;
2702           to_next += 1;
2703           n_left_from -= 1;
2704           n_left_to_next -= 1;
2705
2706           b0 = vlib_get_buffer (vm, bi0);
2707           next0 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2708           vnet_buffer (b0)->snat.flags = 0;
2709           ip0 = vlib_buffer_get_current (b0);
2710
2711           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2712           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2713                                                                sw_if_index0);
2714
2715           if (PREDICT_FALSE(ip0->ttl == 1))
2716             {
2717               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2718               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2719                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2720                                            0);
2721               next0 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2722               goto trace0;
2723             }
2724
2725           udp0 = ip4_next_header (ip0);
2726           tcp0 = (tcp_header_t *) udp0;
2727           icmp0 = (icmp46_header_t *) udp0;
2728           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2729
2730           if (is_slow_path)
2731             {
2732               if (PREDICT_FALSE (proto0 == ~0))
2733                 {
2734                   s0 = nat44_ed_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
2735                                                      thread_index, now, vm, node);
2736                   if (!sm->forwarding_enabled)
2737                     {
2738                       if (!s0)
2739                         next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2740                       goto trace0;
2741                     }
2742                 }
2743
2744               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2745                 {
2746                   next0 = icmp_out2in_ed_slow_path
2747                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2748                      next0, now, thread_index, &s0);
2749                   goto trace0;
2750                 }
2751             }
2752           else
2753             {
2754               if (PREDICT_FALSE (proto0 == ~0))
2755                 {
2756                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2757                   goto trace0;
2758                 }
2759
2760               if (ip4_is_fragment (ip0))
2761                 {
2762                   next0 = NAT44_ED_OUT2IN_NEXT_REASS;
2763                   goto trace0;
2764                 }
2765
2766               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2767                 {
2768                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2769                   goto trace0;
2770                 }
2771             }
2772
2773           make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, ip0->protocol,
2774                       rx_fib_index0, udp0->dst_port, udp0->src_port);
2775
2776           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
2777             {
2778               if (is_slow_path)
2779                 {
2780                   /* Try to match static mapping by external address and port,
2781                      destination address and port in packet */
2782                   e_key0.addr = ip0->dst_address;
2783                   e_key0.port = udp0->dst_port;
2784                   e_key0.protocol = proto0;
2785                   e_key0.fib_index = rx_fib_index0;
2786                   if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
2787                       &twice_nat0, &lb_nat0, &ip0->src_address))
2788                     {
2789                       /*
2790                        * Send DHCP packets to the ipv4 stack, or we won't
2791                        * be able to use dhcp client on the outside interface
2792                        */
2793                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
2794                           && (udp0->dst_port ==
2795                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2796                         {
2797                           vnet_feature_next (&next0, b0);
2798                           goto trace0;
2799                         }
2800
2801                       if (!sm->forwarding_enabled)
2802                         {
2803                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2804                           next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2805                         }
2806                       else
2807                         {
2808                           if (next_src_nat(sm, ip0, ip0->protocol,
2809                                            udp0->src_port, udp0->dst_port,
2810                                            thread_index, rx_fib_index0))
2811                             {
2812                               next0 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2813                               goto trace0;
2814                             }
2815                           create_bypass_for_fwd(sm, ip0, rx_fib_index0,
2816                                                 thread_index);
2817                         }
2818                       goto trace0;
2819                     }
2820
2821                   /* Create session initiated by host from external network */
2822                   s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
2823                                                             e_key0, node,
2824                                                             thread_index,
2825                                                             twice_nat0,
2826                                                             lb_nat0,
2827                                                             now);
2828
2829                   if (!s0)
2830                     {
2831                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2832                       goto trace0;
2833                     }
2834                 }
2835               else
2836                 {
2837                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2838                   goto trace0;
2839                 }
2840             }
2841           else
2842             {
2843               s0 = pool_elt_at_index (tsm->sessions, value0.value);
2844             }
2845
2846           old_addr0 = ip0->dst_address.as_u32;
2847           new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
2848           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2849
2850           sum0 = ip0->checksum;
2851           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2852                                  dst_address);
2853           if (PREDICT_FALSE (is_twice_nat_session (s0)))
2854             sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2855                                    s0->ext_host_nat_addr.as_u32, ip4_header_t,
2856                                    src_address);
2857           ip0->checksum = ip_csum_fold (sum0);
2858
2859           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
2860             {
2861               old_port0 = tcp0->dst_port;
2862               new_port0 = tcp0->dst_port = s0->in2out.port;
2863
2864               sum0 = tcp0->checksum;
2865               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2866                                      dst_address);
2867               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
2868                                      length);
2869               if (is_twice_nat_session (s0))
2870                 {
2871                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2872                                          s0->ext_host_nat_addr.as_u32,
2873                                          ip4_header_t, dst_address);
2874                   sum0 = ip_csum_update (sum0, tcp0->src_port,
2875                                          s0->ext_host_nat_port, ip4_header_t,
2876                                          length);
2877                   tcp0->src_port = s0->ext_host_nat_port;
2878                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2879                 }
2880               tcp0->checksum = ip_csum_fold(sum0);
2881               if (nat44_set_tcp_session_state_o2i (sm, s0, tcp0, thread_index))
2882                 goto trace0;
2883             }
2884           else
2885             {
2886               udp0->dst_port = s0->in2out.port;
2887               if (is_twice_nat_session (s0))
2888                 {
2889                   udp0->src_port = s0->ext_host_nat_port;
2890                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2891                 }
2892               udp0->checksum = 0;
2893             }
2894
2895           /* Accounting */
2896           nat44_session_update_counters (s0, now,
2897                                          vlib_buffer_length_in_chain (vm, b0));
2898
2899         trace0:
2900           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2901                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2902             {
2903               nat44_ed_out2in_trace_t *t =
2904                 vlib_add_trace (vm, node, b0, sizeof (*t));
2905               t->is_slow_path = is_slow_path;
2906               t->sw_if_index = sw_if_index0;
2907               t->next_index = next0;
2908               t->session_index = ~0;
2909               if (s0)
2910                 t->session_index = s0 - tsm->sessions;
2911             }
2912
2913           pkts_processed += next0 != NAT44_ED_OUT2IN_NEXT_DROP;
2914           /* verify speculative enqueue, maybe switch current next frame */
2915           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2916                                            to_next, n_left_to_next,
2917                                            bi0, next0);
2918         }
2919
2920       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2921     }
2922
2923   vlib_node_increment_counter (vm, stats_node_index,
2924                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2925                                pkts_processed);
2926   return frame->n_vectors;
2927 }
2928
2929 static uword
2930 nat44_ed_out2in_fast_path_fn (vlib_main_t * vm,
2931                               vlib_node_runtime_t * node,
2932                               vlib_frame_t * frame)
2933 {
2934   return nat44_ed_out2in_node_fn_inline (vm, node, frame, 0);
2935 }
2936
2937 VLIB_REGISTER_NODE (nat44_ed_out2in_node) = {
2938   .function = nat44_ed_out2in_fast_path_fn,
2939   .name = "nat44-ed-out2in",
2940   .vector_size = sizeof (u32),
2941   .format_trace = format_nat44_ed_out2in_trace,
2942   .type = VLIB_NODE_TYPE_INTERNAL,
2943
2944   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2945   .error_strings = snat_out2in_error_strings,
2946
2947   .runtime_data_bytes = sizeof (snat_runtime_t),
2948
2949   .n_next_nodes = NAT44_ED_OUT2IN_N_NEXT,
2950
2951   /* edit / add dispositions here */
2952   .next_nodes = {
2953     [NAT44_ED_OUT2IN_NEXT_DROP] = "error-drop",
2954     [NAT44_ED_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2955     [NAT44_ED_OUT2IN_NEXT_SLOW_PATH] = "nat44-ed-out2in-slowpath",
2956     [NAT44_ED_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2957     [NAT44_ED_OUT2IN_NEXT_IN2OUT] = "nat44-ed-in2out",
2958     [NAT44_ED_OUT2IN_NEXT_REASS] = "nat44-ed-out2in-reass",
2959   },
2960 };
2961
2962 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_out2in_node, nat44_ed_out2in_fast_path_fn);
2963
2964 static uword
2965 nat44_ed_out2in_slow_path_fn (vlib_main_t * vm,
2966                               vlib_node_runtime_t * node,
2967                               vlib_frame_t * frame)
2968 {
2969   return nat44_ed_out2in_node_fn_inline (vm, node, frame, 1);
2970 }
2971
2972 VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
2973   .function = nat44_ed_out2in_slow_path_fn,
2974   .name = "nat44-ed-out2in-slowpath",
2975   .vector_size = sizeof (u32),
2976   .format_trace = format_nat44_ed_out2in_trace,
2977   .type = VLIB_NODE_TYPE_INTERNAL,
2978
2979   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2980   .error_strings = snat_out2in_error_strings,
2981
2982   .runtime_data_bytes = sizeof (snat_runtime_t),
2983
2984   .n_next_nodes = NAT44_ED_OUT2IN_N_NEXT,
2985
2986   /* edit / add dispositions here */
2987   .next_nodes = {
2988     [NAT44_ED_OUT2IN_NEXT_DROP] = "error-drop",
2989     [NAT44_ED_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2990     [NAT44_ED_OUT2IN_NEXT_SLOW_PATH] = "nat44-ed-out2in-slowpath",
2991     [NAT44_ED_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2992     [NAT44_ED_OUT2IN_NEXT_IN2OUT] = "nat44-ed-in2out",
2993     [NAT44_ED_OUT2IN_NEXT_REASS] = "nat44-ed-out2in-reass",
2994   },
2995 };
2996
2997 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_out2in_slowpath_node,
2998                               nat44_ed_out2in_slow_path_fn);
2999
3000 static uword
3001 nat44_ed_out2in_reass_node_fn (vlib_main_t * vm,
3002                                vlib_node_runtime_t * node,
3003                                vlib_frame_t * frame)
3004 {
3005   u32 n_left_from, *from, *to_next;
3006   snat_out2in_next_t next_index;
3007   u32 pkts_processed = 0;
3008   snat_main_t *sm = &snat_main;
3009   f64 now = vlib_time_now (vm);
3010   u32 thread_index = vm->thread_index;
3011   snat_main_per_thread_data_t *per_thread_data =
3012     &sm->per_thread_data[thread_index];
3013   u32 *fragments_to_drop = 0;
3014   u32 *fragments_to_loopback = 0;
3015
3016   from = vlib_frame_vector_args (frame);
3017   n_left_from = frame->n_vectors;
3018   next_index = node->cached_next_index;
3019
3020   while (n_left_from > 0)
3021     {
3022       u32 n_left_to_next;
3023
3024       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
3025
3026       while (n_left_from > 0 && n_left_to_next > 0)
3027        {
3028           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
3029           vlib_buffer_t *b0;
3030           u32 next0;
3031           u8 cached0 = 0;
3032           ip4_header_t *ip0;
3033           nat_reass_ip4_t *reass0;
3034           udp_header_t * udp0;
3035           tcp_header_t * tcp0;
3036           icmp46_header_t * icmp0;
3037           clib_bihash_kv_16_8_t kv0, value0;
3038           snat_session_t * s0 = 0;
3039           u16 old_port0, new_port0;
3040           ip_csum_t sum0;
3041           snat_session_key_t e_key0, l_key0;
3042           lb_nat_type_t lb0;
3043           twice_nat_type_t twice_nat0;
3044
3045           /* speculatively enqueue b0 to the current next frame */
3046           bi0 = from[0];
3047           to_next[0] = bi0;
3048           from += 1;
3049           to_next += 1;
3050           n_left_from -= 1;
3051           n_left_to_next -= 1;
3052
3053           b0 = vlib_get_buffer (vm, bi0);
3054           next0 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
3055
3056           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3057           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3058                                                                sw_if_index0);
3059
3060           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
3061             {
3062               next0 = NAT44_ED_OUT2IN_NEXT_DROP;
3063               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
3064               goto trace0;
3065             }
3066
3067           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
3068           udp0 = ip4_next_header (ip0);
3069           tcp0 = (tcp_header_t *) udp0;
3070           icmp0 = (icmp46_header_t *) udp0;
3071           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3072
3073           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
3074                                                  ip0->dst_address,
3075                                                  ip0->fragment_id,
3076                                                  ip0->protocol,
3077                                                  1,
3078                                                  &fragments_to_drop);
3079
3080           if (PREDICT_FALSE (!reass0))
3081             {
3082               next0 = NAT44_ED_OUT2IN_NEXT_DROP;
3083               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
3084               nat_log_notice ("maximum reassemblies exceeded");
3085               goto trace0;
3086             }
3087
3088           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
3089             {
3090               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3091                 {
3092                   next0 = icmp_out2in_slow_path
3093                       (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
3094                        next0, now, thread_index, &s0);
3095
3096                   if (PREDICT_TRUE(next0 != NAT44_ED_OUT2IN_NEXT_DROP))
3097                     {
3098                       if (s0)
3099                         reass0->sess_index = s0 - per_thread_data->sessions;
3100                       else
3101                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
3102                       reass0->thread_index = thread_index;
3103                       nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
3104                     }
3105
3106                   goto trace0;
3107                 }
3108
3109               make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, ip0->protocol,
3110                           rx_fib_index0, udp0->dst_port, udp0->src_port);
3111
3112               if (clib_bihash_search_16_8 (&per_thread_data->out2in_ed, &kv0, &value0))
3113                 {
3114                   /* Try to match static mapping by external address and port,
3115                      destination address and port in packet */
3116                   e_key0.addr = ip0->dst_address;
3117                   e_key0.port = udp0->dst_port;
3118                   e_key0.protocol = proto0;
3119                   e_key0.fib_index = rx_fib_index0;
3120                   if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
3121                       &twice_nat0, &lb0, 0))
3122                     {
3123                       /*
3124                        * Send DHCP packets to the ipv4 stack, or we won't
3125                        * be able to use dhcp client on the outside interface
3126                        */
3127                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
3128                           && (udp0->dst_port
3129                               == clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
3130                         {
3131                           vnet_feature_next(&next0, b0);
3132                           goto trace0;
3133                         }
3134
3135                       if (!sm->forwarding_enabled)
3136                         {
3137                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3138                           next0 = NAT44_ED_OUT2IN_NEXT_DROP;
3139                         }
3140                       else
3141                         {
3142                           if (next_src_nat(sm, ip0, ip0->protocol,
3143                                            udp0->src_port, udp0->dst_port,
3144                                            thread_index, rx_fib_index0))
3145                             {
3146                               next0 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
3147                               goto trace0;
3148                             }
3149                           create_bypass_for_fwd(sm, ip0, rx_fib_index0,
3150                                                 thread_index);
3151                           reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
3152                           nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
3153                         }
3154                       goto trace0;
3155                     }
3156
3157                   /* Create session initiated by host from external network */
3158                   s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
3159                                                             e_key0, node,
3160                                                             thread_index,
3161                                                             twice_nat0, lb0,
3162                                                             now);
3163                   if (!s0)
3164                     {
3165                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3166                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
3167                       goto trace0;
3168                     }
3169                   reass0->sess_index = s0 - per_thread_data->sessions;
3170                   reass0->thread_index = thread_index;
3171                 }
3172               else
3173                 {
3174                   s0 = pool_elt_at_index (per_thread_data->sessions,
3175                                           value0.value);
3176                   reass0->sess_index = value0.value;
3177                 }
3178               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
3179             }
3180           else
3181             {
3182               if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE)
3183                 goto trace0;
3184               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
3185                 {
3186                   if (nat_ip4_reass_add_fragment (reass0, bi0, &fragments_to_drop))
3187                     {
3188                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
3189                       nat_log_notice ("maximum fragments per reassembly exceeded");
3190                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
3191                       goto trace0;
3192                     }
3193                   cached0 = 1;
3194                   goto trace0;
3195                 }
3196               s0 = pool_elt_at_index (per_thread_data->sessions,
3197                                       reass0->sess_index);
3198             }
3199
3200           old_addr0 = ip0->dst_address.as_u32;
3201           ip0->dst_address = s0->in2out.addr;
3202           new_addr0 = ip0->dst_address.as_u32;
3203           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
3204
3205           sum0 = ip0->checksum;
3206           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3207                                  ip4_header_t,
3208                                  dst_address /* changed member */);
3209           if (PREDICT_FALSE (is_twice_nat_session (s0)))
3210             sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
3211                                    s0->ext_host_nat_addr.as_u32, ip4_header_t,
3212                                    src_address);
3213           ip0->checksum = ip_csum_fold (sum0);
3214
3215           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
3216             {
3217               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3218                 {
3219                   old_port0 = tcp0->dst_port;
3220                   tcp0->dst_port = s0->in2out.port;
3221                   new_port0 = tcp0->dst_port;
3222
3223                   sum0 = tcp0->checksum;
3224                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3225                                          ip4_header_t,
3226                                          dst_address /* changed member */);
3227
3228                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3229                                          ip4_header_t /* cheat */,
3230                                          length /* changed member */);
3231                   if (is_twice_nat_session (s0))
3232                     {
3233                       sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
3234                                              s0->ext_host_nat_addr.as_u32,
3235                                              ip4_header_t, dst_address);
3236                       sum0 = ip_csum_update (sum0, tcp0->src_port,
3237                                              s0->ext_host_nat_port, ip4_header_t,
3238                                              length);
3239                       tcp0->src_port = s0->ext_host_nat_port;
3240                       ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
3241                     }
3242                   tcp0->checksum = ip_csum_fold(sum0);
3243                 }
3244               else
3245                 {
3246                   old_port0 = udp0->dst_port;
3247                   udp0->dst_port = s0->in2out.port;
3248                   if (is_twice_nat_session (s0))
3249                     {
3250                       udp0->src_port = s0->ext_host_nat_port;
3251                       ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
3252                     }
3253                   udp0->checksum = 0;
3254                 }
3255             }
3256
3257           /* Accounting */
3258           nat44_session_update_counters (s0, now,
3259                                          vlib_buffer_length_in_chain (vm, b0));
3260           /* Per-user LRU list maintenance */
3261           nat44_session_update_lru (sm, s0, thread_index);
3262
3263         trace0:
3264           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3265                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3266             {
3267               nat44_out2in_reass_trace_t *t =
3268                  vlib_add_trace (vm, node, b0, sizeof (*t));
3269               t->cached = cached0;
3270               t->sw_if_index = sw_if_index0;
3271               t->next_index = next0;
3272             }
3273
3274           if (cached0)
3275             {
3276               n_left_to_next++;
3277               to_next--;
3278             }
3279           else
3280             {
3281               pkts_processed += next0 != NAT44_ED_OUT2IN_NEXT_DROP;
3282
3283               /* verify speculative enqueue, maybe switch current next frame */
3284               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3285                                                to_next, n_left_to_next,
3286                                                bi0, next0);
3287             }
3288
3289           if (n_left_from == 0 && vec_len (fragments_to_loopback))
3290             {
3291               from = vlib_frame_vector_args (frame);
3292               u32 len = vec_len (fragments_to_loopback);
3293               if (len <= VLIB_FRAME_SIZE)
3294                 {
3295                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
3296                   n_left_from = len;
3297                   vec_reset_length (fragments_to_loopback);
3298                 }
3299               else
3300                 {
3301                   clib_memcpy (from,
3302                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
3303                                sizeof (u32) * VLIB_FRAME_SIZE);
3304                   n_left_from = VLIB_FRAME_SIZE;
3305                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
3306                 }
3307             }
3308        }
3309
3310       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3311     }
3312
3313   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
3314                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
3315                                pkts_processed);
3316
3317   nat_send_all_to_node (vm, fragments_to_drop, node,
3318                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
3319                         SNAT_OUT2IN_NEXT_DROP);
3320
3321   vec_free (fragments_to_drop);
3322   vec_free (fragments_to_loopback);
3323   return frame->n_vectors;
3324 }
3325
3326 VLIB_REGISTER_NODE (nat44_ed_out2in_reass_node) = {
3327   .function = nat44_ed_out2in_reass_node_fn,
3328   .name = "nat44-ed-out2in-reass",
3329   .vector_size = sizeof (u32),
3330   .format_trace = format_nat44_out2in_reass_trace,
3331   .type = VLIB_NODE_TYPE_INTERNAL,
3332
3333   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3334   .error_strings = snat_out2in_error_strings,
3335
3336   .n_next_nodes = NAT44_ED_OUT2IN_N_NEXT,
3337
3338   /* edit / add dispositions here */
3339   .next_nodes = {
3340     [NAT44_ED_OUT2IN_NEXT_DROP] = "error-drop",
3341     [NAT44_ED_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
3342     [NAT44_ED_OUT2IN_NEXT_SLOW_PATH] = "nat44-ed-out2in-slowpath",
3343     [NAT44_ED_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3344     [NAT44_ED_OUT2IN_NEXT_IN2OUT] = "nat44-ed-in2out",
3345     [NAT44_ED_OUT2IN_NEXT_REASS] = "nat44-ed-out2in-reass",
3346   },
3347 };
3348
3349 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_out2in_reass_node,
3350                               nat44_ed_out2in_reass_node_fn);
3351
3352 /**************************/
3353 /*** deterministic mode ***/
3354 /**************************/
3355 static uword
3356 snat_det_out2in_node_fn (vlib_main_t * vm,
3357                          vlib_node_runtime_t * node,
3358                          vlib_frame_t * frame)
3359 {
3360   u32 n_left_from, * from, * to_next;
3361   snat_out2in_next_t next_index;
3362   u32 pkts_processed = 0;
3363   snat_main_t * sm = &snat_main;
3364   u32 thread_index = vm->thread_index;
3365
3366   from = vlib_frame_vector_args (frame);
3367   n_left_from = frame->n_vectors;
3368   next_index = node->cached_next_index;
3369
3370   while (n_left_from > 0)
3371     {
3372       u32 n_left_to_next;
3373
3374       vlib_get_next_frame (vm, node, next_index,
3375                            to_next, n_left_to_next);
3376
3377       while (n_left_from >= 4 && n_left_to_next >= 2)
3378         {
3379           u32 bi0, bi1;
3380           vlib_buffer_t * b0, * b1;
3381           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
3382           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
3383           u32 sw_if_index0, sw_if_index1;
3384           ip4_header_t * ip0, * ip1;
3385           ip_csum_t sum0, sum1;
3386           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
3387           u16 new_port0, old_port0, old_port1, new_port1;
3388           udp_header_t * udp0, * udp1;
3389           tcp_header_t * tcp0, * tcp1;
3390           u32 proto0, proto1;
3391           snat_det_out_key_t key0, key1;
3392           snat_det_map_t * dm0, * dm1;
3393           snat_det_session_t * ses0 = 0, * ses1 = 0;
3394           u32 rx_fib_index0, rx_fib_index1;
3395           icmp46_header_t * icmp0, * icmp1;
3396
3397           /* Prefetch next iteration. */
3398           {
3399             vlib_buffer_t * p2, * p3;
3400
3401             p2 = vlib_get_buffer (vm, from[2]);
3402             p3 = vlib_get_buffer (vm, from[3]);
3403
3404             vlib_prefetch_buffer_header (p2, LOAD);
3405             vlib_prefetch_buffer_header (p3, LOAD);
3406
3407             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
3408             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
3409           }
3410
3411           /* speculatively enqueue b0 and b1 to the current next frame */
3412           to_next[0] = bi0 = from[0];
3413           to_next[1] = bi1 = from[1];
3414           from += 2;
3415           to_next += 2;
3416           n_left_from -= 2;
3417           n_left_to_next -= 2;
3418
3419           b0 = vlib_get_buffer (vm, bi0);
3420           b1 = vlib_get_buffer (vm, bi1);
3421
3422           ip0 = vlib_buffer_get_current (b0);
3423           udp0 = ip4_next_header (ip0);
3424           tcp0 = (tcp_header_t *) udp0;
3425
3426           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3427
3428           if (PREDICT_FALSE(ip0->ttl == 1))
3429             {
3430               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3431               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3432                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3433                                            0);
3434               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3435               goto trace0;
3436             }
3437
3438           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3439
3440           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3441             {
3442               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3443               icmp0 = (icmp46_header_t *) udp0;
3444
3445               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3446                                   rx_fib_index0, node, next0, thread_index,
3447                                   &ses0, &dm0);
3448               goto trace0;
3449             }
3450
3451           key0.ext_host_addr = ip0->src_address;
3452           key0.ext_host_port = tcp0->src;
3453           key0.out_port = tcp0->dst;
3454
3455           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
3456           if (PREDICT_FALSE(!dm0))
3457             {
3458               nat_log_info ("unknown dst address:  %U",
3459                             format_ip4_address, &ip0->dst_address);
3460               next0 = SNAT_OUT2IN_NEXT_DROP;
3461               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3462               goto trace0;
3463             }
3464
3465           snat_det_reverse(dm0, &ip0->dst_address,
3466                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
3467
3468           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3469           if (PREDICT_FALSE(!ses0))
3470             {
3471               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3472                             format_ip4_address, &ip0->src_address,
3473                             clib_net_to_host_u16 (tcp0->src),
3474                             format_ip4_address, &ip0->dst_address,
3475                             clib_net_to_host_u16 (tcp0->dst),
3476                             format_ip4_address, &new_addr0);
3477               next0 = SNAT_OUT2IN_NEXT_DROP;
3478               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3479               goto trace0;
3480             }
3481           new_port0 = ses0->in_port;
3482
3483           old_addr0 = ip0->dst_address;
3484           ip0->dst_address = new_addr0;
3485           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3486
3487           sum0 = ip0->checksum;
3488           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3489                                  ip4_header_t,
3490                                  dst_address /* changed member */);
3491           ip0->checksum = ip_csum_fold (sum0);
3492
3493           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3494             {
3495               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3496                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3497               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
3498                 snat_det_ses_close(dm0, ses0);
3499
3500               old_port0 = tcp0->dst;
3501               tcp0->dst = new_port0;
3502
3503               sum0 = tcp0->checksum;
3504               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3505                                      ip4_header_t,
3506                                      dst_address /* changed member */);
3507
3508               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3509                                      ip4_header_t /* cheat */,
3510                                      length /* changed member */);
3511               tcp0->checksum = ip_csum_fold(sum0);
3512             }
3513           else
3514             {
3515               old_port0 = udp0->dst_port;
3516               udp0->dst_port = new_port0;
3517               udp0->checksum = 0;
3518             }
3519
3520         trace0:
3521
3522           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3523                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3524             {
3525               snat_out2in_trace_t *t =
3526                  vlib_add_trace (vm, node, b0, sizeof (*t));
3527               t->sw_if_index = sw_if_index0;
3528               t->next_index = next0;
3529               t->session_index = ~0;
3530               if (ses0)
3531                 t->session_index = ses0 - dm0->sessions;
3532             }
3533
3534           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3535
3536           b1 = vlib_get_buffer (vm, bi1);
3537
3538           ip1 = vlib_buffer_get_current (b1);
3539           udp1 = ip4_next_header (ip1);
3540           tcp1 = (tcp_header_t *) udp1;
3541
3542           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3543
3544           if (PREDICT_FALSE(ip1->ttl == 1))
3545             {
3546               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3547               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3548                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3549                                            0);
3550               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3551               goto trace1;
3552             }
3553
3554           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3555
3556           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
3557             {
3558               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
3559               icmp1 = (icmp46_header_t *) udp1;
3560
3561               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
3562                                   rx_fib_index1, node, next1, thread_index,
3563                                   &ses1, &dm1);
3564               goto trace1;
3565             }
3566
3567           key1.ext_host_addr = ip1->src_address;
3568           key1.ext_host_port = tcp1->src;
3569           key1.out_port = tcp1->dst;
3570
3571           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
3572           if (PREDICT_FALSE(!dm1))
3573             {
3574               nat_log_info ("unknown dst address:  %U",
3575                             format_ip4_address, &ip1->dst_address);
3576               next1 = SNAT_OUT2IN_NEXT_DROP;
3577               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3578               goto trace1;
3579             }
3580
3581           snat_det_reverse(dm1, &ip1->dst_address,
3582                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
3583
3584           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
3585           if (PREDICT_FALSE(!ses1))
3586             {
3587               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3588                             format_ip4_address, &ip1->src_address,
3589                             clib_net_to_host_u16 (tcp1->src),
3590                             format_ip4_address, &ip1->dst_address,
3591                             clib_net_to_host_u16 (tcp1->dst),
3592                             format_ip4_address, &new_addr1);
3593               next1 = SNAT_OUT2IN_NEXT_DROP;
3594               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3595               goto trace1;
3596             }
3597           new_port1 = ses1->in_port;
3598
3599           old_addr1 = ip1->dst_address;
3600           ip1->dst_address = new_addr1;
3601           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3602
3603           sum1 = ip1->checksum;
3604           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3605                                  ip4_header_t,
3606                                  dst_address /* changed member */);
3607           ip1->checksum = ip_csum_fold (sum1);
3608
3609           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
3610             {
3611               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
3612                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3613               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
3614                 snat_det_ses_close(dm1, ses1);
3615
3616               old_port1 = tcp1->dst;
3617               tcp1->dst = new_port1;
3618
3619               sum1 = tcp1->checksum;
3620               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3621                                      ip4_header_t,
3622                                      dst_address /* changed member */);
3623
3624               sum1 = ip_csum_update (sum1, old_port1, new_port1,
3625                                      ip4_header_t /* cheat */,
3626                                      length /* changed member */);
3627               tcp1->checksum = ip_csum_fold(sum1);
3628             }
3629           else
3630             {
3631               old_port1 = udp1->dst_port;
3632               udp1->dst_port = new_port1;
3633               udp1->checksum = 0;
3634             }
3635
3636         trace1:
3637
3638           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3639                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3640             {
3641               snat_out2in_trace_t *t =
3642                  vlib_add_trace (vm, node, b1, sizeof (*t));
3643               t->sw_if_index = sw_if_index1;
3644               t->next_index = next1;
3645               t->session_index = ~0;
3646               if (ses1)
3647                 t->session_index = ses1 - dm1->sessions;
3648             }
3649
3650           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
3651
3652           /* verify speculative enqueues, maybe switch current next frame */
3653           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3654                                            to_next, n_left_to_next,
3655                                            bi0, bi1, next0, next1);
3656          }
3657
3658       while (n_left_from > 0 && n_left_to_next > 0)
3659         {
3660           u32 bi0;
3661           vlib_buffer_t * b0;
3662           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
3663           u32 sw_if_index0;
3664           ip4_header_t * ip0;
3665           ip_csum_t sum0;
3666           ip4_address_t new_addr0, old_addr0;
3667           u16 new_port0, old_port0;
3668           udp_header_t * udp0;
3669           tcp_header_t * tcp0;
3670           u32 proto0;
3671           snat_det_out_key_t key0;
3672           snat_det_map_t * dm0;
3673           snat_det_session_t * ses0 = 0;
3674           u32 rx_fib_index0;
3675           icmp46_header_t * icmp0;
3676
3677           /* speculatively enqueue b0 to the current next frame */
3678           bi0 = from[0];
3679           to_next[0] = bi0;
3680           from += 1;
3681           to_next += 1;
3682           n_left_from -= 1;
3683           n_left_to_next -= 1;
3684
3685           b0 = vlib_get_buffer (vm, bi0);
3686
3687           ip0 = vlib_buffer_get_current (b0);
3688           udp0 = ip4_next_header (ip0);
3689           tcp0 = (tcp_header_t *) udp0;
3690
3691           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3692
3693           if (PREDICT_FALSE(ip0->ttl == 1))
3694             {
3695               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3696               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3697                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3698                                            0);
3699               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3700               goto trace00;
3701             }
3702
3703           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3704
3705           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3706             {
3707               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3708               icmp0 = (icmp46_header_t *) udp0;
3709
3710               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3711                                   rx_fib_index0, node, next0, thread_index,
3712                                   &ses0, &dm0);
3713               goto trace00;
3714             }
3715
3716           key0.ext_host_addr = ip0->src_address;
3717           key0.ext_host_port = tcp0->src;
3718           key0.out_port = tcp0->dst;
3719
3720           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
3721           if (PREDICT_FALSE(!dm0))
3722             {
3723               nat_log_info ("unknown dst address:  %U",
3724                             format_ip4_address, &ip0->dst_address);
3725               next0 = SNAT_OUT2IN_NEXT_DROP;
3726               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3727               goto trace00;
3728             }
3729
3730           snat_det_reverse(dm0, &ip0->dst_address,
3731                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
3732
3733           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3734           if (PREDICT_FALSE(!ses0))
3735             {
3736               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3737                             format_ip4_address, &ip0->src_address,
3738                             clib_net_to_host_u16 (tcp0->src),
3739                             format_ip4_address, &ip0->dst_address,
3740                             clib_net_to_host_u16 (tcp0->dst),
3741                             format_ip4_address, &new_addr0);
3742               next0 = SNAT_OUT2IN_NEXT_DROP;
3743               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3744               goto trace00;
3745             }
3746           new_port0 = ses0->in_port;
3747
3748           old_addr0 = ip0->dst_address;
3749           ip0->dst_address = new_addr0;
3750           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3751
3752           sum0 = ip0->checksum;
3753           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3754                                  ip4_header_t,
3755                                  dst_address /* changed member */);
3756           ip0->checksum = ip_csum_fold (sum0);
3757
3758           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3759             {
3760               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3761                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3762               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
3763                 snat_det_ses_close(dm0, ses0);
3764
3765               old_port0 = tcp0->dst;
3766               tcp0->dst = new_port0;
3767
3768               sum0 = tcp0->checksum;
3769               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3770                                      ip4_header_t,
3771                                      dst_address /* changed member */);
3772
3773               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3774                                      ip4_header_t /* cheat */,
3775                                      length /* changed member */);
3776               tcp0->checksum = ip_csum_fold(sum0);
3777             }
3778           else
3779             {
3780               old_port0 = udp0->dst_port;
3781               udp0->dst_port = new_port0;
3782               udp0->checksum = 0;
3783             }
3784
3785         trace00:
3786
3787           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3788                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3789             {
3790               snat_out2in_trace_t *t =
3791                  vlib_add_trace (vm, node, b0, sizeof (*t));
3792               t->sw_if_index = sw_if_index0;
3793               t->next_index = next0;
3794               t->session_index = ~0;
3795               if (ses0)
3796                 t->session_index = ses0 - dm0->sessions;
3797             }
3798
3799           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3800
3801           /* verify speculative enqueue, maybe switch current next frame */
3802           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3803                                            to_next, n_left_to_next,
3804                                            bi0, next0);
3805         }
3806
3807       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3808     }
3809
3810   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
3811                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
3812                                pkts_processed);
3813   return frame->n_vectors;
3814 }
3815
3816 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
3817   .function = snat_det_out2in_node_fn,
3818   .name = "nat44-det-out2in",
3819   .vector_size = sizeof (u32),
3820   .format_trace = format_snat_out2in_trace,
3821   .type = VLIB_NODE_TYPE_INTERNAL,
3822
3823   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3824   .error_strings = snat_out2in_error_strings,
3825
3826   .runtime_data_bytes = sizeof (snat_runtime_t),
3827
3828   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
3829
3830   /* edit / add dispositions here */
3831   .next_nodes = {
3832     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
3833     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
3834     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3835     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
3836   },
3837 };
3838 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
3839
3840 /**
3841  * Get address and port values to be used for ICMP packet translation
3842  * and create session if needed
3843  *
3844  * @param[in,out] sm             NAT main
3845  * @param[in,out] node           NAT node runtime
3846  * @param[in] thread_index       thread index
3847  * @param[in,out] b0             buffer containing packet to be translated
3848  * @param[out] p_proto           protocol used for matching
3849  * @param[out] p_value           address and port after NAT translation
3850  * @param[out] p_dont_translate  if packet should not be translated
3851  * @param d                      optional parameter
3852  * @param e                      optional parameter
3853  */
3854 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
3855                           u32 thread_index, vlib_buffer_t *b0,
3856                           ip4_header_t *ip0, u8 *p_proto,
3857                           snat_session_key_t *p_value,
3858                           u8 *p_dont_translate, void *d, void *e)
3859 {
3860   icmp46_header_t *icmp0;
3861   u32 sw_if_index0;
3862   u8 protocol;
3863   snat_det_out_key_t key0;
3864   u8 dont_translate = 0;
3865   u32 next0 = ~0;
3866   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3867   ip4_header_t *inner_ip0;
3868   void *l4_header = 0;
3869   icmp46_header_t *inner_icmp0;
3870   snat_det_map_t * dm0 = 0;
3871   ip4_address_t new_addr0 = {{0}};
3872   snat_det_session_t * ses0 = 0;
3873   ip4_address_t out_addr;
3874
3875   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3876   echo0 = (icmp_echo_header_t *)(icmp0+1);
3877   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3878
3879   if (!icmp_is_error_message (icmp0))
3880     {
3881       protocol = SNAT_PROTOCOL_ICMP;
3882       key0.ext_host_addr = ip0->src_address;
3883       key0.ext_host_port = 0;
3884       key0.out_port = echo0->identifier;
3885       out_addr = ip0->dst_address;
3886     }
3887   else
3888     {
3889       inner_ip0 = (ip4_header_t *)(echo0+1);
3890       l4_header = ip4_next_header (inner_ip0);
3891       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3892       key0.ext_host_addr = inner_ip0->dst_address;
3893       out_addr = inner_ip0->src_address;
3894       switch (protocol)
3895         {
3896         case SNAT_PROTOCOL_ICMP:
3897           inner_icmp0 = (icmp46_header_t*)l4_header;
3898           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3899           key0.ext_host_port = 0;
3900           key0.out_port = inner_echo0->identifier;
3901           break;
3902         case SNAT_PROTOCOL_UDP:
3903         case SNAT_PROTOCOL_TCP:
3904           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3905           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
3906           break;
3907         default:
3908           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
3909           next0 = SNAT_OUT2IN_NEXT_DROP;
3910           goto out;
3911         }
3912     }
3913
3914   dm0 = snat_det_map_by_out(sm, &out_addr);
3915   if (PREDICT_FALSE(!dm0))
3916     {
3917       /* Don't NAT packet aimed at the intfc address */
3918       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
3919                                           ip0->dst_address.as_u32)))
3920         {
3921           dont_translate = 1;
3922           goto out;
3923         }
3924       nat_log_info ("unknown dst address:  %U",
3925                     format_ip4_address, &ip0->dst_address);
3926       goto out;
3927     }
3928
3929   snat_det_reverse(dm0, &ip0->dst_address,
3930                    clib_net_to_host_u16(key0.out_port), &new_addr0);
3931
3932   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3933   if (PREDICT_FALSE(!ses0))
3934     {
3935       /* Don't NAT packet aimed at the intfc address */
3936       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
3937                                           ip0->dst_address.as_u32)))
3938         {
3939           dont_translate = 1;
3940           goto out;
3941         }
3942       nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3943                     format_ip4_address, &key0.ext_host_addr,
3944                     clib_net_to_host_u16 (key0.ext_host_port),
3945                     format_ip4_address, &out_addr,
3946                     clib_net_to_host_u16 (key0.out_port),
3947                     format_ip4_address, &new_addr0);
3948       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3949       next0 = SNAT_OUT2IN_NEXT_DROP;
3950       goto out;
3951     }
3952
3953   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
3954                     !icmp_is_error_message (icmp0)))
3955     {
3956       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
3957       next0 = SNAT_OUT2IN_NEXT_DROP;
3958       goto out;
3959     }
3960
3961   goto out;
3962
3963 out:
3964   *p_proto = protocol;
3965   if (ses0)
3966     {
3967       p_value->addr = new_addr0;
3968       p_value->fib_index = sm->inside_fib_index;
3969       p_value->port = ses0->in_port;
3970     }
3971   *p_dont_translate = dont_translate;
3972   if (d)
3973     *(snat_det_session_t**)d = ses0;
3974   if (e)
3975     *(snat_det_map_t**)e = dm0;
3976   return next0;
3977 }
3978
3979 /**********************/
3980 /*** worker handoff ***/
3981 /**********************/
3982 static uword
3983 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
3984                                vlib_node_runtime_t * node,
3985                                vlib_frame_t * frame)
3986 {
3987   snat_main_t *sm = &snat_main;
3988   vlib_thread_main_t *tm = vlib_get_thread_main ();
3989   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
3990   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3991   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3992     = 0;
3993   vlib_frame_queue_elt_t *hf = 0;
3994   vlib_frame_queue_t *fq;
3995   vlib_frame_t *f = 0;
3996   int i;
3997   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3998   u32 next_worker_index = 0;
3999   u32 current_worker_index = ~0;
4000   u32 thread_index = vm->thread_index;
4001   vlib_frame_t *d = 0;
4002
4003   ASSERT (vec_len (sm->workers));
4004
4005   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
4006     {
4007       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
4008
4009       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
4010                                tm->n_vlib_mains - 1,
4011                                (vlib_frame_queue_t *) (~0));
4012     }
4013
4014   from = vlib_frame_vector_args (frame);
4015   n_left_from = frame->n_vectors;
4016
4017   while (n_left_from > 0)
4018     {
4019       u32 bi0;
4020       vlib_buffer_t *b0;
4021       u32 sw_if_index0;
4022       u32 rx_fib_index0;
4023       ip4_header_t * ip0;
4024       u8 do_handoff;
4025
4026       bi0 = from[0];
4027       from += 1;
4028       n_left_from -= 1;
4029
4030       b0 = vlib_get_buffer (vm, bi0);
4031
4032       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
4033       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4034
4035       ip0 = vlib_buffer_get_current (b0);
4036
4037       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
4038
4039       if (PREDICT_FALSE (next_worker_index != thread_index))
4040         {
4041           do_handoff = 1;
4042
4043           if (next_worker_index != current_worker_index)
4044             {
4045               fq = is_vlib_frame_queue_congested (
4046                 sm->fq_out2in_index, next_worker_index, NAT_FQ_NELTS - 2,
4047                 congested_handoff_queue_by_worker_index);
4048
4049               if (fq)
4050                 {
4051                   /* if this is 1st frame */
4052                   if (!d)
4053                     {
4054                       d = vlib_get_frame_to_node (vm, sm->error_node_index);
4055                       to_next_drop = vlib_frame_vector_args (d);
4056                     }
4057
4058                   to_next_drop[0] = bi0;
4059                   to_next_drop += 1;
4060                   d->n_vectors++;
4061                   b0->error = node->errors[SNAT_OUT2IN_ERROR_FQ_CONGESTED];
4062                   goto trace0;
4063                 }
4064
4065               if (hf)
4066                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4067
4068               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
4069                                                       next_worker_index,
4070                                                       handoff_queue_elt_by_worker_index);
4071
4072               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
4073               to_next_worker = &hf->buffer_index[hf->n_vectors];
4074               current_worker_index = next_worker_index;
4075             }
4076
4077           /* enqueue to correct worker thread */
4078           to_next_worker[0] = bi0;
4079           to_next_worker++;
4080           n_left_to_next_worker--;
4081
4082           if (n_left_to_next_worker == 0)
4083             {
4084               hf->n_vectors = VLIB_FRAME_SIZE;
4085               vlib_put_frame_queue_elt (hf);
4086               current_worker_index = ~0;
4087               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
4088               hf = 0;
4089             }
4090         }
4091       else
4092         {
4093           do_handoff = 0;
4094           /* if this is 1st frame */
4095           if (!f)
4096             {
4097               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
4098               to_next = vlib_frame_vector_args (f);
4099             }
4100
4101           to_next[0] = bi0;
4102           to_next += 1;
4103           f->n_vectors++;
4104         }
4105
4106 trace0:
4107       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
4108                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4109         {
4110           snat_out2in_worker_handoff_trace_t *t =
4111             vlib_add_trace (vm, node, b0, sizeof (*t));
4112           t->next_worker_index = next_worker_index;
4113           t->do_handoff = do_handoff;
4114         }
4115     }
4116
4117   if (f)
4118     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
4119
4120   if (d)
4121     vlib_put_frame_to_node (vm, sm->error_node_index, d);
4122
4123   if (hf)
4124     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
4125
4126   /* Ship frames to the worker nodes */
4127   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
4128     {
4129       if (handoff_queue_elt_by_worker_index[i])
4130         {
4131           hf = handoff_queue_elt_by_worker_index[i];
4132           /*
4133            * It works better to let the handoff node
4134            * rate-adapt, always ship the handoff queue element.
4135            */
4136           if (1 || hf->n_vectors == hf->last_n_vectors)
4137             {
4138               vlib_put_frame_queue_elt (hf);
4139               handoff_queue_elt_by_worker_index[i] = 0;
4140             }
4141           else
4142             hf->last_n_vectors = hf->n_vectors;
4143         }
4144       congested_handoff_queue_by_worker_index[i] =
4145         (vlib_frame_queue_t *) (~0);
4146     }
4147   hf = 0;
4148   current_worker_index = ~0;
4149   return frame->n_vectors;
4150 }
4151
4152 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
4153   .function = snat_out2in_worker_handoff_fn,
4154   .name = "nat44-out2in-worker-handoff",
4155   .vector_size = sizeof (u32),
4156   .format_trace = format_snat_out2in_worker_handoff_trace,
4157   .type = VLIB_NODE_TYPE_INTERNAL,
4158
4159   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
4160   .error_strings = snat_out2in_error_strings,
4161
4162   .n_next_nodes = 1,
4163
4164   .next_nodes = {
4165     [0] = "error-drop",
4166   },
4167 };
4168
4169 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
4170
4171 static uword
4172 snat_out2in_fast_node_fn (vlib_main_t * vm,
4173                           vlib_node_runtime_t * node,
4174                           vlib_frame_t * frame)
4175 {
4176   u32 n_left_from, * from, * to_next;
4177   snat_out2in_next_t next_index;
4178   u32 pkts_processed = 0;
4179   snat_main_t * sm = &snat_main;
4180
4181   from = vlib_frame_vector_args (frame);
4182   n_left_from = frame->n_vectors;
4183   next_index = node->cached_next_index;
4184
4185   while (n_left_from > 0)
4186     {
4187       u32 n_left_to_next;
4188
4189       vlib_get_next_frame (vm, node, next_index,
4190                            to_next, n_left_to_next);
4191
4192       while (n_left_from > 0 && n_left_to_next > 0)
4193         {
4194           u32 bi0;
4195           vlib_buffer_t * b0;
4196           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
4197           u32 sw_if_index0;
4198           ip4_header_t * ip0;
4199           ip_csum_t sum0;
4200           u32 new_addr0, old_addr0;
4201           u16 new_port0, old_port0;
4202           udp_header_t * udp0;
4203           tcp_header_t * tcp0;
4204           icmp46_header_t * icmp0;
4205           snat_session_key_t key0, sm0;
4206           u32 proto0;
4207           u32 rx_fib_index0;
4208
4209           /* speculatively enqueue b0 to the current next frame */
4210           bi0 = from[0];
4211           to_next[0] = bi0;
4212           from += 1;
4213           to_next += 1;
4214           n_left_from -= 1;
4215           n_left_to_next -= 1;
4216
4217           b0 = vlib_get_buffer (vm, bi0);
4218
4219           ip0 = vlib_buffer_get_current (b0);
4220           udp0 = ip4_next_header (ip0);
4221           tcp0 = (tcp_header_t *) udp0;
4222           icmp0 = (icmp46_header_t *) udp0;
4223
4224           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
4225           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
4226
4227           vnet_feature_next (&next0, b0);
4228
4229           if (PREDICT_FALSE(ip0->ttl == 1))
4230             {
4231               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
4232               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
4233                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
4234                                            0);
4235               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
4236               goto trace00;
4237             }
4238
4239           proto0 = ip_proto_to_snat_proto (ip0->protocol);
4240
4241           if (PREDICT_FALSE (proto0 == ~0))
4242               goto trace00;
4243
4244           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
4245             {
4246               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
4247                                   rx_fib_index0, node, next0, ~0, 0, 0);
4248               goto trace00;
4249             }
4250
4251           key0.addr = ip0->dst_address;
4252           key0.port = udp0->dst_port;
4253           key0.fib_index = rx_fib_index0;
4254
4255           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
4256             {
4257               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
4258               goto trace00;
4259             }
4260
4261           new_addr0 = sm0.addr.as_u32;
4262           new_port0 = sm0.port;
4263           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
4264           old_addr0 = ip0->dst_address.as_u32;
4265           ip0->dst_address.as_u32 = new_addr0;
4266
4267           sum0 = ip0->checksum;
4268           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4269                                  ip4_header_t,
4270                                  dst_address /* changed member */);
4271           ip0->checksum = ip_csum_fold (sum0);
4272
4273           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
4274             {
4275                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4276                 {
4277                   old_port0 = tcp0->dst_port;
4278                   tcp0->dst_port = new_port0;
4279
4280                   sum0 = tcp0->checksum;
4281                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4282                                          ip4_header_t,
4283                                          dst_address /* changed member */);
4284
4285                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
4286                                          ip4_header_t /* cheat */,
4287                                          length /* changed member */);
4288                   tcp0->checksum = ip_csum_fold(sum0);
4289                 }
4290               else
4291                 {
4292                   old_port0 = udp0->dst_port;
4293                   udp0->dst_port = new_port0;
4294                   udp0->checksum = 0;
4295                 }
4296             }
4297           else
4298             {
4299               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4300                 {
4301                   sum0 = tcp0->checksum;
4302                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4303                                          ip4_header_t,
4304                                          dst_address /* changed member */);
4305
4306                   tcp0->checksum = ip_csum_fold(sum0);
4307                 }
4308             }
4309
4310         trace00:
4311
4312           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4313                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4314             {
4315               snat_out2in_trace_t *t =
4316                  vlib_add_trace (vm, node, b0, sizeof (*t));
4317               t->sw_if_index = sw_if_index0;
4318               t->next_index = next0;
4319             }
4320
4321           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
4322
4323           /* verify speculative enqueue, maybe switch current next frame */
4324           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4325                                            to_next, n_left_to_next,
4326                                            bi0, next0);
4327         }
4328
4329       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4330     }
4331
4332   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
4333                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
4334                                pkts_processed);
4335   return frame->n_vectors;
4336 }
4337
4338 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
4339   .function = snat_out2in_fast_node_fn,
4340   .name = "nat44-out2in-fast",
4341   .vector_size = sizeof (u32),
4342   .format_trace = format_snat_out2in_fast_trace,
4343   .type = VLIB_NODE_TYPE_INTERNAL,
4344
4345   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
4346   .error_strings = snat_out2in_error_strings,
4347
4348   .runtime_data_bytes = sizeof (snat_runtime_t),
4349
4350   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
4351
4352   /* edit / add dispositions here */
4353   .next_nodes = {
4354     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
4355     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
4356     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4357     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
4358   },
4359 };
4360 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);