NAT44: add support for session timeout (VPP-1272)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29 #include <nat/nat_inlines.h>
30
31 #include <vppinfra/hash.h>
32 #include <vppinfra/error.h>
33 #include <vppinfra/elog.h>
34
35 typedef struct {
36   u32 sw_if_index;
37   u32 next_index;
38   u32 session_index;
39 } snat_out2in_trace_t;
40
41 typedef struct {
42   u32 next_worker_index;
43   u8 do_handoff;
44 } snat_out2in_worker_handoff_trace_t;
45
46 /* packet trace format function */
47 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
48 {
49   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
50   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
51   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
52
53   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
54               t->sw_if_index, t->next_index, t->session_index);
55   return s;
56 }
57
58 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
63
64   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
65               t->sw_if_index, t->next_index);
66   return s;
67 }
68
69 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
70 {
71   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
72   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
73   snat_out2in_worker_handoff_trace_t * t =
74     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
75   char * m;
76
77   m = t->do_handoff ? "next worker" : "same worker";
78   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
79
80   return s;
81 }
82
83 typedef struct {
84   u32 sw_if_index;
85   u32 next_index;
86   u8 cached;
87 } nat44_out2in_reass_trace_t;
88
89 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
90 {
91   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
92   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
93   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
94
95   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
96               t->sw_if_index, t->next_index,
97               t->cached ? "cached" : "translated");
98
99   return s;
100 }
101
102 vlib_node_registration_t snat_out2in_node;
103 vlib_node_registration_t snat_out2in_fast_node;
104 vlib_node_registration_t snat_out2in_worker_handoff_node;
105 vlib_node_registration_t snat_det_out2in_node;
106 vlib_node_registration_t nat44_out2in_reass_node;
107 vlib_node_registration_t nat44_ed_out2in_node;
108 vlib_node_registration_t nat44_ed_out2in_slowpath_node;
109
110 #define foreach_snat_out2in_error                       \
111 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
112 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
113 _(OUT_OF_PORTS, "Out of ports")                         \
114 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
115 _(NO_TRANSLATION, "No translation")                     \
116 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
117 _(DROP_FRAGMENT, "Drop fragment")                       \
118 _(MAX_REASS, "Maximum reassemblies exceeded")           \
119 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")\
120 _(FQ_CONGESTED, "Handoff frame queue congested")
121
122 typedef enum {
123 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
124   foreach_snat_out2in_error
125 #undef _
126   SNAT_OUT2IN_N_ERROR,
127 } snat_out2in_error_t;
128
129 static char * snat_out2in_error_strings[] = {
130 #define _(sym,string) string,
131   foreach_snat_out2in_error
132 #undef _
133 };
134
135 typedef enum {
136   SNAT_OUT2IN_NEXT_DROP,
137   SNAT_OUT2IN_NEXT_LOOKUP,
138   SNAT_OUT2IN_NEXT_ICMP_ERROR,
139   SNAT_OUT2IN_NEXT_REASS,
140   SNAT_OUT2IN_N_NEXT,
141 } snat_out2in_next_t;
142
143 int
144 nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void * arg)
145 {
146   snat_main_t *sm = &snat_main;
147   nat44_is_idle_session_ctx_t *ctx = arg;
148   snat_session_t *s;
149   u64 sess_timeout_time;
150   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
151                                                        ctx->thread_index);
152   clib_bihash_kv_8_8_t s_kv;
153
154   s = pool_elt_at_index (tsm->sessions, kv->value);
155   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
156   if (ctx->now >= sess_timeout_time)
157     {
158       s_kv.key = s->in2out.as_u64;
159       if (clib_bihash_add_del_8_8 (&tsm->in2out, &s_kv, 0))
160         nat_log_warn ("out2in key del failed");
161
162       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
163                                           s->out2in.addr.as_u32,
164                                           s->in2out.protocol,
165                                           s->in2out.port,
166                                           s->out2in.port,
167                                           s->in2out.fib_index);
168
169       if (!snat_is_session_static (s))
170         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
171                                             &s->out2in);
172
173       nat44_delete_session (sm, s, ctx->thread_index);
174       return 1;
175     }
176
177   return 0;
178 }
179
180 /**
181  * @brief Create session for static mapping.
182  *
183  * Create NAT session initiated by host from external network with static
184  * mapping.
185  *
186  * @param sm     NAT main.
187  * @param b0     Vlib buffer.
188  * @param in2out In2out NAT44 session key.
189  * @param out2in Out2in NAT44 session key.
190  * @param node   Vlib node.
191  *
192  * @returns SNAT session if successfully created otherwise 0.
193  */
194 static inline snat_session_t *
195 create_session_for_static_mapping (snat_main_t *sm,
196                                    vlib_buffer_t *b0,
197                                    snat_session_key_t in2out,
198                                    snat_session_key_t out2in,
199                                    vlib_node_runtime_t * node,
200                                    u32 thread_index,
201                                    f64 now)
202 {
203   snat_user_t *u;
204   snat_session_t *s;
205   clib_bihash_kv_8_8_t kv0;
206   ip4_header_t *ip0;
207   udp_header_t *udp0;
208   nat44_is_idle_session_ctx_t ctx0;
209
210   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
211     {
212       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
213       nat_log_notice ("maximum sessions exceeded");
214       return 0;
215     }
216
217   ip0 = vlib_buffer_get_current (b0);
218   udp0 = ip4_next_header (ip0);
219
220   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
221   if (!u)
222     {
223       nat_log_warn ("create NAT user failed");
224       return 0;
225     }
226
227   s = nat_session_alloc_or_recycle (sm, u, thread_index);
228   if (!s)
229     {
230       nat44_delete_user_with_no_session (sm, u, thread_index);
231       nat_log_warn ("create NAT session failed");
232       return 0;
233     }
234
235   s->outside_address_index = ~0;
236   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
237   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
238   s->ext_host_port = udp0->src_port;
239   user_session_increment (sm, u, 1 /* static */);
240   s->in2out = in2out;
241   s->out2in = out2in;
242   s->in2out.protocol = out2in.protocol;
243
244   /* Add to translation hashes */
245   ctx0.now = now;
246   ctx0.thread_index = thread_index;
247   kv0.key = s->in2out.as_u64;
248   kv0.value = s - sm->per_thread_data[thread_index].sessions;
249   if (clib_bihash_add_or_overwrite_stale_8_8 (
250        &sm->per_thread_data[thread_index].in2out, &kv0,
251        nat44_i2o_is_idle_session_cb, &ctx0))
252       nat_log_notice ("in2out key add failed");
253
254   kv0.key = s->out2in.as_u64;
255
256   if (clib_bihash_add_or_overwrite_stale_8_8 (
257         &sm->per_thread_data[thread_index].out2in, &kv0,
258         nat44_o2i_is_idle_session_cb, &ctx0))
259       nat_log_notice ("out2in key add failed");
260
261   /* log NAT event */
262   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
263                                       s->out2in.addr.as_u32,
264                                       s->in2out.protocol,
265                                       s->in2out.port,
266                                       s->out2in.port,
267                                       s->in2out.fib_index);
268   return s;
269 }
270
271 static_always_inline
272 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
273                                  snat_session_key_t *p_key0)
274 {
275   icmp46_header_t *icmp0;
276   snat_session_key_t key0;
277   icmp_echo_header_t *echo0, *inner_echo0 = 0;
278   ip4_header_t *inner_ip0;
279   void *l4_header = 0;
280   icmp46_header_t *inner_icmp0;
281
282   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
283   echo0 = (icmp_echo_header_t *)(icmp0+1);
284
285   if (!icmp_is_error_message (icmp0))
286     {
287       key0.protocol = SNAT_PROTOCOL_ICMP;
288       key0.addr = ip0->dst_address;
289       key0.port = echo0->identifier;
290     }
291   else
292     {
293       inner_ip0 = (ip4_header_t *)(echo0+1);
294       l4_header = ip4_next_header (inner_ip0);
295       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
296       key0.addr = inner_ip0->src_address;
297       switch (key0.protocol)
298         {
299         case SNAT_PROTOCOL_ICMP:
300           inner_icmp0 = (icmp46_header_t*)l4_header;
301           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
302           key0.port = inner_echo0->identifier;
303           break;
304         case SNAT_PROTOCOL_UDP:
305         case SNAT_PROTOCOL_TCP:
306           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
307           break;
308         default:
309           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
310         }
311     }
312   *p_key0 = key0;
313   return -1; /* success */
314 }
315
316 /**
317  * Get address and port values to be used for ICMP packet translation
318  * and create session if needed
319  *
320  * @param[in,out] sm             NAT main
321  * @param[in,out] node           NAT node runtime
322  * @param[in] thread_index       thread index
323  * @param[in,out] b0             buffer containing packet to be translated
324  * @param[out] p_proto           protocol used for matching
325  * @param[out] p_value           address and port after NAT translation
326  * @param[out] p_dont_translate  if packet should not be translated
327  * @param d                      optional parameter
328  * @param e                      optional parameter
329  */
330 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
331                            u32 thread_index, vlib_buffer_t *b0,
332                            ip4_header_t *ip0, u8 *p_proto,
333                            snat_session_key_t *p_value,
334                            u8 *p_dont_translate, void *d, void *e)
335 {
336   icmp46_header_t *icmp0;
337   u32 sw_if_index0;
338   u32 rx_fib_index0;
339   snat_session_key_t key0;
340   snat_session_key_t sm0;
341   snat_session_t *s0 = 0;
342   u8 dont_translate = 0;
343   clib_bihash_kv_8_8_t kv0, value0;
344   u8 is_addr_only;
345   u32 next0 = ~0;
346   int err;
347
348   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
349   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
350   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
351
352   key0.protocol = 0;
353
354   err = icmp_get_key (ip0, &key0);
355   if (err != -1)
356     {
357       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
358       next0 = SNAT_OUT2IN_NEXT_DROP;
359       goto out;
360     }
361   key0.fib_index = rx_fib_index0;
362
363   kv0.key = key0.as_u64;
364
365   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
366                               &value0))
367     {
368       /* Try to match static mapping by external address and port,
369          destination address and port in packet */
370       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0))
371         {
372           if (!sm->forwarding_enabled)
373             {
374               /* Don't NAT packet aimed at the intfc address */
375               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
376                                                   ip0->dst_address.as_u32)))
377                 {
378                   dont_translate = 1;
379                   goto out;
380                 }
381               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
382               next0 = SNAT_OUT2IN_NEXT_DROP;
383               goto out;
384             }
385           else
386             {
387               dont_translate = 1;
388               goto out;
389             }
390         }
391
392       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
393                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
394         {
395           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
396           next0 = SNAT_OUT2IN_NEXT_DROP;
397           goto out;
398         }
399
400       /* Create session initiated by host from external network */
401       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
402                                              node, thread_index,
403                                              vlib_time_now (sm->vlib_main));
404
405       if (!s0)
406         {
407           next0 = SNAT_OUT2IN_NEXT_DROP;
408           goto out;
409         }
410     }
411   else
412     {
413       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
414                         icmp0->type != ICMP4_echo_request &&
415                         !icmp_is_error_message (icmp0)))
416         {
417           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
418           next0 = SNAT_OUT2IN_NEXT_DROP;
419           goto out;
420         }
421
422       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
423                               value0.value);
424     }
425
426 out:
427   *p_proto = key0.protocol;
428   if (s0)
429     *p_value = s0->in2out;
430   *p_dont_translate = dont_translate;
431   if (d)
432     *(snat_session_t**)d = s0;
433   return next0;
434 }
435
436 /**
437  * Get address and port values to be used for ICMP packet translation
438  *
439  * @param[in] sm                 NAT main
440  * @param[in,out] node           NAT node runtime
441  * @param[in] thread_index       thread index
442  * @param[in,out] b0             buffer containing packet to be translated
443  * @param[out] p_proto           protocol used for matching
444  * @param[out] p_value           address and port after NAT translation
445  * @param[out] p_dont_translate  if packet should not be translated
446  * @param d                      optional parameter
447  * @param e                      optional parameter
448  */
449 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
450                            u32 thread_index, vlib_buffer_t *b0,
451                            ip4_header_t *ip0, u8 *p_proto,
452                            snat_session_key_t *p_value,
453                            u8 *p_dont_translate, void *d, void *e)
454 {
455   icmp46_header_t *icmp0;
456   u32 sw_if_index0;
457   u32 rx_fib_index0;
458   snat_session_key_t key0;
459   snat_session_key_t sm0;
460   u8 dont_translate = 0;
461   u8 is_addr_only;
462   u32 next0 = ~0;
463   int err;
464
465   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
466   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
467   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
468
469   err = icmp_get_key (ip0, &key0);
470   if (err != -1)
471     {
472       b0->error = node->errors[err];
473       next0 = SNAT_OUT2IN_NEXT_DROP;
474       goto out2;
475     }
476   key0.fib_index = rx_fib_index0;
477
478   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0))
479     {
480       /* Don't NAT packet aimed at the intfc address */
481       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
482         {
483           dont_translate = 1;
484           goto out;
485         }
486       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
487       next0 = SNAT_OUT2IN_NEXT_DROP;
488       goto out;
489     }
490
491   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
492                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
493                     !icmp_is_error_message (icmp0)))
494     {
495       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
496       next0 = SNAT_OUT2IN_NEXT_DROP;
497       goto out;
498     }
499
500 out:
501   *p_value = sm0;
502 out2:
503   *p_proto = key0.protocol;
504   *p_dont_translate = dont_translate;
505   return next0;
506 }
507
508 static inline u32 icmp_out2in (snat_main_t *sm,
509                                vlib_buffer_t * b0,
510                                ip4_header_t * ip0,
511                                icmp46_header_t * icmp0,
512                                u32 sw_if_index0,
513                                u32 rx_fib_index0,
514                                vlib_node_runtime_t * node,
515                                u32 next0,
516                                u32 thread_index,
517                                void *d,
518                                void *e)
519 {
520   snat_session_key_t sm0;
521   u8 protocol;
522   icmp_echo_header_t *echo0, *inner_echo0 = 0;
523   ip4_header_t *inner_ip0 = 0;
524   void *l4_header = 0;
525   icmp46_header_t *inner_icmp0;
526   u8 dont_translate;
527   u32 new_addr0, old_addr0;
528   u16 old_id0, new_id0;
529   ip_csum_t sum0;
530   u16 checksum0;
531   u32 next0_tmp;
532
533   echo0 = (icmp_echo_header_t *)(icmp0+1);
534
535   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
536                                        &protocol, &sm0, &dont_translate, d, e);
537   if (next0_tmp != ~0)
538     next0 = next0_tmp;
539   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
540     goto out;
541
542   sum0 = ip_incremental_checksum (0, icmp0,
543                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
544   checksum0 = ~ip_csum_fold (sum0);
545   if (checksum0 != 0 && checksum0 != 0xffff)
546     {
547       next0 = SNAT_OUT2IN_NEXT_DROP;
548       goto out;
549     }
550
551   old_addr0 = ip0->dst_address.as_u32;
552   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
553   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
554
555   sum0 = ip0->checksum;
556   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
557                          dst_address /* changed member */);
558   ip0->checksum = ip_csum_fold (sum0);
559
560   if (icmp0->checksum == 0)
561     icmp0->checksum = 0xffff;
562
563   if (!icmp_is_error_message (icmp0))
564     {
565       new_id0 = sm0.port;
566       if (PREDICT_FALSE(new_id0 != echo0->identifier))
567         {
568           old_id0 = echo0->identifier;
569           new_id0 = sm0.port;
570           echo0->identifier = new_id0;
571
572           sum0 = icmp0->checksum;
573           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
574                                  identifier /* changed member */);
575           icmp0->checksum = ip_csum_fold (sum0);
576         }
577     }
578   else
579     {
580       inner_ip0 = (ip4_header_t *)(echo0+1);
581       l4_header = ip4_next_header (inner_ip0);
582
583       if (!ip4_header_checksum_is_valid (inner_ip0))
584         {
585           next0 = SNAT_OUT2IN_NEXT_DROP;
586           goto out;
587         }
588
589       old_addr0 = inner_ip0->src_address.as_u32;
590       inner_ip0->src_address = sm0.addr;
591       new_addr0 = inner_ip0->src_address.as_u32;
592
593       sum0 = icmp0->checksum;
594       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
595                              src_address /* changed member */);
596       icmp0->checksum = ip_csum_fold (sum0);
597
598       switch (protocol)
599         {
600         case SNAT_PROTOCOL_ICMP:
601           inner_icmp0 = (icmp46_header_t*)l4_header;
602           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
603
604           old_id0 = inner_echo0->identifier;
605           new_id0 = sm0.port;
606           inner_echo0->identifier = new_id0;
607
608           sum0 = icmp0->checksum;
609           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
610                                  identifier);
611           icmp0->checksum = ip_csum_fold (sum0);
612           break;
613         case SNAT_PROTOCOL_UDP:
614         case SNAT_PROTOCOL_TCP:
615           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
616           new_id0 = sm0.port;
617           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
618
619           sum0 = icmp0->checksum;
620           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
621                                  src_port);
622           icmp0->checksum = ip_csum_fold (sum0);
623           break;
624         default:
625           ASSERT(0);
626         }
627     }
628
629 out:
630   return next0;
631 }
632
633
634 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
635                                          vlib_buffer_t * b0,
636                                          ip4_header_t * ip0,
637                                          icmp46_header_t * icmp0,
638                                          u32 sw_if_index0,
639                                          u32 rx_fib_index0,
640                                          vlib_node_runtime_t * node,
641                                          u32 next0, f64 now,
642                                          u32 thread_index,
643                                          snat_session_t ** p_s0)
644 {
645   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
646                       next0, thread_index, p_s0, 0);
647   snat_session_t * s0 = *p_s0;
648   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
649     {
650       /* Accounting */
651       nat44_session_update_counters (s0, now,
652                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
653       /* Per-user LRU list maintenance */
654       nat44_session_update_lru (sm, s0, thread_index);
655     }
656   return next0;
657 }
658
659 static int
660 nat_out2in_sm_unknown_proto (snat_main_t *sm,
661                              vlib_buffer_t * b,
662                              ip4_header_t * ip,
663                              u32 rx_fib_index)
664 {
665   clib_bihash_kv_8_8_t kv, value;
666   snat_static_mapping_t *m;
667   snat_session_key_t m_key;
668   u32 old_addr, new_addr;
669   ip_csum_t sum;
670
671   m_key.addr = ip->dst_address;
672   m_key.port = 0;
673   m_key.protocol = 0;
674   m_key.fib_index = 0;
675   kv.key = m_key.as_u64;
676   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
677     return 1;
678
679   m = pool_elt_at_index (sm->static_mappings, value.value);
680
681   old_addr = ip->dst_address.as_u32;
682   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
683   sum = ip->checksum;
684   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
685   ip->checksum = ip_csum_fold (sum);
686
687   vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
688   return 0;
689 }
690
691 static uword
692 snat_out2in_node_fn (vlib_main_t * vm,
693                   vlib_node_runtime_t * node,
694                   vlib_frame_t * frame)
695 {
696   u32 n_left_from, * from, * to_next;
697   snat_out2in_next_t next_index;
698   u32 pkts_processed = 0;
699   snat_main_t * sm = &snat_main;
700   f64 now = vlib_time_now (vm);
701   u32 thread_index = vm->thread_index;
702
703   from = vlib_frame_vector_args (frame);
704   n_left_from = frame->n_vectors;
705   next_index = node->cached_next_index;
706
707   while (n_left_from > 0)
708     {
709       u32 n_left_to_next;
710
711       vlib_get_next_frame (vm, node, next_index,
712                            to_next, n_left_to_next);
713
714       while (n_left_from >= 4 && n_left_to_next >= 2)
715         {
716           u32 bi0, bi1;
717           vlib_buffer_t * b0, * b1;
718           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
719           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
720           u32 sw_if_index0, sw_if_index1;
721           ip4_header_t * ip0, *ip1;
722           ip_csum_t sum0, sum1;
723           u32 new_addr0, old_addr0;
724           u16 new_port0, old_port0;
725           u32 new_addr1, old_addr1;
726           u16 new_port1, old_port1;
727           udp_header_t * udp0, * udp1;
728           tcp_header_t * tcp0, * tcp1;
729           icmp46_header_t * icmp0, * icmp1;
730           snat_session_key_t key0, key1, sm0, sm1;
731           u32 rx_fib_index0, rx_fib_index1;
732           u32 proto0, proto1;
733           snat_session_t * s0 = 0, * s1 = 0;
734           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
735
736           /* Prefetch next iteration. */
737           {
738             vlib_buffer_t * p2, * p3;
739
740             p2 = vlib_get_buffer (vm, from[2]);
741             p3 = vlib_get_buffer (vm, from[3]);
742
743             vlib_prefetch_buffer_header (p2, LOAD);
744             vlib_prefetch_buffer_header (p3, LOAD);
745
746             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
747             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
748           }
749
750           /* speculatively enqueue b0 and b1 to the current next frame */
751           to_next[0] = bi0 = from[0];
752           to_next[1] = bi1 = from[1];
753           from += 2;
754           to_next += 2;
755           n_left_from -= 2;
756           n_left_to_next -= 2;
757
758           b0 = vlib_get_buffer (vm, bi0);
759           b1 = vlib_get_buffer (vm, bi1);
760
761           vnet_buffer (b0)->snat.flags = 0;
762           vnet_buffer (b1)->snat.flags = 0;
763
764           ip0 = vlib_buffer_get_current (b0);
765           udp0 = ip4_next_header (ip0);
766           tcp0 = (tcp_header_t *) udp0;
767           icmp0 = (icmp46_header_t *) udp0;
768
769           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
770           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
771                                    sw_if_index0);
772
773           if (PREDICT_FALSE(ip0->ttl == 1))
774             {
775               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
776               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
777                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
778                                            0);
779               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
780               goto trace0;
781             }
782
783           proto0 = ip_proto_to_snat_proto (ip0->protocol);
784
785           if (PREDICT_FALSE (proto0 == ~0))
786             {
787               if (nat_out2in_sm_unknown_proto(sm, b0, ip0, rx_fib_index0))
788                 {
789                   if (!sm->forwarding_enabled)
790                     {
791                       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
792                       next0 = SNAT_OUT2IN_NEXT_DROP;
793                     }
794                 }
795               goto trace0;
796             }
797
798           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
799             {
800               next0 = icmp_out2in_slow_path
801                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
802                  next0, now, thread_index, &s0);
803               goto trace0;
804             }
805
806           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
807             {
808               next0 = SNAT_OUT2IN_NEXT_REASS;
809               goto trace0;
810             }
811
812           key0.addr = ip0->dst_address;
813           key0.port = udp0->dst_port;
814           key0.protocol = proto0;
815           key0.fib_index = rx_fib_index0;
816
817           kv0.key = key0.as_u64;
818
819           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
820                                       &kv0, &value0))
821             {
822               /* Try to match static mapping by external address and port,
823                  destination address and port in packet */
824               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
825                 {
826                   /*
827                    * Send DHCP packets to the ipv4 stack, or we won't
828                    * be able to use dhcp client on the outside interface
829                    */
830                   if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
831                       && (udp0->dst_port ==
832                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
833                     {
834                       vnet_feature_next (&next0, b0);
835                       goto trace0;
836                     }
837
838                   if (!sm->forwarding_enabled)
839                     {
840                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
841                       next0 = SNAT_OUT2IN_NEXT_DROP;
842                     }
843                   goto trace0;
844                 }
845
846               /* Create session initiated by host from external network */
847               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
848                                                      thread_index, now);
849               if (!s0)
850                 {
851                   next0 = SNAT_OUT2IN_NEXT_DROP;
852                   goto trace0;
853                 }
854             }
855           else
856             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
857                                     value0.value);
858
859           old_addr0 = ip0->dst_address.as_u32;
860           ip0->dst_address = s0->in2out.addr;
861           new_addr0 = ip0->dst_address.as_u32;
862           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
863
864           sum0 = ip0->checksum;
865           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
866                                  ip4_header_t,
867                                  dst_address /* changed member */);
868           ip0->checksum = ip_csum_fold (sum0);
869
870           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
871             {
872               old_port0 = tcp0->dst_port;
873               tcp0->dst_port = s0->in2out.port;
874               new_port0 = tcp0->dst_port;
875
876               sum0 = tcp0->checksum;
877               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
878                                      ip4_header_t,
879                                      dst_address /* changed member */);
880
881               sum0 = ip_csum_update (sum0, old_port0, new_port0,
882                                      ip4_header_t /* cheat */,
883                                      length /* changed member */);
884               tcp0->checksum = ip_csum_fold(sum0);
885             }
886           else
887             {
888               old_port0 = udp0->dst_port;
889               udp0->dst_port = s0->in2out.port;
890               udp0->checksum = 0;
891             }
892
893           /* Accounting */
894           nat44_session_update_counters (s0, now,
895                                          vlib_buffer_length_in_chain (vm, b0));
896           /* Per-user LRU list maintenance */
897           nat44_session_update_lru (sm, s0, thread_index);
898         trace0:
899
900           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
901                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
902             {
903               snat_out2in_trace_t *t =
904                  vlib_add_trace (vm, node, b0, sizeof (*t));
905               t->sw_if_index = sw_if_index0;
906               t->next_index = next0;
907               t->session_index = ~0;
908               if (s0)
909                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
910             }
911
912           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
913
914
915           ip1 = vlib_buffer_get_current (b1);
916           udp1 = ip4_next_header (ip1);
917           tcp1 = (tcp_header_t *) udp1;
918           icmp1 = (icmp46_header_t *) udp1;
919
920           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
921           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
922                                    sw_if_index1);
923
924           if (PREDICT_FALSE(ip1->ttl == 1))
925             {
926               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
927               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
928                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
929                                            0);
930               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
931               goto trace1;
932             }
933
934           proto1 = ip_proto_to_snat_proto (ip1->protocol);
935
936           if (PREDICT_FALSE (proto1 == ~0))
937             {
938               if (nat_out2in_sm_unknown_proto(sm, b1, ip1, rx_fib_index1))
939                 {
940                   if (!sm->forwarding_enabled)
941                     {
942                       b1->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
943                       next1 = SNAT_OUT2IN_NEXT_DROP;
944                     }
945                 }
946               goto trace1;
947             }
948
949           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
950             {
951               next1 = icmp_out2in_slow_path
952                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
953                  next1, now, thread_index, &s1);
954               goto trace1;
955             }
956
957           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
958             {
959               next1 = SNAT_OUT2IN_NEXT_REASS;
960               goto trace1;
961             }
962
963           key1.addr = ip1->dst_address;
964           key1.port = udp1->dst_port;
965           key1.protocol = proto1;
966           key1.fib_index = rx_fib_index1;
967
968           kv1.key = key1.as_u64;
969
970           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
971                                       &kv1, &value1))
972             {
973               /* Try to match static mapping by external address and port,
974                  destination address and port in packet */
975               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0, 0))
976                 {
977                   /*
978                    * Send DHCP packets to the ipv4 stack, or we won't
979                    * be able to use dhcp client on the outside interface
980                    */
981                   if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
982                       && (udp1->dst_port ==
983                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
984                     {
985                       vnet_feature_next (&next1, b1);
986                       goto trace1;
987                     }
988
989                   if (!sm->forwarding_enabled)
990                     {
991                       b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
992                       next1 = SNAT_OUT2IN_NEXT_DROP;
993                     }
994                   goto trace1;
995                 }
996
997               /* Create session initiated by host from external network */
998               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
999                                                      thread_index, now);
1000               if (!s1)
1001                 {
1002                   next1 = SNAT_OUT2IN_NEXT_DROP;
1003                   goto trace1;
1004                 }
1005             }
1006           else
1007             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1008                                     value1.value);
1009
1010           old_addr1 = ip1->dst_address.as_u32;
1011           ip1->dst_address = s1->in2out.addr;
1012           new_addr1 = ip1->dst_address.as_u32;
1013           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1014
1015           sum1 = ip1->checksum;
1016           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1017                                  ip4_header_t,
1018                                  dst_address /* changed member */);
1019           ip1->checksum = ip_csum_fold (sum1);
1020
1021           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1022             {
1023               old_port1 = tcp1->dst_port;
1024               tcp1->dst_port = s1->in2out.port;
1025               new_port1 = tcp1->dst_port;
1026
1027               sum1 = tcp1->checksum;
1028               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1029                                      ip4_header_t,
1030                                      dst_address /* changed member */);
1031
1032               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1033                                      ip4_header_t /* cheat */,
1034                                      length /* changed member */);
1035               tcp1->checksum = ip_csum_fold(sum1);
1036             }
1037           else
1038             {
1039               old_port1 = udp1->dst_port;
1040               udp1->dst_port = s1->in2out.port;
1041               udp1->checksum = 0;
1042             }
1043
1044           /* Accounting */
1045           nat44_session_update_counters (s1, now,
1046                                          vlib_buffer_length_in_chain (vm, b1));
1047           /* Per-user LRU list maintenance */
1048           nat44_session_update_lru (sm, s1, thread_index);
1049         trace1:
1050
1051           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1052                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1053             {
1054               snat_out2in_trace_t *t =
1055                  vlib_add_trace (vm, node, b1, sizeof (*t));
1056               t->sw_if_index = sw_if_index1;
1057               t->next_index = next1;
1058               t->session_index = ~0;
1059               if (s1)
1060                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1061             }
1062
1063           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1064
1065           /* verify speculative enqueues, maybe switch current next frame */
1066           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1067                                            to_next, n_left_to_next,
1068                                            bi0, bi1, next0, next1);
1069         }
1070
1071       while (n_left_from > 0 && n_left_to_next > 0)
1072         {
1073           u32 bi0;
1074           vlib_buffer_t * b0;
1075           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1076           u32 sw_if_index0;
1077           ip4_header_t * ip0;
1078           ip_csum_t sum0;
1079           u32 new_addr0, old_addr0;
1080           u16 new_port0, old_port0;
1081           udp_header_t * udp0;
1082           tcp_header_t * tcp0;
1083           icmp46_header_t * icmp0;
1084           snat_session_key_t key0, sm0;
1085           u32 rx_fib_index0;
1086           u32 proto0;
1087           snat_session_t * s0 = 0;
1088           clib_bihash_kv_8_8_t kv0, value0;
1089
1090           /* speculatively enqueue b0 to the current next frame */
1091           bi0 = from[0];
1092           to_next[0] = bi0;
1093           from += 1;
1094           to_next += 1;
1095           n_left_from -= 1;
1096           n_left_to_next -= 1;
1097
1098           b0 = vlib_get_buffer (vm, bi0);
1099
1100           vnet_buffer (b0)->snat.flags = 0;
1101
1102           ip0 = vlib_buffer_get_current (b0);
1103           udp0 = ip4_next_header (ip0);
1104           tcp0 = (tcp_header_t *) udp0;
1105           icmp0 = (icmp46_header_t *) udp0;
1106
1107           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1108           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1109                                    sw_if_index0);
1110
1111           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1112
1113           if (PREDICT_FALSE (proto0 == ~0))
1114             {
1115               if (nat_out2in_sm_unknown_proto(sm, b0, ip0, rx_fib_index0))
1116                 {
1117                   if (!sm->forwarding_enabled)
1118                     {
1119                       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
1120                       next0 = SNAT_OUT2IN_NEXT_DROP;
1121                     }
1122                 }
1123               goto trace00;
1124             }
1125
1126           if (PREDICT_FALSE(ip0->ttl == 1))
1127             {
1128               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1129               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1130                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1131                                            0);
1132               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1133               goto trace00;
1134             }
1135
1136           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1137             {
1138               next0 = icmp_out2in_slow_path
1139                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1140                  next0, now, thread_index, &s0);
1141               goto trace00;
1142             }
1143
1144           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1145             {
1146               next0 = SNAT_OUT2IN_NEXT_REASS;
1147               goto trace00;
1148             }
1149
1150           key0.addr = ip0->dst_address;
1151           key0.port = udp0->dst_port;
1152           key0.protocol = proto0;
1153           key0.fib_index = rx_fib_index0;
1154
1155           kv0.key = key0.as_u64;
1156
1157           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1158                                       &kv0, &value0))
1159             {
1160               /* Try to match static mapping by external address and port,
1161                  destination address and port in packet */
1162               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
1163                 {
1164                   /*
1165                    * Send DHCP packets to the ipv4 stack, or we won't
1166                    * be able to use dhcp client on the outside interface
1167                    */
1168                   if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
1169                       && (udp0->dst_port ==
1170                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1171                     {
1172                       vnet_feature_next (&next0, b0);
1173                       goto trace00;
1174                     }
1175
1176                   if (!sm->forwarding_enabled)
1177                     {
1178                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1179                       next0 = SNAT_OUT2IN_NEXT_DROP;
1180                     }
1181                   goto trace00;
1182                 }
1183
1184               /* Create session initiated by host from external network */
1185               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1186                                                      thread_index, now);
1187               if (!s0)
1188                 {
1189                   next0 = SNAT_OUT2IN_NEXT_DROP;
1190                   goto trace00;
1191                 }
1192             }
1193           else
1194             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1195                                     value0.value);
1196
1197           old_addr0 = ip0->dst_address.as_u32;
1198           ip0->dst_address = s0->in2out.addr;
1199           new_addr0 = ip0->dst_address.as_u32;
1200           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1201
1202           sum0 = ip0->checksum;
1203           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1204                                  ip4_header_t,
1205                                  dst_address /* changed member */);
1206           ip0->checksum = ip_csum_fold (sum0);
1207
1208           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1209             {
1210               old_port0 = tcp0->dst_port;
1211               tcp0->dst_port = s0->in2out.port;
1212               new_port0 = tcp0->dst_port;
1213
1214               sum0 = tcp0->checksum;
1215               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1216                                      ip4_header_t,
1217                                      dst_address /* changed member */);
1218
1219               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1220                                      ip4_header_t /* cheat */,
1221                                      length /* changed member */);
1222               tcp0->checksum = ip_csum_fold(sum0);
1223             }
1224           else
1225             {
1226               old_port0 = udp0->dst_port;
1227               udp0->dst_port = s0->in2out.port;
1228               udp0->checksum = 0;
1229             }
1230
1231           /* Accounting */
1232           nat44_session_update_counters (s0, now,
1233                                          vlib_buffer_length_in_chain (vm, b0));
1234           /* Per-user LRU list maintenance */
1235           nat44_session_update_lru (sm, s0, thread_index);
1236         trace00:
1237
1238           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1239                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1240             {
1241               snat_out2in_trace_t *t =
1242                  vlib_add_trace (vm, node, b0, sizeof (*t));
1243               t->sw_if_index = sw_if_index0;
1244               t->next_index = next0;
1245               t->session_index = ~0;
1246               if (s0)
1247                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1248             }
1249
1250           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1251
1252           /* verify speculative enqueue, maybe switch current next frame */
1253           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1254                                            to_next, n_left_to_next,
1255                                            bi0, next0);
1256         }
1257
1258       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1259     }
1260
1261   vlib_node_increment_counter (vm, snat_out2in_node.index,
1262                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1263                                pkts_processed);
1264   return frame->n_vectors;
1265 }
1266
1267 VLIB_REGISTER_NODE (snat_out2in_node) = {
1268   .function = snat_out2in_node_fn,
1269   .name = "nat44-out2in",
1270   .vector_size = sizeof (u32),
1271   .format_trace = format_snat_out2in_trace,
1272   .type = VLIB_NODE_TYPE_INTERNAL,
1273
1274   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1275   .error_strings = snat_out2in_error_strings,
1276
1277   .runtime_data_bytes = sizeof (snat_runtime_t),
1278
1279   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1280
1281   /* edit / add dispositions here */
1282   .next_nodes = {
1283     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1284     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1285     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1286     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1287   },
1288 };
1289 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1290
1291 static uword
1292 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1293                             vlib_node_runtime_t * node,
1294                             vlib_frame_t * frame)
1295 {
1296   u32 n_left_from, *from, *to_next;
1297   snat_out2in_next_t next_index;
1298   u32 pkts_processed = 0;
1299   snat_main_t *sm = &snat_main;
1300   f64 now = vlib_time_now (vm);
1301   u32 thread_index = vm->thread_index;
1302   snat_main_per_thread_data_t *per_thread_data =
1303     &sm->per_thread_data[thread_index];
1304   u32 *fragments_to_drop = 0;
1305   u32 *fragments_to_loopback = 0;
1306
1307   from = vlib_frame_vector_args (frame);
1308   n_left_from = frame->n_vectors;
1309   next_index = node->cached_next_index;
1310
1311   while (n_left_from > 0)
1312     {
1313       u32 n_left_to_next;
1314
1315       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1316
1317       while (n_left_from > 0 && n_left_to_next > 0)
1318        {
1319           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1320           vlib_buffer_t *b0;
1321           u32 next0;
1322           u8 cached0 = 0;
1323           ip4_header_t *ip0;
1324           nat_reass_ip4_t *reass0;
1325           udp_header_t * udp0;
1326           tcp_header_t * tcp0;
1327           snat_session_key_t key0, sm0;
1328           clib_bihash_kv_8_8_t kv0, value0;
1329           snat_session_t * s0 = 0;
1330           u16 old_port0, new_port0;
1331           ip_csum_t sum0;
1332
1333           /* speculatively enqueue b0 to the current next frame */
1334           bi0 = from[0];
1335           to_next[0] = bi0;
1336           from += 1;
1337           to_next += 1;
1338           n_left_from -= 1;
1339           n_left_to_next -= 1;
1340
1341           b0 = vlib_get_buffer (vm, bi0);
1342           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1343
1344           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1345           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1346                                                                sw_if_index0);
1347
1348           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1349             {
1350               next0 = SNAT_OUT2IN_NEXT_DROP;
1351               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1352               goto trace0;
1353             }
1354
1355           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1356           udp0 = ip4_next_header (ip0);
1357           tcp0 = (tcp_header_t *) udp0;
1358           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1359
1360           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1361                                                  ip0->dst_address,
1362                                                  ip0->fragment_id,
1363                                                  ip0->protocol,
1364                                                  1,
1365                                                  &fragments_to_drop);
1366
1367           if (PREDICT_FALSE (!reass0))
1368             {
1369               next0 = SNAT_OUT2IN_NEXT_DROP;
1370               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1371               nat_log_notice ("maximum reassemblies exceeded");
1372               goto trace0;
1373             }
1374
1375           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1376             {
1377               key0.addr = ip0->dst_address;
1378               key0.port = udp0->dst_port;
1379               key0.protocol = proto0;
1380               key0.fib_index = rx_fib_index0;
1381               kv0.key = key0.as_u64;
1382
1383               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1384                 {
1385                   /* Try to match static mapping by external address and port,
1386                      destination address and port in packet */
1387                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
1388                     {
1389                       /*
1390                        * Send DHCP packets to the ipv4 stack, or we won't
1391                        * be able to use dhcp client on the outside interface
1392                        */
1393                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
1394                           && (udp0->dst_port
1395                               == clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1396                         {
1397                           vnet_feature_next (&next0, b0);
1398                           goto trace0;
1399                         }
1400
1401                       if (!sm->forwarding_enabled)
1402                         {
1403                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1404                           next0 = SNAT_OUT2IN_NEXT_DROP;
1405                         }
1406                       goto trace0;
1407                     }
1408
1409                   /* Create session initiated by host from external network */
1410                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1411                                                          thread_index, now);
1412                   if (!s0)
1413                     {
1414                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1415                       next0 = SNAT_OUT2IN_NEXT_DROP;
1416                       goto trace0;
1417                     }
1418                   reass0->sess_index = s0 - per_thread_data->sessions;
1419                   reass0->thread_index = thread_index;
1420                 }
1421               else
1422                 {
1423                   s0 = pool_elt_at_index (per_thread_data->sessions,
1424                                           value0.value);
1425                   reass0->sess_index = value0.value;
1426                 }
1427               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1428             }
1429           else
1430             {
1431               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1432                 {
1433                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1434                     {
1435                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1436                       nat_log_notice ("maximum fragments per reassembly exceeded");
1437                       next0 = SNAT_OUT2IN_NEXT_DROP;
1438                       goto trace0;
1439                     }
1440                   cached0 = 1;
1441                   goto trace0;
1442                 }
1443               s0 = pool_elt_at_index (per_thread_data->sessions,
1444                                       reass0->sess_index);
1445             }
1446
1447           old_addr0 = ip0->dst_address.as_u32;
1448           ip0->dst_address = s0->in2out.addr;
1449           new_addr0 = ip0->dst_address.as_u32;
1450           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1451
1452           sum0 = ip0->checksum;
1453           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1454                                  ip4_header_t,
1455                                  dst_address /* changed member */);
1456           ip0->checksum = ip_csum_fold (sum0);
1457
1458           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1459             {
1460               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1461                 {
1462                   old_port0 = tcp0->dst_port;
1463                   tcp0->dst_port = s0->in2out.port;
1464                   new_port0 = tcp0->dst_port;
1465
1466                   sum0 = tcp0->checksum;
1467                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1468                                          ip4_header_t,
1469                                          dst_address /* changed member */);
1470
1471                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1472                                          ip4_header_t /* cheat */,
1473                                          length /* changed member */);
1474                   tcp0->checksum = ip_csum_fold(sum0);
1475                 }
1476               else
1477                 {
1478                   old_port0 = udp0->dst_port;
1479                   udp0->dst_port = s0->in2out.port;
1480                   udp0->checksum = 0;
1481                 }
1482             }
1483
1484           /* Accounting */
1485           nat44_session_update_counters (s0, now,
1486                                          vlib_buffer_length_in_chain (vm, b0));
1487           /* Per-user LRU list maintenance */
1488           nat44_session_update_lru (sm, s0, thread_index);
1489
1490         trace0:
1491           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1492                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1493             {
1494               nat44_out2in_reass_trace_t *t =
1495                  vlib_add_trace (vm, node, b0, sizeof (*t));
1496               t->cached = cached0;
1497               t->sw_if_index = sw_if_index0;
1498               t->next_index = next0;
1499             }
1500
1501           if (cached0)
1502             {
1503               n_left_to_next++;
1504               to_next--;
1505             }
1506           else
1507             {
1508               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1509
1510               /* verify speculative enqueue, maybe switch current next frame */
1511               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1512                                                to_next, n_left_to_next,
1513                                                bi0, next0);
1514             }
1515
1516           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1517             {
1518               from = vlib_frame_vector_args (frame);
1519               u32 len = vec_len (fragments_to_loopback);
1520               if (len <= VLIB_FRAME_SIZE)
1521                 {
1522                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1523                   n_left_from = len;
1524                   vec_reset_length (fragments_to_loopback);
1525                 }
1526               else
1527                 {
1528                   clib_memcpy (from,
1529                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1530                                sizeof (u32) * VLIB_FRAME_SIZE);
1531                   n_left_from = VLIB_FRAME_SIZE;
1532                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1533                 }
1534             }
1535        }
1536
1537       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1538     }
1539
1540   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1541                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1542                                pkts_processed);
1543
1544   nat_send_all_to_node (vm, fragments_to_drop, node,
1545                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1546                         SNAT_OUT2IN_NEXT_DROP);
1547
1548   vec_free (fragments_to_drop);
1549   vec_free (fragments_to_loopback);
1550   return frame->n_vectors;
1551 }
1552
1553 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1554   .function = nat44_out2in_reass_node_fn,
1555   .name = "nat44-out2in-reass",
1556   .vector_size = sizeof (u32),
1557   .format_trace = format_nat44_out2in_reass_trace,
1558   .type = VLIB_NODE_TYPE_INTERNAL,
1559
1560   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1561   .error_strings = snat_out2in_error_strings,
1562
1563   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1564
1565   /* edit / add dispositions here */
1566   .next_nodes = {
1567     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1568     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1569     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1570     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1571   },
1572 };
1573 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1574                               nat44_out2in_reass_node_fn);
1575
1576 /*******************************/
1577 /*** endpoint-dependent mode ***/
1578 /*******************************/
1579 typedef enum {
1580   NAT44_ED_OUT2IN_NEXT_DROP,
1581   NAT44_ED_OUT2IN_NEXT_LOOKUP,
1582   NAT44_ED_OUT2IN_NEXT_ICMP_ERROR,
1583   NAT44_ED_OUT2IN_NEXT_IN2OUT,
1584   NAT44_ED_OUT2IN_NEXT_SLOW_PATH,
1585   NAT44_ED_OUT2IN_N_NEXT,
1586 } nat44_ed_out2in_next_t;
1587
1588 typedef struct {
1589   u32 sw_if_index;
1590   u32 next_index;
1591   u32 session_index;
1592   u32 is_slow_path;
1593 } nat44_ed_out2in_trace_t;
1594
1595 static u8 *
1596 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
1597 {
1598   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1599   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1600   nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *);
1601   char * tag;
1602
1603   tag = t->is_slow_path ? "NAT44_OUT2IN_SLOW_PATH" : "NAT44_OUT2IN_FAST_PATH";
1604
1605   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
1606               t->sw_if_index, t->next_index, t->session_index);
1607
1608   return s;
1609 }
1610
1611 static inline u32
1612 icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
1613                           ip4_header_t * ip0, icmp46_header_t * icmp0,
1614                           u32 sw_if_index0, u32 rx_fib_index0,
1615                           vlib_node_runtime_t * node, u32 next0, f64 now,
1616                           u32 thread_index, snat_session_t ** p_s0)
1617 {
1618   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1619                       next0, thread_index, p_s0, 0);
1620   snat_session_t * s0 = *p_s0;
1621   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
1622     {
1623       /* Accounting */
1624       nat44_session_update_counters (s0, now,
1625                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
1626     }
1627   return next0;
1628 }
1629
1630 int
1631 nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void * arg)
1632 {
1633   snat_main_t *sm = &snat_main;
1634   nat44_is_idle_session_ctx_t *ctx = arg;
1635   snat_session_t *s;
1636   u64 sess_timeout_time;
1637   nat_ed_ses_key_t ed_key;
1638   clib_bihash_kv_16_8_t ed_kv;
1639   int i;
1640   snat_address_t *a;
1641   snat_session_key_t key;
1642   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
1643                                                        ctx->thread_index);
1644
1645   s = pool_elt_at_index (tsm->sessions, kv->value);
1646   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
1647   if (ctx->now >= sess_timeout_time)
1648     {
1649       ed_key.l_addr = s->in2out.addr;
1650       ed_key.r_addr = s->ext_host_addr;
1651       ed_key.fib_index = s->out2in.fib_index;
1652       if (snat_is_unk_proto_session (s))
1653         {
1654           ed_key.proto = s->in2out.port;
1655           ed_key.r_port = 0;
1656           ed_key.l_port = 0;
1657         }
1658       else
1659         {
1660           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
1661           ed_key.l_port = s->in2out.port;
1662           ed_key.r_port = s->ext_host_port;
1663         }
1664       if (is_twice_nat_session (s))
1665         {
1666           ed_key.r_addr = s->ext_host_nat_addr;
1667           ed_key.r_port = s->ext_host_nat_port;
1668         }
1669       ed_kv.key[0] = ed_key.as_u64[0];
1670       ed_kv.key[1] = ed_key.as_u64[1];
1671       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
1672         nat_log_warn ("in2out_ed key del failed");
1673
1674       if (snat_is_unk_proto_session (s))
1675         goto delete;
1676
1677       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1678                                           s->out2in.addr.as_u32,
1679                                           s->in2out.protocol,
1680                                           s->in2out.port,
1681                                           s->out2in.port,
1682                                           s->in2out.fib_index);
1683
1684       if (is_twice_nat_session (s))
1685         {
1686           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
1687             {
1688               key.protocol = s->in2out.protocol;
1689               key.port = s->ext_host_nat_port;
1690               a = sm->twice_nat_addresses + i;
1691               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
1692                 {
1693                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
1694                                                       ctx->thread_index, &key);
1695                   break;
1696                 }
1697             }
1698         }
1699
1700       if (snat_is_session_static (s))
1701         goto delete;
1702
1703       if (s->outside_address_index != ~0)
1704         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
1705                                             &s->out2in);
1706     delete:
1707       nat44_delete_session (sm, s, ctx->thread_index);
1708       return 1;
1709     }
1710
1711   return 0;
1712 }
1713
1714 static snat_session_t *
1715 create_session_for_static_mapping_ed (snat_main_t * sm,
1716                                       vlib_buffer_t *b,
1717                                       snat_session_key_t l_key,
1718                                       snat_session_key_t e_key,
1719                                       vlib_node_runtime_t * node,
1720                                       u32 thread_index,
1721                                       twice_nat_type_t twice_nat,
1722                                       u8 is_lb,
1723                                       f64 now)
1724 {
1725   snat_session_t *s;
1726   snat_user_t *u;
1727   ip4_header_t *ip;
1728   udp_header_t *udp;
1729   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1730   clib_bihash_kv_16_8_t kv;
1731   snat_session_key_t eh_key;
1732   u32 address_index;
1733   nat44_is_idle_session_ctx_t ctx;
1734
1735   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1736     {
1737       b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
1738       nat_log_notice ("maximum sessions exceeded");
1739       return 0;
1740     }
1741
1742   u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index, thread_index);
1743   if (!u)
1744     {
1745       nat_log_warn ("create NAT user failed");
1746       return 0;
1747     }
1748
1749   s = nat_ed_session_alloc (sm, u, thread_index);
1750   if (!s)
1751     {
1752       nat44_delete_user_with_no_session (sm, u, thread_index);
1753       nat_log_warn ("create NAT session failed");
1754       return 0;
1755     }
1756
1757   ip = vlib_buffer_get_current (b);
1758   udp = ip4_next_header (ip);
1759
1760   s->ext_host_addr.as_u32 = ip->src_address.as_u32;
1761   s->ext_host_port = e_key.protocol == SNAT_PROTOCOL_ICMP ? 0 : udp->src_port;
1762   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1763   if (is_lb)
1764     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1765   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
1766   s->outside_address_index = ~0;
1767   s->out2in = e_key;
1768   s->in2out = l_key;
1769   s->in2out.protocol = s->out2in.protocol;
1770   user_session_increment (sm, u, 1);
1771
1772   /* Add to lookup tables */
1773   make_ed_kv (&kv, &e_key.addr, &s->ext_host_addr, ip->protocol,
1774               e_key.fib_index, e_key.port, s->ext_host_port);
1775   kv.value = s - tsm->sessions;
1776   ctx.now = now;
1777   ctx.thread_index = thread_index;
1778   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->out2in_ed, &kv,
1779                                                nat44_o2i_ed_is_idle_session_cb,
1780                                                &ctx))
1781     nat_log_notice ("out2in-ed key add failed");
1782
1783   if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF &&
1784       ip->src_address.as_u32 == l_key.addr.as_u32))
1785     {
1786       eh_key.protocol = e_key.protocol;
1787       if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
1788                                                thread_index, &eh_key,
1789                                                &address_index,
1790                                                sm->port_per_thread,
1791                                                tsm->snat_thread_index))
1792         {
1793           b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
1794           nat44_delete_session (sm, s, thread_index);
1795           if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &kv, 0))
1796             nat_log_notice ("out2in-ed key del failed");
1797           return 0;
1798         }
1799       s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
1800       s->ext_host_nat_port = eh_key.port;
1801       s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
1802       make_ed_kv (&kv, &l_key.addr, &s->ext_host_nat_addr, ip->protocol,
1803                   l_key.fib_index, l_key.port, s->ext_host_nat_port);
1804     }
1805   else
1806     {
1807       make_ed_kv (&kv, &l_key.addr, &s->ext_host_addr, ip->protocol,
1808                   l_key.fib_index, l_key.port, s->ext_host_port);
1809     }
1810   kv.value = s - tsm->sessions;
1811   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv,
1812                                                nat44_i2o_ed_is_idle_session_cb,
1813                                                &ctx))
1814     nat_log_notice ("in2out-ed key add failed");
1815
1816   return s;
1817 }
1818
1819 static_always_inline int
1820 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
1821 {
1822   icmp46_header_t *icmp0;
1823   nat_ed_ses_key_t key0;
1824   icmp_echo_header_t *echo0, *inner_echo0 = 0;
1825   ip4_header_t *inner_ip0;
1826   void *l4_header = 0;
1827   icmp46_header_t *inner_icmp0;
1828
1829   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
1830   echo0 = (icmp_echo_header_t *)(icmp0+1);
1831
1832   if (!icmp_is_error_message (icmp0))
1833     {
1834       key0.proto = IP_PROTOCOL_ICMP;
1835       key0.l_addr = ip0->dst_address;
1836       key0.r_addr = ip0->src_address;
1837       key0.l_port = echo0->identifier;
1838       key0.r_port = 0;
1839     }
1840   else
1841     {
1842       inner_ip0 = (ip4_header_t *)(echo0+1);
1843       l4_header = ip4_next_header (inner_ip0);
1844       key0.proto = inner_ip0->protocol;
1845       key0.l_addr = inner_ip0->src_address;
1846       key0.r_addr = inner_ip0->dst_address;
1847       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
1848         {
1849         case SNAT_PROTOCOL_ICMP:
1850           inner_icmp0 = (icmp46_header_t*)l4_header;
1851           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
1852           key0.l_port = inner_echo0->identifier;
1853           key0.r_port = 0;
1854           break;
1855         case SNAT_PROTOCOL_UDP:
1856         case SNAT_PROTOCOL_TCP:
1857           key0.l_port = ((tcp_udp_header_t*)l4_header)->src_port;
1858           key0.r_port = ((tcp_udp_header_t*)l4_header)->dst_port;
1859           break;
1860         default:
1861           return -1;
1862         }
1863     }
1864   *p_key0 = key0;
1865   return 0;
1866 }
1867
1868 static int
1869 next_src_nat (snat_main_t * sm, ip4_header_t * ip, u8 proto, u16 src_port,
1870               u16 dst_port, u32 thread_index, u32 rx_fib_index)
1871 {
1872   clib_bihash_kv_16_8_t kv, value;
1873   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1874
1875   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto,
1876               rx_fib_index, src_port, dst_port);
1877   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
1878     return 1;
1879
1880   return 0;
1881 }
1882
1883 static void
1884 create_bypass_for_fwd(snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
1885                       u32 thread_index)
1886 {
1887   nat_ed_ses_key_t key;
1888   clib_bihash_kv_16_8_t kv, value;
1889   udp_header_t *udp;
1890   snat_user_t *u;
1891   snat_session_t *s = 0;
1892   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1893   f64 now = vlib_time_now (sm->vlib_main);
1894
1895   if (ip->protocol == IP_PROTOCOL_ICMP)
1896     {
1897       if (icmp_get_ed_key (ip, &key))
1898         return;
1899     }
1900   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
1901     {
1902       udp = ip4_next_header(ip);
1903       key.r_addr = ip->src_address;
1904       key.l_addr = ip->dst_address;
1905       key.proto = ip->protocol;
1906       key.l_port = udp->dst_port;
1907       key.r_port = udp->src_port;
1908     }
1909   else
1910     {
1911       key.r_addr = ip->src_address;
1912       key.l_addr = ip->dst_address;
1913       key.proto = ip->protocol;
1914       key.l_port = key.r_port = 0;
1915     }
1916   key.fib_index = 0;
1917   kv.key[0] = key.as_u64[0];
1918   kv.key[1] = key.as_u64[1];
1919
1920   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
1921     {
1922       s = pool_elt_at_index (tsm->sessions, value.value);
1923     }
1924   else
1925     {
1926       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1927         return;
1928
1929       u = nat_user_get_or_create (sm, &ip->dst_address, sm->inside_fib_index,
1930                                   thread_index);
1931       if (!u)
1932         {
1933           nat_log_warn ("create NAT user failed");
1934           return;
1935         }
1936
1937       s = nat_ed_session_alloc (sm, u, thread_index);
1938       if (!s)
1939         {
1940           nat44_delete_user_with_no_session (sm, u, thread_index);
1941           nat_log_warn ("create NAT session failed");
1942           return;
1943         }
1944
1945       s->ext_host_addr = key.r_addr;
1946       s->ext_host_port = key.r_port;
1947       s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
1948       s->outside_address_index = ~0;
1949       s->out2in.addr = key.l_addr;
1950       s->out2in.port = key.l_port;
1951       s->out2in.protocol = ip_proto_to_snat_proto (key.proto);
1952       s->out2in.fib_index = 0;
1953       s->in2out = s->out2in;
1954       user_session_increment (sm, u, 0);
1955
1956       kv.value = s - tsm->sessions;
1957       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
1958         nat_log_notice ("in2out_ed key add failed");
1959     }
1960
1961   if (ip->protocol == IP_PROTOCOL_TCP)
1962     {
1963       tcp_header_t *tcp = ip4_next_header(ip);
1964       if (nat44_set_tcp_session_state_o2i (sm, s, tcp, thread_index))
1965         return;
1966     }
1967
1968   /* Accounting */
1969   nat44_session_update_counters (s, now, 0);
1970 }
1971
1972 u32
1973 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
1974                       u32 thread_index, vlib_buffer_t * b, ip4_header_t * ip,
1975                       u8 * p_proto, snat_session_key_t * p_value,
1976                       u8 * p_dont_translate, void * d, void * e)
1977 {
1978   u32 next = ~0, sw_if_index, rx_fib_index;
1979   icmp46_header_t *icmp;
1980   nat_ed_ses_key_t key;
1981   clib_bihash_kv_16_8_t kv, value;
1982   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1983   snat_session_t *s = 0;
1984   u8 dont_translate = 0, is_addr_only;
1985   snat_session_key_t e_key, l_key;
1986
1987   icmp = (icmp46_header_t *) ip4_next_header (ip);
1988   sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
1989   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
1990
1991   if (icmp_get_ed_key (ip, &key))
1992     {
1993       b->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
1994       next = SNAT_OUT2IN_NEXT_DROP;
1995       goto out;
1996     }
1997   key.fib_index = rx_fib_index;
1998   kv.key[0] = key.as_u64[0];
1999   kv.key[1] = key.as_u64[1];
2000
2001   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2002     {
2003       /* Try to match static mapping */
2004       e_key.addr = ip->dst_address;
2005       e_key.port = key.l_port;
2006       e_key.protocol = ip_proto_to_snat_proto (key.proto);
2007       e_key.fib_index = rx_fib_index;
2008       if (snat_static_mapping_match(sm, e_key, &l_key, 1, &is_addr_only, 0, 0))
2009         {
2010           if (!sm->forwarding_enabled)
2011             {
2012               /* Don't NAT packet aimed at the intfc address */
2013               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index,
2014                                                   ip->dst_address.as_u32)))
2015                 {
2016                   dont_translate = 1;
2017                   goto out;
2018                 }
2019               b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2020               next = NAT44_ED_OUT2IN_NEXT_DROP;
2021               goto out;
2022             }
2023           else
2024             {
2025               dont_translate = 1;
2026               if (next_src_nat(sm, ip, key.proto, key.l_port, key.r_port,
2027                                thread_index, rx_fib_index))
2028                 {
2029                   next = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2030                   goto out;
2031                 }
2032               create_bypass_for_fwd(sm, ip, rx_fib_index, thread_index);
2033               goto out;
2034             }
2035         }
2036
2037       if (PREDICT_FALSE(icmp->type != ICMP4_echo_reply &&
2038                         (icmp->type != ICMP4_echo_request || !is_addr_only)))
2039         {
2040           b->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2041           next = NAT44_ED_OUT2IN_NEXT_DROP;
2042           goto out;
2043         }
2044
2045       /* Create session initiated by host from external network */
2046       s = create_session_for_static_mapping_ed(sm, b, l_key, e_key, node,
2047                                                thread_index, 0, 0,
2048                                                vlib_time_now (sm->vlib_main));
2049
2050       if (!s)
2051         {
2052           next = NAT44_ED_OUT2IN_NEXT_DROP;
2053           goto out;
2054         }
2055     }
2056   else
2057     {
2058       if (PREDICT_FALSE(icmp->type != ICMP4_echo_reply &&
2059                         icmp->type != ICMP4_echo_request &&
2060                         !icmp_is_error_message (icmp)))
2061         {
2062           b->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2063           next = SNAT_OUT2IN_NEXT_DROP;
2064           goto out;
2065         }
2066
2067       s = pool_elt_at_index (tsm->sessions, value.value);
2068     }
2069
2070   *p_proto = ip_proto_to_snat_proto (key.proto);
2071 out:
2072   if (s)
2073     *p_value = s->in2out;
2074   *p_dont_translate = dont_translate;
2075   if (d)
2076     *(snat_session_t**)d = s;
2077   return next;
2078 }
2079
2080 static snat_session_t *
2081 nat44_ed_out2in_unknown_proto (snat_main_t *sm,
2082                                vlib_buffer_t * b,
2083                                ip4_header_t * ip,
2084                                u32 rx_fib_index,
2085                                u32 thread_index,
2086                                f64 now,
2087                                vlib_main_t * vm,
2088                                vlib_node_runtime_t * node)
2089 {
2090   clib_bihash_kv_8_8_t kv, value;
2091   clib_bihash_kv_16_8_t s_kv, s_value;
2092   snat_static_mapping_t *m;
2093   u32 old_addr, new_addr;
2094   ip_csum_t sum;
2095   snat_session_t * s;
2096   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2097   snat_user_t *u;
2098
2099   old_addr = ip->dst_address.as_u32;
2100
2101   make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2102               rx_fib_index, 0, 0);
2103
2104   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2105     {
2106       s = pool_elt_at_index (tsm->sessions, s_value.value);
2107       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
2108     }
2109   else
2110     {
2111       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
2112         {
2113           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
2114           nat_log_notice ("maximum sessions exceeded");
2115           return 0;
2116         }
2117
2118       make_sm_kv (&kv, &ip->dst_address, 0, 0, 0);
2119       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2120         {
2121           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2122           return 0;
2123         }
2124
2125       m = pool_elt_at_index (sm->static_mappings, value.value);
2126
2127       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
2128
2129       u = nat_user_get_or_create (sm, &m->local_addr, m->fib_index,
2130                                   thread_index);
2131       if (!u)
2132         {
2133           nat_log_warn ("create NAT user failed");
2134           return 0;
2135         }
2136
2137       /* Create a new session */
2138       s = nat_ed_session_alloc (sm, u, thread_index);
2139       if (!s)
2140         {
2141           nat44_delete_user_with_no_session (sm, u, thread_index);
2142           nat_log_warn ("create NAT session failed");
2143           return 0;
2144         }
2145
2146       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
2147       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
2148       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2149       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2150       s->outside_address_index = ~0;
2151       s->out2in.addr.as_u32 = old_addr;
2152       s->out2in.fib_index = rx_fib_index;
2153       s->in2out.addr.as_u32 = new_addr;
2154       s->in2out.fib_index = m->fib_index;
2155       s->in2out.port = s->out2in.port = ip->protocol;
2156       user_session_increment (sm, u, 1);
2157
2158       /* Add to lookup tables */
2159       s_kv.value = s - tsm->sessions;
2160       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
2161         nat_log_notice ("out2in key add failed");
2162
2163       make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2164                   m->fib_index, 0, 0);
2165       s_kv.value = s - tsm->sessions;
2166       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
2167         nat_log_notice ("in2out key add failed");
2168    }
2169
2170   /* Update IP checksum */
2171   sum = ip->checksum;
2172   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
2173   ip->checksum = ip_csum_fold (sum);
2174
2175   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
2176
2177   /* Accounting */
2178   nat44_session_update_counters (s, now,
2179                                  vlib_buffer_length_in_chain (vm, b));
2180
2181   return s;
2182 }
2183
2184 static inline uword
2185 nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
2186                                 vlib_node_runtime_t * node,
2187                                 vlib_frame_t * frame, int is_slow_path)
2188 {
2189   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
2190   nat44_ed_out2in_next_t next_index;
2191   snat_main_t *sm = &snat_main;
2192   f64 now = vlib_time_now (vm);
2193   u32 thread_index = vm->thread_index;
2194   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2195
2196   stats_node_index = is_slow_path ? nat44_ed_out2in_slowpath_node.index :
2197     nat44_ed_out2in_node.index;
2198
2199   from = vlib_frame_vector_args (frame);
2200   n_left_from = frame->n_vectors;
2201   next_index = node->cached_next_index;
2202
2203   while (n_left_from > 0)
2204     {
2205       u32 n_left_to_next;
2206
2207       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2208
2209       while (n_left_from >= 4 && n_left_to_next >= 2)
2210         {
2211           u32 bi0, bi1;
2212           vlib_buffer_t *b0, *b1;
2213           u32 next0, sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
2214           u32 next1, sw_if_index1, rx_fib_index1, proto1, old_addr1, new_addr1;
2215           u16 old_port0, new_port0, old_port1, new_port1;
2216           ip4_header_t *ip0, *ip1;
2217           udp_header_t *udp0, *udp1;
2218           tcp_header_t *tcp0, *tcp1;
2219           icmp46_header_t *icmp0, *icmp1;
2220           snat_session_t *s0 = 0, *s1 = 0;
2221           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
2222           ip_csum_t sum0, sum1;
2223           snat_session_key_t e_key0, l_key0, e_key1, l_key1;
2224           u8 is_lb0, is_lb1;
2225           twice_nat_type_t twice_nat0, twice_nat1;
2226
2227           /* Prefetch next iteration. */
2228           {
2229             vlib_buffer_t * p2, * p3;
2230
2231             p2 = vlib_get_buffer (vm, from[2]);
2232             p3 = vlib_get_buffer (vm, from[3]);
2233
2234             vlib_prefetch_buffer_header (p2, LOAD);
2235             vlib_prefetch_buffer_header (p3, LOAD);
2236
2237             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2238             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2239           }
2240
2241           /* speculatively enqueue b0 and b1 to the current next frame */
2242           to_next[0] = bi0 = from[0];
2243           to_next[1] = bi1 = from[1];
2244           from += 2;
2245           to_next += 2;
2246           n_left_from -= 2;
2247           n_left_to_next -= 2;
2248
2249           b0 = vlib_get_buffer (vm, bi0);
2250           b1 = vlib_get_buffer (vm, bi1);
2251
2252           next0 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2253           vnet_buffer (b0)->snat.flags = 0;
2254           ip0 = vlib_buffer_get_current (b0);
2255
2256           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2257           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2258                                                                sw_if_index0);
2259
2260           if (PREDICT_FALSE(ip0->ttl == 1))
2261             {
2262               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2263               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2264                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2265                                            0);
2266               next0 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2267               goto trace00;
2268             }
2269
2270           udp0 = ip4_next_header (ip0);
2271           tcp0 = (tcp_header_t *) udp0;
2272           icmp0 = (icmp46_header_t *) udp0;
2273           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2274
2275           if (is_slow_path)
2276             {
2277               if (PREDICT_FALSE (proto0 == ~0))
2278                 {
2279                   s0 = nat44_ed_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
2280                                                      thread_index, now, vm, node);
2281                   if (!sm->forwarding_enabled)
2282                     {
2283                       if (!s0)
2284                         next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2285                       goto trace00;
2286                     }
2287                 }
2288
2289               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2290                 {
2291                   next0 = icmp_out2in_ed_slow_path
2292                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2293                      next0, now, thread_index, &s0);
2294                   goto trace00;
2295                 }
2296             }
2297           else
2298             {
2299               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2300                 {
2301                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2302                   goto trace00;
2303                 }
2304
2305               if (ip4_is_fragment (ip0))
2306                 {
2307                   b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
2308                   next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2309                   goto trace00;
2310                 }
2311             }
2312
2313           make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, ip0->protocol,
2314                       rx_fib_index0, udp0->dst_port, udp0->src_port);
2315
2316           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
2317             {
2318               if (is_slow_path)
2319                 {
2320                   /* Try to match static mapping by external address and port,
2321                      destination address and port in packet */
2322                   e_key0.addr = ip0->dst_address;
2323                   e_key0.port = udp0->dst_port;
2324                   e_key0.protocol = proto0;
2325                   e_key0.fib_index = rx_fib_index0;
2326                   if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
2327                       &twice_nat0, &is_lb0))
2328                     {
2329                       /*
2330                        * Send DHCP packets to the ipv4 stack, or we won't
2331                        * be able to use dhcp client on the outside interface
2332                        */
2333                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
2334                           && (udp0->dst_port ==
2335                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2336                         {
2337                           vnet_feature_next (&next0, b0);
2338                           goto trace00;
2339                         }
2340
2341                       if (!sm->forwarding_enabled)
2342                         {
2343                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2344                           next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2345                         }
2346                       else
2347                         {
2348                           if (next_src_nat(sm, ip0, ip0->protocol,
2349                                            udp0->src_port, udp0->dst_port,
2350                                            thread_index, rx_fib_index0))
2351                             {
2352                               next0 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2353                               goto trace00;
2354                             }
2355                           create_bypass_for_fwd(sm, ip0, rx_fib_index0,
2356                                                 thread_index);
2357                         }
2358                       goto trace00;
2359                     }
2360
2361                   /* Create session initiated by host from external network */
2362                   s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
2363                                                             e_key0, node,
2364                                                             thread_index,
2365                                                             twice_nat0, is_lb0,
2366                                                             now);
2367
2368                   if (!s0)
2369                     {
2370                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2371                       goto trace00;
2372                     }
2373                 }
2374               else
2375                 {
2376                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2377                   goto trace00;
2378                 }
2379             }
2380           else
2381             {
2382               s0 = pool_elt_at_index (tsm->sessions, value0.value);
2383             }
2384
2385           old_addr0 = ip0->dst_address.as_u32;
2386           new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
2387           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2388
2389           sum0 = ip0->checksum;
2390           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2391                                  dst_address);
2392           if (PREDICT_FALSE (is_twice_nat_session (s0)))
2393             sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2394                                    s0->ext_host_nat_addr.as_u32, ip4_header_t,
2395                                    src_address);
2396           ip0->checksum = ip_csum_fold (sum0);
2397
2398           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
2399             {
2400               old_port0 = tcp0->dst_port;
2401               new_port0 = tcp0->dst_port = s0->in2out.port;
2402
2403               sum0 = tcp0->checksum;
2404               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2405                                      dst_address);
2406               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
2407                                      length);
2408               if (is_twice_nat_session (s0))
2409                 {
2410                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2411                                          s0->ext_host_nat_addr.as_u32,
2412                                          ip4_header_t, dst_address);
2413                   sum0 = ip_csum_update (sum0, tcp0->src_port,
2414                                          s0->ext_host_nat_port, ip4_header_t,
2415                                          length);
2416                   tcp0->src_port = s0->ext_host_nat_port;
2417                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2418                 }
2419               tcp0->checksum = ip_csum_fold(sum0);
2420               if (nat44_set_tcp_session_state_o2i (sm, s0, tcp0, thread_index))
2421                 goto trace00;
2422             }
2423           else
2424             {
2425               udp0->dst_port = s0->in2out.port;
2426               if (is_twice_nat_session (s0))
2427                 {
2428                   udp0->src_port = s0->ext_host_nat_port;
2429                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2430                 }
2431               udp0->checksum = 0;
2432             }
2433
2434           /* Accounting */
2435           nat44_session_update_counters (s0, now,
2436                                          vlib_buffer_length_in_chain (vm, b0));
2437
2438         trace00:
2439           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2440                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2441             {
2442               nat44_ed_out2in_trace_t *t =
2443                 vlib_add_trace (vm, node, b0, sizeof (*t));
2444               t->is_slow_path = is_slow_path;
2445               t->sw_if_index = sw_if_index0;
2446               t->next_index = next0;
2447               t->session_index = ~0;
2448               if (s0)
2449                 t->session_index = s0 - tsm->sessions;
2450             }
2451
2452           pkts_processed += next0 != NAT44_ED_OUT2IN_NEXT_DROP;
2453
2454           next1 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2455           vnet_buffer (b1)->snat.flags = 0;
2456           ip1 = vlib_buffer_get_current (b1);
2457
2458           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2459           rx_fib_index1 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2460                                                                sw_if_index1);
2461
2462           if (PREDICT_FALSE(ip1->ttl == 1))
2463             {
2464               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2465               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2466                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2467                                            0);
2468               next1 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2469               goto trace01;
2470             }
2471
2472           udp1 = ip4_next_header (ip1);
2473           tcp1 = (tcp_header_t *) udp1;
2474           icmp1 = (icmp46_header_t *) udp1;
2475           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2476
2477           if (is_slow_path)
2478             {
2479               if (PREDICT_FALSE (proto1 == ~0))
2480                 {
2481                   s1 = nat44_ed_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
2482                                                      thread_index, now, vm, node);
2483                   if (!sm->forwarding_enabled)
2484                     {
2485                       if (!s1)
2486                         next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2487                       goto trace01;
2488                     }
2489                 }
2490
2491               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
2492                 {
2493                   next1 = icmp_out2in_ed_slow_path
2494                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
2495                      next1, now, thread_index, &s1);
2496                   goto trace01;
2497                 }
2498             }
2499           else
2500             {
2501               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
2502                 {
2503                   next1 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2504                   goto trace01;
2505                 }
2506
2507               if (ip4_is_fragment (ip1))
2508                 {
2509                   b1->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
2510                   next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2511                   goto trace01;
2512                 }
2513             }
2514
2515           make_ed_kv (&kv1, &ip1->dst_address, &ip1->src_address, ip1->protocol,
2516                       rx_fib_index1, udp1->dst_port, udp1->src_port);
2517
2518           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv1, &value1))
2519             {
2520               if (is_slow_path)
2521                 {
2522                   /* Try to match static mapping by external address and port,
2523                      destination address and port in packet */
2524                   e_key1.addr = ip1->dst_address;
2525                   e_key1.port = udp1->dst_port;
2526                   e_key1.protocol = proto1;
2527                   e_key1.fib_index = rx_fib_index1;
2528                   if (snat_static_mapping_match(sm, e_key1, &l_key1, 1, 0,
2529                       &twice_nat1, &is_lb1))
2530                     {
2531                       /*
2532                        * Send DHCP packets to the ipv4 stack, or we won't
2533                        * be able to use dhcp client on the outside interface
2534                        */
2535                       if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
2536                           && (udp1->dst_port ==
2537                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2538                         {
2539                           vnet_feature_next (&next1, b1);
2540                           goto trace01;
2541                         }
2542
2543                       if (!sm->forwarding_enabled)
2544                         {
2545                           b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2546                           next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2547                         }
2548                       else
2549                         {
2550                           if (next_src_nat(sm, ip1, ip1->protocol,
2551                                            udp1->src_port, udp1->dst_port,
2552                                            thread_index, rx_fib_index1))
2553                             {
2554                               next1 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2555                               goto trace01;
2556                             }
2557                           create_bypass_for_fwd(sm, ip1, rx_fib_index1,
2558                                                 thread_index);
2559                         }
2560                       goto trace01;
2561                     }
2562
2563                   /* Create session initiated by host from external network */
2564                   s1 = create_session_for_static_mapping_ed(sm, b1, l_key1,
2565                                                             e_key1, node,
2566                                                             thread_index,
2567                                                             twice_nat1, is_lb1,
2568                                                             now);
2569
2570                   if (!s1)
2571                     {
2572                       next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2573                       goto trace01;
2574                     }
2575                 }
2576               else
2577                 {
2578                   next1 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2579                   goto trace01;
2580                 }
2581             }
2582           else
2583             {
2584               s1 = pool_elt_at_index (tsm->sessions, value1.value);
2585             }
2586
2587           old_addr1 = ip1->dst_address.as_u32;
2588           new_addr1 = ip1->dst_address.as_u32 = s1->in2out.addr.as_u32;
2589           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
2590
2591           sum1 = ip1->checksum;
2592           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
2593                                  dst_address);
2594           if (PREDICT_FALSE (is_twice_nat_session (s1)))
2595             sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
2596                                    s1->ext_host_nat_addr.as_u32, ip4_header_t,
2597                                    src_address);
2598           ip1->checksum = ip_csum_fold (sum1);
2599
2600           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
2601             {
2602               old_port1 = tcp1->dst_port;
2603               new_port1 = tcp1->dst_port = s1->in2out.port;
2604
2605               sum1 = tcp1->checksum;
2606               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
2607                                      dst_address);
2608               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
2609                                      length);
2610               if (is_twice_nat_session (s1))
2611                 {
2612                   sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
2613                                          s1->ext_host_nat_addr.as_u32,
2614                                          ip4_header_t, dst_address);
2615                   sum1 = ip_csum_update (sum1, tcp1->src_port,
2616                                          s1->ext_host_nat_port, ip4_header_t,
2617                                          length);
2618                   tcp1->src_port = s1->ext_host_nat_port;
2619                   ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
2620                 }
2621               tcp1->checksum = ip_csum_fold(sum1);
2622               if (nat44_set_tcp_session_state_o2i (sm, s1, tcp1, thread_index))
2623                 goto trace01;
2624             }
2625           else
2626             {
2627               udp1->dst_port = s1->in2out.port;
2628               if (is_twice_nat_session (s1))
2629                 {
2630                   udp1->src_port = s1->ext_host_nat_port;
2631                   ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
2632                 }
2633               udp1->checksum = 0;
2634             }
2635
2636           /* Accounting */
2637           nat44_session_update_counters (s1, now,
2638                                          vlib_buffer_length_in_chain (vm, b1));
2639
2640         trace01:
2641           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2642                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2643             {
2644               nat44_ed_out2in_trace_t *t =
2645                 vlib_add_trace (vm, node, b1, sizeof (*t));
2646               t->is_slow_path = is_slow_path;
2647               t->sw_if_index = sw_if_index1;
2648               t->next_index = next1;
2649               t->session_index = ~0;
2650               if (s1)
2651                 t->session_index = s1 - tsm->sessions;
2652             }
2653
2654           pkts_processed += next1 != NAT44_ED_OUT2IN_NEXT_DROP;
2655
2656           /* verify speculative enqueues, maybe switch current next frame */
2657           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2658                                            to_next, n_left_to_next,
2659                                            bi0, bi1, next0, next1);
2660         }
2661
2662       while (n_left_from > 0 && n_left_to_next > 0)
2663         {
2664           u32 bi0;
2665           vlib_buffer_t *b0;
2666           u32 next0, sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
2667           u16 old_port0, new_port0;
2668           ip4_header_t *ip0;
2669           udp_header_t *udp0;
2670           tcp_header_t *tcp0;
2671           icmp46_header_t * icmp0;
2672           snat_session_t *s0 = 0;
2673           clib_bihash_kv_16_8_t kv0, value0;
2674           ip_csum_t sum0;
2675           snat_session_key_t e_key0, l_key0;
2676           u8 is_lb0;
2677           twice_nat_type_t twice_nat0;
2678
2679           /* speculatively enqueue b0 to the current next frame */
2680           bi0 = from[0];
2681           to_next[0] = bi0;
2682           from += 1;
2683           to_next += 1;
2684           n_left_from -= 1;
2685           n_left_to_next -= 1;
2686
2687           b0 = vlib_get_buffer (vm, bi0);
2688           next0 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2689           vnet_buffer (b0)->snat.flags = 0;
2690           ip0 = vlib_buffer_get_current (b0);
2691
2692           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2693           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2694                                                                sw_if_index0);
2695
2696           if (PREDICT_FALSE(ip0->ttl == 1))
2697             {
2698               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2699               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2700                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2701                                            0);
2702               next0 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2703               goto trace0;
2704             }
2705
2706           udp0 = ip4_next_header (ip0);
2707           tcp0 = (tcp_header_t *) udp0;
2708           icmp0 = (icmp46_header_t *) udp0;
2709           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2710
2711           if (is_slow_path)
2712             {
2713               if (PREDICT_FALSE (proto0 == ~0))
2714                 {
2715                   s0 = nat44_ed_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
2716                                                      thread_index, now, vm, node);
2717                   if (!sm->forwarding_enabled)
2718                     {
2719                       if (!s0)
2720                         next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2721                       goto trace0;
2722                     }
2723                 }
2724
2725               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2726                 {
2727                   next0 = icmp_out2in_ed_slow_path
2728                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2729                      next0, now, thread_index, &s0);
2730                   goto trace0;
2731                 }
2732             }
2733           else
2734             {
2735               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2736                 {
2737                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2738                   goto trace0;
2739                 }
2740
2741               if (ip4_is_fragment (ip0))
2742                 {
2743                   b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
2744                   next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2745                   goto trace0;
2746                 }
2747             }
2748
2749           make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, ip0->protocol,
2750                       rx_fib_index0, udp0->dst_port, udp0->src_port);
2751
2752           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
2753             {
2754               if (is_slow_path)
2755                 {
2756                   /* Try to match static mapping by external address and port,
2757                      destination address and port in packet */
2758                   e_key0.addr = ip0->dst_address;
2759                   e_key0.port = udp0->dst_port;
2760                   e_key0.protocol = proto0;
2761                   e_key0.fib_index = rx_fib_index0;
2762                   if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
2763                       &twice_nat0, &is_lb0))
2764                     {
2765                       /*
2766                        * Send DHCP packets to the ipv4 stack, or we won't
2767                        * be able to use dhcp client on the outside interface
2768                        */
2769                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
2770                           && (udp0->dst_port ==
2771                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2772                         {
2773                           vnet_feature_next (&next0, b0);
2774                           goto trace0;
2775                         }
2776
2777                       if (!sm->forwarding_enabled)
2778                         {
2779                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2780                           next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2781                         }
2782                       else
2783                         {
2784                           if (next_src_nat(sm, ip0, ip0->protocol,
2785                                            udp0->src_port, udp0->dst_port,
2786                                            thread_index, rx_fib_index0))
2787                             {
2788                               next0 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2789                               goto trace0;
2790                             }
2791                           create_bypass_for_fwd(sm, ip0, rx_fib_index0,
2792                                                 thread_index);
2793                         }
2794                       goto trace0;
2795                     }
2796
2797                   /* Create session initiated by host from external network */
2798                   s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
2799                                                             e_key0, node,
2800                                                             thread_index,
2801                                                             twice_nat0, is_lb0,
2802                                                             now);
2803
2804                   if (!s0)
2805                     {
2806                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2807                       goto trace0;
2808                     }
2809                 }
2810               else
2811                 {
2812                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2813                   goto trace0;
2814                 }
2815             }
2816           else
2817             {
2818               s0 = pool_elt_at_index (tsm->sessions, value0.value);
2819             }
2820
2821           old_addr0 = ip0->dst_address.as_u32;
2822           new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
2823           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2824
2825           sum0 = ip0->checksum;
2826           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2827                                  dst_address);
2828           if (PREDICT_FALSE (is_twice_nat_session (s0)))
2829             sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2830                                    s0->ext_host_nat_addr.as_u32, ip4_header_t,
2831                                    src_address);
2832           ip0->checksum = ip_csum_fold (sum0);
2833
2834           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
2835             {
2836               old_port0 = tcp0->dst_port;
2837               new_port0 = tcp0->dst_port = s0->in2out.port;
2838
2839               sum0 = tcp0->checksum;
2840               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2841                                      dst_address);
2842               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
2843                                      length);
2844               if (is_twice_nat_session (s0))
2845                 {
2846                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2847                                          s0->ext_host_nat_addr.as_u32,
2848                                          ip4_header_t, dst_address);
2849                   sum0 = ip_csum_update (sum0, tcp0->src_port,
2850                                          s0->ext_host_nat_port, ip4_header_t,
2851                                          length);
2852                   tcp0->src_port = s0->ext_host_nat_port;
2853                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2854                 }
2855               tcp0->checksum = ip_csum_fold(sum0);
2856               if (nat44_set_tcp_session_state_o2i (sm, s0, tcp0, thread_index))
2857                 goto trace0;
2858             }
2859           else
2860             {
2861               udp0->dst_port = s0->in2out.port;
2862               if (is_twice_nat_session (s0))
2863                 {
2864                   udp0->src_port = s0->ext_host_nat_port;
2865                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2866                 }
2867               udp0->checksum = 0;
2868             }
2869
2870           /* Accounting */
2871           nat44_session_update_counters (s0, now,
2872                                          vlib_buffer_length_in_chain (vm, b0));
2873
2874         trace0:
2875           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2876                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2877             {
2878               nat44_ed_out2in_trace_t *t =
2879                 vlib_add_trace (vm, node, b0, sizeof (*t));
2880               t->is_slow_path = is_slow_path;
2881               t->sw_if_index = sw_if_index0;
2882               t->next_index = next0;
2883               t->session_index = ~0;
2884               if (s0)
2885                 t->session_index = s0 - tsm->sessions;
2886             }
2887
2888           pkts_processed += next0 != NAT44_ED_OUT2IN_NEXT_DROP;
2889
2890           /* verify speculative enqueue, maybe switch current next frame */
2891           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2892                                            to_next, n_left_to_next,
2893                                            bi0, next0);
2894         }
2895
2896       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2897     }
2898
2899   vlib_node_increment_counter (vm, stats_node_index,
2900                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2901                                pkts_processed);
2902   return frame->n_vectors;
2903 }
2904
2905 static uword
2906 nat44_ed_out2in_fast_path_fn (vlib_main_t * vm,
2907                               vlib_node_runtime_t * node,
2908                               vlib_frame_t * frame)
2909 {
2910   return nat44_ed_out2in_node_fn_inline (vm, node, frame, 0);
2911 }
2912
2913 VLIB_REGISTER_NODE (nat44_ed_out2in_node) = {
2914   .function = nat44_ed_out2in_fast_path_fn,
2915   .name = "nat44-ed-out2in",
2916   .vector_size = sizeof (u32),
2917   .format_trace = format_nat44_ed_out2in_trace,
2918   .type = VLIB_NODE_TYPE_INTERNAL,
2919
2920   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2921   .error_strings = snat_out2in_error_strings,
2922
2923   .runtime_data_bytes = sizeof (snat_runtime_t),
2924
2925   .n_next_nodes = NAT44_ED_OUT2IN_N_NEXT,
2926
2927   /* edit / add dispositions here */
2928   .next_nodes = {
2929     [NAT44_ED_OUT2IN_NEXT_DROP] = "error-drop",
2930     [NAT44_ED_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2931     [NAT44_ED_OUT2IN_NEXT_SLOW_PATH] = "nat44-ed-out2in-slowpath",
2932     [NAT44_ED_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2933     [NAT44_ED_OUT2IN_NEXT_IN2OUT] = "nat44-ed-in2out",
2934   },
2935 };
2936
2937 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_out2in_node, nat44_ed_out2in_fast_path_fn);
2938
2939 static uword
2940 nat44_ed_out2in_slow_path_fn (vlib_main_t * vm,
2941                               vlib_node_runtime_t * node,
2942                               vlib_frame_t * frame)
2943 {
2944   return nat44_ed_out2in_node_fn_inline (vm, node, frame, 1);
2945 }
2946
2947 VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
2948   .function = nat44_ed_out2in_slow_path_fn,
2949   .name = "nat44-ed-out2in-slowpath",
2950   .vector_size = sizeof (u32),
2951   .format_trace = format_nat44_ed_out2in_trace,
2952   .type = VLIB_NODE_TYPE_INTERNAL,
2953
2954   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2955   .error_strings = snat_out2in_error_strings,
2956
2957   .runtime_data_bytes = sizeof (snat_runtime_t),
2958
2959   .n_next_nodes = NAT44_ED_OUT2IN_N_NEXT,
2960
2961   /* edit / add dispositions here */
2962   .next_nodes = {
2963     [NAT44_ED_OUT2IN_NEXT_DROP] = "error-drop",
2964     [NAT44_ED_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2965     [NAT44_ED_OUT2IN_NEXT_SLOW_PATH] = "nat44-ed-out2in-slowpath",
2966     [NAT44_ED_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2967     [NAT44_ED_OUT2IN_NEXT_IN2OUT] = "nat44-ed-in2out",
2968   },
2969 };
2970
2971 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_out2in_slowpath_node,
2972                               nat44_ed_out2in_slow_path_fn);
2973
2974 /**************************/
2975 /*** deterministic mode ***/
2976 /**************************/
2977 static uword
2978 snat_det_out2in_node_fn (vlib_main_t * vm,
2979                          vlib_node_runtime_t * node,
2980                          vlib_frame_t * frame)
2981 {
2982   u32 n_left_from, * from, * to_next;
2983   snat_out2in_next_t next_index;
2984   u32 pkts_processed = 0;
2985   snat_main_t * sm = &snat_main;
2986   u32 thread_index = vm->thread_index;
2987
2988   from = vlib_frame_vector_args (frame);
2989   n_left_from = frame->n_vectors;
2990   next_index = node->cached_next_index;
2991
2992   while (n_left_from > 0)
2993     {
2994       u32 n_left_to_next;
2995
2996       vlib_get_next_frame (vm, node, next_index,
2997                            to_next, n_left_to_next);
2998
2999       while (n_left_from >= 4 && n_left_to_next >= 2)
3000         {
3001           u32 bi0, bi1;
3002           vlib_buffer_t * b0, * b1;
3003           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
3004           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
3005           u32 sw_if_index0, sw_if_index1;
3006           ip4_header_t * ip0, * ip1;
3007           ip_csum_t sum0, sum1;
3008           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
3009           u16 new_port0, old_port0, old_port1, new_port1;
3010           udp_header_t * udp0, * udp1;
3011           tcp_header_t * tcp0, * tcp1;
3012           u32 proto0, proto1;
3013           snat_det_out_key_t key0, key1;
3014           snat_det_map_t * dm0, * dm1;
3015           snat_det_session_t * ses0 = 0, * ses1 = 0;
3016           u32 rx_fib_index0, rx_fib_index1;
3017           icmp46_header_t * icmp0, * icmp1;
3018
3019           /* Prefetch next iteration. */
3020           {
3021             vlib_buffer_t * p2, * p3;
3022
3023             p2 = vlib_get_buffer (vm, from[2]);
3024             p3 = vlib_get_buffer (vm, from[3]);
3025
3026             vlib_prefetch_buffer_header (p2, LOAD);
3027             vlib_prefetch_buffer_header (p3, LOAD);
3028
3029             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
3030             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
3031           }
3032
3033           /* speculatively enqueue b0 and b1 to the current next frame */
3034           to_next[0] = bi0 = from[0];
3035           to_next[1] = bi1 = from[1];
3036           from += 2;
3037           to_next += 2;
3038           n_left_from -= 2;
3039           n_left_to_next -= 2;
3040
3041           b0 = vlib_get_buffer (vm, bi0);
3042           b1 = vlib_get_buffer (vm, bi1);
3043
3044           ip0 = vlib_buffer_get_current (b0);
3045           udp0 = ip4_next_header (ip0);
3046           tcp0 = (tcp_header_t *) udp0;
3047
3048           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3049
3050           if (PREDICT_FALSE(ip0->ttl == 1))
3051             {
3052               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3053               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3054                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3055                                            0);
3056               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3057               goto trace0;
3058             }
3059
3060           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3061
3062           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3063             {
3064               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3065               icmp0 = (icmp46_header_t *) udp0;
3066
3067               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3068                                   rx_fib_index0, node, next0, thread_index,
3069                                   &ses0, &dm0);
3070               goto trace0;
3071             }
3072
3073           key0.ext_host_addr = ip0->src_address;
3074           key0.ext_host_port = tcp0->src;
3075           key0.out_port = tcp0->dst;
3076
3077           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
3078           if (PREDICT_FALSE(!dm0))
3079             {
3080               nat_log_info ("unknown dst address:  %U",
3081                             format_ip4_address, &ip0->dst_address);
3082               next0 = SNAT_OUT2IN_NEXT_DROP;
3083               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3084               goto trace0;
3085             }
3086
3087           snat_det_reverse(dm0, &ip0->dst_address,
3088                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
3089
3090           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3091           if (PREDICT_FALSE(!ses0))
3092             {
3093               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3094                             format_ip4_address, &ip0->src_address,
3095                             clib_net_to_host_u16 (tcp0->src),
3096                             format_ip4_address, &ip0->dst_address,
3097                             clib_net_to_host_u16 (tcp0->dst),
3098                             format_ip4_address, &new_addr0);
3099               next0 = SNAT_OUT2IN_NEXT_DROP;
3100               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3101               goto trace0;
3102             }
3103           new_port0 = ses0->in_port;
3104
3105           old_addr0 = ip0->dst_address;
3106           ip0->dst_address = new_addr0;
3107           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3108
3109           sum0 = ip0->checksum;
3110           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3111                                  ip4_header_t,
3112                                  dst_address /* changed member */);
3113           ip0->checksum = ip_csum_fold (sum0);
3114
3115           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3116             {
3117               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3118                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3119               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
3120                 snat_det_ses_close(dm0, ses0);
3121
3122               old_port0 = tcp0->dst;
3123               tcp0->dst = new_port0;
3124
3125               sum0 = tcp0->checksum;
3126               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3127                                      ip4_header_t,
3128                                      dst_address /* changed member */);
3129
3130               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3131                                      ip4_header_t /* cheat */,
3132                                      length /* changed member */);
3133               tcp0->checksum = ip_csum_fold(sum0);
3134             }
3135           else
3136             {
3137               old_port0 = udp0->dst_port;
3138               udp0->dst_port = new_port0;
3139               udp0->checksum = 0;
3140             }
3141
3142         trace0:
3143
3144           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3145                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3146             {
3147               snat_out2in_trace_t *t =
3148                  vlib_add_trace (vm, node, b0, sizeof (*t));
3149               t->sw_if_index = sw_if_index0;
3150               t->next_index = next0;
3151               t->session_index = ~0;
3152               if (ses0)
3153                 t->session_index = ses0 - dm0->sessions;
3154             }
3155
3156           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3157
3158           b1 = vlib_get_buffer (vm, bi1);
3159
3160           ip1 = vlib_buffer_get_current (b1);
3161           udp1 = ip4_next_header (ip1);
3162           tcp1 = (tcp_header_t *) udp1;
3163
3164           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3165
3166           if (PREDICT_FALSE(ip1->ttl == 1))
3167             {
3168               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3169               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3170                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3171                                            0);
3172               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3173               goto trace1;
3174             }
3175
3176           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3177
3178           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
3179             {
3180               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
3181               icmp1 = (icmp46_header_t *) udp1;
3182
3183               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
3184                                   rx_fib_index1, node, next1, thread_index,
3185                                   &ses1, &dm1);
3186               goto trace1;
3187             }
3188
3189           key1.ext_host_addr = ip1->src_address;
3190           key1.ext_host_port = tcp1->src;
3191           key1.out_port = tcp1->dst;
3192
3193           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
3194           if (PREDICT_FALSE(!dm1))
3195             {
3196               nat_log_info ("unknown dst address:  %U",
3197                             format_ip4_address, &ip1->dst_address);
3198               next1 = SNAT_OUT2IN_NEXT_DROP;
3199               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3200               goto trace1;
3201             }
3202
3203           snat_det_reverse(dm1, &ip1->dst_address,
3204                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
3205
3206           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
3207           if (PREDICT_FALSE(!ses1))
3208             {
3209               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3210                             format_ip4_address, &ip1->src_address,
3211                             clib_net_to_host_u16 (tcp1->src),
3212                             format_ip4_address, &ip1->dst_address,
3213                             clib_net_to_host_u16 (tcp1->dst),
3214                             format_ip4_address, &new_addr1);
3215               next1 = SNAT_OUT2IN_NEXT_DROP;
3216               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3217               goto trace1;
3218             }
3219           new_port1 = ses1->in_port;
3220
3221           old_addr1 = ip1->dst_address;
3222           ip1->dst_address = new_addr1;
3223           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3224
3225           sum1 = ip1->checksum;
3226           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3227                                  ip4_header_t,
3228                                  dst_address /* changed member */);
3229           ip1->checksum = ip_csum_fold (sum1);
3230
3231           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
3232             {
3233               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
3234                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3235               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
3236                 snat_det_ses_close(dm1, ses1);
3237
3238               old_port1 = tcp1->dst;
3239               tcp1->dst = new_port1;
3240
3241               sum1 = tcp1->checksum;
3242               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3243                                      ip4_header_t,
3244                                      dst_address /* changed member */);
3245
3246               sum1 = ip_csum_update (sum1, old_port1, new_port1,
3247                                      ip4_header_t /* cheat */,
3248                                      length /* changed member */);
3249               tcp1->checksum = ip_csum_fold(sum1);
3250             }
3251           else
3252             {
3253               old_port1 = udp1->dst_port;
3254               udp1->dst_port = new_port1;
3255               udp1->checksum = 0;
3256             }
3257
3258         trace1:
3259
3260           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3261                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3262             {
3263               snat_out2in_trace_t *t =
3264                  vlib_add_trace (vm, node, b1, sizeof (*t));
3265               t->sw_if_index = sw_if_index1;
3266               t->next_index = next1;
3267               t->session_index = ~0;
3268               if (ses1)
3269                 t->session_index = ses1 - dm1->sessions;
3270             }
3271
3272           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
3273
3274           /* verify speculative enqueues, maybe switch current next frame */
3275           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3276                                            to_next, n_left_to_next,
3277                                            bi0, bi1, next0, next1);
3278          }
3279
3280       while (n_left_from > 0 && n_left_to_next > 0)
3281         {
3282           u32 bi0;
3283           vlib_buffer_t * b0;
3284           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
3285           u32 sw_if_index0;
3286           ip4_header_t * ip0;
3287           ip_csum_t sum0;
3288           ip4_address_t new_addr0, old_addr0;
3289           u16 new_port0, old_port0;
3290           udp_header_t * udp0;
3291           tcp_header_t * tcp0;
3292           u32 proto0;
3293           snat_det_out_key_t key0;
3294           snat_det_map_t * dm0;
3295           snat_det_session_t * ses0 = 0;
3296           u32 rx_fib_index0;
3297           icmp46_header_t * icmp0;
3298
3299           /* speculatively enqueue b0 to the current next frame */
3300           bi0 = from[0];
3301           to_next[0] = bi0;
3302           from += 1;
3303           to_next += 1;
3304           n_left_from -= 1;
3305           n_left_to_next -= 1;
3306
3307           b0 = vlib_get_buffer (vm, bi0);
3308
3309           ip0 = vlib_buffer_get_current (b0);
3310           udp0 = ip4_next_header (ip0);
3311           tcp0 = (tcp_header_t *) udp0;
3312
3313           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3314
3315           if (PREDICT_FALSE(ip0->ttl == 1))
3316             {
3317               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3318               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3319                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3320                                            0);
3321               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3322               goto trace00;
3323             }
3324
3325           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3326
3327           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3328             {
3329               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3330               icmp0 = (icmp46_header_t *) udp0;
3331
3332               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3333                                   rx_fib_index0, node, next0, thread_index,
3334                                   &ses0, &dm0);
3335               goto trace00;
3336             }
3337
3338           key0.ext_host_addr = ip0->src_address;
3339           key0.ext_host_port = tcp0->src;
3340           key0.out_port = tcp0->dst;
3341
3342           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
3343           if (PREDICT_FALSE(!dm0))
3344             {
3345               nat_log_info ("unknown dst address:  %U",
3346                             format_ip4_address, &ip0->dst_address);
3347               next0 = SNAT_OUT2IN_NEXT_DROP;
3348               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3349               goto trace00;
3350             }
3351
3352           snat_det_reverse(dm0, &ip0->dst_address,
3353                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
3354
3355           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3356           if (PREDICT_FALSE(!ses0))
3357             {
3358               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3359                             format_ip4_address, &ip0->src_address,
3360                             clib_net_to_host_u16 (tcp0->src),
3361                             format_ip4_address, &ip0->dst_address,
3362                             clib_net_to_host_u16 (tcp0->dst),
3363                             format_ip4_address, &new_addr0);
3364               next0 = SNAT_OUT2IN_NEXT_DROP;
3365               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3366               goto trace00;
3367             }
3368           new_port0 = ses0->in_port;
3369
3370           old_addr0 = ip0->dst_address;
3371           ip0->dst_address = new_addr0;
3372           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3373
3374           sum0 = ip0->checksum;
3375           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3376                                  ip4_header_t,
3377                                  dst_address /* changed member */);
3378           ip0->checksum = ip_csum_fold (sum0);
3379
3380           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3381             {
3382               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3383                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3384               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
3385                 snat_det_ses_close(dm0, ses0);
3386
3387               old_port0 = tcp0->dst;
3388               tcp0->dst = new_port0;
3389
3390               sum0 = tcp0->checksum;
3391               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3392                                      ip4_header_t,
3393                                      dst_address /* changed member */);
3394
3395               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3396                                      ip4_header_t /* cheat */,
3397                                      length /* changed member */);
3398               tcp0->checksum = ip_csum_fold(sum0);
3399             }
3400           else
3401             {
3402               old_port0 = udp0->dst_port;
3403               udp0->dst_port = new_port0;
3404               udp0->checksum = 0;
3405             }
3406
3407         trace00:
3408
3409           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3410                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3411             {
3412               snat_out2in_trace_t *t =
3413                  vlib_add_trace (vm, node, b0, sizeof (*t));
3414               t->sw_if_index = sw_if_index0;
3415               t->next_index = next0;
3416               t->session_index = ~0;
3417               if (ses0)
3418                 t->session_index = ses0 - dm0->sessions;
3419             }
3420
3421           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3422
3423           /* verify speculative enqueue, maybe switch current next frame */
3424           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3425                                            to_next, n_left_to_next,
3426                                            bi0, next0);
3427         }
3428
3429       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3430     }
3431
3432   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
3433                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
3434                                pkts_processed);
3435   return frame->n_vectors;
3436 }
3437
3438 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
3439   .function = snat_det_out2in_node_fn,
3440   .name = "nat44-det-out2in",
3441   .vector_size = sizeof (u32),
3442   .format_trace = format_snat_out2in_trace,
3443   .type = VLIB_NODE_TYPE_INTERNAL,
3444
3445   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3446   .error_strings = snat_out2in_error_strings,
3447
3448   .runtime_data_bytes = sizeof (snat_runtime_t),
3449
3450   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
3451
3452   /* edit / add dispositions here */
3453   .next_nodes = {
3454     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
3455     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
3456     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3457     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
3458   },
3459 };
3460 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
3461
3462 /**
3463  * Get address and port values to be used for ICMP packet translation
3464  * and create session if needed
3465  *
3466  * @param[in,out] sm             NAT main
3467  * @param[in,out] node           NAT node runtime
3468  * @param[in] thread_index       thread index
3469  * @param[in,out] b0             buffer containing packet to be translated
3470  * @param[out] p_proto           protocol used for matching
3471  * @param[out] p_value           address and port after NAT translation
3472  * @param[out] p_dont_translate  if packet should not be translated
3473  * @param d                      optional parameter
3474  * @param e                      optional parameter
3475  */
3476 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
3477                           u32 thread_index, vlib_buffer_t *b0,
3478                           ip4_header_t *ip0, u8 *p_proto,
3479                           snat_session_key_t *p_value,
3480                           u8 *p_dont_translate, void *d, void *e)
3481 {
3482   icmp46_header_t *icmp0;
3483   u32 sw_if_index0;
3484   u8 protocol;
3485   snat_det_out_key_t key0;
3486   u8 dont_translate = 0;
3487   u32 next0 = ~0;
3488   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3489   ip4_header_t *inner_ip0;
3490   void *l4_header = 0;
3491   icmp46_header_t *inner_icmp0;
3492   snat_det_map_t * dm0 = 0;
3493   ip4_address_t new_addr0 = {{0}};
3494   snat_det_session_t * ses0 = 0;
3495   ip4_address_t out_addr;
3496
3497   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3498   echo0 = (icmp_echo_header_t *)(icmp0+1);
3499   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3500
3501   if (!icmp_is_error_message (icmp0))
3502     {
3503       protocol = SNAT_PROTOCOL_ICMP;
3504       key0.ext_host_addr = ip0->src_address;
3505       key0.ext_host_port = 0;
3506       key0.out_port = echo0->identifier;
3507       out_addr = ip0->dst_address;
3508     }
3509   else
3510     {
3511       inner_ip0 = (ip4_header_t *)(echo0+1);
3512       l4_header = ip4_next_header (inner_ip0);
3513       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3514       key0.ext_host_addr = inner_ip0->dst_address;
3515       out_addr = inner_ip0->src_address;
3516       switch (protocol)
3517         {
3518         case SNAT_PROTOCOL_ICMP:
3519           inner_icmp0 = (icmp46_header_t*)l4_header;
3520           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3521           key0.ext_host_port = 0;
3522           key0.out_port = inner_echo0->identifier;
3523           break;
3524         case SNAT_PROTOCOL_UDP:
3525         case SNAT_PROTOCOL_TCP:
3526           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3527           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
3528           break;
3529         default:
3530           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
3531           next0 = SNAT_OUT2IN_NEXT_DROP;
3532           goto out;
3533         }
3534     }
3535
3536   dm0 = snat_det_map_by_out(sm, &out_addr);
3537   if (PREDICT_FALSE(!dm0))
3538     {
3539       /* Don't NAT packet aimed at the intfc address */
3540       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
3541                                           ip0->dst_address.as_u32)))
3542         {
3543           dont_translate = 1;
3544           goto out;
3545         }
3546       nat_log_info ("unknown dst address:  %U",
3547                     format_ip4_address, &ip0->dst_address);
3548       goto out;
3549     }
3550
3551   snat_det_reverse(dm0, &ip0->dst_address,
3552                    clib_net_to_host_u16(key0.out_port), &new_addr0);
3553
3554   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3555   if (PREDICT_FALSE(!ses0))
3556     {
3557       /* Don't NAT packet aimed at the intfc address */
3558       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
3559                                           ip0->dst_address.as_u32)))
3560         {
3561           dont_translate = 1;
3562           goto out;
3563         }
3564       nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3565                     format_ip4_address, &key0.ext_host_addr,
3566                     clib_net_to_host_u16 (key0.ext_host_port),
3567                     format_ip4_address, &out_addr,
3568                     clib_net_to_host_u16 (key0.out_port),
3569                     format_ip4_address, &new_addr0);
3570       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3571       next0 = SNAT_OUT2IN_NEXT_DROP;
3572       goto out;
3573     }
3574
3575   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
3576                     !icmp_is_error_message (icmp0)))
3577     {
3578       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
3579       next0 = SNAT_OUT2IN_NEXT_DROP;
3580       goto out;
3581     }
3582
3583   goto out;
3584
3585 out:
3586   *p_proto = protocol;
3587   if (ses0)
3588     {
3589       p_value->addr = new_addr0;
3590       p_value->fib_index = sm->inside_fib_index;
3591       p_value->port = ses0->in_port;
3592     }
3593   *p_dont_translate = dont_translate;
3594   if (d)
3595     *(snat_det_session_t**)d = ses0;
3596   if (e)
3597     *(snat_det_map_t**)e = dm0;
3598   return next0;
3599 }
3600
3601 /**********************/
3602 /*** worker handoff ***/
3603 /**********************/
3604 static uword
3605 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
3606                                vlib_node_runtime_t * node,
3607                                vlib_frame_t * frame)
3608 {
3609   snat_main_t *sm = &snat_main;
3610   vlib_thread_main_t *tm = vlib_get_thread_main ();
3611   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
3612   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3613   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3614     = 0;
3615   vlib_frame_queue_elt_t *hf = 0;
3616   vlib_frame_queue_t *fq;
3617   vlib_frame_t *f = 0;
3618   int i;
3619   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3620   u32 next_worker_index = 0;
3621   u32 current_worker_index = ~0;
3622   u32 thread_index = vm->thread_index;
3623   vlib_frame_t *d = 0;
3624
3625   ASSERT (vec_len (sm->workers));
3626
3627   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3628     {
3629       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3630
3631       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3632                                tm->n_vlib_mains - 1,
3633                                (vlib_frame_queue_t *) (~0));
3634     }
3635
3636   from = vlib_frame_vector_args (frame);
3637   n_left_from = frame->n_vectors;
3638
3639   while (n_left_from > 0)
3640     {
3641       u32 bi0;
3642       vlib_buffer_t *b0;
3643       u32 sw_if_index0;
3644       u32 rx_fib_index0;
3645       ip4_header_t * ip0;
3646       u8 do_handoff;
3647
3648       bi0 = from[0];
3649       from += 1;
3650       n_left_from -= 1;
3651
3652       b0 = vlib_get_buffer (vm, bi0);
3653
3654       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3655       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3656
3657       ip0 = vlib_buffer_get_current (b0);
3658
3659       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
3660
3661       if (PREDICT_FALSE (next_worker_index != thread_index))
3662         {
3663           do_handoff = 1;
3664
3665           if (next_worker_index != current_worker_index)
3666             {
3667               fq = is_vlib_frame_queue_congested (
3668                 sm->fq_out2in_index, next_worker_index, NAT_FQ_NELTS - 2,
3669                 congested_handoff_queue_by_worker_index);
3670
3671               if (fq)
3672                 {
3673                   /* if this is 1st frame */
3674                   if (!d)
3675                     {
3676                       d = vlib_get_frame_to_node (vm, sm->error_node_index);
3677                       to_next_drop = vlib_frame_vector_args (d);
3678                     }
3679
3680                   to_next_drop[0] = bi0;
3681                   to_next_drop += 1;
3682                   d->n_vectors++;
3683                   b0->error = node->errors[SNAT_OUT2IN_ERROR_FQ_CONGESTED];
3684                   goto trace0;
3685                 }
3686
3687               if (hf)
3688                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3689
3690               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
3691                                                       next_worker_index,
3692                                                       handoff_queue_elt_by_worker_index);
3693
3694               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3695               to_next_worker = &hf->buffer_index[hf->n_vectors];
3696               current_worker_index = next_worker_index;
3697             }
3698
3699           /* enqueue to correct worker thread */
3700           to_next_worker[0] = bi0;
3701           to_next_worker++;
3702           n_left_to_next_worker--;
3703
3704           if (n_left_to_next_worker == 0)
3705             {
3706               hf->n_vectors = VLIB_FRAME_SIZE;
3707               vlib_put_frame_queue_elt (hf);
3708               current_worker_index = ~0;
3709               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3710               hf = 0;
3711             }
3712         }
3713       else
3714         {
3715           do_handoff = 0;
3716           /* if this is 1st frame */
3717           if (!f)
3718             {
3719               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
3720               to_next = vlib_frame_vector_args (f);
3721             }
3722
3723           to_next[0] = bi0;
3724           to_next += 1;
3725           f->n_vectors++;
3726         }
3727
3728 trace0:
3729       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3730                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3731         {
3732           snat_out2in_worker_handoff_trace_t *t =
3733             vlib_add_trace (vm, node, b0, sizeof (*t));
3734           t->next_worker_index = next_worker_index;
3735           t->do_handoff = do_handoff;
3736         }
3737     }
3738
3739   if (f)
3740     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
3741
3742   if (d)
3743     vlib_put_frame_to_node (vm, sm->error_node_index, d);
3744
3745   if (hf)
3746     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3747
3748   /* Ship frames to the worker nodes */
3749   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3750     {
3751       if (handoff_queue_elt_by_worker_index[i])
3752         {
3753           hf = handoff_queue_elt_by_worker_index[i];
3754           /*
3755            * It works better to let the handoff node
3756            * rate-adapt, always ship the handoff queue element.
3757            */
3758           if (1 || hf->n_vectors == hf->last_n_vectors)
3759             {
3760               vlib_put_frame_queue_elt (hf);
3761               handoff_queue_elt_by_worker_index[i] = 0;
3762             }
3763           else
3764             hf->last_n_vectors = hf->n_vectors;
3765         }
3766       congested_handoff_queue_by_worker_index[i] =
3767         (vlib_frame_queue_t *) (~0);
3768     }
3769   hf = 0;
3770   current_worker_index = ~0;
3771   return frame->n_vectors;
3772 }
3773
3774 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
3775   .function = snat_out2in_worker_handoff_fn,
3776   .name = "nat44-out2in-worker-handoff",
3777   .vector_size = sizeof (u32),
3778   .format_trace = format_snat_out2in_worker_handoff_trace,
3779   .type = VLIB_NODE_TYPE_INTERNAL,
3780
3781   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3782   .error_strings = snat_out2in_error_strings,
3783
3784   .n_next_nodes = 1,
3785
3786   .next_nodes = {
3787     [0] = "error-drop",
3788   },
3789 };
3790
3791 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
3792
3793 static uword
3794 snat_out2in_fast_node_fn (vlib_main_t * vm,
3795                           vlib_node_runtime_t * node,
3796                           vlib_frame_t * frame)
3797 {
3798   u32 n_left_from, * from, * to_next;
3799   snat_out2in_next_t next_index;
3800   u32 pkts_processed = 0;
3801   snat_main_t * sm = &snat_main;
3802
3803   from = vlib_frame_vector_args (frame);
3804   n_left_from = frame->n_vectors;
3805   next_index = node->cached_next_index;
3806
3807   while (n_left_from > 0)
3808     {
3809       u32 n_left_to_next;
3810
3811       vlib_get_next_frame (vm, node, next_index,
3812                            to_next, n_left_to_next);
3813
3814       while (n_left_from > 0 && n_left_to_next > 0)
3815         {
3816           u32 bi0;
3817           vlib_buffer_t * b0;
3818           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
3819           u32 sw_if_index0;
3820           ip4_header_t * ip0;
3821           ip_csum_t sum0;
3822           u32 new_addr0, old_addr0;
3823           u16 new_port0, old_port0;
3824           udp_header_t * udp0;
3825           tcp_header_t * tcp0;
3826           icmp46_header_t * icmp0;
3827           snat_session_key_t key0, sm0;
3828           u32 proto0;
3829           u32 rx_fib_index0;
3830
3831           /* speculatively enqueue b0 to the current next frame */
3832           bi0 = from[0];
3833           to_next[0] = bi0;
3834           from += 1;
3835           to_next += 1;
3836           n_left_from -= 1;
3837           n_left_to_next -= 1;
3838
3839           b0 = vlib_get_buffer (vm, bi0);
3840
3841           ip0 = vlib_buffer_get_current (b0);
3842           udp0 = ip4_next_header (ip0);
3843           tcp0 = (tcp_header_t *) udp0;
3844           icmp0 = (icmp46_header_t *) udp0;
3845
3846           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3847           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3848
3849           vnet_feature_next (&next0, b0);
3850
3851           if (PREDICT_FALSE(ip0->ttl == 1))
3852             {
3853               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3854               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3855                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3856                                            0);
3857               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3858               goto trace00;
3859             }
3860
3861           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3862
3863           if (PREDICT_FALSE (proto0 == ~0))
3864               goto trace00;
3865
3866           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3867             {
3868               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3869                                   rx_fib_index0, node, next0, ~0, 0, 0);
3870               goto trace00;
3871             }
3872
3873           key0.addr = ip0->dst_address;
3874           key0.port = udp0->dst_port;
3875           key0.fib_index = rx_fib_index0;
3876
3877           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
3878             {
3879               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3880               goto trace00;
3881             }
3882
3883           new_addr0 = sm0.addr.as_u32;
3884           new_port0 = sm0.port;
3885           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3886           old_addr0 = ip0->dst_address.as_u32;
3887           ip0->dst_address.as_u32 = new_addr0;
3888
3889           sum0 = ip0->checksum;
3890           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3891                                  ip4_header_t,
3892                                  dst_address /* changed member */);
3893           ip0->checksum = ip_csum_fold (sum0);
3894
3895           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3896             {
3897                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3898                 {
3899                   old_port0 = tcp0->dst_port;
3900                   tcp0->dst_port = new_port0;
3901
3902                   sum0 = tcp0->checksum;
3903                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3904                                          ip4_header_t,
3905                                          dst_address /* changed member */);
3906
3907                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3908                                          ip4_header_t /* cheat */,
3909                                          length /* changed member */);
3910                   tcp0->checksum = ip_csum_fold(sum0);
3911                 }
3912               else
3913                 {
3914                   old_port0 = udp0->dst_port;
3915                   udp0->dst_port = new_port0;
3916                   udp0->checksum = 0;
3917                 }
3918             }
3919           else
3920             {
3921               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3922                 {
3923                   sum0 = tcp0->checksum;
3924                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3925                                          ip4_header_t,
3926                                          dst_address /* changed member */);
3927
3928                   tcp0->checksum = ip_csum_fold(sum0);
3929                 }
3930             }
3931
3932         trace00:
3933
3934           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3935                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3936             {
3937               snat_out2in_trace_t *t =
3938                  vlib_add_trace (vm, node, b0, sizeof (*t));
3939               t->sw_if_index = sw_if_index0;
3940               t->next_index = next0;
3941             }
3942
3943           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3944
3945           /* verify speculative enqueue, maybe switch current next frame */
3946           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3947                                            to_next, n_left_to_next,
3948                                            bi0, next0);
3949         }
3950
3951       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3952     }
3953
3954   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
3955                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
3956                                pkts_processed);
3957   return frame->n_vectors;
3958 }
3959
3960 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
3961   .function = snat_out2in_fast_node_fn,
3962   .name = "nat44-out2in-fast",
3963   .vector_size = sizeof (u32),
3964   .format_trace = format_snat_out2in_fast_trace,
3965   .type = VLIB_NODE_TYPE_INTERNAL,
3966
3967   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3968   .error_strings = snat_out2in_error_strings,
3969
3970   .runtime_data_bytes = sizeof (snat_runtime_t),
3971
3972   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
3973
3974   /* edit / add dispositions here */
3975   .next_nodes = {
3976     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
3977     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
3978     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3979     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
3980   },
3981 };
3982 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);