NAT44: client-IP based session affinity for load-balancing (VPP-1297)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29 #include <nat/nat_inlines.h>
30
31 #include <vppinfra/hash.h>
32 #include <vppinfra/error.h>
33 #include <vppinfra/elog.h>
34
35 typedef struct {
36   u32 sw_if_index;
37   u32 next_index;
38   u32 session_index;
39 } snat_out2in_trace_t;
40
41 typedef struct {
42   u32 next_worker_index;
43   u8 do_handoff;
44 } snat_out2in_worker_handoff_trace_t;
45
46 /* packet trace format function */
47 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
48 {
49   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
50   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
51   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
52
53   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
54               t->sw_if_index, t->next_index, t->session_index);
55   return s;
56 }
57
58 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
63
64   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
65               t->sw_if_index, t->next_index);
66   return s;
67 }
68
69 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
70 {
71   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
72   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
73   snat_out2in_worker_handoff_trace_t * t =
74     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
75   char * m;
76
77   m = t->do_handoff ? "next worker" : "same worker";
78   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
79
80   return s;
81 }
82
83 typedef struct {
84   u32 sw_if_index;
85   u32 next_index;
86   u8 cached;
87 } nat44_out2in_reass_trace_t;
88
89 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
90 {
91   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
92   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
93   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
94
95   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
96               t->sw_if_index, t->next_index,
97               t->cached ? "cached" : "translated");
98
99   return s;
100 }
101
102 vlib_node_registration_t snat_out2in_node;
103 vlib_node_registration_t snat_out2in_fast_node;
104 vlib_node_registration_t snat_out2in_worker_handoff_node;
105 vlib_node_registration_t snat_det_out2in_node;
106 vlib_node_registration_t nat44_out2in_reass_node;
107 vlib_node_registration_t nat44_ed_out2in_node;
108 vlib_node_registration_t nat44_ed_out2in_slowpath_node;
109
110 #define foreach_snat_out2in_error                       \
111 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
112 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
113 _(OUT_OF_PORTS, "Out of ports")                         \
114 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
115 _(NO_TRANSLATION, "No translation")                     \
116 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
117 _(DROP_FRAGMENT, "Drop fragment")                       \
118 _(MAX_REASS, "Maximum reassemblies exceeded")           \
119 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")\
120 _(FQ_CONGESTED, "Handoff frame queue congested")
121
122 typedef enum {
123 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
124   foreach_snat_out2in_error
125 #undef _
126   SNAT_OUT2IN_N_ERROR,
127 } snat_out2in_error_t;
128
129 static char * snat_out2in_error_strings[] = {
130 #define _(sym,string) string,
131   foreach_snat_out2in_error
132 #undef _
133 };
134
135 typedef enum {
136   SNAT_OUT2IN_NEXT_DROP,
137   SNAT_OUT2IN_NEXT_LOOKUP,
138   SNAT_OUT2IN_NEXT_ICMP_ERROR,
139   SNAT_OUT2IN_NEXT_REASS,
140   SNAT_OUT2IN_N_NEXT,
141 } snat_out2in_next_t;
142
143 int
144 nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void * arg)
145 {
146   snat_main_t *sm = &snat_main;
147   nat44_is_idle_session_ctx_t *ctx = arg;
148   snat_session_t *s;
149   u64 sess_timeout_time;
150   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
151                                                        ctx->thread_index);
152   clib_bihash_kv_8_8_t s_kv;
153
154   s = pool_elt_at_index (tsm->sessions, kv->value);
155   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
156   if (ctx->now >= sess_timeout_time)
157     {
158       s_kv.key = s->in2out.as_u64;
159       if (clib_bihash_add_del_8_8 (&tsm->in2out, &s_kv, 0))
160         nat_log_warn ("out2in key del failed");
161
162       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
163                                           s->out2in.addr.as_u32,
164                                           s->in2out.protocol,
165                                           s->in2out.port,
166                                           s->out2in.port,
167                                           s->in2out.fib_index);
168
169       if (!snat_is_session_static (s))
170         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
171                                             &s->out2in);
172
173       nat44_delete_session (sm, s, ctx->thread_index);
174       return 1;
175     }
176
177   return 0;
178 }
179
180 /**
181  * @brief Create session for static mapping.
182  *
183  * Create NAT session initiated by host from external network with static
184  * mapping.
185  *
186  * @param sm     NAT main.
187  * @param b0     Vlib buffer.
188  * @param in2out In2out NAT44 session key.
189  * @param out2in Out2in NAT44 session key.
190  * @param node   Vlib node.
191  *
192  * @returns SNAT session if successfully created otherwise 0.
193  */
194 static inline snat_session_t *
195 create_session_for_static_mapping (snat_main_t *sm,
196                                    vlib_buffer_t *b0,
197                                    snat_session_key_t in2out,
198                                    snat_session_key_t out2in,
199                                    vlib_node_runtime_t * node,
200                                    u32 thread_index,
201                                    f64 now)
202 {
203   snat_user_t *u;
204   snat_session_t *s;
205   clib_bihash_kv_8_8_t kv0;
206   ip4_header_t *ip0;
207   udp_header_t *udp0;
208   nat44_is_idle_session_ctx_t ctx0;
209
210   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
211     {
212       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
213       nat_log_notice ("maximum sessions exceeded");
214       return 0;
215     }
216
217   ip0 = vlib_buffer_get_current (b0);
218   udp0 = ip4_next_header (ip0);
219
220   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
221   if (!u)
222     {
223       nat_log_warn ("create NAT user failed");
224       return 0;
225     }
226
227   s = nat_session_alloc_or_recycle (sm, u, thread_index);
228   if (!s)
229     {
230       nat44_delete_user_with_no_session (sm, u, thread_index);
231       nat_log_warn ("create NAT session failed");
232       return 0;
233     }
234
235   s->outside_address_index = ~0;
236   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
237   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
238   s->ext_host_port = udp0->src_port;
239   user_session_increment (sm, u, 1 /* static */);
240   s->in2out = in2out;
241   s->out2in = out2in;
242   s->in2out.protocol = out2in.protocol;
243
244   /* Add to translation hashes */
245   ctx0.now = now;
246   ctx0.thread_index = thread_index;
247   kv0.key = s->in2out.as_u64;
248   kv0.value = s - sm->per_thread_data[thread_index].sessions;
249   if (clib_bihash_add_or_overwrite_stale_8_8 (
250        &sm->per_thread_data[thread_index].in2out, &kv0,
251        nat44_i2o_is_idle_session_cb, &ctx0))
252       nat_log_notice ("in2out key add failed");
253
254   kv0.key = s->out2in.as_u64;
255
256   if (clib_bihash_add_or_overwrite_stale_8_8 (
257         &sm->per_thread_data[thread_index].out2in, &kv0,
258         nat44_o2i_is_idle_session_cb, &ctx0))
259       nat_log_notice ("out2in key add failed");
260
261   /* log NAT event */
262   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
263                                       s->out2in.addr.as_u32,
264                                       s->in2out.protocol,
265                                       s->in2out.port,
266                                       s->out2in.port,
267                                       s->in2out.fib_index);
268   return s;
269 }
270
271 static_always_inline
272 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
273                                  snat_session_key_t *p_key0)
274 {
275   icmp46_header_t *icmp0;
276   snat_session_key_t key0;
277   icmp_echo_header_t *echo0, *inner_echo0 = 0;
278   ip4_header_t *inner_ip0;
279   void *l4_header = 0;
280   icmp46_header_t *inner_icmp0;
281
282   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
283   echo0 = (icmp_echo_header_t *)(icmp0+1);
284
285   if (!icmp_is_error_message (icmp0))
286     {
287       key0.protocol = SNAT_PROTOCOL_ICMP;
288       key0.addr = ip0->dst_address;
289       key0.port = echo0->identifier;
290     }
291   else
292     {
293       inner_ip0 = (ip4_header_t *)(echo0+1);
294       l4_header = ip4_next_header (inner_ip0);
295       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
296       key0.addr = inner_ip0->src_address;
297       switch (key0.protocol)
298         {
299         case SNAT_PROTOCOL_ICMP:
300           inner_icmp0 = (icmp46_header_t*)l4_header;
301           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
302           key0.port = inner_echo0->identifier;
303           break;
304         case SNAT_PROTOCOL_UDP:
305         case SNAT_PROTOCOL_TCP:
306           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
307           break;
308         default:
309           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
310         }
311     }
312   *p_key0 = key0;
313   return -1; /* success */
314 }
315
316 /**
317  * Get address and port values to be used for ICMP packet translation
318  * and create session if needed
319  *
320  * @param[in,out] sm             NAT main
321  * @param[in,out] node           NAT node runtime
322  * @param[in] thread_index       thread index
323  * @param[in,out] b0             buffer containing packet to be translated
324  * @param[out] p_proto           protocol used for matching
325  * @param[out] p_value           address and port after NAT translation
326  * @param[out] p_dont_translate  if packet should not be translated
327  * @param d                      optional parameter
328  * @param e                      optional parameter
329  */
330 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
331                            u32 thread_index, vlib_buffer_t *b0,
332                            ip4_header_t *ip0, u8 *p_proto,
333                            snat_session_key_t *p_value,
334                            u8 *p_dont_translate, void *d, void *e)
335 {
336   icmp46_header_t *icmp0;
337   u32 sw_if_index0;
338   u32 rx_fib_index0;
339   snat_session_key_t key0;
340   snat_session_key_t sm0;
341   snat_session_t *s0 = 0;
342   u8 dont_translate = 0;
343   clib_bihash_kv_8_8_t kv0, value0;
344   u8 is_addr_only;
345   u32 next0 = ~0;
346   int err;
347
348   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
349   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
350   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
351
352   key0.protocol = 0;
353
354   err = icmp_get_key (ip0, &key0);
355   if (err != -1)
356     {
357       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
358       next0 = SNAT_OUT2IN_NEXT_DROP;
359       goto out;
360     }
361   key0.fib_index = rx_fib_index0;
362
363   kv0.key = key0.as_u64;
364
365   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
366                               &value0))
367     {
368       /* Try to match static mapping by external address and port,
369          destination address and port in packet */
370       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0, 0))
371         {
372           if (!sm->forwarding_enabled)
373             {
374               /* Don't NAT packet aimed at the intfc address */
375               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
376                                                   ip0->dst_address.as_u32)))
377                 {
378                   dont_translate = 1;
379                   goto out;
380                 }
381               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
382               next0 = SNAT_OUT2IN_NEXT_DROP;
383               goto out;
384             }
385           else
386             {
387               dont_translate = 1;
388               goto out;
389             }
390         }
391
392       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
393                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
394         {
395           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
396           next0 = SNAT_OUT2IN_NEXT_DROP;
397           goto out;
398         }
399
400       /* Create session initiated by host from external network */
401       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
402                                              node, thread_index,
403                                              vlib_time_now (sm->vlib_main));
404
405       if (!s0)
406         {
407           next0 = SNAT_OUT2IN_NEXT_DROP;
408           goto out;
409         }
410     }
411   else
412     {
413       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
414                         icmp0->type != ICMP4_echo_request &&
415                         !icmp_is_error_message (icmp0)))
416         {
417           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
418           next0 = SNAT_OUT2IN_NEXT_DROP;
419           goto out;
420         }
421
422       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
423                               value0.value);
424     }
425
426 out:
427   *p_proto = key0.protocol;
428   if (s0)
429     *p_value = s0->in2out;
430   *p_dont_translate = dont_translate;
431   if (d)
432     *(snat_session_t**)d = s0;
433   return next0;
434 }
435
436 /**
437  * Get address and port values to be used for ICMP packet translation
438  *
439  * @param[in] sm                 NAT main
440  * @param[in,out] node           NAT node runtime
441  * @param[in] thread_index       thread index
442  * @param[in,out] b0             buffer containing packet to be translated
443  * @param[out] p_proto           protocol used for matching
444  * @param[out] p_value           address and port after NAT translation
445  * @param[out] p_dont_translate  if packet should not be translated
446  * @param d                      optional parameter
447  * @param e                      optional parameter
448  */
449 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
450                            u32 thread_index, vlib_buffer_t *b0,
451                            ip4_header_t *ip0, u8 *p_proto,
452                            snat_session_key_t *p_value,
453                            u8 *p_dont_translate, void *d, void *e)
454 {
455   icmp46_header_t *icmp0;
456   u32 sw_if_index0;
457   u32 rx_fib_index0;
458   snat_session_key_t key0;
459   snat_session_key_t sm0;
460   u8 dont_translate = 0;
461   u8 is_addr_only;
462   u32 next0 = ~0;
463   int err;
464
465   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
466   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
467   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
468
469   err = icmp_get_key (ip0, &key0);
470   if (err != -1)
471     {
472       b0->error = node->errors[err];
473       next0 = SNAT_OUT2IN_NEXT_DROP;
474       goto out2;
475     }
476   key0.fib_index = rx_fib_index0;
477
478   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0, 0))
479     {
480       /* Don't NAT packet aimed at the intfc address */
481       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
482         {
483           dont_translate = 1;
484           goto out;
485         }
486       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
487       next0 = SNAT_OUT2IN_NEXT_DROP;
488       goto out;
489     }
490
491   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
492                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
493                     !icmp_is_error_message (icmp0)))
494     {
495       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
496       next0 = SNAT_OUT2IN_NEXT_DROP;
497       goto out;
498     }
499
500 out:
501   *p_value = sm0;
502 out2:
503   *p_proto = key0.protocol;
504   *p_dont_translate = dont_translate;
505   return next0;
506 }
507
508 static inline u32 icmp_out2in (snat_main_t *sm,
509                                vlib_buffer_t * b0,
510                                ip4_header_t * ip0,
511                                icmp46_header_t * icmp0,
512                                u32 sw_if_index0,
513                                u32 rx_fib_index0,
514                                vlib_node_runtime_t * node,
515                                u32 next0,
516                                u32 thread_index,
517                                void *d,
518                                void *e)
519 {
520   snat_session_key_t sm0;
521   u8 protocol;
522   icmp_echo_header_t *echo0, *inner_echo0 = 0;
523   ip4_header_t *inner_ip0 = 0;
524   void *l4_header = 0;
525   icmp46_header_t *inner_icmp0;
526   u8 dont_translate;
527   u32 new_addr0, old_addr0;
528   u16 old_id0, new_id0;
529   ip_csum_t sum0;
530   u16 checksum0;
531   u32 next0_tmp;
532
533   echo0 = (icmp_echo_header_t *)(icmp0+1);
534
535   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
536                                        &protocol, &sm0, &dont_translate, d, e);
537   if (next0_tmp != ~0)
538     next0 = next0_tmp;
539   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
540     goto out;
541
542   sum0 = ip_incremental_checksum (0, icmp0,
543                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
544   checksum0 = ~ip_csum_fold (sum0);
545   if (checksum0 != 0 && checksum0 != 0xffff)
546     {
547       next0 = SNAT_OUT2IN_NEXT_DROP;
548       goto out;
549     }
550
551   old_addr0 = ip0->dst_address.as_u32;
552   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
553   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
554
555   sum0 = ip0->checksum;
556   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
557                          dst_address /* changed member */);
558   ip0->checksum = ip_csum_fold (sum0);
559
560   if (icmp0->checksum == 0)
561     icmp0->checksum = 0xffff;
562
563   if (!icmp_is_error_message (icmp0))
564     {
565       new_id0 = sm0.port;
566       if (PREDICT_FALSE(new_id0 != echo0->identifier))
567         {
568           old_id0 = echo0->identifier;
569           new_id0 = sm0.port;
570           echo0->identifier = new_id0;
571
572           sum0 = icmp0->checksum;
573           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
574                                  identifier /* changed member */);
575           icmp0->checksum = ip_csum_fold (sum0);
576         }
577     }
578   else
579     {
580       inner_ip0 = (ip4_header_t *)(echo0+1);
581       l4_header = ip4_next_header (inner_ip0);
582
583       if (!ip4_header_checksum_is_valid (inner_ip0))
584         {
585           next0 = SNAT_OUT2IN_NEXT_DROP;
586           goto out;
587         }
588
589       old_addr0 = inner_ip0->src_address.as_u32;
590       inner_ip0->src_address = sm0.addr;
591       new_addr0 = inner_ip0->src_address.as_u32;
592
593       sum0 = icmp0->checksum;
594       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
595                              src_address /* changed member */);
596       icmp0->checksum = ip_csum_fold (sum0);
597
598       switch (protocol)
599         {
600         case SNAT_PROTOCOL_ICMP:
601           inner_icmp0 = (icmp46_header_t*)l4_header;
602           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
603
604           old_id0 = inner_echo0->identifier;
605           new_id0 = sm0.port;
606           inner_echo0->identifier = new_id0;
607
608           sum0 = icmp0->checksum;
609           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
610                                  identifier);
611           icmp0->checksum = ip_csum_fold (sum0);
612           break;
613         case SNAT_PROTOCOL_UDP:
614         case SNAT_PROTOCOL_TCP:
615           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
616           new_id0 = sm0.port;
617           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
618
619           sum0 = icmp0->checksum;
620           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
621                                  src_port);
622           icmp0->checksum = ip_csum_fold (sum0);
623           break;
624         default:
625           ASSERT(0);
626         }
627     }
628
629 out:
630   return next0;
631 }
632
633
634 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
635                                          vlib_buffer_t * b0,
636                                          ip4_header_t * ip0,
637                                          icmp46_header_t * icmp0,
638                                          u32 sw_if_index0,
639                                          u32 rx_fib_index0,
640                                          vlib_node_runtime_t * node,
641                                          u32 next0, f64 now,
642                                          u32 thread_index,
643                                          snat_session_t ** p_s0)
644 {
645   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
646                       next0, thread_index, p_s0, 0);
647   snat_session_t * s0 = *p_s0;
648   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
649     {
650       /* Accounting */
651       nat44_session_update_counters (s0, now,
652                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
653       /* Per-user LRU list maintenance */
654       nat44_session_update_lru (sm, s0, thread_index);
655     }
656   return next0;
657 }
658
659 static int
660 nat_out2in_sm_unknown_proto (snat_main_t *sm,
661                              vlib_buffer_t * b,
662                              ip4_header_t * ip,
663                              u32 rx_fib_index)
664 {
665   clib_bihash_kv_8_8_t kv, value;
666   snat_static_mapping_t *m;
667   snat_session_key_t m_key;
668   u32 old_addr, new_addr;
669   ip_csum_t sum;
670
671   m_key.addr = ip->dst_address;
672   m_key.port = 0;
673   m_key.protocol = 0;
674   m_key.fib_index = 0;
675   kv.key = m_key.as_u64;
676   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
677     return 1;
678
679   m = pool_elt_at_index (sm->static_mappings, value.value);
680
681   old_addr = ip->dst_address.as_u32;
682   new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
683   sum = ip->checksum;
684   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
685   ip->checksum = ip_csum_fold (sum);
686
687   vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
688   return 0;
689 }
690
691 static uword
692 snat_out2in_node_fn (vlib_main_t * vm,
693                   vlib_node_runtime_t * node,
694                   vlib_frame_t * frame)
695 {
696   u32 n_left_from, * from, * to_next;
697   snat_out2in_next_t next_index;
698   u32 pkts_processed = 0;
699   snat_main_t * sm = &snat_main;
700   f64 now = vlib_time_now (vm);
701   u32 thread_index = vm->thread_index;
702
703   from = vlib_frame_vector_args (frame);
704   n_left_from = frame->n_vectors;
705   next_index = node->cached_next_index;
706
707   while (n_left_from > 0)
708     {
709       u32 n_left_to_next;
710
711       vlib_get_next_frame (vm, node, next_index,
712                            to_next, n_left_to_next);
713
714       while (n_left_from >= 4 && n_left_to_next >= 2)
715         {
716           u32 bi0, bi1;
717           vlib_buffer_t * b0, * b1;
718           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
719           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
720           u32 sw_if_index0, sw_if_index1;
721           ip4_header_t * ip0, *ip1;
722           ip_csum_t sum0, sum1;
723           u32 new_addr0, old_addr0;
724           u16 new_port0, old_port0;
725           u32 new_addr1, old_addr1;
726           u16 new_port1, old_port1;
727           udp_header_t * udp0, * udp1;
728           tcp_header_t * tcp0, * tcp1;
729           icmp46_header_t * icmp0, * icmp1;
730           snat_session_key_t key0, key1, sm0, sm1;
731           u32 rx_fib_index0, rx_fib_index1;
732           u32 proto0, proto1;
733           snat_session_t * s0 = 0, * s1 = 0;
734           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
735
736           /* Prefetch next iteration. */
737           {
738             vlib_buffer_t * p2, * p3;
739
740             p2 = vlib_get_buffer (vm, from[2]);
741             p3 = vlib_get_buffer (vm, from[3]);
742
743             vlib_prefetch_buffer_header (p2, LOAD);
744             vlib_prefetch_buffer_header (p3, LOAD);
745
746             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
747             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
748           }
749
750           /* speculatively enqueue b0 and b1 to the current next frame */
751           to_next[0] = bi0 = from[0];
752           to_next[1] = bi1 = from[1];
753           from += 2;
754           to_next += 2;
755           n_left_from -= 2;
756           n_left_to_next -= 2;
757
758           b0 = vlib_get_buffer (vm, bi0);
759           b1 = vlib_get_buffer (vm, bi1);
760
761           vnet_buffer (b0)->snat.flags = 0;
762           vnet_buffer (b1)->snat.flags = 0;
763
764           ip0 = vlib_buffer_get_current (b0);
765           udp0 = ip4_next_header (ip0);
766           tcp0 = (tcp_header_t *) udp0;
767           icmp0 = (icmp46_header_t *) udp0;
768
769           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
770           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
771                                    sw_if_index0);
772
773           if (PREDICT_FALSE(ip0->ttl == 1))
774             {
775               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
776               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
777                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
778                                            0);
779               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
780               goto trace0;
781             }
782
783           proto0 = ip_proto_to_snat_proto (ip0->protocol);
784
785           if (PREDICT_FALSE (proto0 == ~0))
786             {
787               if (nat_out2in_sm_unknown_proto(sm, b0, ip0, rx_fib_index0))
788                 {
789                   if (!sm->forwarding_enabled)
790                     {
791                       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
792                       next0 = SNAT_OUT2IN_NEXT_DROP;
793                     }
794                 }
795               goto trace0;
796             }
797
798           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
799             {
800               next0 = icmp_out2in_slow_path
801                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
802                  next0, now, thread_index, &s0);
803               goto trace0;
804             }
805
806           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
807             {
808               next0 = SNAT_OUT2IN_NEXT_REASS;
809               goto trace0;
810             }
811
812           key0.addr = ip0->dst_address;
813           key0.port = udp0->dst_port;
814           key0.protocol = proto0;
815           key0.fib_index = rx_fib_index0;
816
817           kv0.key = key0.as_u64;
818
819           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
820                                       &kv0, &value0))
821             {
822               /* Try to match static mapping by external address and port,
823                  destination address and port in packet */
824               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
825                 {
826                   /*
827                    * Send DHCP packets to the ipv4 stack, or we won't
828                    * be able to use dhcp client on the outside interface
829                    */
830                   if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
831                       && (udp0->dst_port ==
832                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
833                     {
834                       vnet_feature_next (&next0, b0);
835                       goto trace0;
836                     }
837
838                   if (!sm->forwarding_enabled)
839                     {
840                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
841                       next0 = SNAT_OUT2IN_NEXT_DROP;
842                     }
843                   goto trace0;
844                 }
845
846               /* Create session initiated by host from external network */
847               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
848                                                      thread_index, now);
849               if (!s0)
850                 {
851                   next0 = SNAT_OUT2IN_NEXT_DROP;
852                   goto trace0;
853                 }
854             }
855           else
856             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
857                                     value0.value);
858
859           old_addr0 = ip0->dst_address.as_u32;
860           ip0->dst_address = s0->in2out.addr;
861           new_addr0 = ip0->dst_address.as_u32;
862           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
863
864           sum0 = ip0->checksum;
865           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
866                                  ip4_header_t,
867                                  dst_address /* changed member */);
868           ip0->checksum = ip_csum_fold (sum0);
869
870           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
871             {
872               old_port0 = tcp0->dst_port;
873               tcp0->dst_port = s0->in2out.port;
874               new_port0 = tcp0->dst_port;
875
876               sum0 = tcp0->checksum;
877               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
878                                      ip4_header_t,
879                                      dst_address /* changed member */);
880
881               sum0 = ip_csum_update (sum0, old_port0, new_port0,
882                                      ip4_header_t /* cheat */,
883                                      length /* changed member */);
884               tcp0->checksum = ip_csum_fold(sum0);
885             }
886           else
887             {
888               old_port0 = udp0->dst_port;
889               udp0->dst_port = s0->in2out.port;
890               udp0->checksum = 0;
891             }
892
893           /* Accounting */
894           nat44_session_update_counters (s0, now,
895                                          vlib_buffer_length_in_chain (vm, b0));
896           /* Per-user LRU list maintenance */
897           nat44_session_update_lru (sm, s0, thread_index);
898         trace0:
899
900           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
901                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
902             {
903               snat_out2in_trace_t *t =
904                  vlib_add_trace (vm, node, b0, sizeof (*t));
905               t->sw_if_index = sw_if_index0;
906               t->next_index = next0;
907               t->session_index = ~0;
908               if (s0)
909                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
910             }
911
912           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
913
914
915           ip1 = vlib_buffer_get_current (b1);
916           udp1 = ip4_next_header (ip1);
917           tcp1 = (tcp_header_t *) udp1;
918           icmp1 = (icmp46_header_t *) udp1;
919
920           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
921           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
922                                    sw_if_index1);
923
924           if (PREDICT_FALSE(ip1->ttl == 1))
925             {
926               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
927               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
928                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
929                                            0);
930               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
931               goto trace1;
932             }
933
934           proto1 = ip_proto_to_snat_proto (ip1->protocol);
935
936           if (PREDICT_FALSE (proto1 == ~0))
937             {
938               if (nat_out2in_sm_unknown_proto(sm, b1, ip1, rx_fib_index1))
939                 {
940                   if (!sm->forwarding_enabled)
941                     {
942                       b1->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
943                       next1 = SNAT_OUT2IN_NEXT_DROP;
944                     }
945                 }
946               goto trace1;
947             }
948
949           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
950             {
951               next1 = icmp_out2in_slow_path
952                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
953                  next1, now, thread_index, &s1);
954               goto trace1;
955             }
956
957           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
958             {
959               next1 = SNAT_OUT2IN_NEXT_REASS;
960               goto trace1;
961             }
962
963           key1.addr = ip1->dst_address;
964           key1.port = udp1->dst_port;
965           key1.protocol = proto1;
966           key1.fib_index = rx_fib_index1;
967
968           kv1.key = key1.as_u64;
969
970           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
971                                       &kv1, &value1))
972             {
973               /* Try to match static mapping by external address and port,
974                  destination address and port in packet */
975               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0, 0, 0))
976                 {
977                   /*
978                    * Send DHCP packets to the ipv4 stack, or we won't
979                    * be able to use dhcp client on the outside interface
980                    */
981                   if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
982                       && (udp1->dst_port ==
983                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
984                     {
985                       vnet_feature_next (&next1, b1);
986                       goto trace1;
987                     }
988
989                   if (!sm->forwarding_enabled)
990                     {
991                       b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
992                       next1 = SNAT_OUT2IN_NEXT_DROP;
993                     }
994                   goto trace1;
995                 }
996
997               /* Create session initiated by host from external network */
998               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
999                                                      thread_index, now);
1000               if (!s1)
1001                 {
1002                   next1 = SNAT_OUT2IN_NEXT_DROP;
1003                   goto trace1;
1004                 }
1005             }
1006           else
1007             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1008                                     value1.value);
1009
1010           old_addr1 = ip1->dst_address.as_u32;
1011           ip1->dst_address = s1->in2out.addr;
1012           new_addr1 = ip1->dst_address.as_u32;
1013           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1014
1015           sum1 = ip1->checksum;
1016           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1017                                  ip4_header_t,
1018                                  dst_address /* changed member */);
1019           ip1->checksum = ip_csum_fold (sum1);
1020
1021           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1022             {
1023               old_port1 = tcp1->dst_port;
1024               tcp1->dst_port = s1->in2out.port;
1025               new_port1 = tcp1->dst_port;
1026
1027               sum1 = tcp1->checksum;
1028               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1029                                      ip4_header_t,
1030                                      dst_address /* changed member */);
1031
1032               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1033                                      ip4_header_t /* cheat */,
1034                                      length /* changed member */);
1035               tcp1->checksum = ip_csum_fold(sum1);
1036             }
1037           else
1038             {
1039               old_port1 = udp1->dst_port;
1040               udp1->dst_port = s1->in2out.port;
1041               udp1->checksum = 0;
1042             }
1043
1044           /* Accounting */
1045           nat44_session_update_counters (s1, now,
1046                                          vlib_buffer_length_in_chain (vm, b1));
1047           /* Per-user LRU list maintenance */
1048           nat44_session_update_lru (sm, s1, thread_index);
1049         trace1:
1050
1051           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1052                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1053             {
1054               snat_out2in_trace_t *t =
1055                  vlib_add_trace (vm, node, b1, sizeof (*t));
1056               t->sw_if_index = sw_if_index1;
1057               t->next_index = next1;
1058               t->session_index = ~0;
1059               if (s1)
1060                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1061             }
1062
1063           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1064
1065           /* verify speculative enqueues, maybe switch current next frame */
1066           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1067                                            to_next, n_left_to_next,
1068                                            bi0, bi1, next0, next1);
1069         }
1070
1071       while (n_left_from > 0 && n_left_to_next > 0)
1072         {
1073           u32 bi0;
1074           vlib_buffer_t * b0;
1075           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1076           u32 sw_if_index0;
1077           ip4_header_t * ip0;
1078           ip_csum_t sum0;
1079           u32 new_addr0, old_addr0;
1080           u16 new_port0, old_port0;
1081           udp_header_t * udp0;
1082           tcp_header_t * tcp0;
1083           icmp46_header_t * icmp0;
1084           snat_session_key_t key0, sm0;
1085           u32 rx_fib_index0;
1086           u32 proto0;
1087           snat_session_t * s0 = 0;
1088           clib_bihash_kv_8_8_t kv0, value0;
1089
1090           /* speculatively enqueue b0 to the current next frame */
1091           bi0 = from[0];
1092           to_next[0] = bi0;
1093           from += 1;
1094           to_next += 1;
1095           n_left_from -= 1;
1096           n_left_to_next -= 1;
1097
1098           b0 = vlib_get_buffer (vm, bi0);
1099
1100           vnet_buffer (b0)->snat.flags = 0;
1101
1102           ip0 = vlib_buffer_get_current (b0);
1103           udp0 = ip4_next_header (ip0);
1104           tcp0 = (tcp_header_t *) udp0;
1105           icmp0 = (icmp46_header_t *) udp0;
1106
1107           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1108           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1109                                    sw_if_index0);
1110
1111           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1112
1113           if (PREDICT_FALSE (proto0 == ~0))
1114             {
1115               if (nat_out2in_sm_unknown_proto(sm, b0, ip0, rx_fib_index0))
1116                 {
1117                   if (!sm->forwarding_enabled)
1118                     {
1119                       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
1120                       next0 = SNAT_OUT2IN_NEXT_DROP;
1121                     }
1122                 }
1123               goto trace00;
1124             }
1125
1126           if (PREDICT_FALSE(ip0->ttl == 1))
1127             {
1128               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1129               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1130                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1131                                            0);
1132               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1133               goto trace00;
1134             }
1135
1136           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1137             {
1138               next0 = icmp_out2in_slow_path
1139                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1140                  next0, now, thread_index, &s0);
1141               goto trace00;
1142             }
1143
1144           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1145             {
1146               next0 = SNAT_OUT2IN_NEXT_REASS;
1147               goto trace00;
1148             }
1149
1150           key0.addr = ip0->dst_address;
1151           key0.port = udp0->dst_port;
1152           key0.protocol = proto0;
1153           key0.fib_index = rx_fib_index0;
1154
1155           kv0.key = key0.as_u64;
1156
1157           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1158                                       &kv0, &value0))
1159             {
1160               /* Try to match static mapping by external address and port,
1161                  destination address and port in packet */
1162               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
1163                 {
1164                   /*
1165                    * Send DHCP packets to the ipv4 stack, or we won't
1166                    * be able to use dhcp client on the outside interface
1167                    */
1168                   if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
1169                       && (udp0->dst_port ==
1170                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1171                     {
1172                       vnet_feature_next (&next0, b0);
1173                       goto trace00;
1174                     }
1175
1176                   if (!sm->forwarding_enabled)
1177                     {
1178                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1179                       next0 = SNAT_OUT2IN_NEXT_DROP;
1180                     }
1181                   goto trace00;
1182                 }
1183
1184               /* Create session initiated by host from external network */
1185               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1186                                                      thread_index, now);
1187               if (!s0)
1188                 {
1189                   next0 = SNAT_OUT2IN_NEXT_DROP;
1190                   goto trace00;
1191                 }
1192             }
1193           else
1194             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1195                                     value0.value);
1196
1197           old_addr0 = ip0->dst_address.as_u32;
1198           ip0->dst_address = s0->in2out.addr;
1199           new_addr0 = ip0->dst_address.as_u32;
1200           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1201
1202           sum0 = ip0->checksum;
1203           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1204                                  ip4_header_t,
1205                                  dst_address /* changed member */);
1206           ip0->checksum = ip_csum_fold (sum0);
1207
1208           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1209             {
1210               old_port0 = tcp0->dst_port;
1211               tcp0->dst_port = s0->in2out.port;
1212               new_port0 = tcp0->dst_port;
1213
1214               sum0 = tcp0->checksum;
1215               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1216                                      ip4_header_t,
1217                                      dst_address /* changed member */);
1218
1219               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1220                                      ip4_header_t /* cheat */,
1221                                      length /* changed member */);
1222               tcp0->checksum = ip_csum_fold(sum0);
1223             }
1224           else
1225             {
1226               old_port0 = udp0->dst_port;
1227               udp0->dst_port = s0->in2out.port;
1228               udp0->checksum = 0;
1229             }
1230
1231           /* Accounting */
1232           nat44_session_update_counters (s0, now,
1233                                          vlib_buffer_length_in_chain (vm, b0));
1234           /* Per-user LRU list maintenance */
1235           nat44_session_update_lru (sm, s0, thread_index);
1236         trace00:
1237
1238           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1239                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1240             {
1241               snat_out2in_trace_t *t =
1242                  vlib_add_trace (vm, node, b0, sizeof (*t));
1243               t->sw_if_index = sw_if_index0;
1244               t->next_index = next0;
1245               t->session_index = ~0;
1246               if (s0)
1247                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1248             }
1249
1250           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1251
1252           /* verify speculative enqueue, maybe switch current next frame */
1253           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1254                                            to_next, n_left_to_next,
1255                                            bi0, next0);
1256         }
1257
1258       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1259     }
1260
1261   vlib_node_increment_counter (vm, snat_out2in_node.index,
1262                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1263                                pkts_processed);
1264   return frame->n_vectors;
1265 }
1266
1267 VLIB_REGISTER_NODE (snat_out2in_node) = {
1268   .function = snat_out2in_node_fn,
1269   .name = "nat44-out2in",
1270   .vector_size = sizeof (u32),
1271   .format_trace = format_snat_out2in_trace,
1272   .type = VLIB_NODE_TYPE_INTERNAL,
1273
1274   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1275   .error_strings = snat_out2in_error_strings,
1276
1277   .runtime_data_bytes = sizeof (snat_runtime_t),
1278
1279   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1280
1281   /* edit / add dispositions here */
1282   .next_nodes = {
1283     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1284     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1285     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1286     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1287   },
1288 };
1289 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1290
1291 static uword
1292 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1293                             vlib_node_runtime_t * node,
1294                             vlib_frame_t * frame)
1295 {
1296   u32 n_left_from, *from, *to_next;
1297   snat_out2in_next_t next_index;
1298   u32 pkts_processed = 0;
1299   snat_main_t *sm = &snat_main;
1300   f64 now = vlib_time_now (vm);
1301   u32 thread_index = vm->thread_index;
1302   snat_main_per_thread_data_t *per_thread_data =
1303     &sm->per_thread_data[thread_index];
1304   u32 *fragments_to_drop = 0;
1305   u32 *fragments_to_loopback = 0;
1306
1307   from = vlib_frame_vector_args (frame);
1308   n_left_from = frame->n_vectors;
1309   next_index = node->cached_next_index;
1310
1311   while (n_left_from > 0)
1312     {
1313       u32 n_left_to_next;
1314
1315       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1316
1317       while (n_left_from > 0 && n_left_to_next > 0)
1318        {
1319           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1320           vlib_buffer_t *b0;
1321           u32 next0;
1322           u8 cached0 = 0;
1323           ip4_header_t *ip0;
1324           nat_reass_ip4_t *reass0;
1325           udp_header_t * udp0;
1326           tcp_header_t * tcp0;
1327           snat_session_key_t key0, sm0;
1328           clib_bihash_kv_8_8_t kv0, value0;
1329           snat_session_t * s0 = 0;
1330           u16 old_port0, new_port0;
1331           ip_csum_t sum0;
1332
1333           /* speculatively enqueue b0 to the current next frame */
1334           bi0 = from[0];
1335           to_next[0] = bi0;
1336           from += 1;
1337           to_next += 1;
1338           n_left_from -= 1;
1339           n_left_to_next -= 1;
1340
1341           b0 = vlib_get_buffer (vm, bi0);
1342           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1343
1344           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1345           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1346                                                                sw_if_index0);
1347
1348           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1349             {
1350               next0 = SNAT_OUT2IN_NEXT_DROP;
1351               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1352               goto trace0;
1353             }
1354
1355           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1356           udp0 = ip4_next_header (ip0);
1357           tcp0 = (tcp_header_t *) udp0;
1358           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1359
1360           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1361                                                  ip0->dst_address,
1362                                                  ip0->fragment_id,
1363                                                  ip0->protocol,
1364                                                  1,
1365                                                  &fragments_to_drop);
1366
1367           if (PREDICT_FALSE (!reass0))
1368             {
1369               next0 = SNAT_OUT2IN_NEXT_DROP;
1370               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1371               nat_log_notice ("maximum reassemblies exceeded");
1372               goto trace0;
1373             }
1374
1375           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1376             {
1377               key0.addr = ip0->dst_address;
1378               key0.port = udp0->dst_port;
1379               key0.protocol = proto0;
1380               key0.fib_index = rx_fib_index0;
1381               kv0.key = key0.as_u64;
1382
1383               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1384                 {
1385                   /* Try to match static mapping by external address and port,
1386                      destination address and port in packet */
1387                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
1388                     {
1389                       /*
1390                        * Send DHCP packets to the ipv4 stack, or we won't
1391                        * be able to use dhcp client on the outside interface
1392                        */
1393                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
1394                           && (udp0->dst_port
1395                               == clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1396                         {
1397                           vnet_feature_next (&next0, b0);
1398                           goto trace0;
1399                         }
1400
1401                       if (!sm->forwarding_enabled)
1402                         {
1403                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1404                           next0 = SNAT_OUT2IN_NEXT_DROP;
1405                         }
1406                       goto trace0;
1407                     }
1408
1409                   /* Create session initiated by host from external network */
1410                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1411                                                          thread_index, now);
1412                   if (!s0)
1413                     {
1414                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1415                       next0 = SNAT_OUT2IN_NEXT_DROP;
1416                       goto trace0;
1417                     }
1418                   reass0->sess_index = s0 - per_thread_data->sessions;
1419                   reass0->thread_index = thread_index;
1420                 }
1421               else
1422                 {
1423                   s0 = pool_elt_at_index (per_thread_data->sessions,
1424                                           value0.value);
1425                   reass0->sess_index = value0.value;
1426                 }
1427               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1428             }
1429           else
1430             {
1431               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1432                 {
1433                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1434                     {
1435                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1436                       nat_log_notice ("maximum fragments per reassembly exceeded");
1437                       next0 = SNAT_OUT2IN_NEXT_DROP;
1438                       goto trace0;
1439                     }
1440                   cached0 = 1;
1441                   goto trace0;
1442                 }
1443               s0 = pool_elt_at_index (per_thread_data->sessions,
1444                                       reass0->sess_index);
1445             }
1446
1447           old_addr0 = ip0->dst_address.as_u32;
1448           ip0->dst_address = s0->in2out.addr;
1449           new_addr0 = ip0->dst_address.as_u32;
1450           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1451
1452           sum0 = ip0->checksum;
1453           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1454                                  ip4_header_t,
1455                                  dst_address /* changed member */);
1456           ip0->checksum = ip_csum_fold (sum0);
1457
1458           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1459             {
1460               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1461                 {
1462                   old_port0 = tcp0->dst_port;
1463                   tcp0->dst_port = s0->in2out.port;
1464                   new_port0 = tcp0->dst_port;
1465
1466                   sum0 = tcp0->checksum;
1467                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1468                                          ip4_header_t,
1469                                          dst_address /* changed member */);
1470
1471                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1472                                          ip4_header_t /* cheat */,
1473                                          length /* changed member */);
1474                   tcp0->checksum = ip_csum_fold(sum0);
1475                 }
1476               else
1477                 {
1478                   old_port0 = udp0->dst_port;
1479                   udp0->dst_port = s0->in2out.port;
1480                   udp0->checksum = 0;
1481                 }
1482             }
1483
1484           /* Accounting */
1485           nat44_session_update_counters (s0, now,
1486                                          vlib_buffer_length_in_chain (vm, b0));
1487           /* Per-user LRU list maintenance */
1488           nat44_session_update_lru (sm, s0, thread_index);
1489
1490         trace0:
1491           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1492                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1493             {
1494               nat44_out2in_reass_trace_t *t =
1495                  vlib_add_trace (vm, node, b0, sizeof (*t));
1496               t->cached = cached0;
1497               t->sw_if_index = sw_if_index0;
1498               t->next_index = next0;
1499             }
1500
1501           if (cached0)
1502             {
1503               n_left_to_next++;
1504               to_next--;
1505             }
1506           else
1507             {
1508               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1509
1510               /* verify speculative enqueue, maybe switch current next frame */
1511               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1512                                                to_next, n_left_to_next,
1513                                                bi0, next0);
1514             }
1515
1516           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1517             {
1518               from = vlib_frame_vector_args (frame);
1519               u32 len = vec_len (fragments_to_loopback);
1520               if (len <= VLIB_FRAME_SIZE)
1521                 {
1522                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1523                   n_left_from = len;
1524                   vec_reset_length (fragments_to_loopback);
1525                 }
1526               else
1527                 {
1528                   clib_memcpy (from,
1529                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1530                                sizeof (u32) * VLIB_FRAME_SIZE);
1531                   n_left_from = VLIB_FRAME_SIZE;
1532                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1533                 }
1534             }
1535        }
1536
1537       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1538     }
1539
1540   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1541                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1542                                pkts_processed);
1543
1544   nat_send_all_to_node (vm, fragments_to_drop, node,
1545                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1546                         SNAT_OUT2IN_NEXT_DROP);
1547
1548   vec_free (fragments_to_drop);
1549   vec_free (fragments_to_loopback);
1550   return frame->n_vectors;
1551 }
1552
1553 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1554   .function = nat44_out2in_reass_node_fn,
1555   .name = "nat44-out2in-reass",
1556   .vector_size = sizeof (u32),
1557   .format_trace = format_nat44_out2in_reass_trace,
1558   .type = VLIB_NODE_TYPE_INTERNAL,
1559
1560   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1561   .error_strings = snat_out2in_error_strings,
1562
1563   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1564
1565   /* edit / add dispositions here */
1566   .next_nodes = {
1567     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1568     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1569     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1570     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1571   },
1572 };
1573 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1574                               nat44_out2in_reass_node_fn);
1575
1576 /*******************************/
1577 /*** endpoint-dependent mode ***/
1578 /*******************************/
1579 typedef enum {
1580   NAT44_ED_OUT2IN_NEXT_DROP,
1581   NAT44_ED_OUT2IN_NEXT_LOOKUP,
1582   NAT44_ED_OUT2IN_NEXT_ICMP_ERROR,
1583   NAT44_ED_OUT2IN_NEXT_IN2OUT,
1584   NAT44_ED_OUT2IN_NEXT_SLOW_PATH,
1585   NAT44_ED_OUT2IN_N_NEXT,
1586 } nat44_ed_out2in_next_t;
1587
1588 typedef struct {
1589   u32 sw_if_index;
1590   u32 next_index;
1591   u32 session_index;
1592   u32 is_slow_path;
1593 } nat44_ed_out2in_trace_t;
1594
1595 static u8 *
1596 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
1597 {
1598   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1599   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1600   nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *);
1601   char * tag;
1602
1603   tag = t->is_slow_path ? "NAT44_OUT2IN_SLOW_PATH" : "NAT44_OUT2IN_FAST_PATH";
1604
1605   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
1606               t->sw_if_index, t->next_index, t->session_index);
1607
1608   return s;
1609 }
1610
1611 static inline u32
1612 icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
1613                           ip4_header_t * ip0, icmp46_header_t * icmp0,
1614                           u32 sw_if_index0, u32 rx_fib_index0,
1615                           vlib_node_runtime_t * node, u32 next0, f64 now,
1616                           u32 thread_index, snat_session_t ** p_s0)
1617 {
1618   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1619                       next0, thread_index, p_s0, 0);
1620   snat_session_t * s0 = *p_s0;
1621   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
1622     {
1623       /* Accounting */
1624       nat44_session_update_counters (s0, now,
1625                                      vlib_buffer_length_in_chain (sm->vlib_main, b0));
1626     }
1627   return next0;
1628 }
1629
1630 int
1631 nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void * arg)
1632 {
1633   snat_main_t *sm = &snat_main;
1634   nat44_is_idle_session_ctx_t *ctx = arg;
1635   snat_session_t *s;
1636   u64 sess_timeout_time;
1637   nat_ed_ses_key_t ed_key;
1638   clib_bihash_kv_16_8_t ed_kv;
1639   int i;
1640   snat_address_t *a;
1641   snat_session_key_t key;
1642   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
1643                                                        ctx->thread_index);
1644
1645   s = pool_elt_at_index (tsm->sessions, kv->value);
1646   sess_timeout_time = s->last_heard + (f64)nat44_session_get_timeout(sm, s);
1647   if (ctx->now >= sess_timeout_time)
1648     {
1649       ed_key.l_addr = s->in2out.addr;
1650       ed_key.r_addr = s->ext_host_addr;
1651       ed_key.fib_index = s->out2in.fib_index;
1652       if (snat_is_unk_proto_session (s))
1653         {
1654           ed_key.proto = s->in2out.port;
1655           ed_key.r_port = 0;
1656           ed_key.l_port = 0;
1657         }
1658       else
1659         {
1660           ed_key.proto = snat_proto_to_ip_proto (s->in2out.protocol);
1661           ed_key.l_port = s->in2out.port;
1662           ed_key.r_port = s->ext_host_port;
1663         }
1664       if (is_twice_nat_session (s))
1665         {
1666           ed_key.r_addr = s->ext_host_nat_addr;
1667           ed_key.r_port = s->ext_host_nat_port;
1668         }
1669       ed_kv.key[0] = ed_key.as_u64[0];
1670       ed_kv.key[1] = ed_key.as_u64[1];
1671       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
1672         nat_log_warn ("in2out_ed key del failed");
1673
1674       if (snat_is_unk_proto_session (s))
1675         goto delete;
1676
1677       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
1678                                           s->out2in.addr.as_u32,
1679                                           s->in2out.protocol,
1680                                           s->in2out.port,
1681                                           s->out2in.port,
1682                                           s->in2out.fib_index);
1683
1684       if (is_twice_nat_session (s))
1685         {
1686           for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
1687             {
1688               key.protocol = s->in2out.protocol;
1689               key.port = s->ext_host_nat_port;
1690               a = sm->twice_nat_addresses + i;
1691               if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
1692                 {
1693                   snat_free_outside_address_and_port (sm->twice_nat_addresses,
1694                                                       ctx->thread_index, &key);
1695                   break;
1696                 }
1697             }
1698         }
1699
1700       if (snat_is_session_static (s))
1701         goto delete;
1702
1703       if (s->outside_address_index != ~0)
1704         snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
1705                                             &s->out2in);
1706     delete:
1707       nat44_delete_session (sm, s, ctx->thread_index);
1708       return 1;
1709     }
1710
1711   return 0;
1712 }
1713
1714 static snat_session_t *
1715 create_session_for_static_mapping_ed (snat_main_t * sm,
1716                                       vlib_buffer_t *b,
1717                                       snat_session_key_t l_key,
1718                                       snat_session_key_t e_key,
1719                                       vlib_node_runtime_t * node,
1720                                       u32 thread_index,
1721                                       twice_nat_type_t twice_nat,
1722                                       lb_nat_type_t lb_nat,
1723                                       f64 now)
1724 {
1725   snat_session_t *s;
1726   snat_user_t *u;
1727   ip4_header_t *ip;
1728   udp_header_t *udp;
1729   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1730   clib_bihash_kv_16_8_t kv;
1731   snat_session_key_t eh_key;
1732   u32 address_index;
1733   nat44_is_idle_session_ctx_t ctx;
1734
1735   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1736     {
1737       b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
1738       nat_log_notice ("maximum sessions exceeded");
1739       return 0;
1740     }
1741
1742   u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index, thread_index);
1743   if (!u)
1744     {
1745       nat_log_warn ("create NAT user failed");
1746       return 0;
1747     }
1748
1749   s = nat_ed_session_alloc (sm, u, thread_index);
1750   if (!s)
1751     {
1752       nat44_delete_user_with_no_session (sm, u, thread_index);
1753       nat_log_warn ("create NAT session failed");
1754       return 0;
1755     }
1756
1757   ip = vlib_buffer_get_current (b);
1758   udp = ip4_next_header (ip);
1759
1760   s->ext_host_addr.as_u32 = ip->src_address.as_u32;
1761   s->ext_host_port = e_key.protocol == SNAT_PROTOCOL_ICMP ? 0 : udp->src_port;
1762   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1763   if (lb_nat)
1764     s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1765   if (lb_nat == AFFINITY_LB_NAT)
1766     s->flags |= SNAT_SESSION_FLAG_AFFINITY;
1767   s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
1768   s->outside_address_index = ~0;
1769   s->out2in = e_key;
1770   s->in2out = l_key;
1771   s->in2out.protocol = s->out2in.protocol;
1772   user_session_increment (sm, u, 1);
1773
1774   /* Add to lookup tables */
1775   make_ed_kv (&kv, &e_key.addr, &s->ext_host_addr, ip->protocol,
1776               e_key.fib_index, e_key.port, s->ext_host_port);
1777   kv.value = s - tsm->sessions;
1778   ctx.now = now;
1779   ctx.thread_index = thread_index;
1780   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->out2in_ed, &kv,
1781                                                nat44_o2i_ed_is_idle_session_cb,
1782                                                &ctx))
1783     nat_log_notice ("out2in-ed key add failed");
1784
1785   if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF &&
1786       ip->src_address.as_u32 == l_key.addr.as_u32))
1787     {
1788       eh_key.protocol = e_key.protocol;
1789       if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
1790                                                thread_index, &eh_key,
1791                                                &address_index,
1792                                                sm->port_per_thread,
1793                                                tsm->snat_thread_index))
1794         {
1795           b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
1796           nat44_delete_session (sm, s, thread_index);
1797           if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &kv, 0))
1798             nat_log_notice ("out2in-ed key del failed");
1799           return 0;
1800         }
1801       s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
1802       s->ext_host_nat_port = eh_key.port;
1803       s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
1804       make_ed_kv (&kv, &l_key.addr, &s->ext_host_nat_addr, ip->protocol,
1805                   l_key.fib_index, l_key.port, s->ext_host_nat_port);
1806     }
1807   else
1808     {
1809       make_ed_kv (&kv, &l_key.addr, &s->ext_host_addr, ip->protocol,
1810                   l_key.fib_index, l_key.port, s->ext_host_port);
1811     }
1812   kv.value = s - tsm->sessions;
1813   if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv,
1814                                                nat44_i2o_ed_is_idle_session_cb,
1815                                                &ctx))
1816     nat_log_notice ("in2out-ed key add failed");
1817
1818   return s;
1819 }
1820
1821 static_always_inline int
1822 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
1823 {
1824   icmp46_header_t *icmp0;
1825   nat_ed_ses_key_t key0;
1826   icmp_echo_header_t *echo0, *inner_echo0 = 0;
1827   ip4_header_t *inner_ip0;
1828   void *l4_header = 0;
1829   icmp46_header_t *inner_icmp0;
1830
1831   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
1832   echo0 = (icmp_echo_header_t *)(icmp0+1);
1833
1834   if (!icmp_is_error_message (icmp0))
1835     {
1836       key0.proto = IP_PROTOCOL_ICMP;
1837       key0.l_addr = ip0->dst_address;
1838       key0.r_addr = ip0->src_address;
1839       key0.l_port = echo0->identifier;
1840       key0.r_port = 0;
1841     }
1842   else
1843     {
1844       inner_ip0 = (ip4_header_t *)(echo0+1);
1845       l4_header = ip4_next_header (inner_ip0);
1846       key0.proto = inner_ip0->protocol;
1847       key0.l_addr = inner_ip0->src_address;
1848       key0.r_addr = inner_ip0->dst_address;
1849       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
1850         {
1851         case SNAT_PROTOCOL_ICMP:
1852           inner_icmp0 = (icmp46_header_t*)l4_header;
1853           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
1854           key0.l_port = inner_echo0->identifier;
1855           key0.r_port = 0;
1856           break;
1857         case SNAT_PROTOCOL_UDP:
1858         case SNAT_PROTOCOL_TCP:
1859           key0.l_port = ((tcp_udp_header_t*)l4_header)->src_port;
1860           key0.r_port = ((tcp_udp_header_t*)l4_header)->dst_port;
1861           break;
1862         default:
1863           return -1;
1864         }
1865     }
1866   *p_key0 = key0;
1867   return 0;
1868 }
1869
1870 static int
1871 next_src_nat (snat_main_t * sm, ip4_header_t * ip, u8 proto, u16 src_port,
1872               u16 dst_port, u32 thread_index, u32 rx_fib_index)
1873 {
1874   clib_bihash_kv_16_8_t kv, value;
1875   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1876
1877   make_ed_kv (&kv, &ip->src_address, &ip->dst_address, proto,
1878               rx_fib_index, src_port, dst_port);
1879   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
1880     return 1;
1881
1882   return 0;
1883 }
1884
1885 static void
1886 create_bypass_for_fwd(snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
1887                       u32 thread_index)
1888 {
1889   nat_ed_ses_key_t key;
1890   clib_bihash_kv_16_8_t kv, value;
1891   udp_header_t *udp;
1892   snat_user_t *u;
1893   snat_session_t *s = 0;
1894   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1895   f64 now = vlib_time_now (sm->vlib_main);
1896
1897   if (ip->protocol == IP_PROTOCOL_ICMP)
1898     {
1899       if (icmp_get_ed_key (ip, &key))
1900         return;
1901     }
1902   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
1903     {
1904       udp = ip4_next_header(ip);
1905       key.r_addr = ip->src_address;
1906       key.l_addr = ip->dst_address;
1907       key.proto = ip->protocol;
1908       key.l_port = udp->dst_port;
1909       key.r_port = udp->src_port;
1910     }
1911   else
1912     {
1913       key.r_addr = ip->src_address;
1914       key.l_addr = ip->dst_address;
1915       key.proto = ip->protocol;
1916       key.l_port = key.r_port = 0;
1917     }
1918   key.fib_index = 0;
1919   kv.key[0] = key.as_u64[0];
1920   kv.key[1] = key.as_u64[1];
1921
1922   if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
1923     {
1924       s = pool_elt_at_index (tsm->sessions, value.value);
1925     }
1926   else
1927     {
1928       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1929         return;
1930
1931       u = nat_user_get_or_create (sm, &ip->dst_address, sm->inside_fib_index,
1932                                   thread_index);
1933       if (!u)
1934         {
1935           nat_log_warn ("create NAT user failed");
1936           return;
1937         }
1938
1939       s = nat_ed_session_alloc (sm, u, thread_index);
1940       if (!s)
1941         {
1942           nat44_delete_user_with_no_session (sm, u, thread_index);
1943           nat_log_warn ("create NAT session failed");
1944           return;
1945         }
1946
1947       s->ext_host_addr = key.r_addr;
1948       s->ext_host_port = key.r_port;
1949       s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
1950       s->outside_address_index = ~0;
1951       s->out2in.addr = key.l_addr;
1952       s->out2in.port = key.l_port;
1953       s->out2in.protocol = ip_proto_to_snat_proto (key.proto);
1954       s->out2in.fib_index = 0;
1955       s->in2out = s->out2in;
1956       user_session_increment (sm, u, 0);
1957
1958       kv.value = s - tsm->sessions;
1959       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
1960         nat_log_notice ("in2out_ed key add failed");
1961     }
1962
1963   if (ip->protocol == IP_PROTOCOL_TCP)
1964     {
1965       tcp_header_t *tcp = ip4_next_header(ip);
1966       if (nat44_set_tcp_session_state_o2i (sm, s, tcp, thread_index))
1967         return;
1968     }
1969
1970   /* Accounting */
1971   nat44_session_update_counters (s, now, 0);
1972 }
1973
1974 u32
1975 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
1976                       u32 thread_index, vlib_buffer_t * b, ip4_header_t * ip,
1977                       u8 * p_proto, snat_session_key_t * p_value,
1978                       u8 * p_dont_translate, void * d, void * e)
1979 {
1980   u32 next = ~0, sw_if_index, rx_fib_index;
1981   icmp46_header_t *icmp;
1982   nat_ed_ses_key_t key;
1983   clib_bihash_kv_16_8_t kv, value;
1984   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1985   snat_session_t *s = 0;
1986   u8 dont_translate = 0, is_addr_only;
1987   snat_session_key_t e_key, l_key;
1988
1989   icmp = (icmp46_header_t *) ip4_next_header (ip);
1990   sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
1991   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
1992
1993   if (icmp_get_ed_key (ip, &key))
1994     {
1995       b->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
1996       next = SNAT_OUT2IN_NEXT_DROP;
1997       goto out;
1998     }
1999   key.fib_index = rx_fib_index;
2000   kv.key[0] = key.as_u64[0];
2001   kv.key[1] = key.as_u64[1];
2002
2003   if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
2004     {
2005       /* Try to match static mapping */
2006       e_key.addr = ip->dst_address;
2007       e_key.port = key.l_port;
2008       e_key.protocol = ip_proto_to_snat_proto (key.proto);
2009       e_key.fib_index = rx_fib_index;
2010       if (snat_static_mapping_match(sm, e_key, &l_key, 1, &is_addr_only, 0, 0, 0))
2011         {
2012           if (!sm->forwarding_enabled)
2013             {
2014               /* Don't NAT packet aimed at the intfc address */
2015               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index,
2016                                                   ip->dst_address.as_u32)))
2017                 {
2018                   dont_translate = 1;
2019                   goto out;
2020                 }
2021               b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2022               next = NAT44_ED_OUT2IN_NEXT_DROP;
2023               goto out;
2024             }
2025           else
2026             {
2027               dont_translate = 1;
2028               if (next_src_nat(sm, ip, key.proto, key.l_port, key.r_port,
2029                                thread_index, rx_fib_index))
2030                 {
2031                   next = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2032                   goto out;
2033                 }
2034               create_bypass_for_fwd(sm, ip, rx_fib_index, thread_index);
2035               goto out;
2036             }
2037         }
2038
2039       if (PREDICT_FALSE(icmp->type != ICMP4_echo_reply &&
2040                         (icmp->type != ICMP4_echo_request || !is_addr_only)))
2041         {
2042           b->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2043           next = NAT44_ED_OUT2IN_NEXT_DROP;
2044           goto out;
2045         }
2046
2047       /* Create session initiated by host from external network */
2048       s = create_session_for_static_mapping_ed(sm, b, l_key, e_key, node,
2049                                                thread_index, 0, 0,
2050                                                vlib_time_now (sm->vlib_main));
2051
2052       if (!s)
2053         {
2054           next = NAT44_ED_OUT2IN_NEXT_DROP;
2055           goto out;
2056         }
2057     }
2058   else
2059     {
2060       if (PREDICT_FALSE(icmp->type != ICMP4_echo_reply &&
2061                         icmp->type != ICMP4_echo_request &&
2062                         !icmp_is_error_message (icmp)))
2063         {
2064           b->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2065           next = SNAT_OUT2IN_NEXT_DROP;
2066           goto out;
2067         }
2068
2069       s = pool_elt_at_index (tsm->sessions, value.value);
2070     }
2071
2072   *p_proto = ip_proto_to_snat_proto (key.proto);
2073 out:
2074   if (s)
2075     *p_value = s->in2out;
2076   *p_dont_translate = dont_translate;
2077   if (d)
2078     *(snat_session_t**)d = s;
2079   return next;
2080 }
2081
2082 static snat_session_t *
2083 nat44_ed_out2in_unknown_proto (snat_main_t *sm,
2084                                vlib_buffer_t * b,
2085                                ip4_header_t * ip,
2086                                u32 rx_fib_index,
2087                                u32 thread_index,
2088                                f64 now,
2089                                vlib_main_t * vm,
2090                                vlib_node_runtime_t * node)
2091 {
2092   clib_bihash_kv_8_8_t kv, value;
2093   clib_bihash_kv_16_8_t s_kv, s_value;
2094   snat_static_mapping_t *m;
2095   u32 old_addr, new_addr;
2096   ip_csum_t sum;
2097   snat_session_t * s;
2098   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2099   snat_user_t *u;
2100
2101   old_addr = ip->dst_address.as_u32;
2102
2103   make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2104               rx_fib_index, 0, 0);
2105
2106   if (!clib_bihash_search_16_8 (&tsm->out2in_ed, &s_kv, &s_value))
2107     {
2108       s = pool_elt_at_index (tsm->sessions, s_value.value);
2109       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
2110     }
2111   else
2112     {
2113       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
2114         {
2115           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
2116           nat_log_notice ("maximum sessions exceeded");
2117           return 0;
2118         }
2119
2120       make_sm_kv (&kv, &ip->dst_address, 0, 0, 0);
2121       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
2122         {
2123           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2124           return 0;
2125         }
2126
2127       m = pool_elt_at_index (sm->static_mappings, value.value);
2128
2129       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
2130
2131       u = nat_user_get_or_create (sm, &m->local_addr, m->fib_index,
2132                                   thread_index);
2133       if (!u)
2134         {
2135           nat_log_warn ("create NAT user failed");
2136           return 0;
2137         }
2138
2139       /* Create a new session */
2140       s = nat_ed_session_alloc (sm, u, thread_index);
2141       if (!s)
2142         {
2143           nat44_delete_user_with_no_session (sm, u, thread_index);
2144           nat_log_warn ("create NAT session failed");
2145           return 0;
2146         }
2147
2148       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
2149       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
2150       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
2151       s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
2152       s->outside_address_index = ~0;
2153       s->out2in.addr.as_u32 = old_addr;
2154       s->out2in.fib_index = rx_fib_index;
2155       s->in2out.addr.as_u32 = new_addr;
2156       s->in2out.fib_index = m->fib_index;
2157       s->in2out.port = s->out2in.port = ip->protocol;
2158       user_session_increment (sm, u, 1);
2159
2160       /* Add to lookup tables */
2161       s_kv.value = s - tsm->sessions;
2162       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &s_kv, 1))
2163         nat_log_notice ("out2in key add failed");
2164
2165       make_ed_kv (&s_kv, &ip->dst_address, &ip->src_address, ip->protocol,
2166                   m->fib_index, 0, 0);
2167       s_kv.value = s - tsm->sessions;
2168       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
2169         nat_log_notice ("in2out key add failed");
2170    }
2171
2172   /* Update IP checksum */
2173   sum = ip->checksum;
2174   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
2175   ip->checksum = ip_csum_fold (sum);
2176
2177   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
2178
2179   /* Accounting */
2180   nat44_session_update_counters (s, now,
2181                                  vlib_buffer_length_in_chain (vm, b));
2182
2183   return s;
2184 }
2185
2186 static inline uword
2187 nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
2188                                 vlib_node_runtime_t * node,
2189                                 vlib_frame_t * frame, int is_slow_path)
2190 {
2191   u32 n_left_from, *from, *to_next, pkts_processed = 0, stats_node_index;
2192   nat44_ed_out2in_next_t next_index;
2193   snat_main_t *sm = &snat_main;
2194   f64 now = vlib_time_now (vm);
2195   u32 thread_index = vm->thread_index;
2196   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
2197
2198   stats_node_index = is_slow_path ? nat44_ed_out2in_slowpath_node.index :
2199     nat44_ed_out2in_node.index;
2200
2201   from = vlib_frame_vector_args (frame);
2202   n_left_from = frame->n_vectors;
2203   next_index = node->cached_next_index;
2204
2205   while (n_left_from > 0)
2206     {
2207       u32 n_left_to_next;
2208
2209       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2210
2211       while (n_left_from >= 4 && n_left_to_next >= 2)
2212         {
2213           u32 bi0, bi1;
2214           vlib_buffer_t *b0, *b1;
2215           u32 next0, sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
2216           u32 next1, sw_if_index1, rx_fib_index1, proto1, old_addr1, new_addr1;
2217           u16 old_port0, new_port0, old_port1, new_port1;
2218           ip4_header_t *ip0, *ip1;
2219           udp_header_t *udp0, *udp1;
2220           tcp_header_t *tcp0, *tcp1;
2221           icmp46_header_t *icmp0, *icmp1;
2222           snat_session_t *s0 = 0, *s1 = 0;
2223           clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
2224           ip_csum_t sum0, sum1;
2225           snat_session_key_t e_key0, l_key0, e_key1, l_key1;
2226           lb_nat_type_t lb_nat0, lb_nat1;
2227           twice_nat_type_t twice_nat0, twice_nat1;
2228
2229           /* Prefetch next iteration. */
2230           {
2231             vlib_buffer_t * p2, * p3;
2232
2233             p2 = vlib_get_buffer (vm, from[2]);
2234             p3 = vlib_get_buffer (vm, from[3]);
2235
2236             vlib_prefetch_buffer_header (p2, LOAD);
2237             vlib_prefetch_buffer_header (p3, LOAD);
2238
2239             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2240             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2241           }
2242
2243           /* speculatively enqueue b0 and b1 to the current next frame */
2244           to_next[0] = bi0 = from[0];
2245           to_next[1] = bi1 = from[1];
2246           from += 2;
2247           to_next += 2;
2248           n_left_from -= 2;
2249           n_left_to_next -= 2;
2250
2251           b0 = vlib_get_buffer (vm, bi0);
2252           b1 = vlib_get_buffer (vm, bi1);
2253
2254           next0 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2255           vnet_buffer (b0)->snat.flags = 0;
2256           ip0 = vlib_buffer_get_current (b0);
2257
2258           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2259           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2260                                                                sw_if_index0);
2261
2262           if (PREDICT_FALSE(ip0->ttl == 1))
2263             {
2264               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2265               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2266                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2267                                            0);
2268               next0 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2269               goto trace00;
2270             }
2271
2272           udp0 = ip4_next_header (ip0);
2273           tcp0 = (tcp_header_t *) udp0;
2274           icmp0 = (icmp46_header_t *) udp0;
2275           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2276
2277           if (is_slow_path)
2278             {
2279               if (PREDICT_FALSE (proto0 == ~0))
2280                 {
2281                   s0 = nat44_ed_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
2282                                                      thread_index, now, vm, node);
2283                   if (!sm->forwarding_enabled)
2284                     {
2285                       if (!s0)
2286                         next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2287                       goto trace00;
2288                     }
2289                 }
2290
2291               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2292                 {
2293                   next0 = icmp_out2in_ed_slow_path
2294                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2295                      next0, now, thread_index, &s0);
2296                   goto trace00;
2297                 }
2298             }
2299           else
2300             {
2301               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2302                 {
2303                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2304                   goto trace00;
2305                 }
2306
2307               if (ip4_is_fragment (ip0))
2308                 {
2309                   b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
2310                   next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2311                   goto trace00;
2312                 }
2313             }
2314
2315           make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, ip0->protocol,
2316                       rx_fib_index0, udp0->dst_port, udp0->src_port);
2317
2318           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
2319             {
2320               if (is_slow_path)
2321                 {
2322                   /* Try to match static mapping by external address and port,
2323                      destination address and port in packet */
2324                   e_key0.addr = ip0->dst_address;
2325                   e_key0.port = udp0->dst_port;
2326                   e_key0.protocol = proto0;
2327                   e_key0.fib_index = rx_fib_index0;
2328                   if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
2329                       &twice_nat0, &lb_nat0, &ip0->src_address))
2330                     {
2331                       /*
2332                        * Send DHCP packets to the ipv4 stack, or we won't
2333                        * be able to use dhcp client on the outside interface
2334                        */
2335                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
2336                           && (udp0->dst_port ==
2337                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2338                         {
2339                           vnet_feature_next (&next0, b0);
2340                           goto trace00;
2341                         }
2342
2343                       if (!sm->forwarding_enabled)
2344                         {
2345                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2346                           next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2347                         }
2348                       else
2349                         {
2350                           if (next_src_nat(sm, ip0, ip0->protocol,
2351                                            udp0->src_port, udp0->dst_port,
2352                                            thread_index, rx_fib_index0))
2353                             {
2354                               next0 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2355                               goto trace00;
2356                             }
2357                           create_bypass_for_fwd(sm, ip0, rx_fib_index0,
2358                                                 thread_index);
2359                         }
2360                       goto trace00;
2361                     }
2362
2363                   /* Create session initiated by host from external network */
2364                   s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
2365                                                             e_key0, node,
2366                                                             thread_index,
2367                                                             twice_nat0,
2368                                                             lb_nat0,
2369                                                             now);
2370
2371                   if (!s0)
2372                     {
2373                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2374                       goto trace00;
2375                     }
2376                 }
2377               else
2378                 {
2379                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2380                   goto trace00;
2381                 }
2382             }
2383           else
2384             {
2385               s0 = pool_elt_at_index (tsm->sessions, value0.value);
2386             }
2387
2388           old_addr0 = ip0->dst_address.as_u32;
2389           new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
2390           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2391
2392           sum0 = ip0->checksum;
2393           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2394                                  dst_address);
2395           if (PREDICT_FALSE (is_twice_nat_session (s0)))
2396             sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2397                                    s0->ext_host_nat_addr.as_u32, ip4_header_t,
2398                                    src_address);
2399           ip0->checksum = ip_csum_fold (sum0);
2400
2401           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
2402             {
2403               old_port0 = tcp0->dst_port;
2404               new_port0 = tcp0->dst_port = s0->in2out.port;
2405
2406               sum0 = tcp0->checksum;
2407               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2408                                      dst_address);
2409               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
2410                                      length);
2411               if (is_twice_nat_session (s0))
2412                 {
2413                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2414                                          s0->ext_host_nat_addr.as_u32,
2415                                          ip4_header_t, dst_address);
2416                   sum0 = ip_csum_update (sum0, tcp0->src_port,
2417                                          s0->ext_host_nat_port, ip4_header_t,
2418                                          length);
2419                   tcp0->src_port = s0->ext_host_nat_port;
2420                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2421                 }
2422               tcp0->checksum = ip_csum_fold(sum0);
2423               if (nat44_set_tcp_session_state_o2i (sm, s0, tcp0, thread_index))
2424                 goto trace00;
2425             }
2426           else
2427             {
2428               udp0->dst_port = s0->in2out.port;
2429               if (is_twice_nat_session (s0))
2430                 {
2431                   udp0->src_port = s0->ext_host_nat_port;
2432                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2433                 }
2434               udp0->checksum = 0;
2435             }
2436
2437           /* Accounting */
2438           nat44_session_update_counters (s0, now,
2439                                          vlib_buffer_length_in_chain (vm, b0));
2440
2441         trace00:
2442           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2443                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2444             {
2445               nat44_ed_out2in_trace_t *t =
2446                 vlib_add_trace (vm, node, b0, sizeof (*t));
2447               t->is_slow_path = is_slow_path;
2448               t->sw_if_index = sw_if_index0;
2449               t->next_index = next0;
2450               t->session_index = ~0;
2451               if (s0)
2452                 t->session_index = s0 - tsm->sessions;
2453             }
2454
2455           pkts_processed += next0 != NAT44_ED_OUT2IN_NEXT_DROP;
2456
2457           next1 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2458           vnet_buffer (b1)->snat.flags = 0;
2459           ip1 = vlib_buffer_get_current (b1);
2460
2461           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2462           rx_fib_index1 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2463                                                                sw_if_index1);
2464
2465           if (PREDICT_FALSE(ip1->ttl == 1))
2466             {
2467               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2468               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2469                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2470                                            0);
2471               next1 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2472               goto trace01;
2473             }
2474
2475           udp1 = ip4_next_header (ip1);
2476           tcp1 = (tcp_header_t *) udp1;
2477           icmp1 = (icmp46_header_t *) udp1;
2478           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2479
2480           if (is_slow_path)
2481             {
2482               if (PREDICT_FALSE (proto1 == ~0))
2483                 {
2484                   s1 = nat44_ed_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
2485                                                      thread_index, now, vm, node);
2486                   if (!sm->forwarding_enabled)
2487                     {
2488                       if (!s1)
2489                         next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2490                       goto trace01;
2491                     }
2492                 }
2493
2494               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
2495                 {
2496                   next1 = icmp_out2in_ed_slow_path
2497                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
2498                      next1, now, thread_index, &s1);
2499                   goto trace01;
2500                 }
2501             }
2502           else
2503             {
2504               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
2505                 {
2506                   next1 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2507                   goto trace01;
2508                 }
2509
2510               if (ip4_is_fragment (ip1))
2511                 {
2512                   b1->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
2513                   next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2514                   goto trace01;
2515                 }
2516             }
2517
2518           make_ed_kv (&kv1, &ip1->dst_address, &ip1->src_address, ip1->protocol,
2519                       rx_fib_index1, udp1->dst_port, udp1->src_port);
2520
2521           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv1, &value1))
2522             {
2523               if (is_slow_path)
2524                 {
2525                   /* Try to match static mapping by external address and port,
2526                      destination address and port in packet */
2527                   e_key1.addr = ip1->dst_address;
2528                   e_key1.port = udp1->dst_port;
2529                   e_key1.protocol = proto1;
2530                   e_key1.fib_index = rx_fib_index1;
2531                   if (snat_static_mapping_match(sm, e_key1, &l_key1, 1, 0,
2532                       &twice_nat1, &lb_nat1, &ip1->src_address))
2533                     {
2534                       /*
2535                        * Send DHCP packets to the ipv4 stack, or we won't
2536                        * be able to use dhcp client on the outside interface
2537                        */
2538                       if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
2539                           && (udp1->dst_port ==
2540                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2541                         {
2542                           vnet_feature_next (&next1, b1);
2543                           goto trace01;
2544                         }
2545
2546                       if (!sm->forwarding_enabled)
2547                         {
2548                           b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2549                           next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2550                         }
2551                       else
2552                         {
2553                           if (next_src_nat(sm, ip1, ip1->protocol,
2554                                            udp1->src_port, udp1->dst_port,
2555                                            thread_index, rx_fib_index1))
2556                             {
2557                               next1 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2558                               goto trace01;
2559                             }
2560                           create_bypass_for_fwd(sm, ip1, rx_fib_index1,
2561                                                 thread_index);
2562                         }
2563                       goto trace01;
2564                     }
2565
2566                   /* Create session initiated by host from external network */
2567                   s1 = create_session_for_static_mapping_ed(sm, b1, l_key1,
2568                                                             e_key1, node,
2569                                                             thread_index,
2570                                                             twice_nat1,
2571                                                             lb_nat1,
2572                                                             now);
2573
2574                   if (!s1)
2575                     {
2576                       next1 = NAT44_ED_OUT2IN_NEXT_DROP;
2577                       goto trace01;
2578                     }
2579                 }
2580               else
2581                 {
2582                   next1 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2583                   goto trace01;
2584                 }
2585             }
2586           else
2587             {
2588               s1 = pool_elt_at_index (tsm->sessions, value1.value);
2589             }
2590
2591           old_addr1 = ip1->dst_address.as_u32;
2592           new_addr1 = ip1->dst_address.as_u32 = s1->in2out.addr.as_u32;
2593           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
2594
2595           sum1 = ip1->checksum;
2596           sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
2597                                  dst_address);
2598           if (PREDICT_FALSE (is_twice_nat_session (s1)))
2599             sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
2600                                    s1->ext_host_nat_addr.as_u32, ip4_header_t,
2601                                    src_address);
2602           ip1->checksum = ip_csum_fold (sum1);
2603
2604           if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
2605             {
2606               old_port1 = tcp1->dst_port;
2607               new_port1 = tcp1->dst_port = s1->in2out.port;
2608
2609               sum1 = tcp1->checksum;
2610               sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
2611                                      dst_address);
2612               sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
2613                                      length);
2614               if (is_twice_nat_session (s1))
2615                 {
2616                   sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
2617                                          s1->ext_host_nat_addr.as_u32,
2618                                          ip4_header_t, dst_address);
2619                   sum1 = ip_csum_update (sum1, tcp1->src_port,
2620                                          s1->ext_host_nat_port, ip4_header_t,
2621                                          length);
2622                   tcp1->src_port = s1->ext_host_nat_port;
2623                   ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
2624                 }
2625               tcp1->checksum = ip_csum_fold(sum1);
2626               if (nat44_set_tcp_session_state_o2i (sm, s1, tcp1, thread_index))
2627                 goto trace01;
2628             }
2629           else
2630             {
2631               udp1->dst_port = s1->in2out.port;
2632               if (is_twice_nat_session (s1))
2633                 {
2634                   udp1->src_port = s1->ext_host_nat_port;
2635                   ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
2636                 }
2637               udp1->checksum = 0;
2638             }
2639
2640           /* Accounting */
2641           nat44_session_update_counters (s1, now,
2642                                          vlib_buffer_length_in_chain (vm, b1));
2643
2644         trace01:
2645           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2646                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2647             {
2648               nat44_ed_out2in_trace_t *t =
2649                 vlib_add_trace (vm, node, b1, sizeof (*t));
2650               t->is_slow_path = is_slow_path;
2651               t->sw_if_index = sw_if_index1;
2652               t->next_index = next1;
2653               t->session_index = ~0;
2654               if (s1)
2655                 t->session_index = s1 - tsm->sessions;
2656             }
2657
2658           pkts_processed += next1 != NAT44_ED_OUT2IN_NEXT_DROP;
2659
2660           /* verify speculative enqueues, maybe switch current next frame */
2661           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2662                                            to_next, n_left_to_next,
2663                                            bi0, bi1, next0, next1);
2664         }
2665
2666       while (n_left_from > 0 && n_left_to_next > 0)
2667         {
2668           u32 bi0;
2669           vlib_buffer_t *b0;
2670           u32 next0, sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
2671           u16 old_port0, new_port0;
2672           ip4_header_t *ip0;
2673           udp_header_t *udp0;
2674           tcp_header_t *tcp0;
2675           icmp46_header_t * icmp0;
2676           snat_session_t *s0 = 0;
2677           clib_bihash_kv_16_8_t kv0, value0;
2678           ip_csum_t sum0;
2679           snat_session_key_t e_key0, l_key0;
2680           lb_nat_type_t lb_nat0;
2681           twice_nat_type_t twice_nat0;
2682
2683           /* speculatively enqueue b0 to the current next frame */
2684           bi0 = from[0];
2685           to_next[0] = bi0;
2686           from += 1;
2687           to_next += 1;
2688           n_left_from -= 1;
2689           n_left_to_next -= 1;
2690
2691           b0 = vlib_get_buffer (vm, bi0);
2692           next0 = NAT44_ED_OUT2IN_NEXT_LOOKUP;
2693           vnet_buffer (b0)->snat.flags = 0;
2694           ip0 = vlib_buffer_get_current (b0);
2695
2696           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2697           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2698                                                                sw_if_index0);
2699
2700           if (PREDICT_FALSE(ip0->ttl == 1))
2701             {
2702               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2703               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2704                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2705                                            0);
2706               next0 = NAT44_ED_OUT2IN_NEXT_ICMP_ERROR;
2707               goto trace0;
2708             }
2709
2710           udp0 = ip4_next_header (ip0);
2711           tcp0 = (tcp_header_t *) udp0;
2712           icmp0 = (icmp46_header_t *) udp0;
2713           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2714
2715           if (is_slow_path)
2716             {
2717               if (PREDICT_FALSE (proto0 == ~0))
2718                 {
2719                   s0 = nat44_ed_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
2720                                                      thread_index, now, vm, node);
2721                   if (!sm->forwarding_enabled)
2722                     {
2723                       if (!s0)
2724                         next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2725                       goto trace0;
2726                     }
2727                 }
2728
2729               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2730                 {
2731                   next0 = icmp_out2in_ed_slow_path
2732                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
2733                      next0, now, thread_index, &s0);
2734                   goto trace0;
2735                 }
2736             }
2737           else
2738             {
2739               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
2740                 {
2741                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2742                   goto trace0;
2743                 }
2744
2745               if (ip4_is_fragment (ip0))
2746                 {
2747                   b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
2748                   next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2749                   goto trace0;
2750                 }
2751             }
2752
2753           make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, ip0->protocol,
2754                       rx_fib_index0, udp0->dst_port, udp0->src_port);
2755
2756           if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
2757             {
2758               if (is_slow_path)
2759                 {
2760                   /* Try to match static mapping by external address and port,
2761                      destination address and port in packet */
2762                   e_key0.addr = ip0->dst_address;
2763                   e_key0.port = udp0->dst_port;
2764                   e_key0.protocol = proto0;
2765                   e_key0.fib_index = rx_fib_index0;
2766                   if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
2767                       &twice_nat0, &lb_nat0, &ip0->src_address))
2768                     {
2769                       /*
2770                        * Send DHCP packets to the ipv4 stack, or we won't
2771                        * be able to use dhcp client on the outside interface
2772                        */
2773                       if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
2774                           && (udp0->dst_port ==
2775                           clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
2776                         {
2777                           vnet_feature_next (&next0, b0);
2778                           goto trace0;
2779                         }
2780
2781                       if (!sm->forwarding_enabled)
2782                         {
2783                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2784                           next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2785                         }
2786                       else
2787                         {
2788                           if (next_src_nat(sm, ip0, ip0->protocol,
2789                                            udp0->src_port, udp0->dst_port,
2790                                            thread_index, rx_fib_index0))
2791                             {
2792                               next0 = NAT44_ED_OUT2IN_NEXT_IN2OUT;
2793                               goto trace0;
2794                             }
2795                           create_bypass_for_fwd(sm, ip0, rx_fib_index0,
2796                                                 thread_index);
2797                         }
2798                       goto trace0;
2799                     }
2800
2801                   /* Create session initiated by host from external network */
2802                   s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
2803                                                             e_key0, node,
2804                                                             thread_index,
2805                                                             twice_nat0,
2806                                                             lb_nat0,
2807                                                             now);
2808
2809                   if (!s0)
2810                     {
2811                       next0 = NAT44_ED_OUT2IN_NEXT_DROP;
2812                       goto trace0;
2813                     }
2814                 }
2815               else
2816                 {
2817                   next0 = NAT44_ED_OUT2IN_NEXT_SLOW_PATH;
2818                   goto trace0;
2819                 }
2820             }
2821           else
2822             {
2823               s0 = pool_elt_at_index (tsm->sessions, value0.value);
2824             }
2825
2826           old_addr0 = ip0->dst_address.as_u32;
2827           new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
2828           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2829
2830           sum0 = ip0->checksum;
2831           sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2832                                  dst_address);
2833           if (PREDICT_FALSE (is_twice_nat_session (s0)))
2834             sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2835                                    s0->ext_host_nat_addr.as_u32, ip4_header_t,
2836                                    src_address);
2837           ip0->checksum = ip_csum_fold (sum0);
2838
2839           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
2840             {
2841               old_port0 = tcp0->dst_port;
2842               new_port0 = tcp0->dst_port = s0->in2out.port;
2843
2844               sum0 = tcp0->checksum;
2845               sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
2846                                      dst_address);
2847               sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
2848                                      length);
2849               if (is_twice_nat_session (s0))
2850                 {
2851                   sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
2852                                          s0->ext_host_nat_addr.as_u32,
2853                                          ip4_header_t, dst_address);
2854                   sum0 = ip_csum_update (sum0, tcp0->src_port,
2855                                          s0->ext_host_nat_port, ip4_header_t,
2856                                          length);
2857                   tcp0->src_port = s0->ext_host_nat_port;
2858                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2859                 }
2860               tcp0->checksum = ip_csum_fold(sum0);
2861               if (nat44_set_tcp_session_state_o2i (sm, s0, tcp0, thread_index))
2862                 goto trace0;
2863             }
2864           else
2865             {
2866               udp0->dst_port = s0->in2out.port;
2867               if (is_twice_nat_session (s0))
2868                 {
2869                   udp0->src_port = s0->ext_host_nat_port;
2870                   ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
2871                 }
2872               udp0->checksum = 0;
2873             }
2874
2875           /* Accounting */
2876           nat44_session_update_counters (s0, now,
2877                                          vlib_buffer_length_in_chain (vm, b0));
2878
2879         trace0:
2880           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2881                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2882             {
2883               nat44_ed_out2in_trace_t *t =
2884                 vlib_add_trace (vm, node, b0, sizeof (*t));
2885               t->is_slow_path = is_slow_path;
2886               t->sw_if_index = sw_if_index0;
2887               t->next_index = next0;
2888               t->session_index = ~0;
2889               if (s0)
2890                 t->session_index = s0 - tsm->sessions;
2891             }
2892
2893           pkts_processed += next0 != NAT44_ED_OUT2IN_NEXT_DROP;
2894
2895           /* verify speculative enqueue, maybe switch current next frame */
2896           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2897                                            to_next, n_left_to_next,
2898                                            bi0, next0);
2899         }
2900
2901       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2902     }
2903
2904   vlib_node_increment_counter (vm, stats_node_index,
2905                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2906                                pkts_processed);
2907   return frame->n_vectors;
2908 }
2909
2910 static uword
2911 nat44_ed_out2in_fast_path_fn (vlib_main_t * vm,
2912                               vlib_node_runtime_t * node,
2913                               vlib_frame_t * frame)
2914 {
2915   return nat44_ed_out2in_node_fn_inline (vm, node, frame, 0);
2916 }
2917
2918 VLIB_REGISTER_NODE (nat44_ed_out2in_node) = {
2919   .function = nat44_ed_out2in_fast_path_fn,
2920   .name = "nat44-ed-out2in",
2921   .vector_size = sizeof (u32),
2922   .format_trace = format_nat44_ed_out2in_trace,
2923   .type = VLIB_NODE_TYPE_INTERNAL,
2924
2925   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2926   .error_strings = snat_out2in_error_strings,
2927
2928   .runtime_data_bytes = sizeof (snat_runtime_t),
2929
2930   .n_next_nodes = NAT44_ED_OUT2IN_N_NEXT,
2931
2932   /* edit / add dispositions here */
2933   .next_nodes = {
2934     [NAT44_ED_OUT2IN_NEXT_DROP] = "error-drop",
2935     [NAT44_ED_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2936     [NAT44_ED_OUT2IN_NEXT_SLOW_PATH] = "nat44-ed-out2in-slowpath",
2937     [NAT44_ED_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2938     [NAT44_ED_OUT2IN_NEXT_IN2OUT] = "nat44-ed-in2out",
2939   },
2940 };
2941
2942 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_out2in_node, nat44_ed_out2in_fast_path_fn);
2943
2944 static uword
2945 nat44_ed_out2in_slow_path_fn (vlib_main_t * vm,
2946                               vlib_node_runtime_t * node,
2947                               vlib_frame_t * frame)
2948 {
2949   return nat44_ed_out2in_node_fn_inline (vm, node, frame, 1);
2950 }
2951
2952 VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
2953   .function = nat44_ed_out2in_slow_path_fn,
2954   .name = "nat44-ed-out2in-slowpath",
2955   .vector_size = sizeof (u32),
2956   .format_trace = format_nat44_ed_out2in_trace,
2957   .type = VLIB_NODE_TYPE_INTERNAL,
2958
2959   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2960   .error_strings = snat_out2in_error_strings,
2961
2962   .runtime_data_bytes = sizeof (snat_runtime_t),
2963
2964   .n_next_nodes = NAT44_ED_OUT2IN_N_NEXT,
2965
2966   /* edit / add dispositions here */
2967   .next_nodes = {
2968     [NAT44_ED_OUT2IN_NEXT_DROP] = "error-drop",
2969     [NAT44_ED_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2970     [NAT44_ED_OUT2IN_NEXT_SLOW_PATH] = "nat44-ed-out2in-slowpath",
2971     [NAT44_ED_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2972     [NAT44_ED_OUT2IN_NEXT_IN2OUT] = "nat44-ed-in2out",
2973   },
2974 };
2975
2976 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_out2in_slowpath_node,
2977                               nat44_ed_out2in_slow_path_fn);
2978
2979 /**************************/
2980 /*** deterministic mode ***/
2981 /**************************/
2982 static uword
2983 snat_det_out2in_node_fn (vlib_main_t * vm,
2984                          vlib_node_runtime_t * node,
2985                          vlib_frame_t * frame)
2986 {
2987   u32 n_left_from, * from, * to_next;
2988   snat_out2in_next_t next_index;
2989   u32 pkts_processed = 0;
2990   snat_main_t * sm = &snat_main;
2991   u32 thread_index = vm->thread_index;
2992
2993   from = vlib_frame_vector_args (frame);
2994   n_left_from = frame->n_vectors;
2995   next_index = node->cached_next_index;
2996
2997   while (n_left_from > 0)
2998     {
2999       u32 n_left_to_next;
3000
3001       vlib_get_next_frame (vm, node, next_index,
3002                            to_next, n_left_to_next);
3003
3004       while (n_left_from >= 4 && n_left_to_next >= 2)
3005         {
3006           u32 bi0, bi1;
3007           vlib_buffer_t * b0, * b1;
3008           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
3009           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
3010           u32 sw_if_index0, sw_if_index1;
3011           ip4_header_t * ip0, * ip1;
3012           ip_csum_t sum0, sum1;
3013           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
3014           u16 new_port0, old_port0, old_port1, new_port1;
3015           udp_header_t * udp0, * udp1;
3016           tcp_header_t * tcp0, * tcp1;
3017           u32 proto0, proto1;
3018           snat_det_out_key_t key0, key1;
3019           snat_det_map_t * dm0, * dm1;
3020           snat_det_session_t * ses0 = 0, * ses1 = 0;
3021           u32 rx_fib_index0, rx_fib_index1;
3022           icmp46_header_t * icmp0, * icmp1;
3023
3024           /* Prefetch next iteration. */
3025           {
3026             vlib_buffer_t * p2, * p3;
3027
3028             p2 = vlib_get_buffer (vm, from[2]);
3029             p3 = vlib_get_buffer (vm, from[3]);
3030
3031             vlib_prefetch_buffer_header (p2, LOAD);
3032             vlib_prefetch_buffer_header (p3, LOAD);
3033
3034             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
3035             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
3036           }
3037
3038           /* speculatively enqueue b0 and b1 to the current next frame */
3039           to_next[0] = bi0 = from[0];
3040           to_next[1] = bi1 = from[1];
3041           from += 2;
3042           to_next += 2;
3043           n_left_from -= 2;
3044           n_left_to_next -= 2;
3045
3046           b0 = vlib_get_buffer (vm, bi0);
3047           b1 = vlib_get_buffer (vm, bi1);
3048
3049           ip0 = vlib_buffer_get_current (b0);
3050           udp0 = ip4_next_header (ip0);
3051           tcp0 = (tcp_header_t *) udp0;
3052
3053           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3054
3055           if (PREDICT_FALSE(ip0->ttl == 1))
3056             {
3057               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3058               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3059                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3060                                            0);
3061               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3062               goto trace0;
3063             }
3064
3065           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3066
3067           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3068             {
3069               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3070               icmp0 = (icmp46_header_t *) udp0;
3071
3072               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3073                                   rx_fib_index0, node, next0, thread_index,
3074                                   &ses0, &dm0);
3075               goto trace0;
3076             }
3077
3078           key0.ext_host_addr = ip0->src_address;
3079           key0.ext_host_port = tcp0->src;
3080           key0.out_port = tcp0->dst;
3081
3082           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
3083           if (PREDICT_FALSE(!dm0))
3084             {
3085               nat_log_info ("unknown dst address:  %U",
3086                             format_ip4_address, &ip0->dst_address);
3087               next0 = SNAT_OUT2IN_NEXT_DROP;
3088               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3089               goto trace0;
3090             }
3091
3092           snat_det_reverse(dm0, &ip0->dst_address,
3093                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
3094
3095           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3096           if (PREDICT_FALSE(!ses0))
3097             {
3098               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3099                             format_ip4_address, &ip0->src_address,
3100                             clib_net_to_host_u16 (tcp0->src),
3101                             format_ip4_address, &ip0->dst_address,
3102                             clib_net_to_host_u16 (tcp0->dst),
3103                             format_ip4_address, &new_addr0);
3104               next0 = SNAT_OUT2IN_NEXT_DROP;
3105               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3106               goto trace0;
3107             }
3108           new_port0 = ses0->in_port;
3109
3110           old_addr0 = ip0->dst_address;
3111           ip0->dst_address = new_addr0;
3112           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3113
3114           sum0 = ip0->checksum;
3115           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3116                                  ip4_header_t,
3117                                  dst_address /* changed member */);
3118           ip0->checksum = ip_csum_fold (sum0);
3119
3120           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3121             {
3122               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3123                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3124               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
3125                 snat_det_ses_close(dm0, ses0);
3126
3127               old_port0 = tcp0->dst;
3128               tcp0->dst = new_port0;
3129
3130               sum0 = tcp0->checksum;
3131               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3132                                      ip4_header_t,
3133                                      dst_address /* changed member */);
3134
3135               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3136                                      ip4_header_t /* cheat */,
3137                                      length /* changed member */);
3138               tcp0->checksum = ip_csum_fold(sum0);
3139             }
3140           else
3141             {
3142               old_port0 = udp0->dst_port;
3143               udp0->dst_port = new_port0;
3144               udp0->checksum = 0;
3145             }
3146
3147         trace0:
3148
3149           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3150                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3151             {
3152               snat_out2in_trace_t *t =
3153                  vlib_add_trace (vm, node, b0, sizeof (*t));
3154               t->sw_if_index = sw_if_index0;
3155               t->next_index = next0;
3156               t->session_index = ~0;
3157               if (ses0)
3158                 t->session_index = ses0 - dm0->sessions;
3159             }
3160
3161           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3162
3163           b1 = vlib_get_buffer (vm, bi1);
3164
3165           ip1 = vlib_buffer_get_current (b1);
3166           udp1 = ip4_next_header (ip1);
3167           tcp1 = (tcp_header_t *) udp1;
3168
3169           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
3170
3171           if (PREDICT_FALSE(ip1->ttl == 1))
3172             {
3173               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3174               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
3175                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3176                                            0);
3177               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3178               goto trace1;
3179             }
3180
3181           proto1 = ip_proto_to_snat_proto (ip1->protocol);
3182
3183           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
3184             {
3185               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
3186               icmp1 = (icmp46_header_t *) udp1;
3187
3188               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
3189                                   rx_fib_index1, node, next1, thread_index,
3190                                   &ses1, &dm1);
3191               goto trace1;
3192             }
3193
3194           key1.ext_host_addr = ip1->src_address;
3195           key1.ext_host_port = tcp1->src;
3196           key1.out_port = tcp1->dst;
3197
3198           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
3199           if (PREDICT_FALSE(!dm1))
3200             {
3201               nat_log_info ("unknown dst address:  %U",
3202                             format_ip4_address, &ip1->dst_address);
3203               next1 = SNAT_OUT2IN_NEXT_DROP;
3204               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3205               goto trace1;
3206             }
3207
3208           snat_det_reverse(dm1, &ip1->dst_address,
3209                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
3210
3211           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
3212           if (PREDICT_FALSE(!ses1))
3213             {
3214               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3215                             format_ip4_address, &ip1->src_address,
3216                             clib_net_to_host_u16 (tcp1->src),
3217                             format_ip4_address, &ip1->dst_address,
3218                             clib_net_to_host_u16 (tcp1->dst),
3219                             format_ip4_address, &new_addr1);
3220               next1 = SNAT_OUT2IN_NEXT_DROP;
3221               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3222               goto trace1;
3223             }
3224           new_port1 = ses1->in_port;
3225
3226           old_addr1 = ip1->dst_address;
3227           ip1->dst_address = new_addr1;
3228           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3229
3230           sum1 = ip1->checksum;
3231           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3232                                  ip4_header_t,
3233                                  dst_address /* changed member */);
3234           ip1->checksum = ip_csum_fold (sum1);
3235
3236           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
3237             {
3238               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
3239                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3240               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
3241                 snat_det_ses_close(dm1, ses1);
3242
3243               old_port1 = tcp1->dst;
3244               tcp1->dst = new_port1;
3245
3246               sum1 = tcp1->checksum;
3247               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3248                                      ip4_header_t,
3249                                      dst_address /* changed member */);
3250
3251               sum1 = ip_csum_update (sum1, old_port1, new_port1,
3252                                      ip4_header_t /* cheat */,
3253                                      length /* changed member */);
3254               tcp1->checksum = ip_csum_fold(sum1);
3255             }
3256           else
3257             {
3258               old_port1 = udp1->dst_port;
3259               udp1->dst_port = new_port1;
3260               udp1->checksum = 0;
3261             }
3262
3263         trace1:
3264
3265           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3266                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3267             {
3268               snat_out2in_trace_t *t =
3269                  vlib_add_trace (vm, node, b1, sizeof (*t));
3270               t->sw_if_index = sw_if_index1;
3271               t->next_index = next1;
3272               t->session_index = ~0;
3273               if (ses1)
3274                 t->session_index = ses1 - dm1->sessions;
3275             }
3276
3277           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
3278
3279           /* verify speculative enqueues, maybe switch current next frame */
3280           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3281                                            to_next, n_left_to_next,
3282                                            bi0, bi1, next0, next1);
3283          }
3284
3285       while (n_left_from > 0 && n_left_to_next > 0)
3286         {
3287           u32 bi0;
3288           vlib_buffer_t * b0;
3289           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
3290           u32 sw_if_index0;
3291           ip4_header_t * ip0;
3292           ip_csum_t sum0;
3293           ip4_address_t new_addr0, old_addr0;
3294           u16 new_port0, old_port0;
3295           udp_header_t * udp0;
3296           tcp_header_t * tcp0;
3297           u32 proto0;
3298           snat_det_out_key_t key0;
3299           snat_det_map_t * dm0;
3300           snat_det_session_t * ses0 = 0;
3301           u32 rx_fib_index0;
3302           icmp46_header_t * icmp0;
3303
3304           /* speculatively enqueue b0 to the current next frame */
3305           bi0 = from[0];
3306           to_next[0] = bi0;
3307           from += 1;
3308           to_next += 1;
3309           n_left_from -= 1;
3310           n_left_to_next -= 1;
3311
3312           b0 = vlib_get_buffer (vm, bi0);
3313
3314           ip0 = vlib_buffer_get_current (b0);
3315           udp0 = ip4_next_header (ip0);
3316           tcp0 = (tcp_header_t *) udp0;
3317
3318           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3319
3320           if (PREDICT_FALSE(ip0->ttl == 1))
3321             {
3322               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3323               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3324                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3325                                            0);
3326               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3327               goto trace00;
3328             }
3329
3330           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3331
3332           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3333             {
3334               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3335               icmp0 = (icmp46_header_t *) udp0;
3336
3337               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3338                                   rx_fib_index0, node, next0, thread_index,
3339                                   &ses0, &dm0);
3340               goto trace00;
3341             }
3342
3343           key0.ext_host_addr = ip0->src_address;
3344           key0.ext_host_port = tcp0->src;
3345           key0.out_port = tcp0->dst;
3346
3347           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
3348           if (PREDICT_FALSE(!dm0))
3349             {
3350               nat_log_info ("unknown dst address:  %U",
3351                             format_ip4_address, &ip0->dst_address);
3352               next0 = SNAT_OUT2IN_NEXT_DROP;
3353               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3354               goto trace00;
3355             }
3356
3357           snat_det_reverse(dm0, &ip0->dst_address,
3358                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
3359
3360           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3361           if (PREDICT_FALSE(!ses0))
3362             {
3363               nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3364                             format_ip4_address, &ip0->src_address,
3365                             clib_net_to_host_u16 (tcp0->src),
3366                             format_ip4_address, &ip0->dst_address,
3367                             clib_net_to_host_u16 (tcp0->dst),
3368                             format_ip4_address, &new_addr0);
3369               next0 = SNAT_OUT2IN_NEXT_DROP;
3370               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3371               goto trace00;
3372             }
3373           new_port0 = ses0->in_port;
3374
3375           old_addr0 = ip0->dst_address;
3376           ip0->dst_address = new_addr0;
3377           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
3378
3379           sum0 = ip0->checksum;
3380           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3381                                  ip4_header_t,
3382                                  dst_address /* changed member */);
3383           ip0->checksum = ip_csum_fold (sum0);
3384
3385           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3386             {
3387               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3388                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
3389               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
3390                 snat_det_ses_close(dm0, ses0);
3391
3392               old_port0 = tcp0->dst;
3393               tcp0->dst = new_port0;
3394
3395               sum0 = tcp0->checksum;
3396               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3397                                      ip4_header_t,
3398                                      dst_address /* changed member */);
3399
3400               sum0 = ip_csum_update (sum0, old_port0, new_port0,
3401                                      ip4_header_t /* cheat */,
3402                                      length /* changed member */);
3403               tcp0->checksum = ip_csum_fold(sum0);
3404             }
3405           else
3406             {
3407               old_port0 = udp0->dst_port;
3408               udp0->dst_port = new_port0;
3409               udp0->checksum = 0;
3410             }
3411
3412         trace00:
3413
3414           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3415                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3416             {
3417               snat_out2in_trace_t *t =
3418                  vlib_add_trace (vm, node, b0, sizeof (*t));
3419               t->sw_if_index = sw_if_index0;
3420               t->next_index = next0;
3421               t->session_index = ~0;
3422               if (ses0)
3423                 t->session_index = ses0 - dm0->sessions;
3424             }
3425
3426           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3427
3428           /* verify speculative enqueue, maybe switch current next frame */
3429           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3430                                            to_next, n_left_to_next,
3431                                            bi0, next0);
3432         }
3433
3434       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3435     }
3436
3437   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
3438                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
3439                                pkts_processed);
3440   return frame->n_vectors;
3441 }
3442
3443 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
3444   .function = snat_det_out2in_node_fn,
3445   .name = "nat44-det-out2in",
3446   .vector_size = sizeof (u32),
3447   .format_trace = format_snat_out2in_trace,
3448   .type = VLIB_NODE_TYPE_INTERNAL,
3449
3450   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3451   .error_strings = snat_out2in_error_strings,
3452
3453   .runtime_data_bytes = sizeof (snat_runtime_t),
3454
3455   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
3456
3457   /* edit / add dispositions here */
3458   .next_nodes = {
3459     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
3460     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
3461     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3462     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
3463   },
3464 };
3465 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
3466
3467 /**
3468  * Get address and port values to be used for ICMP packet translation
3469  * and create session if needed
3470  *
3471  * @param[in,out] sm             NAT main
3472  * @param[in,out] node           NAT node runtime
3473  * @param[in] thread_index       thread index
3474  * @param[in,out] b0             buffer containing packet to be translated
3475  * @param[out] p_proto           protocol used for matching
3476  * @param[out] p_value           address and port after NAT translation
3477  * @param[out] p_dont_translate  if packet should not be translated
3478  * @param d                      optional parameter
3479  * @param e                      optional parameter
3480  */
3481 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
3482                           u32 thread_index, vlib_buffer_t *b0,
3483                           ip4_header_t *ip0, u8 *p_proto,
3484                           snat_session_key_t *p_value,
3485                           u8 *p_dont_translate, void *d, void *e)
3486 {
3487   icmp46_header_t *icmp0;
3488   u32 sw_if_index0;
3489   u8 protocol;
3490   snat_det_out_key_t key0;
3491   u8 dont_translate = 0;
3492   u32 next0 = ~0;
3493   icmp_echo_header_t *echo0, *inner_echo0 = 0;
3494   ip4_header_t *inner_ip0;
3495   void *l4_header = 0;
3496   icmp46_header_t *inner_icmp0;
3497   snat_det_map_t * dm0 = 0;
3498   ip4_address_t new_addr0 = {{0}};
3499   snat_det_session_t * ses0 = 0;
3500   ip4_address_t out_addr;
3501
3502   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3503   echo0 = (icmp_echo_header_t *)(icmp0+1);
3504   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3505
3506   if (!icmp_is_error_message (icmp0))
3507     {
3508       protocol = SNAT_PROTOCOL_ICMP;
3509       key0.ext_host_addr = ip0->src_address;
3510       key0.ext_host_port = 0;
3511       key0.out_port = echo0->identifier;
3512       out_addr = ip0->dst_address;
3513     }
3514   else
3515     {
3516       inner_ip0 = (ip4_header_t *)(echo0+1);
3517       l4_header = ip4_next_header (inner_ip0);
3518       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3519       key0.ext_host_addr = inner_ip0->dst_address;
3520       out_addr = inner_ip0->src_address;
3521       switch (protocol)
3522         {
3523         case SNAT_PROTOCOL_ICMP:
3524           inner_icmp0 = (icmp46_header_t*)l4_header;
3525           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3526           key0.ext_host_port = 0;
3527           key0.out_port = inner_echo0->identifier;
3528           break;
3529         case SNAT_PROTOCOL_UDP:
3530         case SNAT_PROTOCOL_TCP:
3531           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3532           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
3533           break;
3534         default:
3535           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
3536           next0 = SNAT_OUT2IN_NEXT_DROP;
3537           goto out;
3538         }
3539     }
3540
3541   dm0 = snat_det_map_by_out(sm, &out_addr);
3542   if (PREDICT_FALSE(!dm0))
3543     {
3544       /* Don't NAT packet aimed at the intfc address */
3545       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
3546                                           ip0->dst_address.as_u32)))
3547         {
3548           dont_translate = 1;
3549           goto out;
3550         }
3551       nat_log_info ("unknown dst address:  %U",
3552                     format_ip4_address, &ip0->dst_address);
3553       goto out;
3554     }
3555
3556   snat_det_reverse(dm0, &ip0->dst_address,
3557                    clib_net_to_host_u16(key0.out_port), &new_addr0);
3558
3559   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
3560   if (PREDICT_FALSE(!ses0))
3561     {
3562       /* Don't NAT packet aimed at the intfc address */
3563       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
3564                                           ip0->dst_address.as_u32)))
3565         {
3566           dont_translate = 1;
3567           goto out;
3568         }
3569       nat_log_info ("no match src %U:%d dst %U:%d for user %U",
3570                     format_ip4_address, &key0.ext_host_addr,
3571                     clib_net_to_host_u16 (key0.ext_host_port),
3572                     format_ip4_address, &out_addr,
3573                     clib_net_to_host_u16 (key0.out_port),
3574                     format_ip4_address, &new_addr0);
3575       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3576       next0 = SNAT_OUT2IN_NEXT_DROP;
3577       goto out;
3578     }
3579
3580   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
3581                     !icmp_is_error_message (icmp0)))
3582     {
3583       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
3584       next0 = SNAT_OUT2IN_NEXT_DROP;
3585       goto out;
3586     }
3587
3588   goto out;
3589
3590 out:
3591   *p_proto = protocol;
3592   if (ses0)
3593     {
3594       p_value->addr = new_addr0;
3595       p_value->fib_index = sm->inside_fib_index;
3596       p_value->port = ses0->in_port;
3597     }
3598   *p_dont_translate = dont_translate;
3599   if (d)
3600     *(snat_det_session_t**)d = ses0;
3601   if (e)
3602     *(snat_det_map_t**)e = dm0;
3603   return next0;
3604 }
3605
3606 /**********************/
3607 /*** worker handoff ***/
3608 /**********************/
3609 static uword
3610 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
3611                                vlib_node_runtime_t * node,
3612                                vlib_frame_t * frame)
3613 {
3614   snat_main_t *sm = &snat_main;
3615   vlib_thread_main_t *tm = vlib_get_thread_main ();
3616   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
3617   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3618   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3619     = 0;
3620   vlib_frame_queue_elt_t *hf = 0;
3621   vlib_frame_queue_t *fq;
3622   vlib_frame_t *f = 0;
3623   int i;
3624   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3625   u32 next_worker_index = 0;
3626   u32 current_worker_index = ~0;
3627   u32 thread_index = vm->thread_index;
3628   vlib_frame_t *d = 0;
3629
3630   ASSERT (vec_len (sm->workers));
3631
3632   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3633     {
3634       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3635
3636       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3637                                tm->n_vlib_mains - 1,
3638                                (vlib_frame_queue_t *) (~0));
3639     }
3640
3641   from = vlib_frame_vector_args (frame);
3642   n_left_from = frame->n_vectors;
3643
3644   while (n_left_from > 0)
3645     {
3646       u32 bi0;
3647       vlib_buffer_t *b0;
3648       u32 sw_if_index0;
3649       u32 rx_fib_index0;
3650       ip4_header_t * ip0;
3651       u8 do_handoff;
3652
3653       bi0 = from[0];
3654       from += 1;
3655       n_left_from -= 1;
3656
3657       b0 = vlib_get_buffer (vm, bi0);
3658
3659       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3660       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3661
3662       ip0 = vlib_buffer_get_current (b0);
3663
3664       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
3665
3666       if (PREDICT_FALSE (next_worker_index != thread_index))
3667         {
3668           do_handoff = 1;
3669
3670           if (next_worker_index != current_worker_index)
3671             {
3672               fq = is_vlib_frame_queue_congested (
3673                 sm->fq_out2in_index, next_worker_index, NAT_FQ_NELTS - 2,
3674                 congested_handoff_queue_by_worker_index);
3675
3676               if (fq)
3677                 {
3678                   /* if this is 1st frame */
3679                   if (!d)
3680                     {
3681                       d = vlib_get_frame_to_node (vm, sm->error_node_index);
3682                       to_next_drop = vlib_frame_vector_args (d);
3683                     }
3684
3685                   to_next_drop[0] = bi0;
3686                   to_next_drop += 1;
3687                   d->n_vectors++;
3688                   b0->error = node->errors[SNAT_OUT2IN_ERROR_FQ_CONGESTED];
3689                   goto trace0;
3690                 }
3691
3692               if (hf)
3693                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3694
3695               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
3696                                                       next_worker_index,
3697                                                       handoff_queue_elt_by_worker_index);
3698
3699               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3700               to_next_worker = &hf->buffer_index[hf->n_vectors];
3701               current_worker_index = next_worker_index;
3702             }
3703
3704           /* enqueue to correct worker thread */
3705           to_next_worker[0] = bi0;
3706           to_next_worker++;
3707           n_left_to_next_worker--;
3708
3709           if (n_left_to_next_worker == 0)
3710             {
3711               hf->n_vectors = VLIB_FRAME_SIZE;
3712               vlib_put_frame_queue_elt (hf);
3713               current_worker_index = ~0;
3714               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3715               hf = 0;
3716             }
3717         }
3718       else
3719         {
3720           do_handoff = 0;
3721           /* if this is 1st frame */
3722           if (!f)
3723             {
3724               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
3725               to_next = vlib_frame_vector_args (f);
3726             }
3727
3728           to_next[0] = bi0;
3729           to_next += 1;
3730           f->n_vectors++;
3731         }
3732
3733 trace0:
3734       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3735                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3736         {
3737           snat_out2in_worker_handoff_trace_t *t =
3738             vlib_add_trace (vm, node, b0, sizeof (*t));
3739           t->next_worker_index = next_worker_index;
3740           t->do_handoff = do_handoff;
3741         }
3742     }
3743
3744   if (f)
3745     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
3746
3747   if (d)
3748     vlib_put_frame_to_node (vm, sm->error_node_index, d);
3749
3750   if (hf)
3751     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3752
3753   /* Ship frames to the worker nodes */
3754   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3755     {
3756       if (handoff_queue_elt_by_worker_index[i])
3757         {
3758           hf = handoff_queue_elt_by_worker_index[i];
3759           /*
3760            * It works better to let the handoff node
3761            * rate-adapt, always ship the handoff queue element.
3762            */
3763           if (1 || hf->n_vectors == hf->last_n_vectors)
3764             {
3765               vlib_put_frame_queue_elt (hf);
3766               handoff_queue_elt_by_worker_index[i] = 0;
3767             }
3768           else
3769             hf->last_n_vectors = hf->n_vectors;
3770         }
3771       congested_handoff_queue_by_worker_index[i] =
3772         (vlib_frame_queue_t *) (~0);
3773     }
3774   hf = 0;
3775   current_worker_index = ~0;
3776   return frame->n_vectors;
3777 }
3778
3779 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
3780   .function = snat_out2in_worker_handoff_fn,
3781   .name = "nat44-out2in-worker-handoff",
3782   .vector_size = sizeof (u32),
3783   .format_trace = format_snat_out2in_worker_handoff_trace,
3784   .type = VLIB_NODE_TYPE_INTERNAL,
3785
3786   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3787   .error_strings = snat_out2in_error_strings,
3788
3789   .n_next_nodes = 1,
3790
3791   .next_nodes = {
3792     [0] = "error-drop",
3793   },
3794 };
3795
3796 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
3797
3798 static uword
3799 snat_out2in_fast_node_fn (vlib_main_t * vm,
3800                           vlib_node_runtime_t * node,
3801                           vlib_frame_t * frame)
3802 {
3803   u32 n_left_from, * from, * to_next;
3804   snat_out2in_next_t next_index;
3805   u32 pkts_processed = 0;
3806   snat_main_t * sm = &snat_main;
3807
3808   from = vlib_frame_vector_args (frame);
3809   n_left_from = frame->n_vectors;
3810   next_index = node->cached_next_index;
3811
3812   while (n_left_from > 0)
3813     {
3814       u32 n_left_to_next;
3815
3816       vlib_get_next_frame (vm, node, next_index,
3817                            to_next, n_left_to_next);
3818
3819       while (n_left_from > 0 && n_left_to_next > 0)
3820         {
3821           u32 bi0;
3822           vlib_buffer_t * b0;
3823           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
3824           u32 sw_if_index0;
3825           ip4_header_t * ip0;
3826           ip_csum_t sum0;
3827           u32 new_addr0, old_addr0;
3828           u16 new_port0, old_port0;
3829           udp_header_t * udp0;
3830           tcp_header_t * tcp0;
3831           icmp46_header_t * icmp0;
3832           snat_session_key_t key0, sm0;
3833           u32 proto0;
3834           u32 rx_fib_index0;
3835
3836           /* speculatively enqueue b0 to the current next frame */
3837           bi0 = from[0];
3838           to_next[0] = bi0;
3839           from += 1;
3840           to_next += 1;
3841           n_left_from -= 1;
3842           n_left_to_next -= 1;
3843
3844           b0 = vlib_get_buffer (vm, bi0);
3845
3846           ip0 = vlib_buffer_get_current (b0);
3847           udp0 = ip4_next_header (ip0);
3848           tcp0 = (tcp_header_t *) udp0;
3849           icmp0 = (icmp46_header_t *) udp0;
3850
3851           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3852           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3853
3854           vnet_feature_next (&next0, b0);
3855
3856           if (PREDICT_FALSE(ip0->ttl == 1))
3857             {
3858               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3859               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3860                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
3861                                            0);
3862               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
3863               goto trace00;
3864             }
3865
3866           proto0 = ip_proto_to_snat_proto (ip0->protocol);
3867
3868           if (PREDICT_FALSE (proto0 == ~0))
3869               goto trace00;
3870
3871           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
3872             {
3873               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
3874                                   rx_fib_index0, node, next0, ~0, 0, 0);
3875               goto trace00;
3876             }
3877
3878           key0.addr = ip0->dst_address;
3879           key0.port = udp0->dst_port;
3880           key0.fib_index = rx_fib_index0;
3881
3882           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
3883             {
3884               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
3885               goto trace00;
3886             }
3887
3888           new_addr0 = sm0.addr.as_u32;
3889           new_port0 = sm0.port;
3890           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
3891           old_addr0 = ip0->dst_address.as_u32;
3892           ip0->dst_address.as_u32 = new_addr0;
3893
3894           sum0 = ip0->checksum;
3895           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3896                                  ip4_header_t,
3897                                  dst_address /* changed member */);
3898           ip0->checksum = ip_csum_fold (sum0);
3899
3900           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
3901             {
3902                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3903                 {
3904                   old_port0 = tcp0->dst_port;
3905                   tcp0->dst_port = new_port0;
3906
3907                   sum0 = tcp0->checksum;
3908                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3909                                          ip4_header_t,
3910                                          dst_address /* changed member */);
3911
3912                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
3913                                          ip4_header_t /* cheat */,
3914                                          length /* changed member */);
3915                   tcp0->checksum = ip_csum_fold(sum0);
3916                 }
3917               else
3918                 {
3919                   old_port0 = udp0->dst_port;
3920                   udp0->dst_port = new_port0;
3921                   udp0->checksum = 0;
3922                 }
3923             }
3924           else
3925             {
3926               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3927                 {
3928                   sum0 = tcp0->checksum;
3929                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
3930                                          ip4_header_t,
3931                                          dst_address /* changed member */);
3932
3933                   tcp0->checksum = ip_csum_fold(sum0);
3934                 }
3935             }
3936
3937         trace00:
3938
3939           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3940                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3941             {
3942               snat_out2in_trace_t *t =
3943                  vlib_add_trace (vm, node, b0, sizeof (*t));
3944               t->sw_if_index = sw_if_index0;
3945               t->next_index = next0;
3946             }
3947
3948           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
3949
3950           /* verify speculative enqueue, maybe switch current next frame */
3951           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3952                                            to_next, n_left_to_next,
3953                                            bi0, next0);
3954         }
3955
3956       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3957     }
3958
3959   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
3960                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
3961                                pkts_processed);
3962   return frame->n_vectors;
3963 }
3964
3965 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
3966   .function = snat_out2in_fast_node_fn,
3967   .name = "nat44-out2in-fast",
3968   .vector_size = sizeof (u32),
3969   .format_trace = format_snat_out2in_fast_trace,
3970   .type = VLIB_NODE_TYPE_INTERNAL,
3971
3972   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
3973   .error_strings = snat_out2in_error_strings,
3974
3975   .runtime_data_bytes = sizeof (snat_runtime_t),
3976
3977   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
3978
3979   /* edit / add dispositions here */
3980   .next_nodes = {
3981     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
3982     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
3983     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3984     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
3985   },
3986 };
3987 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);