NAT: Twice NAT44 (VPP-969)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38 } snat_out2in_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_out2in_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
51
52   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
53               t->sw_if_index, t->next_index, t->session_index);
54   return s;
55 }
56
57 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
62
63   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
64               t->sw_if_index, t->next_index);
65   return s;
66 }
67
68 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   snat_out2in_worker_handoff_trace_t * t =
73     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74   char * m;
75
76   m = t->do_handoff ? "next worker" : "same worker";
77   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
78
79   return s;
80 }
81
82 typedef struct {
83   u32 sw_if_index;
84   u32 next_index;
85   u8 cached;
86 } nat44_out2in_reass_trace_t;
87
88 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
89 {
90   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
91   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
92   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
93
94   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
95               t->sw_if_index, t->next_index,
96               t->cached ? "cached" : "translated");
97
98   return s;
99 }
100
101 vlib_node_registration_t snat_out2in_node;
102 vlib_node_registration_t snat_out2in_fast_node;
103 vlib_node_registration_t snat_out2in_worker_handoff_node;
104 vlib_node_registration_t snat_det_out2in_node;
105 vlib_node_registration_t nat44_out2in_reass_node;
106
107 #define foreach_snat_out2in_error                       \
108 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
109 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
110 _(OUT_OF_PORTS, "Out of ports")                         \
111 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
112 _(NO_TRANSLATION, "No translation")                     \
113 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
114 _(DROP_FRAGMENT, "Drop fragment")                       \
115 _(MAX_REASS, "Maximum reassemblies exceeded")           \
116 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
117
118 typedef enum {
119 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
120   foreach_snat_out2in_error
121 #undef _
122   SNAT_OUT2IN_N_ERROR,
123 } snat_out2in_error_t;
124
125 static char * snat_out2in_error_strings[] = {
126 #define _(sym,string) string,
127   foreach_snat_out2in_error
128 #undef _
129 };
130
131 typedef enum {
132   SNAT_OUT2IN_NEXT_DROP,
133   SNAT_OUT2IN_NEXT_LOOKUP,
134   SNAT_OUT2IN_NEXT_ICMP_ERROR,
135   SNAT_OUT2IN_NEXT_REASS,
136   SNAT_OUT2IN_N_NEXT,
137 } snat_out2in_next_t;
138
139 /**
140  * @brief Create session for static mapping.
141  *
142  * Create NAT session initiated by host from external network with static
143  * mapping.
144  *
145  * @param sm     NAT main.
146  * @param b0     Vlib buffer.
147  * @param in2out In2out NAT44 session key.
148  * @param out2in Out2in NAT44 session key.
149  * @param node   Vlib node.
150  *
151  * @returns SNAT session if successfully created otherwise 0.
152  */
153 static inline snat_session_t *
154 create_session_for_static_mapping (snat_main_t *sm,
155                                    vlib_buffer_t *b0,
156                                    snat_session_key_t in2out,
157                                    snat_session_key_t out2in,
158                                    vlib_node_runtime_t * node,
159                                    u32 thread_index)
160 {
161   snat_user_t *u;
162   snat_session_t *s;
163   clib_bihash_kv_8_8_t kv0;
164   ip4_header_t *ip0;
165   udp_header_t *udp0;
166
167   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
168     {
169       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
170       return 0;
171     }
172
173   ip0 = vlib_buffer_get_current (b0);
174   udp0 = ip4_next_header (ip0);
175
176   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
177   if (!u)
178     {
179       clib_warning ("create NAT user failed");
180       return 0;
181     }
182
183   s = nat_session_alloc_or_recycle (sm, u, thread_index);
184   if (!s)
185     {
186       clib_warning ("create NAT session failed");
187       return 0;
188     }
189
190   s->outside_address_index = ~0;
191   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
192   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
193   s->ext_host_port = udp0->src_port;
194   u->nstaticsessions++;
195   s->in2out = in2out;
196   s->out2in = out2in;
197   s->in2out.protocol = out2in.protocol;
198
199   /* Add to translation hashes */
200   kv0.key = s->in2out.as_u64;
201   kv0.value = s - sm->per_thread_data[thread_index].sessions;
202   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
203                                1 /* is_add */))
204       clib_warning ("in2out key add failed");
205
206   kv0.key = s->out2in.as_u64;
207
208   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
209                                1 /* is_add */))
210       clib_warning ("out2in key add failed");
211
212   /* log NAT event */
213   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
214                                       s->out2in.addr.as_u32,
215                                       s->in2out.protocol,
216                                       s->in2out.port,
217                                       s->out2in.port,
218                                       s->in2out.fib_index);
219    return s;
220 }
221
222 static_always_inline
223 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
224                                  snat_session_key_t *p_key0)
225 {
226   icmp46_header_t *icmp0;
227   snat_session_key_t key0;
228   icmp_echo_header_t *echo0, *inner_echo0 = 0;
229   ip4_header_t *inner_ip0;
230   void *l4_header = 0;
231   icmp46_header_t *inner_icmp0;
232
233   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
234   echo0 = (icmp_echo_header_t *)(icmp0+1);
235
236   if (!icmp_is_error_message (icmp0))
237     {
238       key0.protocol = SNAT_PROTOCOL_ICMP;
239       key0.addr = ip0->dst_address;
240       key0.port = echo0->identifier;
241     }
242   else
243     {
244       inner_ip0 = (ip4_header_t *)(echo0+1);
245       l4_header = ip4_next_header (inner_ip0);
246       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
247       key0.addr = inner_ip0->src_address;
248       switch (key0.protocol)
249         {
250         case SNAT_PROTOCOL_ICMP:
251           inner_icmp0 = (icmp46_header_t*)l4_header;
252           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
253           key0.port = inner_echo0->identifier;
254           break;
255         case SNAT_PROTOCOL_UDP:
256         case SNAT_PROTOCOL_TCP:
257           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
258           break;
259         default:
260           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
261         }
262     }
263   *p_key0 = key0;
264   return -1; /* success */
265 }
266
267 /**
268  * Get address and port values to be used for ICMP packet translation
269  * and create session if needed
270  *
271  * @param[in,out] sm             NAT main
272  * @param[in,out] node           NAT node runtime
273  * @param[in] thread_index       thread index
274  * @param[in,out] b0             buffer containing packet to be translated
275  * @param[out] p_proto           protocol used for matching
276  * @param[out] p_value           address and port after NAT translation
277  * @param[out] p_dont_translate  if packet should not be translated
278  * @param d                      optional parameter
279  * @param e                      optional parameter
280  */
281 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
282                            u32 thread_index, vlib_buffer_t *b0,
283                            ip4_header_t *ip0, u8 *p_proto,
284                            snat_session_key_t *p_value,
285                            u8 *p_dont_translate, void *d, void *e)
286 {
287   icmp46_header_t *icmp0;
288   u32 sw_if_index0;
289   u32 rx_fib_index0;
290   snat_session_key_t key0;
291   snat_session_key_t sm0;
292   snat_session_t *s0 = 0;
293   u8 dont_translate = 0;
294   clib_bihash_kv_8_8_t kv0, value0;
295   u8 is_addr_only;
296   u32 next0 = ~0;
297   int err;
298
299   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
300   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
301   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
302
303   key0.protocol = 0;
304
305   err = icmp_get_key (ip0, &key0);
306   if (err != -1)
307     {
308       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
309       next0 = SNAT_OUT2IN_NEXT_DROP;
310       goto out;
311     }
312   key0.fib_index = rx_fib_index0;
313
314   kv0.key = key0.as_u64;
315
316   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
317                               &value0))
318     {
319       /* Try to match static mapping by external address and port,
320          destination address and port in packet */
321       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
322         {
323           /* Don't NAT packet aimed at the intfc address */
324           if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
325                                               ip0->dst_address.as_u32)))
326             {
327               dont_translate = 1;
328               goto out;
329             }
330           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
331           next0 = SNAT_OUT2IN_NEXT_DROP;
332           goto out;
333         }
334
335       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
336                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
337         {
338           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
339           next0 = SNAT_OUT2IN_NEXT_DROP;
340           goto out;
341         }
342
343       /* Create session initiated by host from external network */
344       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
345                                              node, thread_index);
346
347       if (!s0)
348         {
349           next0 = SNAT_OUT2IN_NEXT_DROP;
350           goto out;
351         }
352     }
353   else
354     {
355       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
356                         icmp0->type != ICMP4_echo_request &&
357                         !icmp_is_error_message (icmp0)))
358         {
359           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
360           next0 = SNAT_OUT2IN_NEXT_DROP;
361           goto out;
362         }
363
364       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
365                               value0.value);
366     }
367
368 out:
369   *p_proto = key0.protocol;
370   if (s0)
371     *p_value = s0->in2out;
372   *p_dont_translate = dont_translate;
373   if (d)
374     *(snat_session_t**)d = s0;
375   return next0;
376 }
377
378 /**
379  * Get address and port values to be used for ICMP packet translation
380  *
381  * @param[in] sm                 NAT main
382  * @param[in,out] node           NAT node runtime
383  * @param[in] thread_index       thread index
384  * @param[in,out] b0             buffer containing packet to be translated
385  * @param[out] p_proto           protocol used for matching
386  * @param[out] p_value           address and port after NAT translation
387  * @param[out] p_dont_translate  if packet should not be translated
388  * @param d                      optional parameter
389  * @param e                      optional parameter
390  */
391 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
392                            u32 thread_index, vlib_buffer_t *b0,
393                            ip4_header_t *ip0, u8 *p_proto,
394                            snat_session_key_t *p_value,
395                            u8 *p_dont_translate, void *d, void *e)
396 {
397   icmp46_header_t *icmp0;
398   u32 sw_if_index0;
399   u32 rx_fib_index0;
400   snat_session_key_t key0;
401   snat_session_key_t sm0;
402   u8 dont_translate = 0;
403   u8 is_addr_only;
404   u32 next0 = ~0;
405   int err;
406
407   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
408   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
409   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
410
411   err = icmp_get_key (ip0, &key0);
412   if (err != -1)
413     {
414       b0->error = node->errors[err];
415       next0 = SNAT_OUT2IN_NEXT_DROP;
416       goto out2;
417     }
418   key0.fib_index = rx_fib_index0;
419
420   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
421     {
422       /* Don't NAT packet aimed at the intfc address */
423       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
424         {
425           dont_translate = 1;
426           goto out;
427         }
428       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
429       next0 = SNAT_OUT2IN_NEXT_DROP;
430       goto out;
431     }
432
433   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
434                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
435                     !icmp_is_error_message (icmp0)))
436     {
437       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
438       next0 = SNAT_OUT2IN_NEXT_DROP;
439       goto out;
440     }
441
442 out:
443   *p_value = sm0;
444 out2:
445   *p_proto = key0.protocol;
446   *p_dont_translate = dont_translate;
447   return next0;
448 }
449
450 static inline u32 icmp_out2in (snat_main_t *sm,
451                                vlib_buffer_t * b0,
452                                ip4_header_t * ip0,
453                                icmp46_header_t * icmp0,
454                                u32 sw_if_index0,
455                                u32 rx_fib_index0,
456                                vlib_node_runtime_t * node,
457                                u32 next0,
458                                u32 thread_index,
459                                void *d,
460                                void *e)
461 {
462   snat_session_key_t sm0;
463   u8 protocol;
464   icmp_echo_header_t *echo0, *inner_echo0 = 0;
465   ip4_header_t *inner_ip0 = 0;
466   void *l4_header = 0;
467   icmp46_header_t *inner_icmp0;
468   u8 dont_translate;
469   u32 new_addr0, old_addr0;
470   u16 old_id0, new_id0;
471   ip_csum_t sum0;
472   u16 checksum0;
473   u32 next0_tmp;
474
475   echo0 = (icmp_echo_header_t *)(icmp0+1);
476
477   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
478                                        &protocol, &sm0, &dont_translate, d, e);
479   if (next0_tmp != ~0)
480     next0 = next0_tmp;
481   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
482     goto out;
483
484   sum0 = ip_incremental_checksum (0, icmp0,
485                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
486   checksum0 = ~ip_csum_fold (sum0);
487   if (checksum0 != 0 && checksum0 != 0xffff)
488     {
489       next0 = SNAT_OUT2IN_NEXT_DROP;
490       goto out;
491     }
492
493   old_addr0 = ip0->dst_address.as_u32;
494   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
495   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
496
497   sum0 = ip0->checksum;
498   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
499                          dst_address /* changed member */);
500   ip0->checksum = ip_csum_fold (sum0);
501
502   if (!icmp_is_error_message (icmp0))
503     {
504       new_id0 = sm0.port;
505       if (PREDICT_FALSE(new_id0 != echo0->identifier))
506         {
507           old_id0 = echo0->identifier;
508           new_id0 = sm0.port;
509           echo0->identifier = new_id0;
510
511           sum0 = icmp0->checksum;
512           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
513                                  identifier /* changed member */);
514           icmp0->checksum = ip_csum_fold (sum0);
515         }
516     }
517   else
518     {
519       inner_ip0 = (ip4_header_t *)(echo0+1);
520       l4_header = ip4_next_header (inner_ip0);
521
522       if (!ip4_header_checksum_is_valid (inner_ip0))
523         {
524           next0 = SNAT_OUT2IN_NEXT_DROP;
525           goto out;
526         }
527
528       old_addr0 = inner_ip0->src_address.as_u32;
529       inner_ip0->src_address = sm0.addr;
530       new_addr0 = inner_ip0->src_address.as_u32;
531
532       sum0 = icmp0->checksum;
533       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
534                              src_address /* changed member */);
535       icmp0->checksum = ip_csum_fold (sum0);
536
537       switch (protocol)
538         {
539         case SNAT_PROTOCOL_ICMP:
540           inner_icmp0 = (icmp46_header_t*)l4_header;
541           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
542
543           old_id0 = inner_echo0->identifier;
544           new_id0 = sm0.port;
545           inner_echo0->identifier = new_id0;
546
547           sum0 = icmp0->checksum;
548           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
549                                  identifier);
550           icmp0->checksum = ip_csum_fold (sum0);
551           break;
552         case SNAT_PROTOCOL_UDP:
553         case SNAT_PROTOCOL_TCP:
554           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
555           new_id0 = sm0.port;
556           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
557
558           sum0 = icmp0->checksum;
559           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
560                                  src_port);
561           icmp0->checksum = ip_csum_fold (sum0);
562           break;
563         default:
564           ASSERT(0);
565         }
566     }
567
568 out:
569   return next0;
570 }
571
572
573 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
574                                          vlib_buffer_t * b0,
575                                          ip4_header_t * ip0,
576                                          icmp46_header_t * icmp0,
577                                          u32 sw_if_index0,
578                                          u32 rx_fib_index0,
579                                          vlib_node_runtime_t * node,
580                                          u32 next0, f64 now,
581                                          u32 thread_index,
582                                          snat_session_t ** p_s0)
583 {
584   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
585                       next0, thread_index, p_s0, 0);
586   snat_session_t * s0 = *p_s0;
587   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
588     {
589       /* Accounting */
590       s0->last_heard = now;
591       s0->total_pkts++;
592       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
593       /* Per-user LRU list maintenance */
594       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
595                          s0->per_user_index);
596       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
597                           s0->per_user_list_head_index,
598                           s0->per_user_index);
599     }
600   return next0;
601 }
602
603 static snat_session_t *
604 snat_out2in_unknown_proto (snat_main_t *sm,
605                            vlib_buffer_t * b,
606                            ip4_header_t * ip,
607                            u32 rx_fib_index,
608                            u32 thread_index,
609                            f64 now,
610                            vlib_main_t * vm,
611                            vlib_node_runtime_t * node)
612 {
613   clib_bihash_kv_8_8_t kv, value;
614   clib_bihash_kv_16_8_t s_kv, s_value;
615   snat_static_mapping_t *m;
616   snat_session_key_t m_key;
617   u32 old_addr, new_addr;
618   ip_csum_t sum;
619   nat_ed_ses_key_t key;
620   snat_session_t * s;
621   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
622   snat_user_t *u;
623
624   old_addr = ip->dst_address.as_u32;
625
626   key.l_addr = ip->dst_address;
627   key.r_addr = ip->src_address;
628   key.fib_index = rx_fib_index;
629   key.proto = ip->protocol;
630   key.r_port = 0;
631   key.l_port = 0;
632   s_kv.key[0] = key.as_u64[0];
633   s_kv.key[1] = key.as_u64[1];
634
635   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
636     {
637       s = pool_elt_at_index (tsm->sessions, s_value.value);
638       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
639     }
640   else
641     {
642       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
643         {
644           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
645           return 0;
646         }
647
648       m_key.addr = ip->dst_address;
649       m_key.port = 0;
650       m_key.protocol = 0;
651       m_key.fib_index = rx_fib_index;
652       kv.key = m_key.as_u64;
653       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
654         {
655           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
656           return 0;
657         }
658
659       m = pool_elt_at_index (sm->static_mappings, value.value);
660
661       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
662
663       u = nat_user_get_or_create (sm, &ip->src_address, m->fib_index,
664                                   thread_index);
665       if (!u)
666         {
667           clib_warning ("create NAT user failed");
668           return 0;
669         }
670
671       /* Create a new session */
672       s = nat_session_alloc_or_recycle (sm, u, thread_index);
673       if (!s)
674         {
675           clib_warning ("create NAT session failed");
676           return 0;
677         }
678
679       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
680       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
681       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
682       s->outside_address_index = ~0;
683       s->out2in.addr.as_u32 = old_addr;
684       s->out2in.fib_index = rx_fib_index;
685       s->in2out.addr.as_u32 = new_addr;
686       s->in2out.fib_index = m->fib_index;
687       s->in2out.port = s->out2in.port = ip->protocol;
688       u->nstaticsessions++;
689
690       /* Add to lookup tables */
691       s_kv.value = s - tsm->sessions;
692       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
693         clib_warning ("out2in key add failed");
694
695       key.l_addr = ip->dst_address;
696       key.fib_index = m->fib_index;
697       s_kv.key[0] = key.as_u64[0];
698       s_kv.key[1] = key.as_u64[1];
699       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
700         clib_warning ("in2out key add failed");
701    }
702
703   /* Update IP checksum */
704   sum = ip->checksum;
705   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
706   ip->checksum = ip_csum_fold (sum);
707
708   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
709
710   /* Accounting */
711   s->last_heard = now;
712   s->total_pkts++;
713   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
714   /* Per-user LRU list maintenance */
715   clib_dlist_remove (tsm->list_pool, s->per_user_index);
716   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
717                       s->per_user_index);
718
719   return s;
720 }
721
722 static snat_session_t *
723 snat_out2in_lb (snat_main_t *sm,
724                 vlib_buffer_t * b,
725                 ip4_header_t * ip,
726                 u32 rx_fib_index,
727                 u32 thread_index,
728                 f64 now,
729                 vlib_main_t * vm,
730                 vlib_node_runtime_t * node)
731 {
732   nat_ed_ses_key_t key;
733   clib_bihash_kv_16_8_t s_kv, s_value;
734   udp_header_t *udp = ip4_next_header (ip);
735   tcp_header_t *tcp = (tcp_header_t *) udp;
736   snat_session_t *s = 0;
737   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
738   snat_session_key_t e_key, l_key;
739   u32 old_addr, new_addr;
740   u32 proto = ip_proto_to_snat_proto (ip->protocol);
741   u16 new_port, old_port;
742   ip_csum_t sum;
743   snat_user_t *u;
744   u32 address_index;
745   snat_session_key_t eh_key;
746   u8 twice_nat;
747
748   old_addr = ip->dst_address.as_u32;
749
750   key.l_addr = ip->dst_address;
751   key.r_addr = ip->src_address;
752   key.fib_index = rx_fib_index;
753   key.proto = ip->protocol;
754   key.r_port = udp->src_port;
755   key.l_port = udp->dst_port;
756   s_kv.key[0] = key.as_u64[0];
757   s_kv.key[1] = key.as_u64[1];
758
759   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
760     {
761       s = pool_elt_at_index (tsm->sessions, s_value.value);
762     }
763   else
764     {
765       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
766         {
767           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
768           return 0;
769         }
770
771       e_key.addr = ip->dst_address;
772       e_key.port = udp->dst_port;
773       e_key.protocol = proto;
774       e_key.fib_index = rx_fib_index;
775       if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0, &twice_nat))
776         return 0;
777
778       u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index,
779                                   thread_index);
780       if (!u)
781       {
782         clib_warning ("create NAT user failed");
783         return 0;
784       }
785
786       s = nat_session_alloc_or_recycle (sm, u, thread_index);
787       if (!s)
788         {
789           clib_warning ("create NAT session failed");
790           return 0;
791         }
792
793       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
794       s->ext_host_port = udp->src_port;
795       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
796       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
797       s->outside_address_index = ~0;
798       s->out2in = e_key;
799       s->in2out = l_key;
800       u->nstaticsessions++;
801
802       /* Add to lookup tables */
803       s_kv.value = s - tsm->sessions;
804       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
805         clib_warning ("out2in-ed key add failed");
806
807       if (twice_nat)
808         {
809           eh_key.protocol = proto;
810           if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
811                                                    thread_index, &eh_key,
812                                                    &address_index,
813                                                    sm->port_per_thread,
814                                                    sm->per_thread_data[thread_index].snat_thread_index))
815             {
816               b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
817               return 0;
818             }
819           key.r_addr.as_u32 = s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
820           key.r_port = s->ext_host_nat_port = eh_key.port;
821           s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
822         }
823       key.l_addr = l_key.addr;
824       key.fib_index = l_key.fib_index;
825       key.l_port = l_key.port;
826       s_kv.key[0] = key.as_u64[0];
827       s_kv.key[1] = key.as_u64[1];
828       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
829         clib_warning ("in2out-ed key add failed");
830     }
831
832   new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
833
834   /* Update IP checksum */
835   sum = ip->checksum;
836   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
837   if (is_twice_nat_session (s))
838     sum = ip_csum_update (sum, ip->src_address.as_u32,
839                           s->ext_host_nat_addr.as_u32, ip4_header_t,
840                           src_address);
841   ip->checksum = ip_csum_fold (sum);
842
843   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
844     {
845       old_port = tcp->dst_port;
846       tcp->dst_port = s->in2out.port;
847       new_port = tcp->dst_port;
848
849       sum = tcp->checksum;
850       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
851       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
852       if (is_twice_nat_session (s))
853         {
854           sum = ip_csum_update (sum, ip->src_address.as_u32,
855                                 s->ext_host_nat_addr.as_u32, ip4_header_t,
856                                 dst_address);
857           sum = ip_csum_update (sum, tcp->src_port, s->ext_host_nat_port,
858                                 ip4_header_t, length);
859           tcp->src_port = s->ext_host_nat_port;
860           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
861         }
862       tcp->checksum = ip_csum_fold(sum);
863     }
864   else
865     {
866       udp->dst_port = s->in2out.port;
867       if (is_twice_nat_session (s))
868         {
869           udp->src_port = s->ext_host_nat_port;
870           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
871         }
872       udp->checksum = 0;
873     }
874
875   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
876
877   /* Accounting */
878   s->last_heard = now;
879   s->total_pkts++;
880   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
881   /* Per-user LRU list maintenance */
882   clib_dlist_remove (tsm->list_pool, s->per_user_index);
883   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
884                       s->per_user_index);
885
886   return s;
887 }
888
889 static uword
890 snat_out2in_node_fn (vlib_main_t * vm,
891                   vlib_node_runtime_t * node,
892                   vlib_frame_t * frame)
893 {
894   u32 n_left_from, * from, * to_next;
895   snat_out2in_next_t next_index;
896   u32 pkts_processed = 0;
897   snat_main_t * sm = &snat_main;
898   f64 now = vlib_time_now (vm);
899   u32 thread_index = vlib_get_thread_index ();
900
901   from = vlib_frame_vector_args (frame);
902   n_left_from = frame->n_vectors;
903   next_index = node->cached_next_index;
904
905   while (n_left_from > 0)
906     {
907       u32 n_left_to_next;
908
909       vlib_get_next_frame (vm, node, next_index,
910                            to_next, n_left_to_next);
911
912       while (n_left_from >= 4 && n_left_to_next >= 2)
913         {
914           u32 bi0, bi1;
915           vlib_buffer_t * b0, * b1;
916           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
917           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
918           u32 sw_if_index0, sw_if_index1;
919           ip4_header_t * ip0, *ip1;
920           ip_csum_t sum0, sum1;
921           u32 new_addr0, old_addr0;
922           u16 new_port0, old_port0;
923           u32 new_addr1, old_addr1;
924           u16 new_port1, old_port1;
925           udp_header_t * udp0, * udp1;
926           tcp_header_t * tcp0, * tcp1;
927           icmp46_header_t * icmp0, * icmp1;
928           snat_session_key_t key0, key1, sm0, sm1;
929           u32 rx_fib_index0, rx_fib_index1;
930           u32 proto0, proto1;
931           snat_session_t * s0 = 0, * s1 = 0;
932           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
933
934           /* Prefetch next iteration. */
935           {
936             vlib_buffer_t * p2, * p3;
937
938             p2 = vlib_get_buffer (vm, from[2]);
939             p3 = vlib_get_buffer (vm, from[3]);
940
941             vlib_prefetch_buffer_header (p2, LOAD);
942             vlib_prefetch_buffer_header (p3, LOAD);
943
944             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
945             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
946           }
947
948           /* speculatively enqueue b0 and b1 to the current next frame */
949           to_next[0] = bi0 = from[0];
950           to_next[1] = bi1 = from[1];
951           from += 2;
952           to_next += 2;
953           n_left_from -= 2;
954           n_left_to_next -= 2;
955
956           b0 = vlib_get_buffer (vm, bi0);
957           b1 = vlib_get_buffer (vm, bi1);
958
959           vnet_buffer (b0)->snat.flags = 0;
960           vnet_buffer (b1)->snat.flags = 0;
961
962           ip0 = vlib_buffer_get_current (b0);
963           udp0 = ip4_next_header (ip0);
964           tcp0 = (tcp_header_t *) udp0;
965           icmp0 = (icmp46_header_t *) udp0;
966
967           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
968           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
969                                    sw_if_index0);
970
971           if (PREDICT_FALSE(ip0->ttl == 1))
972             {
973               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
974               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
975                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
976                                            0);
977               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
978               goto trace0;
979             }
980
981           proto0 = ip_proto_to_snat_proto (ip0->protocol);
982
983           if (PREDICT_FALSE (proto0 == ~0))
984             {
985               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
986                                              thread_index, now, vm, node);
987               if (!s0)
988                 next0 = SNAT_OUT2IN_NEXT_DROP;
989               goto trace0;
990             }
991
992           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
993             {
994               next0 = icmp_out2in_slow_path
995                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
996                  next0, now, thread_index, &s0);
997               goto trace0;
998             }
999
1000           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1001             {
1002               next0 = SNAT_OUT2IN_NEXT_REASS;
1003               goto trace0;
1004             }
1005
1006           key0.addr = ip0->dst_address;
1007           key0.port = udp0->dst_port;
1008           key0.protocol = proto0;
1009           key0.fib_index = rx_fib_index0;
1010
1011           kv0.key = key0.as_u64;
1012
1013           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1014                                       &kv0, &value0))
1015             {
1016               /* Try to match static mapping by external address and port,
1017                  destination address and port in packet */
1018               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1019                 {
1020                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1021                   /*
1022                    * Send DHCP packets to the ipv4 stack, or we won't
1023                    * be able to use dhcp client on the outside interface
1024                    */
1025                   if (proto0 != SNAT_PROTOCOL_UDP
1026                       || (udp0->dst_port
1027                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1028                     next0 = SNAT_OUT2IN_NEXT_DROP;
1029                   goto trace0;
1030                 }
1031
1032               /* Create session initiated by host from external network */
1033               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1034                                                      thread_index);
1035               if (!s0)
1036                 {
1037                   next0 = SNAT_OUT2IN_NEXT_DROP;
1038                   goto trace0;
1039                 }
1040             }
1041           else
1042             {
1043               if (PREDICT_FALSE (value0.value == ~0ULL))
1044                 {
1045                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1046                                       now, vm, node);
1047                   if (!s0)
1048                     next0 = SNAT_OUT2IN_NEXT_DROP;
1049                   goto trace0;
1050                 }
1051               else
1052                 {
1053                   s0 = pool_elt_at_index (
1054                     sm->per_thread_data[thread_index].sessions,
1055                     value0.value);
1056                 }
1057             }
1058
1059           old_addr0 = ip0->dst_address.as_u32;
1060           ip0->dst_address = s0->in2out.addr;
1061           new_addr0 = ip0->dst_address.as_u32;
1062           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1063
1064           sum0 = ip0->checksum;
1065           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1066                                  ip4_header_t,
1067                                  dst_address /* changed member */);
1068           ip0->checksum = ip_csum_fold (sum0);
1069
1070           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1071             {
1072               old_port0 = tcp0->dst_port;
1073               tcp0->dst_port = s0->in2out.port;
1074               new_port0 = tcp0->dst_port;
1075
1076               sum0 = tcp0->checksum;
1077               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1078                                      ip4_header_t,
1079                                      dst_address /* changed member */);
1080
1081               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1082                                      ip4_header_t /* cheat */,
1083                                      length /* changed member */);
1084               tcp0->checksum = ip_csum_fold(sum0);
1085             }
1086           else
1087             {
1088               old_port0 = udp0->dst_port;
1089               udp0->dst_port = s0->in2out.port;
1090               udp0->checksum = 0;
1091             }
1092
1093           /* Accounting */
1094           s0->last_heard = now;
1095           s0->total_pkts++;
1096           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1097           /* Per-user LRU list maintenance */
1098           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1099                              s0->per_user_index);
1100           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1101                               s0->per_user_list_head_index,
1102                               s0->per_user_index);
1103         trace0:
1104
1105           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1106                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1107             {
1108               snat_out2in_trace_t *t =
1109                  vlib_add_trace (vm, node, b0, sizeof (*t));
1110               t->sw_if_index = sw_if_index0;
1111               t->next_index = next0;
1112               t->session_index = ~0;
1113               if (s0)
1114                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1115             }
1116
1117           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1118
1119
1120           ip1 = vlib_buffer_get_current (b1);
1121           udp1 = ip4_next_header (ip1);
1122           tcp1 = (tcp_header_t *) udp1;
1123           icmp1 = (icmp46_header_t *) udp1;
1124
1125           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1126           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1127                                    sw_if_index1);
1128
1129           if (PREDICT_FALSE(ip1->ttl == 1))
1130             {
1131               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1132               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1133                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1134                                            0);
1135               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1136               goto trace1;
1137             }
1138
1139           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1140
1141           if (PREDICT_FALSE (proto1 == ~0))
1142             {
1143               s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
1144                                              thread_index, now, vm, node);
1145               if (!s1)
1146                 next1 = SNAT_OUT2IN_NEXT_DROP;
1147               goto trace1;
1148             }
1149
1150           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1151             {
1152               next1 = icmp_out2in_slow_path
1153                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1154                  next1, now, thread_index, &s1);
1155               goto trace1;
1156             }
1157
1158           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
1159             {
1160               next1 = SNAT_OUT2IN_NEXT_REASS;
1161               goto trace1;
1162             }
1163
1164           key1.addr = ip1->dst_address;
1165           key1.port = udp1->dst_port;
1166           key1.protocol = proto1;
1167           key1.fib_index = rx_fib_index1;
1168
1169           kv1.key = key1.as_u64;
1170
1171           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1172                                       &kv1, &value1))
1173             {
1174               /* Try to match static mapping by external address and port,
1175                  destination address and port in packet */
1176               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0))
1177                 {
1178                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1179                   /*
1180                    * Send DHCP packets to the ipv4 stack, or we won't
1181                    * be able to use dhcp client on the outside interface
1182                    */
1183                   if (proto1 != SNAT_PROTOCOL_UDP
1184                       || (udp1->dst_port
1185                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1186                     next1 = SNAT_OUT2IN_NEXT_DROP;
1187                   goto trace1;
1188                 }
1189
1190               /* Create session initiated by host from external network */
1191               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1192                                                      thread_index);
1193               if (!s1)
1194                 {
1195                   next1 = SNAT_OUT2IN_NEXT_DROP;
1196                   goto trace1;
1197                 }
1198             }
1199           else
1200             {
1201               if (PREDICT_FALSE (value1.value == ~0ULL))
1202                 {
1203                   s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1204                                       now, vm, node);
1205                   if (!s1)
1206                     next1 = SNAT_OUT2IN_NEXT_DROP;
1207                   goto trace1;
1208                 }
1209               else
1210                 {
1211                   s1 = pool_elt_at_index (
1212                     sm->per_thread_data[thread_index].sessions,
1213                     value1.value);
1214                 }
1215             }
1216
1217           old_addr1 = ip1->dst_address.as_u32;
1218           ip1->dst_address = s1->in2out.addr;
1219           new_addr1 = ip1->dst_address.as_u32;
1220           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1221
1222           sum1 = ip1->checksum;
1223           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1224                                  ip4_header_t,
1225                                  dst_address /* changed member */);
1226           ip1->checksum = ip_csum_fold (sum1);
1227
1228           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1229             {
1230               old_port1 = tcp1->dst_port;
1231               tcp1->dst_port = s1->in2out.port;
1232               new_port1 = tcp1->dst_port;
1233
1234               sum1 = tcp1->checksum;
1235               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1236                                      ip4_header_t,
1237                                      dst_address /* changed member */);
1238
1239               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1240                                      ip4_header_t /* cheat */,
1241                                      length /* changed member */);
1242               tcp1->checksum = ip_csum_fold(sum1);
1243             }
1244           else
1245             {
1246               old_port1 = udp1->dst_port;
1247               udp1->dst_port = s1->in2out.port;
1248               udp1->checksum = 0;
1249             }
1250
1251           /* Accounting */
1252           s1->last_heard = now;
1253           s1->total_pkts++;
1254           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1255           /* Per-user LRU list maintenance */
1256           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1257                              s1->per_user_index);
1258           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1259                               s1->per_user_list_head_index,
1260                               s1->per_user_index);
1261         trace1:
1262
1263           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1264                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1265             {
1266               snat_out2in_trace_t *t =
1267                  vlib_add_trace (vm, node, b1, sizeof (*t));
1268               t->sw_if_index = sw_if_index1;
1269               t->next_index = next1;
1270               t->session_index = ~0;
1271               if (s1)
1272                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1273             }
1274
1275           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1276
1277           /* verify speculative enqueues, maybe switch current next frame */
1278           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1279                                            to_next, n_left_to_next,
1280                                            bi0, bi1, next0, next1);
1281         }
1282
1283       while (n_left_from > 0 && n_left_to_next > 0)
1284         {
1285           u32 bi0;
1286           vlib_buffer_t * b0;
1287           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1288           u32 sw_if_index0;
1289           ip4_header_t * ip0;
1290           ip_csum_t sum0;
1291           u32 new_addr0, old_addr0;
1292           u16 new_port0, old_port0;
1293           udp_header_t * udp0;
1294           tcp_header_t * tcp0;
1295           icmp46_header_t * icmp0;
1296           snat_session_key_t key0, sm0;
1297           u32 rx_fib_index0;
1298           u32 proto0;
1299           snat_session_t * s0 = 0;
1300           clib_bihash_kv_8_8_t kv0, value0;
1301
1302           /* speculatively enqueue b0 to the current next frame */
1303           bi0 = from[0];
1304           to_next[0] = bi0;
1305           from += 1;
1306           to_next += 1;
1307           n_left_from -= 1;
1308           n_left_to_next -= 1;
1309
1310           b0 = vlib_get_buffer (vm, bi0);
1311
1312           vnet_buffer (b0)->snat.flags = 0;
1313
1314           ip0 = vlib_buffer_get_current (b0);
1315           udp0 = ip4_next_header (ip0);
1316           tcp0 = (tcp_header_t *) udp0;
1317           icmp0 = (icmp46_header_t *) udp0;
1318
1319           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1320           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1321                                    sw_if_index0);
1322
1323           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1324
1325           if (PREDICT_FALSE (proto0 == ~0))
1326             {
1327               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1328                                              thread_index, now, vm, node);
1329               if (!s0)
1330                 next0 = SNAT_OUT2IN_NEXT_DROP;
1331               goto trace00;
1332             }
1333
1334           if (PREDICT_FALSE(ip0->ttl == 1))
1335             {
1336               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1337               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1338                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1339                                            0);
1340               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1341               goto trace00;
1342             }
1343
1344           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1345             {
1346               next0 = icmp_out2in_slow_path
1347                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1348                  next0, now, thread_index, &s0);
1349               goto trace00;
1350             }
1351
1352           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1353             {
1354               next0 = SNAT_OUT2IN_NEXT_REASS;
1355               goto trace00;
1356             }
1357
1358           key0.addr = ip0->dst_address;
1359           key0.port = udp0->dst_port;
1360           key0.protocol = proto0;
1361           key0.fib_index = rx_fib_index0;
1362
1363           kv0.key = key0.as_u64;
1364
1365           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1366                                       &kv0, &value0))
1367             {
1368               /* Try to match static mapping by external address and port,
1369                  destination address and port in packet */
1370               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1371                 {
1372                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1373                   /*
1374                    * Send DHCP packets to the ipv4 stack, or we won't
1375                    * be able to use dhcp client on the outside interface
1376                    */
1377                   if (proto0 != SNAT_PROTOCOL_UDP
1378                       || (udp0->dst_port
1379                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1380
1381                     next0 = SNAT_OUT2IN_NEXT_DROP;
1382                   goto trace00;
1383                 }
1384
1385               /* Create session initiated by host from external network */
1386               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1387                                                      thread_index);
1388               if (!s0)
1389                 {
1390                   next0 = SNAT_OUT2IN_NEXT_DROP;
1391                   goto trace00;
1392                 }
1393             }
1394           else
1395             {
1396               if (PREDICT_FALSE (value0.value == ~0ULL))
1397                 {
1398                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1399                                       now, vm, node);
1400                   if (!s0)
1401                     next0 = SNAT_OUT2IN_NEXT_DROP;
1402                   goto trace00;
1403                 }
1404               else
1405                 {
1406                   s0 = pool_elt_at_index (
1407                     sm->per_thread_data[thread_index].sessions,
1408                     value0.value);
1409                 }
1410             }
1411
1412           old_addr0 = ip0->dst_address.as_u32;
1413           ip0->dst_address = s0->in2out.addr;
1414           new_addr0 = ip0->dst_address.as_u32;
1415           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1416
1417           sum0 = ip0->checksum;
1418           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1419                                  ip4_header_t,
1420                                  dst_address /* changed member */);
1421           ip0->checksum = ip_csum_fold (sum0);
1422
1423           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1424             {
1425               old_port0 = tcp0->dst_port;
1426               tcp0->dst_port = s0->in2out.port;
1427               new_port0 = tcp0->dst_port;
1428
1429               sum0 = tcp0->checksum;
1430               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1431                                      ip4_header_t,
1432                                      dst_address /* changed member */);
1433
1434               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1435                                      ip4_header_t /* cheat */,
1436                                      length /* changed member */);
1437               tcp0->checksum = ip_csum_fold(sum0);
1438             }
1439           else
1440             {
1441               old_port0 = udp0->dst_port;
1442               udp0->dst_port = s0->in2out.port;
1443               udp0->checksum = 0;
1444             }
1445
1446           /* Accounting */
1447           s0->last_heard = now;
1448           s0->total_pkts++;
1449           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1450           /* Per-user LRU list maintenance */
1451           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1452                              s0->per_user_index);
1453           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1454                               s0->per_user_list_head_index,
1455                               s0->per_user_index);
1456         trace00:
1457
1458           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1459                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1460             {
1461               snat_out2in_trace_t *t =
1462                  vlib_add_trace (vm, node, b0, sizeof (*t));
1463               t->sw_if_index = sw_if_index0;
1464               t->next_index = next0;
1465               t->session_index = ~0;
1466               if (s0)
1467                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1468             }
1469
1470           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1471
1472           /* verify speculative enqueue, maybe switch current next frame */
1473           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1474                                            to_next, n_left_to_next,
1475                                            bi0, next0);
1476         }
1477
1478       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1479     }
1480
1481   vlib_node_increment_counter (vm, snat_out2in_node.index,
1482                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1483                                pkts_processed);
1484   return frame->n_vectors;
1485 }
1486
1487 VLIB_REGISTER_NODE (snat_out2in_node) = {
1488   .function = snat_out2in_node_fn,
1489   .name = "nat44-out2in",
1490   .vector_size = sizeof (u32),
1491   .format_trace = format_snat_out2in_trace,
1492   .type = VLIB_NODE_TYPE_INTERNAL,
1493
1494   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1495   .error_strings = snat_out2in_error_strings,
1496
1497   .runtime_data_bytes = sizeof (snat_runtime_t),
1498
1499   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1500
1501   /* edit / add dispositions here */
1502   .next_nodes = {
1503     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1504     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1505     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1506     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1507   },
1508 };
1509 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1510
1511 static uword
1512 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1513                             vlib_node_runtime_t * node,
1514                             vlib_frame_t * frame)
1515 {
1516   u32 n_left_from, *from, *to_next;
1517   snat_out2in_next_t next_index;
1518   u32 pkts_processed = 0;
1519   snat_main_t *sm = &snat_main;
1520   f64 now = vlib_time_now (vm);
1521   u32 thread_index = vlib_get_thread_index ();
1522   snat_main_per_thread_data_t *per_thread_data =
1523     &sm->per_thread_data[thread_index];
1524   u32 *fragments_to_drop = 0;
1525   u32 *fragments_to_loopback = 0;
1526
1527   from = vlib_frame_vector_args (frame);
1528   n_left_from = frame->n_vectors;
1529   next_index = node->cached_next_index;
1530
1531   while (n_left_from > 0)
1532     {
1533       u32 n_left_to_next;
1534
1535       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1536
1537       while (n_left_from > 0 && n_left_to_next > 0)
1538        {
1539           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1540           vlib_buffer_t *b0;
1541           u32 next0;
1542           u8 cached0 = 0;
1543           ip4_header_t *ip0;
1544           nat_reass_ip4_t *reass0;
1545           udp_header_t * udp0;
1546           tcp_header_t * tcp0;
1547           snat_session_key_t key0, sm0;
1548           clib_bihash_kv_8_8_t kv0, value0;
1549           snat_session_t * s0 = 0;
1550           u16 old_port0, new_port0;
1551           ip_csum_t sum0;
1552
1553           /* speculatively enqueue b0 to the current next frame */
1554           bi0 = from[0];
1555           to_next[0] = bi0;
1556           from += 1;
1557           to_next += 1;
1558           n_left_from -= 1;
1559           n_left_to_next -= 1;
1560
1561           b0 = vlib_get_buffer (vm, bi0);
1562           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1563
1564           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1565           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1566                                                                sw_if_index0);
1567
1568           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1569             {
1570               next0 = SNAT_OUT2IN_NEXT_DROP;
1571               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1572               goto trace0;
1573             }
1574
1575           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1576           udp0 = ip4_next_header (ip0);
1577           tcp0 = (tcp_header_t *) udp0;
1578           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1579
1580           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1581                                                  ip0->dst_address,
1582                                                  ip0->fragment_id,
1583                                                  ip0->protocol,
1584                                                  1,
1585                                                  &fragments_to_drop);
1586
1587           if (PREDICT_FALSE (!reass0))
1588             {
1589               next0 = SNAT_OUT2IN_NEXT_DROP;
1590               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1591               goto trace0;
1592             }
1593
1594           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1595             {
1596               key0.addr = ip0->dst_address;
1597               key0.port = udp0->dst_port;
1598               key0.protocol = proto0;
1599               key0.fib_index = rx_fib_index0;
1600               kv0.key = key0.as_u64;
1601
1602               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1603                 {
1604                   /* Try to match static mapping by external address and port,
1605                      destination address and port in packet */
1606                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1607                     {
1608                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1609                       /*
1610                        * Send DHCP packets to the ipv4 stack, or we won't
1611                        * be able to use dhcp client on the outside interface
1612                        */
1613                       if (proto0 != SNAT_PROTOCOL_UDP
1614                           || (udp0->dst_port
1615                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1616
1617                         next0 = SNAT_OUT2IN_NEXT_DROP;
1618                       goto trace0;
1619                     }
1620
1621                   /* Create session initiated by host from external network */
1622                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1623                                                          thread_index);
1624                   if (!s0)
1625                     {
1626                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1627                       next0 = SNAT_OUT2IN_NEXT_DROP;
1628                       goto trace0;
1629                     }
1630                   reass0->sess_index = s0 - per_thread_data->sessions;
1631                   reass0->thread_index = thread_index;
1632                 }
1633               else
1634                 {
1635                   s0 = pool_elt_at_index (per_thread_data->sessions,
1636                                           value0.value);
1637                   reass0->sess_index = value0.value;
1638                 }
1639               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1640             }
1641           else
1642             {
1643               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1644                 {
1645                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1646                     {
1647                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1648                       next0 = SNAT_OUT2IN_NEXT_DROP;
1649                       goto trace0;
1650                     }
1651                   cached0 = 1;
1652                   goto trace0;
1653                 }
1654               s0 = pool_elt_at_index (per_thread_data->sessions,
1655                                       reass0->sess_index);
1656             }
1657
1658           old_addr0 = ip0->dst_address.as_u32;
1659           ip0->dst_address = s0->in2out.addr;
1660           new_addr0 = ip0->dst_address.as_u32;
1661           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1662
1663           sum0 = ip0->checksum;
1664           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1665                                  ip4_header_t,
1666                                  dst_address /* changed member */);
1667           ip0->checksum = ip_csum_fold (sum0);
1668
1669           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1670             {
1671               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1672                 {
1673                   old_port0 = tcp0->dst_port;
1674                   tcp0->dst_port = s0->in2out.port;
1675                   new_port0 = tcp0->dst_port;
1676
1677                   sum0 = tcp0->checksum;
1678                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1679                                          ip4_header_t,
1680                                          dst_address /* changed member */);
1681
1682                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1683                                          ip4_header_t /* cheat */,
1684                                          length /* changed member */);
1685                   tcp0->checksum = ip_csum_fold(sum0);
1686                 }
1687               else
1688                 {
1689                   old_port0 = udp0->dst_port;
1690                   udp0->dst_port = s0->in2out.port;
1691                   udp0->checksum = 0;
1692                 }
1693             }
1694
1695           /* Accounting */
1696           s0->last_heard = now;
1697           s0->total_pkts++;
1698           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1699           /* Per-user LRU list maintenance */
1700           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1701                              s0->per_user_index);
1702           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1703                               s0->per_user_list_head_index,
1704                               s0->per_user_index);
1705
1706         trace0:
1707           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1708                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1709             {
1710               nat44_out2in_reass_trace_t *t =
1711                  vlib_add_trace (vm, node, b0, sizeof (*t));
1712               t->cached = cached0;
1713               t->sw_if_index = sw_if_index0;
1714               t->next_index = next0;
1715             }
1716
1717           if (cached0)
1718             {
1719               n_left_to_next++;
1720               to_next--;
1721             }
1722           else
1723             {
1724               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1725
1726               /* verify speculative enqueue, maybe switch current next frame */
1727               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1728                                                to_next, n_left_to_next,
1729                                                bi0, next0);
1730             }
1731
1732           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1733             {
1734               from = vlib_frame_vector_args (frame);
1735               u32 len = vec_len (fragments_to_loopback);
1736               if (len <= VLIB_FRAME_SIZE)
1737                 {
1738                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1739                   n_left_from = len;
1740                   vec_reset_length (fragments_to_loopback);
1741                 }
1742               else
1743                 {
1744                   clib_memcpy (from,
1745                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1746                                sizeof (u32) * VLIB_FRAME_SIZE);
1747                   n_left_from = VLIB_FRAME_SIZE;
1748                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1749                 }
1750             }
1751        }
1752
1753       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1754     }
1755
1756   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1757                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1758                                pkts_processed);
1759
1760   nat_send_all_to_node (vm, fragments_to_drop, node,
1761                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1762                         SNAT_OUT2IN_NEXT_DROP);
1763
1764   vec_free (fragments_to_drop);
1765   vec_free (fragments_to_loopback);
1766   return frame->n_vectors;
1767 }
1768
1769 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1770   .function = nat44_out2in_reass_node_fn,
1771   .name = "nat44-out2in-reass",
1772   .vector_size = sizeof (u32),
1773   .format_trace = format_nat44_out2in_reass_trace,
1774   .type = VLIB_NODE_TYPE_INTERNAL,
1775
1776   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1777   .error_strings = snat_out2in_error_strings,
1778
1779   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1780
1781   /* edit / add dispositions here */
1782   .next_nodes = {
1783     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1784     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1785     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1786     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1787   },
1788 };
1789 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1790                               nat44_out2in_reass_node_fn);
1791
1792 /**************************/
1793 /*** deterministic mode ***/
1794 /**************************/
1795 static uword
1796 snat_det_out2in_node_fn (vlib_main_t * vm,
1797                          vlib_node_runtime_t * node,
1798                          vlib_frame_t * frame)
1799 {
1800   u32 n_left_from, * from, * to_next;
1801   snat_out2in_next_t next_index;
1802   u32 pkts_processed = 0;
1803   snat_main_t * sm = &snat_main;
1804   u32 thread_index = vlib_get_thread_index ();
1805
1806   from = vlib_frame_vector_args (frame);
1807   n_left_from = frame->n_vectors;
1808   next_index = node->cached_next_index;
1809
1810   while (n_left_from > 0)
1811     {
1812       u32 n_left_to_next;
1813
1814       vlib_get_next_frame (vm, node, next_index,
1815                            to_next, n_left_to_next);
1816
1817       while (n_left_from >= 4 && n_left_to_next >= 2)
1818         {
1819           u32 bi0, bi1;
1820           vlib_buffer_t * b0, * b1;
1821           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1822           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1823           u32 sw_if_index0, sw_if_index1;
1824           ip4_header_t * ip0, * ip1;
1825           ip_csum_t sum0, sum1;
1826           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1827           u16 new_port0, old_port0, old_port1, new_port1;
1828           udp_header_t * udp0, * udp1;
1829           tcp_header_t * tcp0, * tcp1;
1830           u32 proto0, proto1;
1831           snat_det_out_key_t key0, key1;
1832           snat_det_map_t * dm0, * dm1;
1833           snat_det_session_t * ses0 = 0, * ses1 = 0;
1834           u32 rx_fib_index0, rx_fib_index1;
1835           icmp46_header_t * icmp0, * icmp1;
1836
1837           /* Prefetch next iteration. */
1838           {
1839             vlib_buffer_t * p2, * p3;
1840
1841             p2 = vlib_get_buffer (vm, from[2]);
1842             p3 = vlib_get_buffer (vm, from[3]);
1843
1844             vlib_prefetch_buffer_header (p2, LOAD);
1845             vlib_prefetch_buffer_header (p3, LOAD);
1846
1847             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1848             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1849           }
1850
1851           /* speculatively enqueue b0 and b1 to the current next frame */
1852           to_next[0] = bi0 = from[0];
1853           to_next[1] = bi1 = from[1];
1854           from += 2;
1855           to_next += 2;
1856           n_left_from -= 2;
1857           n_left_to_next -= 2;
1858
1859           b0 = vlib_get_buffer (vm, bi0);
1860           b1 = vlib_get_buffer (vm, bi1);
1861
1862           ip0 = vlib_buffer_get_current (b0);
1863           udp0 = ip4_next_header (ip0);
1864           tcp0 = (tcp_header_t *) udp0;
1865
1866           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1867
1868           if (PREDICT_FALSE(ip0->ttl == 1))
1869             {
1870               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1871               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1872                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1873                                            0);
1874               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1875               goto trace0;
1876             }
1877
1878           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1879
1880           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1881             {
1882               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1883               icmp0 = (icmp46_header_t *) udp0;
1884
1885               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1886                                   rx_fib_index0, node, next0, thread_index,
1887                                   &ses0, &dm0);
1888               goto trace0;
1889             }
1890
1891           key0.ext_host_addr = ip0->src_address;
1892           key0.ext_host_port = tcp0->src;
1893           key0.out_port = tcp0->dst;
1894
1895           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1896           if (PREDICT_FALSE(!dm0))
1897             {
1898               clib_warning("unknown dst address:  %U",
1899                            format_ip4_address, &ip0->dst_address);
1900               next0 = SNAT_OUT2IN_NEXT_DROP;
1901               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1902               goto trace0;
1903             }
1904
1905           snat_det_reverse(dm0, &ip0->dst_address,
1906                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
1907
1908           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1909           if (PREDICT_FALSE(!ses0))
1910             {
1911               clib_warning("no match src %U:%d dst %U:%d for user %U",
1912                            format_ip4_address, &ip0->src_address,
1913                            clib_net_to_host_u16 (tcp0->src),
1914                            format_ip4_address, &ip0->dst_address,
1915                            clib_net_to_host_u16 (tcp0->dst),
1916                            format_ip4_address, &new_addr0);
1917               next0 = SNAT_OUT2IN_NEXT_DROP;
1918               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1919               goto trace0;
1920             }
1921           new_port0 = ses0->in_port;
1922
1923           old_addr0 = ip0->dst_address;
1924           ip0->dst_address = new_addr0;
1925           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
1926
1927           sum0 = ip0->checksum;
1928           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1929                                  ip4_header_t,
1930                                  dst_address /* changed member */);
1931           ip0->checksum = ip_csum_fold (sum0);
1932
1933           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1934             {
1935               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1936                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
1937               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
1938                 snat_det_ses_close(dm0, ses0);
1939
1940               old_port0 = tcp0->dst;
1941               tcp0->dst = new_port0;
1942
1943               sum0 = tcp0->checksum;
1944               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1945                                      ip4_header_t,
1946                                      dst_address /* changed member */);
1947
1948               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1949                                      ip4_header_t /* cheat */,
1950                                      length /* changed member */);
1951               tcp0->checksum = ip_csum_fold(sum0);
1952             }
1953           else
1954             {
1955               old_port0 = udp0->dst_port;
1956               udp0->dst_port = new_port0;
1957               udp0->checksum = 0;
1958             }
1959
1960         trace0:
1961
1962           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1963                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1964             {
1965               snat_out2in_trace_t *t =
1966                  vlib_add_trace (vm, node, b0, sizeof (*t));
1967               t->sw_if_index = sw_if_index0;
1968               t->next_index = next0;
1969               t->session_index = ~0;
1970               if (ses0)
1971                 t->session_index = ses0 - dm0->sessions;
1972             }
1973
1974           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1975
1976           b1 = vlib_get_buffer (vm, bi1);
1977
1978           ip1 = vlib_buffer_get_current (b1);
1979           udp1 = ip4_next_header (ip1);
1980           tcp1 = (tcp_header_t *) udp1;
1981
1982           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1983
1984           if (PREDICT_FALSE(ip1->ttl == 1))
1985             {
1986               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1987               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1988                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1989                                            0);
1990               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1991               goto trace1;
1992             }
1993
1994           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1995
1996           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1997             {
1998               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1999               icmp1 = (icmp46_header_t *) udp1;
2000
2001               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
2002                                   rx_fib_index1, node, next1, thread_index,
2003                                   &ses1, &dm1);
2004               goto trace1;
2005             }
2006
2007           key1.ext_host_addr = ip1->src_address;
2008           key1.ext_host_port = tcp1->src;
2009           key1.out_port = tcp1->dst;
2010
2011           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
2012           if (PREDICT_FALSE(!dm1))
2013             {
2014               clib_warning("unknown dst address:  %U",
2015                            format_ip4_address, &ip1->dst_address);
2016               next1 = SNAT_OUT2IN_NEXT_DROP;
2017               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2018               goto trace1;
2019             }
2020
2021           snat_det_reverse(dm1, &ip1->dst_address,
2022                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
2023
2024           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
2025           if (PREDICT_FALSE(!ses1))
2026             {
2027               clib_warning("no match src %U:%d dst %U:%d for user %U",
2028                            format_ip4_address, &ip1->src_address,
2029                            clib_net_to_host_u16 (tcp1->src),
2030                            format_ip4_address, &ip1->dst_address,
2031                            clib_net_to_host_u16 (tcp1->dst),
2032                            format_ip4_address, &new_addr1);
2033               next1 = SNAT_OUT2IN_NEXT_DROP;
2034               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2035               goto trace1;
2036             }
2037           new_port1 = ses1->in_port;
2038
2039           old_addr1 = ip1->dst_address;
2040           ip1->dst_address = new_addr1;
2041           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2042
2043           sum1 = ip1->checksum;
2044           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2045                                  ip4_header_t,
2046                                  dst_address /* changed member */);
2047           ip1->checksum = ip_csum_fold (sum1);
2048
2049           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2050             {
2051               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2052                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2053               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
2054                 snat_det_ses_close(dm1, ses1);
2055
2056               old_port1 = tcp1->dst;
2057               tcp1->dst = new_port1;
2058
2059               sum1 = tcp1->checksum;
2060               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2061                                      ip4_header_t,
2062                                      dst_address /* changed member */);
2063
2064               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2065                                      ip4_header_t /* cheat */,
2066                                      length /* changed member */);
2067               tcp1->checksum = ip_csum_fold(sum1);
2068             }
2069           else
2070             {
2071               old_port1 = udp1->dst_port;
2072               udp1->dst_port = new_port1;
2073               udp1->checksum = 0;
2074             }
2075
2076         trace1:
2077
2078           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2079                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2080             {
2081               snat_out2in_trace_t *t =
2082                  vlib_add_trace (vm, node, b1, sizeof (*t));
2083               t->sw_if_index = sw_if_index1;
2084               t->next_index = next1;
2085               t->session_index = ~0;
2086               if (ses1)
2087                 t->session_index = ses1 - dm1->sessions;
2088             }
2089
2090           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
2091
2092           /* verify speculative enqueues, maybe switch current next frame */
2093           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2094                                            to_next, n_left_to_next,
2095                                            bi0, bi1, next0, next1);
2096          }
2097
2098       while (n_left_from > 0 && n_left_to_next > 0)
2099         {
2100           u32 bi0;
2101           vlib_buffer_t * b0;
2102           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
2103           u32 sw_if_index0;
2104           ip4_header_t * ip0;
2105           ip_csum_t sum0;
2106           ip4_address_t new_addr0, old_addr0;
2107           u16 new_port0, old_port0;
2108           udp_header_t * udp0;
2109           tcp_header_t * tcp0;
2110           u32 proto0;
2111           snat_det_out_key_t key0;
2112           snat_det_map_t * dm0;
2113           snat_det_session_t * ses0 = 0;
2114           u32 rx_fib_index0;
2115           icmp46_header_t * icmp0;
2116
2117           /* speculatively enqueue b0 to the current next frame */
2118           bi0 = from[0];
2119           to_next[0] = bi0;
2120           from += 1;
2121           to_next += 1;
2122           n_left_from -= 1;
2123           n_left_to_next -= 1;
2124
2125           b0 = vlib_get_buffer (vm, bi0);
2126
2127           ip0 = vlib_buffer_get_current (b0);
2128           udp0 = ip4_next_header (ip0);
2129           tcp0 = (tcp_header_t *) udp0;
2130
2131           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2132
2133           if (PREDICT_FALSE(ip0->ttl == 1))
2134             {
2135               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2136               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2137                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2138                                            0);
2139               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2140               goto trace00;
2141             }
2142
2143           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2144
2145           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2146             {
2147               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2148               icmp0 = (icmp46_header_t *) udp0;
2149
2150               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2151                                   rx_fib_index0, node, next0, thread_index,
2152                                   &ses0, &dm0);
2153               goto trace00;
2154             }
2155
2156           key0.ext_host_addr = ip0->src_address;
2157           key0.ext_host_port = tcp0->src;
2158           key0.out_port = tcp0->dst;
2159
2160           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2161           if (PREDICT_FALSE(!dm0))
2162             {
2163               clib_warning("unknown dst address:  %U",
2164                            format_ip4_address, &ip0->dst_address);
2165               next0 = SNAT_OUT2IN_NEXT_DROP;
2166               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2167               goto trace00;
2168             }
2169
2170           snat_det_reverse(dm0, &ip0->dst_address,
2171                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2172
2173           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2174           if (PREDICT_FALSE(!ses0))
2175             {
2176               clib_warning("no match src %U:%d dst %U:%d for user %U",
2177                            format_ip4_address, &ip0->src_address,
2178                            clib_net_to_host_u16 (tcp0->src),
2179                            format_ip4_address, &ip0->dst_address,
2180                            clib_net_to_host_u16 (tcp0->dst),
2181                            format_ip4_address, &new_addr0);
2182               next0 = SNAT_OUT2IN_NEXT_DROP;
2183               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2184               goto trace00;
2185             }
2186           new_port0 = ses0->in_port;
2187
2188           old_addr0 = ip0->dst_address;
2189           ip0->dst_address = new_addr0;
2190           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2191
2192           sum0 = ip0->checksum;
2193           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2194                                  ip4_header_t,
2195                                  dst_address /* changed member */);
2196           ip0->checksum = ip_csum_fold (sum0);
2197
2198           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2199             {
2200               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2201                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2202               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2203                 snat_det_ses_close(dm0, ses0);
2204
2205               old_port0 = tcp0->dst;
2206               tcp0->dst = new_port0;
2207
2208               sum0 = tcp0->checksum;
2209               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2210                                      ip4_header_t,
2211                                      dst_address /* changed member */);
2212
2213               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2214                                      ip4_header_t /* cheat */,
2215                                      length /* changed member */);
2216               tcp0->checksum = ip_csum_fold(sum0);
2217             }
2218           else
2219             {
2220               old_port0 = udp0->dst_port;
2221               udp0->dst_port = new_port0;
2222               udp0->checksum = 0;
2223             }
2224
2225         trace00:
2226
2227           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2228                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2229             {
2230               snat_out2in_trace_t *t =
2231                  vlib_add_trace (vm, node, b0, sizeof (*t));
2232               t->sw_if_index = sw_if_index0;
2233               t->next_index = next0;
2234               t->session_index = ~0;
2235               if (ses0)
2236                 t->session_index = ses0 - dm0->sessions;
2237             }
2238
2239           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2240
2241           /* verify speculative enqueue, maybe switch current next frame */
2242           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2243                                            to_next, n_left_to_next,
2244                                            bi0, next0);
2245         }
2246
2247       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2248     }
2249
2250   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
2251                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2252                                pkts_processed);
2253   return frame->n_vectors;
2254 }
2255
2256 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
2257   .function = snat_det_out2in_node_fn,
2258   .name = "nat44-det-out2in",
2259   .vector_size = sizeof (u32),
2260   .format_trace = format_snat_out2in_trace,
2261   .type = VLIB_NODE_TYPE_INTERNAL,
2262
2263   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2264   .error_strings = snat_out2in_error_strings,
2265
2266   .runtime_data_bytes = sizeof (snat_runtime_t),
2267
2268   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2269
2270   /* edit / add dispositions here */
2271   .next_nodes = {
2272     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2273     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2274     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2275     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2276   },
2277 };
2278 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
2279
2280 /**
2281  * Get address and port values to be used for ICMP packet translation
2282  * and create session if needed
2283  *
2284  * @param[in,out] sm             NAT main
2285  * @param[in,out] node           NAT node runtime
2286  * @param[in] thread_index       thread index
2287  * @param[in,out] b0             buffer containing packet to be translated
2288  * @param[out] p_proto           protocol used for matching
2289  * @param[out] p_value           address and port after NAT translation
2290  * @param[out] p_dont_translate  if packet should not be translated
2291  * @param d                      optional parameter
2292  * @param e                      optional parameter
2293  */
2294 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
2295                           u32 thread_index, vlib_buffer_t *b0,
2296                           ip4_header_t *ip0, u8 *p_proto,
2297                           snat_session_key_t *p_value,
2298                           u8 *p_dont_translate, void *d, void *e)
2299 {
2300   icmp46_header_t *icmp0;
2301   u32 sw_if_index0;
2302   u8 protocol;
2303   snat_det_out_key_t key0;
2304   u8 dont_translate = 0;
2305   u32 next0 = ~0;
2306   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2307   ip4_header_t *inner_ip0;
2308   void *l4_header = 0;
2309   icmp46_header_t *inner_icmp0;
2310   snat_det_map_t * dm0 = 0;
2311   ip4_address_t new_addr0 = {{0}};
2312   snat_det_session_t * ses0 = 0;
2313   ip4_address_t out_addr;
2314
2315   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2316   echo0 = (icmp_echo_header_t *)(icmp0+1);
2317   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2318
2319   if (!icmp_is_error_message (icmp0))
2320     {
2321       protocol = SNAT_PROTOCOL_ICMP;
2322       key0.ext_host_addr = ip0->src_address;
2323       key0.ext_host_port = 0;
2324       key0.out_port = echo0->identifier;
2325       out_addr = ip0->dst_address;
2326     }
2327   else
2328     {
2329       inner_ip0 = (ip4_header_t *)(echo0+1);
2330       l4_header = ip4_next_header (inner_ip0);
2331       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2332       key0.ext_host_addr = inner_ip0->dst_address;
2333       out_addr = inner_ip0->src_address;
2334       switch (protocol)
2335         {
2336         case SNAT_PROTOCOL_ICMP:
2337           inner_icmp0 = (icmp46_header_t*)l4_header;
2338           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2339           key0.ext_host_port = 0;
2340           key0.out_port = inner_echo0->identifier;
2341           break;
2342         case SNAT_PROTOCOL_UDP:
2343         case SNAT_PROTOCOL_TCP:
2344           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2345           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
2346           break;
2347         default:
2348           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2349           next0 = SNAT_OUT2IN_NEXT_DROP;
2350           goto out;
2351         }
2352     }
2353
2354   dm0 = snat_det_map_by_out(sm, &out_addr);
2355   if (PREDICT_FALSE(!dm0))
2356     {
2357       /* Don't NAT packet aimed at the intfc address */
2358       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2359                                           ip0->dst_address.as_u32)))
2360         {
2361           dont_translate = 1;
2362           goto out;
2363         }
2364       clib_warning("unknown dst address:  %U",
2365                    format_ip4_address, &ip0->dst_address);
2366       goto out;
2367     }
2368
2369   snat_det_reverse(dm0, &ip0->dst_address,
2370                    clib_net_to_host_u16(key0.out_port), &new_addr0);
2371
2372   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2373   if (PREDICT_FALSE(!ses0))
2374     {
2375       /* Don't NAT packet aimed at the intfc address */
2376       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2377                                           ip0->dst_address.as_u32)))
2378         {
2379           dont_translate = 1;
2380           goto out;
2381         }
2382       clib_warning("no match src %U:%d dst %U:%d for user %U",
2383                    format_ip4_address, &key0.ext_host_addr,
2384                    clib_net_to_host_u16 (key0.ext_host_port),
2385                    format_ip4_address, &out_addr,
2386                    clib_net_to_host_u16 (key0.out_port),
2387                    format_ip4_address, &new_addr0);
2388       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2389       next0 = SNAT_OUT2IN_NEXT_DROP;
2390       goto out;
2391     }
2392
2393   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
2394                     !icmp_is_error_message (icmp0)))
2395     {
2396       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2397       next0 = SNAT_OUT2IN_NEXT_DROP;
2398       goto out;
2399     }
2400
2401   goto out;
2402
2403 out:
2404   *p_proto = protocol;
2405   if (ses0)
2406     {
2407       p_value->addr = new_addr0;
2408       p_value->fib_index = sm->inside_fib_index;
2409       p_value->port = ses0->in_port;
2410     }
2411   *p_dont_translate = dont_translate;
2412   if (d)
2413     *(snat_det_session_t**)d = ses0;
2414   if (e)
2415     *(snat_det_map_t**)e = dm0;
2416   return next0;
2417 }
2418
2419 /**********************/
2420 /*** worker handoff ***/
2421 /**********************/
2422 static uword
2423 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
2424                                vlib_node_runtime_t * node,
2425                                vlib_frame_t * frame)
2426 {
2427   snat_main_t *sm = &snat_main;
2428   vlib_thread_main_t *tm = vlib_get_thread_main ();
2429   u32 n_left_from, *from, *to_next = 0;
2430   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2431   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2432     = 0;
2433   vlib_frame_queue_elt_t *hf = 0;
2434   vlib_frame_t *f = 0;
2435   int i;
2436   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2437   u32 next_worker_index = 0;
2438   u32 current_worker_index = ~0;
2439   u32 thread_index = vlib_get_thread_index ();
2440
2441   ASSERT (vec_len (sm->workers));
2442
2443   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2444     {
2445       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2446
2447       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2448                                sm->first_worker_index + sm->num_workers - 1,
2449                                (vlib_frame_queue_t *) (~0));
2450     }
2451
2452   from = vlib_frame_vector_args (frame);
2453   n_left_from = frame->n_vectors;
2454
2455   while (n_left_from > 0)
2456     {
2457       u32 bi0;
2458       vlib_buffer_t *b0;
2459       u32 sw_if_index0;
2460       u32 rx_fib_index0;
2461       ip4_header_t * ip0;
2462       u8 do_handoff;
2463
2464       bi0 = from[0];
2465       from += 1;
2466       n_left_from -= 1;
2467
2468       b0 = vlib_get_buffer (vm, bi0);
2469
2470       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2471       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2472
2473       ip0 = vlib_buffer_get_current (b0);
2474
2475       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2476
2477       if (PREDICT_FALSE (next_worker_index != thread_index))
2478         {
2479           do_handoff = 1;
2480
2481           if (next_worker_index != current_worker_index)
2482             {
2483               if (hf)
2484                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2485
2486               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2487                                                       next_worker_index,
2488                                                       handoff_queue_elt_by_worker_index);
2489
2490               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2491               to_next_worker = &hf->buffer_index[hf->n_vectors];
2492               current_worker_index = next_worker_index;
2493             }
2494
2495           /* enqueue to correct worker thread */
2496           to_next_worker[0] = bi0;
2497           to_next_worker++;
2498           n_left_to_next_worker--;
2499
2500           if (n_left_to_next_worker == 0)
2501             {
2502               hf->n_vectors = VLIB_FRAME_SIZE;
2503               vlib_put_frame_queue_elt (hf);
2504               current_worker_index = ~0;
2505               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2506               hf = 0;
2507             }
2508         }
2509       else
2510         {
2511           do_handoff = 0;
2512           /* if this is 1st frame */
2513           if (!f)
2514             {
2515               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2516               to_next = vlib_frame_vector_args (f);
2517             }
2518
2519           to_next[0] = bi0;
2520           to_next += 1;
2521           f->n_vectors++;
2522         }
2523
2524       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2525                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2526         {
2527           snat_out2in_worker_handoff_trace_t *t =
2528             vlib_add_trace (vm, node, b0, sizeof (*t));
2529           t->next_worker_index = next_worker_index;
2530           t->do_handoff = do_handoff;
2531         }
2532     }
2533
2534   if (f)
2535     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2536
2537   if (hf)
2538     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2539
2540   /* Ship frames to the worker nodes */
2541   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2542     {
2543       if (handoff_queue_elt_by_worker_index[i])
2544         {
2545           hf = handoff_queue_elt_by_worker_index[i];
2546           /*
2547            * It works better to let the handoff node
2548            * rate-adapt, always ship the handoff queue element.
2549            */
2550           if (1 || hf->n_vectors == hf->last_n_vectors)
2551             {
2552               vlib_put_frame_queue_elt (hf);
2553               handoff_queue_elt_by_worker_index[i] = 0;
2554             }
2555           else
2556             hf->last_n_vectors = hf->n_vectors;
2557         }
2558       congested_handoff_queue_by_worker_index[i] =
2559         (vlib_frame_queue_t *) (~0);
2560     }
2561   hf = 0;
2562   current_worker_index = ~0;
2563   return frame->n_vectors;
2564 }
2565
2566 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2567   .function = snat_out2in_worker_handoff_fn,
2568   .name = "nat44-out2in-worker-handoff",
2569   .vector_size = sizeof (u32),
2570   .format_trace = format_snat_out2in_worker_handoff_trace,
2571   .type = VLIB_NODE_TYPE_INTERNAL,
2572
2573   .n_next_nodes = 1,
2574
2575   .next_nodes = {
2576     [0] = "error-drop",
2577   },
2578 };
2579
2580 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2581
2582 static uword
2583 snat_out2in_fast_node_fn (vlib_main_t * vm,
2584                           vlib_node_runtime_t * node,
2585                           vlib_frame_t * frame)
2586 {
2587   u32 n_left_from, * from, * to_next;
2588   snat_out2in_next_t next_index;
2589   u32 pkts_processed = 0;
2590   snat_main_t * sm = &snat_main;
2591
2592   from = vlib_frame_vector_args (frame);
2593   n_left_from = frame->n_vectors;
2594   next_index = node->cached_next_index;
2595
2596   while (n_left_from > 0)
2597     {
2598       u32 n_left_to_next;
2599
2600       vlib_get_next_frame (vm, node, next_index,
2601                            to_next, n_left_to_next);
2602
2603       while (n_left_from > 0 && n_left_to_next > 0)
2604         {
2605           u32 bi0;
2606           vlib_buffer_t * b0;
2607           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2608           u32 sw_if_index0;
2609           ip4_header_t * ip0;
2610           ip_csum_t sum0;
2611           u32 new_addr0, old_addr0;
2612           u16 new_port0, old_port0;
2613           udp_header_t * udp0;
2614           tcp_header_t * tcp0;
2615           icmp46_header_t * icmp0;
2616           snat_session_key_t key0, sm0;
2617           u32 proto0;
2618           u32 rx_fib_index0;
2619
2620           /* speculatively enqueue b0 to the current next frame */
2621           bi0 = from[0];
2622           to_next[0] = bi0;
2623           from += 1;
2624           to_next += 1;
2625           n_left_from -= 1;
2626           n_left_to_next -= 1;
2627
2628           b0 = vlib_get_buffer (vm, bi0);
2629
2630           ip0 = vlib_buffer_get_current (b0);
2631           udp0 = ip4_next_header (ip0);
2632           tcp0 = (tcp_header_t *) udp0;
2633           icmp0 = (icmp46_header_t *) udp0;
2634
2635           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2636           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2637
2638           vnet_feature_next (sw_if_index0, &next0, b0);
2639
2640           if (PREDICT_FALSE(ip0->ttl == 1))
2641             {
2642               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2643               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2644                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2645                                            0);
2646               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2647               goto trace00;
2648             }
2649
2650           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2651
2652           if (PREDICT_FALSE (proto0 == ~0))
2653               goto trace00;
2654
2655           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2656             {
2657               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2658                                   rx_fib_index0, node, next0, ~0, 0, 0);
2659               goto trace00;
2660             }
2661
2662           key0.addr = ip0->dst_address;
2663           key0.port = udp0->dst_port;
2664           key0.fib_index = rx_fib_index0;
2665
2666           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2667             {
2668               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2669               goto trace00;
2670             }
2671
2672           new_addr0 = sm0.addr.as_u32;
2673           new_port0 = sm0.port;
2674           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2675           old_addr0 = ip0->dst_address.as_u32;
2676           ip0->dst_address.as_u32 = new_addr0;
2677
2678           sum0 = ip0->checksum;
2679           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2680                                  ip4_header_t,
2681                                  dst_address /* changed member */);
2682           ip0->checksum = ip_csum_fold (sum0);
2683
2684           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2685             {
2686                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2687                 {
2688                   old_port0 = tcp0->dst_port;
2689                   tcp0->dst_port = new_port0;
2690
2691                   sum0 = tcp0->checksum;
2692                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2693                                          ip4_header_t,
2694                                          dst_address /* changed member */);
2695
2696                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2697                                          ip4_header_t /* cheat */,
2698                                          length /* changed member */);
2699                   tcp0->checksum = ip_csum_fold(sum0);
2700                 }
2701               else
2702                 {
2703                   old_port0 = udp0->dst_port;
2704                   udp0->dst_port = new_port0;
2705                   udp0->checksum = 0;
2706                 }
2707             }
2708           else
2709             {
2710               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2711                 {
2712                   sum0 = tcp0->checksum;
2713                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2714                                          ip4_header_t,
2715                                          dst_address /* changed member */);
2716
2717                   tcp0->checksum = ip_csum_fold(sum0);
2718                 }
2719             }
2720
2721         trace00:
2722
2723           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2724                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2725             {
2726               snat_out2in_trace_t *t =
2727                  vlib_add_trace (vm, node, b0, sizeof (*t));
2728               t->sw_if_index = sw_if_index0;
2729               t->next_index = next0;
2730             }
2731
2732           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2733
2734           /* verify speculative enqueue, maybe switch current next frame */
2735           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2736                                            to_next, n_left_to_next,
2737                                            bi0, next0);
2738         }
2739
2740       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2741     }
2742
2743   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2744                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2745                                pkts_processed);
2746   return frame->n_vectors;
2747 }
2748
2749 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2750   .function = snat_out2in_fast_node_fn,
2751   .name = "nat44-out2in-fast",
2752   .vector_size = sizeof (u32),
2753   .format_trace = format_snat_out2in_fast_trace,
2754   .type = VLIB_NODE_TYPE_INTERNAL,
2755
2756   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2757   .error_strings = snat_out2in_error_strings,
2758
2759   .runtime_data_bytes = sizeof (snat_runtime_t),
2760
2761   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2762
2763   /* edit / add dispositions here */
2764   .next_nodes = {
2765     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2766     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2767     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2768     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2769   },
2770 };
2771 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);