NAT44 out2in DHCP client next node
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38 } snat_out2in_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_out2in_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
51
52   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
53               t->sw_if_index, t->next_index, t->session_index);
54   return s;
55 }
56
57 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
62
63   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
64               t->sw_if_index, t->next_index);
65   return s;
66 }
67
68 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   snat_out2in_worker_handoff_trace_t * t =
73     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74   char * m;
75
76   m = t->do_handoff ? "next worker" : "same worker";
77   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
78
79   return s;
80 }
81
82 typedef struct {
83   u32 sw_if_index;
84   u32 next_index;
85   u8 cached;
86 } nat44_out2in_reass_trace_t;
87
88 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
89 {
90   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
91   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
92   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
93
94   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
95               t->sw_if_index, t->next_index,
96               t->cached ? "cached" : "translated");
97
98   return s;
99 }
100
101 vlib_node_registration_t snat_out2in_node;
102 vlib_node_registration_t snat_out2in_fast_node;
103 vlib_node_registration_t snat_out2in_worker_handoff_node;
104 vlib_node_registration_t snat_det_out2in_node;
105 vlib_node_registration_t nat44_out2in_reass_node;
106
107 #define foreach_snat_out2in_error                       \
108 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
109 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
110 _(OUT_OF_PORTS, "Out of ports")                         \
111 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
112 _(NO_TRANSLATION, "No translation")                     \
113 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
114 _(DROP_FRAGMENT, "Drop fragment")                       \
115 _(MAX_REASS, "Maximum reassemblies exceeded")           \
116 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
117
118 typedef enum {
119 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
120   foreach_snat_out2in_error
121 #undef _
122   SNAT_OUT2IN_N_ERROR,
123 } snat_out2in_error_t;
124
125 static char * snat_out2in_error_strings[] = {
126 #define _(sym,string) string,
127   foreach_snat_out2in_error
128 #undef _
129 };
130
131 typedef enum {
132   SNAT_OUT2IN_NEXT_DROP,
133   SNAT_OUT2IN_NEXT_LOOKUP,
134   SNAT_OUT2IN_NEXT_ICMP_ERROR,
135   SNAT_OUT2IN_NEXT_REASS,
136   SNAT_OUT2IN_N_NEXT,
137 } snat_out2in_next_t;
138
139 /**
140  * @brief Create session for static mapping.
141  *
142  * Create NAT session initiated by host from external network with static
143  * mapping.
144  *
145  * @param sm     NAT main.
146  * @param b0     Vlib buffer.
147  * @param in2out In2out NAT44 session key.
148  * @param out2in Out2in NAT44 session key.
149  * @param node   Vlib node.
150  *
151  * @returns SNAT session if successfully created otherwise 0.
152  */
153 static inline snat_session_t *
154 create_session_for_static_mapping (snat_main_t *sm,
155                                    vlib_buffer_t *b0,
156                                    snat_session_key_t in2out,
157                                    snat_session_key_t out2in,
158                                    vlib_node_runtime_t * node,
159                                    u32 thread_index)
160 {
161   snat_user_t *u;
162   snat_session_t *s;
163   clib_bihash_kv_8_8_t kv0;
164   ip4_header_t *ip0;
165   udp_header_t *udp0;
166
167   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
168     {
169       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
170       return 0;
171     }
172
173   ip0 = vlib_buffer_get_current (b0);
174   udp0 = ip4_next_header (ip0);
175
176   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
177   if (!u)
178     {
179       clib_warning ("create NAT user failed");
180       return 0;
181     }
182
183   s = nat_session_alloc_or_recycle (sm, u, thread_index);
184   if (!s)
185     {
186       clib_warning ("create NAT session failed");
187       return 0;
188     }
189
190   s->outside_address_index = ~0;
191   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
192   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
193   s->ext_host_port = udp0->src_port;
194   u->nstaticsessions++;
195   s->in2out = in2out;
196   s->out2in = out2in;
197   s->in2out.protocol = out2in.protocol;
198
199   /* Add to translation hashes */
200   kv0.key = s->in2out.as_u64;
201   kv0.value = s - sm->per_thread_data[thread_index].sessions;
202   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
203                                1 /* is_add */))
204       clib_warning ("in2out key add failed");
205
206   kv0.key = s->out2in.as_u64;
207
208   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
209                                1 /* is_add */))
210       clib_warning ("out2in key add failed");
211
212   /* log NAT event */
213   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
214                                       s->out2in.addr.as_u32,
215                                       s->in2out.protocol,
216                                       s->in2out.port,
217                                       s->out2in.port,
218                                       s->in2out.fib_index);
219    return s;
220 }
221
222 static_always_inline
223 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
224                                  snat_session_key_t *p_key0)
225 {
226   icmp46_header_t *icmp0;
227   snat_session_key_t key0;
228   icmp_echo_header_t *echo0, *inner_echo0 = 0;
229   ip4_header_t *inner_ip0;
230   void *l4_header = 0;
231   icmp46_header_t *inner_icmp0;
232
233   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
234   echo0 = (icmp_echo_header_t *)(icmp0+1);
235
236   if (!icmp_is_error_message (icmp0))
237     {
238       key0.protocol = SNAT_PROTOCOL_ICMP;
239       key0.addr = ip0->dst_address;
240       key0.port = echo0->identifier;
241     }
242   else
243     {
244       inner_ip0 = (ip4_header_t *)(echo0+1);
245       l4_header = ip4_next_header (inner_ip0);
246       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
247       key0.addr = inner_ip0->src_address;
248       switch (key0.protocol)
249         {
250         case SNAT_PROTOCOL_ICMP:
251           inner_icmp0 = (icmp46_header_t*)l4_header;
252           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
253           key0.port = inner_echo0->identifier;
254           break;
255         case SNAT_PROTOCOL_UDP:
256         case SNAT_PROTOCOL_TCP:
257           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
258           break;
259         default:
260           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
261         }
262     }
263   *p_key0 = key0;
264   return -1; /* success */
265 }
266
267 static_always_inline int
268 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
269 {
270   icmp46_header_t *icmp0;
271   nat_ed_ses_key_t key0;
272   icmp_echo_header_t *echo0, *inner_echo0 = 0;
273   ip4_header_t *inner_ip0;
274   void *l4_header = 0;
275   icmp46_header_t *inner_icmp0;
276
277   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
278   echo0 = (icmp_echo_header_t *)(icmp0+1);
279
280   if (!icmp_is_error_message (icmp0))
281     {
282       key0.proto = IP_PROTOCOL_ICMP;
283       key0.l_addr = ip0->dst_address;
284       key0.r_addr = ip0->src_address;
285       key0.l_port = key0.r_port = echo0->identifier;
286     }
287   else
288     {
289       inner_ip0 = (ip4_header_t *)(echo0+1);
290       l4_header = ip4_next_header (inner_ip0);
291       key0.proto = inner_ip0->protocol;
292       key0.l_addr = inner_ip0->src_address;
293       key0.r_addr = inner_ip0->dst_address;
294       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
295         {
296         case SNAT_PROTOCOL_ICMP:
297           inner_icmp0 = (icmp46_header_t*)l4_header;
298           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
299           key0.l_port = key0.r_port = inner_echo0->identifier;
300           break;
301         case SNAT_PROTOCOL_UDP:
302         case SNAT_PROTOCOL_TCP:
303           key0.l_port = ((tcp_udp_header_t*)l4_header)->src_port;
304           key0.r_port = ((tcp_udp_header_t*)l4_header)->dst_port;
305           break;
306         default:
307           return -1;
308         }
309     }
310   *p_key0 = key0;
311   return 0;
312 }
313
314 /**
315  * Get address and port values to be used for ICMP packet translation
316  * and create session if needed
317  *
318  * @param[in,out] sm             NAT main
319  * @param[in,out] node           NAT node runtime
320  * @param[in] thread_index       thread index
321  * @param[in,out] b0             buffer containing packet to be translated
322  * @param[out] p_proto           protocol used for matching
323  * @param[out] p_value           address and port after NAT translation
324  * @param[out] p_dont_translate  if packet should not be translated
325  * @param d                      optional parameter
326  * @param e                      optional parameter
327  */
328 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
329                            u32 thread_index, vlib_buffer_t *b0,
330                            ip4_header_t *ip0, u8 *p_proto,
331                            snat_session_key_t *p_value,
332                            u8 *p_dont_translate, void *d, void *e)
333 {
334   icmp46_header_t *icmp0;
335   u32 sw_if_index0;
336   u32 rx_fib_index0;
337   snat_session_key_t key0;
338   snat_session_key_t sm0;
339   snat_session_t *s0 = 0;
340   u8 dont_translate = 0;
341   clib_bihash_kv_8_8_t kv0, value0;
342   u8 is_addr_only;
343   u32 next0 = ~0;
344   int err;
345
346   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
347   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
348   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
349
350   key0.protocol = 0;
351
352   err = icmp_get_key (ip0, &key0);
353   if (err != -1)
354     {
355       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
356       next0 = SNAT_OUT2IN_NEXT_DROP;
357       goto out;
358     }
359   key0.fib_index = rx_fib_index0;
360
361   kv0.key = key0.as_u64;
362
363   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
364                               &value0))
365     {
366       /* Try to match static mapping by external address and port,
367          destination address and port in packet */
368       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
369         {
370           if (!sm->forwarding_enabled)
371             {
372               /* Don't NAT packet aimed at the intfc address */
373               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
374                                                   ip0->dst_address.as_u32)))
375                 {
376                   dont_translate = 1;
377                   goto out;
378                 }
379               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
380               next0 = SNAT_OUT2IN_NEXT_DROP;
381               goto out;
382             }
383           else
384             {
385               dont_translate = 1;
386               goto out;
387             }
388         }
389
390       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
391                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
392         {
393           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
394           next0 = SNAT_OUT2IN_NEXT_DROP;
395           goto out;
396         }
397
398       /* Create session initiated by host from external network */
399       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
400                                              node, thread_index);
401
402       if (!s0)
403         {
404           next0 = SNAT_OUT2IN_NEXT_DROP;
405           goto out;
406         }
407     }
408   else
409     {
410       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
411                         icmp0->type != ICMP4_echo_request &&
412                         !icmp_is_error_message (icmp0)))
413         {
414           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
415           next0 = SNAT_OUT2IN_NEXT_DROP;
416           goto out;
417         }
418
419       if (PREDICT_FALSE (value0.value == ~0ULL))
420         {
421           nat_ed_ses_key_t key;
422           clib_bihash_kv_16_8_t s_kv, s_value;
423
424           key.as_u64[0] = 0;
425           key.as_u64[1] = 0;
426           if (icmp_get_ed_key (ip0, &key))
427             {
428               b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
429               next0 = SNAT_OUT2IN_NEXT_DROP;
430               goto out;
431             }
432           key.fib_index = rx_fib_index0;
433           s_kv.key[0] = key.as_u64[0];
434           s_kv.key[1] = key.as_u64[1];
435           if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
436             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
437                                     s_value.value);
438           else
439            {
440               next0 = SNAT_OUT2IN_NEXT_DROP;
441               goto out;
442            }
443         }
444       else
445         s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
446                                 value0.value);
447     }
448
449 out:
450   *p_proto = key0.protocol;
451   if (s0)
452     *p_value = s0->in2out;
453   *p_dont_translate = dont_translate;
454   if (d)
455     *(snat_session_t**)d = s0;
456   return next0;
457 }
458
459 /**
460  * Get address and port values to be used for ICMP packet translation
461  *
462  * @param[in] sm                 NAT main
463  * @param[in,out] node           NAT node runtime
464  * @param[in] thread_index       thread index
465  * @param[in,out] b0             buffer containing packet to be translated
466  * @param[out] p_proto           protocol used for matching
467  * @param[out] p_value           address and port after NAT translation
468  * @param[out] p_dont_translate  if packet should not be translated
469  * @param d                      optional parameter
470  * @param e                      optional parameter
471  */
472 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
473                            u32 thread_index, vlib_buffer_t *b0,
474                            ip4_header_t *ip0, u8 *p_proto,
475                            snat_session_key_t *p_value,
476                            u8 *p_dont_translate, void *d, void *e)
477 {
478   icmp46_header_t *icmp0;
479   u32 sw_if_index0;
480   u32 rx_fib_index0;
481   snat_session_key_t key0;
482   snat_session_key_t sm0;
483   u8 dont_translate = 0;
484   u8 is_addr_only;
485   u32 next0 = ~0;
486   int err;
487
488   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
489   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
490   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
491
492   err = icmp_get_key (ip0, &key0);
493   if (err != -1)
494     {
495       b0->error = node->errors[err];
496       next0 = SNAT_OUT2IN_NEXT_DROP;
497       goto out2;
498     }
499   key0.fib_index = rx_fib_index0;
500
501   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
502     {
503       /* Don't NAT packet aimed at the intfc address */
504       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
505         {
506           dont_translate = 1;
507           goto out;
508         }
509       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
510       next0 = SNAT_OUT2IN_NEXT_DROP;
511       goto out;
512     }
513
514   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
515                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
516                     !icmp_is_error_message (icmp0)))
517     {
518       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
519       next0 = SNAT_OUT2IN_NEXT_DROP;
520       goto out;
521     }
522
523 out:
524   *p_value = sm0;
525 out2:
526   *p_proto = key0.protocol;
527   *p_dont_translate = dont_translate;
528   return next0;
529 }
530
531 static inline u32 icmp_out2in (snat_main_t *sm,
532                                vlib_buffer_t * b0,
533                                ip4_header_t * ip0,
534                                icmp46_header_t * icmp0,
535                                u32 sw_if_index0,
536                                u32 rx_fib_index0,
537                                vlib_node_runtime_t * node,
538                                u32 next0,
539                                u32 thread_index,
540                                void *d,
541                                void *e)
542 {
543   snat_session_key_t sm0;
544   u8 protocol;
545   icmp_echo_header_t *echo0, *inner_echo0 = 0;
546   ip4_header_t *inner_ip0 = 0;
547   void *l4_header = 0;
548   icmp46_header_t *inner_icmp0;
549   u8 dont_translate;
550   u32 new_addr0, old_addr0;
551   u16 old_id0, new_id0;
552   ip_csum_t sum0;
553   u16 checksum0;
554   u32 next0_tmp;
555
556   echo0 = (icmp_echo_header_t *)(icmp0+1);
557
558   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
559                                        &protocol, &sm0, &dont_translate, d, e);
560   if (next0_tmp != ~0)
561     next0 = next0_tmp;
562   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
563     goto out;
564
565   sum0 = ip_incremental_checksum (0, icmp0,
566                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
567   checksum0 = ~ip_csum_fold (sum0);
568   if (checksum0 != 0 && checksum0 != 0xffff)
569     {
570       next0 = SNAT_OUT2IN_NEXT_DROP;
571       goto out;
572     }
573
574   old_addr0 = ip0->dst_address.as_u32;
575   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
576   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
577
578   sum0 = ip0->checksum;
579   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
580                          dst_address /* changed member */);
581   ip0->checksum = ip_csum_fold (sum0);
582
583   if (!icmp_is_error_message (icmp0))
584     {
585       new_id0 = sm0.port;
586       if (PREDICT_FALSE(new_id0 != echo0->identifier))
587         {
588           old_id0 = echo0->identifier;
589           new_id0 = sm0.port;
590           echo0->identifier = new_id0;
591
592           sum0 = icmp0->checksum;
593           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
594                                  identifier /* changed member */);
595           icmp0->checksum = ip_csum_fold (sum0);
596         }
597     }
598   else
599     {
600       inner_ip0 = (ip4_header_t *)(echo0+1);
601       l4_header = ip4_next_header (inner_ip0);
602
603       if (!ip4_header_checksum_is_valid (inner_ip0))
604         {
605           next0 = SNAT_OUT2IN_NEXT_DROP;
606           goto out;
607         }
608
609       old_addr0 = inner_ip0->src_address.as_u32;
610       inner_ip0->src_address = sm0.addr;
611       new_addr0 = inner_ip0->src_address.as_u32;
612
613       sum0 = icmp0->checksum;
614       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
615                              src_address /* changed member */);
616       icmp0->checksum = ip_csum_fold (sum0);
617
618       switch (protocol)
619         {
620         case SNAT_PROTOCOL_ICMP:
621           inner_icmp0 = (icmp46_header_t*)l4_header;
622           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
623
624           old_id0 = inner_echo0->identifier;
625           new_id0 = sm0.port;
626           inner_echo0->identifier = new_id0;
627
628           sum0 = icmp0->checksum;
629           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
630                                  identifier);
631           icmp0->checksum = ip_csum_fold (sum0);
632           break;
633         case SNAT_PROTOCOL_UDP:
634         case SNAT_PROTOCOL_TCP:
635           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
636           new_id0 = sm0.port;
637           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
638
639           sum0 = icmp0->checksum;
640           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
641                                  src_port);
642           icmp0->checksum = ip_csum_fold (sum0);
643           break;
644         default:
645           ASSERT(0);
646         }
647     }
648
649 out:
650   return next0;
651 }
652
653
654 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
655                                          vlib_buffer_t * b0,
656                                          ip4_header_t * ip0,
657                                          icmp46_header_t * icmp0,
658                                          u32 sw_if_index0,
659                                          u32 rx_fib_index0,
660                                          vlib_node_runtime_t * node,
661                                          u32 next0, f64 now,
662                                          u32 thread_index,
663                                          snat_session_t ** p_s0)
664 {
665   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
666                       next0, thread_index, p_s0, 0);
667   snat_session_t * s0 = *p_s0;
668   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
669     {
670       /* Accounting */
671       s0->last_heard = now;
672       s0->total_pkts++;
673       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
674       /* Per-user LRU list maintenance */
675       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
676                          s0->per_user_index);
677       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
678                           s0->per_user_list_head_index,
679                           s0->per_user_index);
680     }
681   return next0;
682 }
683
684 static snat_session_t *
685 snat_out2in_unknown_proto (snat_main_t *sm,
686                            vlib_buffer_t * b,
687                            ip4_header_t * ip,
688                            u32 rx_fib_index,
689                            u32 thread_index,
690                            f64 now,
691                            vlib_main_t * vm,
692                            vlib_node_runtime_t * node)
693 {
694   clib_bihash_kv_8_8_t kv, value;
695   clib_bihash_kv_16_8_t s_kv, s_value;
696   snat_static_mapping_t *m;
697   snat_session_key_t m_key;
698   u32 old_addr, new_addr;
699   ip_csum_t sum;
700   nat_ed_ses_key_t key;
701   snat_session_t * s;
702   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
703   snat_user_t *u;
704
705   old_addr = ip->dst_address.as_u32;
706
707   key.l_addr = ip->dst_address;
708   key.r_addr = ip->src_address;
709   key.fib_index = rx_fib_index;
710   key.proto = ip->protocol;
711   key.r_port = 0;
712   key.l_port = 0;
713   s_kv.key[0] = key.as_u64[0];
714   s_kv.key[1] = key.as_u64[1];
715
716   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
717     {
718       s = pool_elt_at_index (tsm->sessions, s_value.value);
719       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
720     }
721   else
722     {
723       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
724         {
725           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
726           return 0;
727         }
728
729       m_key.addr = ip->dst_address;
730       m_key.port = 0;
731       m_key.protocol = 0;
732       m_key.fib_index = rx_fib_index;
733       kv.key = m_key.as_u64;
734       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
735         {
736           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
737           return 0;
738         }
739
740       m = pool_elt_at_index (sm->static_mappings, value.value);
741
742       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
743
744       u = nat_user_get_or_create (sm, &ip->src_address, m->fib_index,
745                                   thread_index);
746       if (!u)
747         {
748           clib_warning ("create NAT user failed");
749           return 0;
750         }
751
752       /* Create a new session */
753       s = nat_session_alloc_or_recycle (sm, u, thread_index);
754       if (!s)
755         {
756           clib_warning ("create NAT session failed");
757           return 0;
758         }
759
760       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
761       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
762       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
763       s->outside_address_index = ~0;
764       s->out2in.addr.as_u32 = old_addr;
765       s->out2in.fib_index = rx_fib_index;
766       s->in2out.addr.as_u32 = new_addr;
767       s->in2out.fib_index = m->fib_index;
768       s->in2out.port = s->out2in.port = ip->protocol;
769       u->nstaticsessions++;
770
771       /* Add to lookup tables */
772       s_kv.value = s - tsm->sessions;
773       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
774         clib_warning ("out2in key add failed");
775
776       key.l_addr = ip->dst_address;
777       key.fib_index = m->fib_index;
778       s_kv.key[0] = key.as_u64[0];
779       s_kv.key[1] = key.as_u64[1];
780       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
781         clib_warning ("in2out key add failed");
782    }
783
784   /* Update IP checksum */
785   sum = ip->checksum;
786   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
787   ip->checksum = ip_csum_fold (sum);
788
789   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
790
791   /* Accounting */
792   s->last_heard = now;
793   s->total_pkts++;
794   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
795   /* Per-user LRU list maintenance */
796   clib_dlist_remove (tsm->list_pool, s->per_user_index);
797   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
798                       s->per_user_index);
799
800   return s;
801 }
802
803 static snat_session_t *
804 snat_out2in_lb (snat_main_t *sm,
805                 vlib_buffer_t * b,
806                 ip4_header_t * ip,
807                 u32 rx_fib_index,
808                 u32 thread_index,
809                 f64 now,
810                 vlib_main_t * vm,
811                 vlib_node_runtime_t * node)
812 {
813   nat_ed_ses_key_t key;
814   clib_bihash_kv_16_8_t s_kv, s_value;
815   udp_header_t *udp = ip4_next_header (ip);
816   tcp_header_t *tcp = (tcp_header_t *) udp;
817   snat_session_t *s = 0;
818   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
819   snat_session_key_t e_key, l_key;
820   u32 old_addr, new_addr;
821   u32 proto = ip_proto_to_snat_proto (ip->protocol);
822   u16 new_port, old_port;
823   ip_csum_t sum;
824   snat_user_t *u;
825   u32 address_index;
826   snat_session_key_t eh_key;
827   u8 twice_nat;
828
829   old_addr = ip->dst_address.as_u32;
830
831   key.l_addr = ip->dst_address;
832   key.r_addr = ip->src_address;
833   key.fib_index = rx_fib_index;
834   key.proto = ip->protocol;
835   key.r_port = udp->src_port;
836   key.l_port = udp->dst_port;
837   s_kv.key[0] = key.as_u64[0];
838   s_kv.key[1] = key.as_u64[1];
839
840   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
841     {
842       s = pool_elt_at_index (tsm->sessions, s_value.value);
843     }
844   else
845     {
846       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
847         {
848           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
849           return 0;
850         }
851
852       e_key.addr = ip->dst_address;
853       e_key.port = udp->dst_port;
854       e_key.protocol = proto;
855       e_key.fib_index = rx_fib_index;
856       if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0, &twice_nat))
857         return 0;
858
859       u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index,
860                                   thread_index);
861       if (!u)
862       {
863         clib_warning ("create NAT user failed");
864         return 0;
865       }
866
867       s = nat_session_alloc_or_recycle (sm, u, thread_index);
868       if (!s)
869         {
870           clib_warning ("create NAT session failed");
871           return 0;
872         }
873
874       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
875       s->ext_host_port = udp->src_port;
876       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
877       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
878       s->outside_address_index = ~0;
879       s->out2in = e_key;
880       s->in2out = l_key;
881       u->nstaticsessions++;
882
883       /* Add to lookup tables */
884       s_kv.value = s - tsm->sessions;
885       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
886         clib_warning ("out2in-ed key add failed");
887
888       if (twice_nat)
889         {
890           eh_key.protocol = proto;
891           if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
892                                                    thread_index, &eh_key,
893                                                    &address_index,
894                                                    sm->port_per_thread,
895                                                    sm->per_thread_data[thread_index].snat_thread_index))
896             {
897               b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
898               return 0;
899             }
900           key.r_addr.as_u32 = s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
901           key.r_port = s->ext_host_nat_port = eh_key.port;
902           s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
903         }
904       key.l_addr = l_key.addr;
905       key.fib_index = l_key.fib_index;
906       key.l_port = l_key.port;
907       s_kv.key[0] = key.as_u64[0];
908       s_kv.key[1] = key.as_u64[1];
909       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
910         clib_warning ("in2out-ed key add failed");
911     }
912
913   new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
914
915   /* Update IP checksum */
916   sum = ip->checksum;
917   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
918   if (is_twice_nat_session (s))
919     sum = ip_csum_update (sum, ip->src_address.as_u32,
920                           s->ext_host_nat_addr.as_u32, ip4_header_t,
921                           src_address);
922   ip->checksum = ip_csum_fold (sum);
923
924   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
925     {
926       old_port = tcp->dst_port;
927       tcp->dst_port = s->in2out.port;
928       new_port = tcp->dst_port;
929
930       sum = tcp->checksum;
931       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
932       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
933       if (is_twice_nat_session (s))
934         {
935           sum = ip_csum_update (sum, ip->src_address.as_u32,
936                                 s->ext_host_nat_addr.as_u32, ip4_header_t,
937                                 dst_address);
938           sum = ip_csum_update (sum, tcp->src_port, s->ext_host_nat_port,
939                                 ip4_header_t, length);
940           tcp->src_port = s->ext_host_nat_port;
941           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
942         }
943       tcp->checksum = ip_csum_fold(sum);
944     }
945   else
946     {
947       udp->dst_port = s->in2out.port;
948       if (is_twice_nat_session (s))
949         {
950           udp->src_port = s->ext_host_nat_port;
951           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
952         }
953       udp->checksum = 0;
954     }
955
956   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
957
958   /* Accounting */
959   s->last_heard = now;
960   s->total_pkts++;
961   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
962   /* Per-user LRU list maintenance */
963   clib_dlist_remove (tsm->list_pool, s->per_user_index);
964   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
965                       s->per_user_index);
966
967   return s;
968 }
969
970 static uword
971 snat_out2in_node_fn (vlib_main_t * vm,
972                   vlib_node_runtime_t * node,
973                   vlib_frame_t * frame)
974 {
975   u32 n_left_from, * from, * to_next;
976   snat_out2in_next_t next_index;
977   u32 pkts_processed = 0;
978   snat_main_t * sm = &snat_main;
979   f64 now = vlib_time_now (vm);
980   u32 thread_index = vlib_get_thread_index ();
981
982   from = vlib_frame_vector_args (frame);
983   n_left_from = frame->n_vectors;
984   next_index = node->cached_next_index;
985
986   while (n_left_from > 0)
987     {
988       u32 n_left_to_next;
989
990       vlib_get_next_frame (vm, node, next_index,
991                            to_next, n_left_to_next);
992
993       while (n_left_from >= 4 && n_left_to_next >= 2)
994         {
995           u32 bi0, bi1;
996           vlib_buffer_t * b0, * b1;
997           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
998           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
999           u32 sw_if_index0, sw_if_index1;
1000           ip4_header_t * ip0, *ip1;
1001           ip_csum_t sum0, sum1;
1002           u32 new_addr0, old_addr0;
1003           u16 new_port0, old_port0;
1004           u32 new_addr1, old_addr1;
1005           u16 new_port1, old_port1;
1006           udp_header_t * udp0, * udp1;
1007           tcp_header_t * tcp0, * tcp1;
1008           icmp46_header_t * icmp0, * icmp1;
1009           snat_session_key_t key0, key1, sm0, sm1;
1010           u32 rx_fib_index0, rx_fib_index1;
1011           u32 proto0, proto1;
1012           snat_session_t * s0 = 0, * s1 = 0;
1013           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
1014
1015           /* Prefetch next iteration. */
1016           {
1017             vlib_buffer_t * p2, * p3;
1018
1019             p2 = vlib_get_buffer (vm, from[2]);
1020             p3 = vlib_get_buffer (vm, from[3]);
1021
1022             vlib_prefetch_buffer_header (p2, LOAD);
1023             vlib_prefetch_buffer_header (p3, LOAD);
1024
1025             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1026             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1027           }
1028
1029           /* speculatively enqueue b0 and b1 to the current next frame */
1030           to_next[0] = bi0 = from[0];
1031           to_next[1] = bi1 = from[1];
1032           from += 2;
1033           to_next += 2;
1034           n_left_from -= 2;
1035           n_left_to_next -= 2;
1036
1037           b0 = vlib_get_buffer (vm, bi0);
1038           b1 = vlib_get_buffer (vm, bi1);
1039
1040           vnet_buffer (b0)->snat.flags = 0;
1041           vnet_buffer (b1)->snat.flags = 0;
1042
1043           ip0 = vlib_buffer_get_current (b0);
1044           udp0 = ip4_next_header (ip0);
1045           tcp0 = (tcp_header_t *) udp0;
1046           icmp0 = (icmp46_header_t *) udp0;
1047
1048           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1049           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1050                                    sw_if_index0);
1051
1052           if (PREDICT_FALSE(ip0->ttl == 1))
1053             {
1054               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1055               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1056                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1057                                            0);
1058               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1059               goto trace0;
1060             }
1061
1062           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1063
1064           if (PREDICT_FALSE (proto0 == ~0))
1065             {
1066               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1067                                              thread_index, now, vm, node);
1068               if (!s0)
1069                 next0 = SNAT_OUT2IN_NEXT_DROP;
1070               goto trace0;
1071             }
1072
1073           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1074             {
1075               next0 = icmp_out2in_slow_path
1076                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1077                  next0, now, thread_index, &s0);
1078               goto trace0;
1079             }
1080
1081           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1082             {
1083               next0 = SNAT_OUT2IN_NEXT_REASS;
1084               goto trace0;
1085             }
1086
1087           key0.addr = ip0->dst_address;
1088           key0.port = udp0->dst_port;
1089           key0.protocol = proto0;
1090           key0.fib_index = rx_fib_index0;
1091
1092           kv0.key = key0.as_u64;
1093
1094           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1095                                       &kv0, &value0))
1096             {
1097               /* Try to match static mapping by external address and port,
1098                  destination address and port in packet */
1099               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1100                 {
1101                   if (!sm->forwarding_enabled)
1102                     {
1103                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1104                       /*
1105                        * Send DHCP packets to the ipv4 stack, or we won't
1106                        * be able to use dhcp client on the outside interface
1107                        */
1108                       if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1109                           || (udp0->dst_port
1110                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1111                         next0 = SNAT_OUT2IN_NEXT_DROP;
1112                       else
1113                         vnet_feature_next
1114                           (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1115                            &next0, b0);
1116                       goto trace0;
1117                     }
1118                   else
1119                     goto trace0;
1120                 }
1121
1122               /* Create session initiated by host from external network */
1123               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1124                                                      thread_index);
1125               if (!s0)
1126                 {
1127                   next0 = SNAT_OUT2IN_NEXT_DROP;
1128                   goto trace0;
1129                 }
1130             }
1131           else
1132             {
1133               if (PREDICT_FALSE (value0.value == ~0ULL))
1134                 {
1135                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1136                                       now, vm, node);
1137                   if (!s0)
1138                     next0 = SNAT_OUT2IN_NEXT_DROP;
1139                   goto trace0;
1140                 }
1141               else
1142                 {
1143                   s0 = pool_elt_at_index (
1144                     sm->per_thread_data[thread_index].sessions,
1145                     value0.value);
1146                 }
1147             }
1148
1149           old_addr0 = ip0->dst_address.as_u32;
1150           ip0->dst_address = s0->in2out.addr;
1151           new_addr0 = ip0->dst_address.as_u32;
1152           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1153
1154           sum0 = ip0->checksum;
1155           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1156                                  ip4_header_t,
1157                                  dst_address /* changed member */);
1158           ip0->checksum = ip_csum_fold (sum0);
1159
1160           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1161             {
1162               old_port0 = tcp0->dst_port;
1163               tcp0->dst_port = s0->in2out.port;
1164               new_port0 = tcp0->dst_port;
1165
1166               sum0 = tcp0->checksum;
1167               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1168                                      ip4_header_t,
1169                                      dst_address /* changed member */);
1170
1171               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1172                                      ip4_header_t /* cheat */,
1173                                      length /* changed member */);
1174               tcp0->checksum = ip_csum_fold(sum0);
1175             }
1176           else
1177             {
1178               old_port0 = udp0->dst_port;
1179               udp0->dst_port = s0->in2out.port;
1180               udp0->checksum = 0;
1181             }
1182
1183           /* Accounting */
1184           s0->last_heard = now;
1185           s0->total_pkts++;
1186           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1187           /* Per-user LRU list maintenance */
1188           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1189                              s0->per_user_index);
1190           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1191                               s0->per_user_list_head_index,
1192                               s0->per_user_index);
1193         trace0:
1194
1195           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1196                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1197             {
1198               snat_out2in_trace_t *t =
1199                  vlib_add_trace (vm, node, b0, sizeof (*t));
1200               t->sw_if_index = sw_if_index0;
1201               t->next_index = next0;
1202               t->session_index = ~0;
1203               if (s0)
1204                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1205             }
1206
1207           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1208
1209
1210           ip1 = vlib_buffer_get_current (b1);
1211           udp1 = ip4_next_header (ip1);
1212           tcp1 = (tcp_header_t *) udp1;
1213           icmp1 = (icmp46_header_t *) udp1;
1214
1215           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1216           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1217                                    sw_if_index1);
1218
1219           if (PREDICT_FALSE(ip1->ttl == 1))
1220             {
1221               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1222               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1223                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1224                                            0);
1225               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1226               goto trace1;
1227             }
1228
1229           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1230
1231           if (PREDICT_FALSE (proto1 == ~0))
1232             {
1233               s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
1234                                              thread_index, now, vm, node);
1235               if (!s1)
1236                 next1 = SNAT_OUT2IN_NEXT_DROP;
1237               goto trace1;
1238             }
1239
1240           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1241             {
1242               next1 = icmp_out2in_slow_path
1243                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1244                  next1, now, thread_index, &s1);
1245               goto trace1;
1246             }
1247
1248           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
1249             {
1250               next1 = SNAT_OUT2IN_NEXT_REASS;
1251               goto trace1;
1252             }
1253
1254           key1.addr = ip1->dst_address;
1255           key1.port = udp1->dst_port;
1256           key1.protocol = proto1;
1257           key1.fib_index = rx_fib_index1;
1258
1259           kv1.key = key1.as_u64;
1260
1261           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1262                                       &kv1, &value1))
1263             {
1264               /* Try to match static mapping by external address and port,
1265                  destination address and port in packet */
1266               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0))
1267                 {
1268                   if (!sm->forwarding_enabled)
1269                     {
1270                       b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1271                       /*
1272                        * Send DHCP packets to the ipv4 stack, or we won't
1273                        * be able to use dhcp client on the outside interface
1274                        */
1275                       if (PREDICT_TRUE (proto1 != SNAT_PROTOCOL_UDP
1276                           || (udp1->dst_port
1277                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1278                         next1 = SNAT_OUT2IN_NEXT_DROP;
1279                       else
1280                         vnet_feature_next
1281                           (vnet_buffer (b1)->sw_if_index[VLIB_RX],
1282                            &next1, b1);
1283                       goto trace1;
1284                     }
1285                   else
1286                     goto trace1;
1287                 }
1288
1289               /* Create session initiated by host from external network */
1290               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1291                                                      thread_index);
1292               if (!s1)
1293                 {
1294                   next1 = SNAT_OUT2IN_NEXT_DROP;
1295                   goto trace1;
1296                 }
1297             }
1298           else
1299             {
1300               if (PREDICT_FALSE (value1.value == ~0ULL))
1301                 {
1302                   s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1303                                       now, vm, node);
1304                   if (!s1)
1305                     next1 = SNAT_OUT2IN_NEXT_DROP;
1306                   goto trace1;
1307                 }
1308               else
1309                 {
1310                   s1 = pool_elt_at_index (
1311                     sm->per_thread_data[thread_index].sessions,
1312                     value1.value);
1313                 }
1314             }
1315
1316           old_addr1 = ip1->dst_address.as_u32;
1317           ip1->dst_address = s1->in2out.addr;
1318           new_addr1 = ip1->dst_address.as_u32;
1319           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1320
1321           sum1 = ip1->checksum;
1322           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1323                                  ip4_header_t,
1324                                  dst_address /* changed member */);
1325           ip1->checksum = ip_csum_fold (sum1);
1326
1327           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1328             {
1329               old_port1 = tcp1->dst_port;
1330               tcp1->dst_port = s1->in2out.port;
1331               new_port1 = tcp1->dst_port;
1332
1333               sum1 = tcp1->checksum;
1334               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1335                                      ip4_header_t,
1336                                      dst_address /* changed member */);
1337
1338               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1339                                      ip4_header_t /* cheat */,
1340                                      length /* changed member */);
1341               tcp1->checksum = ip_csum_fold(sum1);
1342             }
1343           else
1344             {
1345               old_port1 = udp1->dst_port;
1346               udp1->dst_port = s1->in2out.port;
1347               udp1->checksum = 0;
1348             }
1349
1350           /* Accounting */
1351           s1->last_heard = now;
1352           s1->total_pkts++;
1353           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1354           /* Per-user LRU list maintenance */
1355           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1356                              s1->per_user_index);
1357           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1358                               s1->per_user_list_head_index,
1359                               s1->per_user_index);
1360         trace1:
1361
1362           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1363                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1364             {
1365               snat_out2in_trace_t *t =
1366                  vlib_add_trace (vm, node, b1, sizeof (*t));
1367               t->sw_if_index = sw_if_index1;
1368               t->next_index = next1;
1369               t->session_index = ~0;
1370               if (s1)
1371                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1372             }
1373
1374           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1375
1376           /* verify speculative enqueues, maybe switch current next frame */
1377           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1378                                            to_next, n_left_to_next,
1379                                            bi0, bi1, next0, next1);
1380         }
1381
1382       while (n_left_from > 0 && n_left_to_next > 0)
1383         {
1384           u32 bi0;
1385           vlib_buffer_t * b0;
1386           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1387           u32 sw_if_index0;
1388           ip4_header_t * ip0;
1389           ip_csum_t sum0;
1390           u32 new_addr0, old_addr0;
1391           u16 new_port0, old_port0;
1392           udp_header_t * udp0;
1393           tcp_header_t * tcp0;
1394           icmp46_header_t * icmp0;
1395           snat_session_key_t key0, sm0;
1396           u32 rx_fib_index0;
1397           u32 proto0;
1398           snat_session_t * s0 = 0;
1399           clib_bihash_kv_8_8_t kv0, value0;
1400
1401           /* speculatively enqueue b0 to the current next frame */
1402           bi0 = from[0];
1403           to_next[0] = bi0;
1404           from += 1;
1405           to_next += 1;
1406           n_left_from -= 1;
1407           n_left_to_next -= 1;
1408
1409           b0 = vlib_get_buffer (vm, bi0);
1410
1411           vnet_buffer (b0)->snat.flags = 0;
1412
1413           ip0 = vlib_buffer_get_current (b0);
1414           udp0 = ip4_next_header (ip0);
1415           tcp0 = (tcp_header_t *) udp0;
1416           icmp0 = (icmp46_header_t *) udp0;
1417
1418           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1419           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1420                                    sw_if_index0);
1421
1422           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1423
1424           if (PREDICT_FALSE (proto0 == ~0))
1425             {
1426               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1427                                              thread_index, now, vm, node);
1428               if (!s0)
1429                 next0 = SNAT_OUT2IN_NEXT_DROP;
1430               goto trace00;
1431             }
1432
1433           if (PREDICT_FALSE(ip0->ttl == 1))
1434             {
1435               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1436               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1437                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1438                                            0);
1439               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1440               goto trace00;
1441             }
1442
1443           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1444             {
1445               next0 = icmp_out2in_slow_path
1446                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1447                  next0, now, thread_index, &s0);
1448               goto trace00;
1449             }
1450
1451           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1452             {
1453               next0 = SNAT_OUT2IN_NEXT_REASS;
1454               goto trace00;
1455             }
1456
1457           key0.addr = ip0->dst_address;
1458           key0.port = udp0->dst_port;
1459           key0.protocol = proto0;
1460           key0.fib_index = rx_fib_index0;
1461
1462           kv0.key = key0.as_u64;
1463
1464           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1465                                       &kv0, &value0))
1466             {
1467               /* Try to match static mapping by external address and port,
1468                  destination address and port in packet */
1469               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1470                 {
1471                   if (!sm->forwarding_enabled)
1472                     {
1473                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1474                       /*
1475                        * Send DHCP packets to the ipv4 stack, or we won't
1476                        * be able to use dhcp client on the outside interface
1477                        */
1478                       if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1479                           || (udp0->dst_port
1480                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1481                         next0 = SNAT_OUT2IN_NEXT_DROP;
1482                       else
1483                         vnet_feature_next
1484                           (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1485                            &next0, b0);
1486                       goto trace00;
1487                     }
1488                   else
1489                     goto trace00;
1490                 }
1491
1492               /* Create session initiated by host from external network */
1493               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1494                                                      thread_index);
1495               if (!s0)
1496                 {
1497                   next0 = SNAT_OUT2IN_NEXT_DROP;
1498                   goto trace00;
1499                 }
1500             }
1501           else
1502             {
1503               if (PREDICT_FALSE (value0.value == ~0ULL))
1504                 {
1505                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1506                                       now, vm, node);
1507                   if (!s0)
1508                     next0 = SNAT_OUT2IN_NEXT_DROP;
1509                   goto trace00;
1510                 }
1511               else
1512                 {
1513                   s0 = pool_elt_at_index (
1514                     sm->per_thread_data[thread_index].sessions,
1515                     value0.value);
1516                 }
1517             }
1518
1519           old_addr0 = ip0->dst_address.as_u32;
1520           ip0->dst_address = s0->in2out.addr;
1521           new_addr0 = ip0->dst_address.as_u32;
1522           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1523
1524           sum0 = ip0->checksum;
1525           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1526                                  ip4_header_t,
1527                                  dst_address /* changed member */);
1528           ip0->checksum = ip_csum_fold (sum0);
1529
1530           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1531             {
1532               old_port0 = tcp0->dst_port;
1533               tcp0->dst_port = s0->in2out.port;
1534               new_port0 = tcp0->dst_port;
1535
1536               sum0 = tcp0->checksum;
1537               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1538                                      ip4_header_t,
1539                                      dst_address /* changed member */);
1540
1541               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1542                                      ip4_header_t /* cheat */,
1543                                      length /* changed member */);
1544               tcp0->checksum = ip_csum_fold(sum0);
1545             }
1546           else
1547             {
1548               old_port0 = udp0->dst_port;
1549               udp0->dst_port = s0->in2out.port;
1550               udp0->checksum = 0;
1551             }
1552
1553           /* Accounting */
1554           s0->last_heard = now;
1555           s0->total_pkts++;
1556           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1557           /* Per-user LRU list maintenance */
1558           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1559                              s0->per_user_index);
1560           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1561                               s0->per_user_list_head_index,
1562                               s0->per_user_index);
1563         trace00:
1564
1565           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1566                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1567             {
1568               snat_out2in_trace_t *t =
1569                  vlib_add_trace (vm, node, b0, sizeof (*t));
1570               t->sw_if_index = sw_if_index0;
1571               t->next_index = next0;
1572               t->session_index = ~0;
1573               if (s0)
1574                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1575             }
1576
1577           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1578
1579           /* verify speculative enqueue, maybe switch current next frame */
1580           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1581                                            to_next, n_left_to_next,
1582                                            bi0, next0);
1583         }
1584
1585       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1586     }
1587
1588   vlib_node_increment_counter (vm, snat_out2in_node.index,
1589                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1590                                pkts_processed);
1591   return frame->n_vectors;
1592 }
1593
1594 VLIB_REGISTER_NODE (snat_out2in_node) = {
1595   .function = snat_out2in_node_fn,
1596   .name = "nat44-out2in",
1597   .vector_size = sizeof (u32),
1598   .format_trace = format_snat_out2in_trace,
1599   .type = VLIB_NODE_TYPE_INTERNAL,
1600
1601   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1602   .error_strings = snat_out2in_error_strings,
1603
1604   .runtime_data_bytes = sizeof (snat_runtime_t),
1605
1606   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1607
1608   /* edit / add dispositions here */
1609   .next_nodes = {
1610     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1611     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1612     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1613     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1614   },
1615 };
1616 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1617
1618 static uword
1619 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1620                             vlib_node_runtime_t * node,
1621                             vlib_frame_t * frame)
1622 {
1623   u32 n_left_from, *from, *to_next;
1624   snat_out2in_next_t next_index;
1625   u32 pkts_processed = 0;
1626   snat_main_t *sm = &snat_main;
1627   f64 now = vlib_time_now (vm);
1628   u32 thread_index = vlib_get_thread_index ();
1629   snat_main_per_thread_data_t *per_thread_data =
1630     &sm->per_thread_data[thread_index];
1631   u32 *fragments_to_drop = 0;
1632   u32 *fragments_to_loopback = 0;
1633
1634   from = vlib_frame_vector_args (frame);
1635   n_left_from = frame->n_vectors;
1636   next_index = node->cached_next_index;
1637
1638   while (n_left_from > 0)
1639     {
1640       u32 n_left_to_next;
1641
1642       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1643
1644       while (n_left_from > 0 && n_left_to_next > 0)
1645        {
1646           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1647           vlib_buffer_t *b0;
1648           u32 next0;
1649           u8 cached0 = 0;
1650           ip4_header_t *ip0;
1651           nat_reass_ip4_t *reass0;
1652           udp_header_t * udp0;
1653           tcp_header_t * tcp0;
1654           snat_session_key_t key0, sm0;
1655           clib_bihash_kv_8_8_t kv0, value0;
1656           snat_session_t * s0 = 0;
1657           u16 old_port0, new_port0;
1658           ip_csum_t sum0;
1659
1660           /* speculatively enqueue b0 to the current next frame */
1661           bi0 = from[0];
1662           to_next[0] = bi0;
1663           from += 1;
1664           to_next += 1;
1665           n_left_from -= 1;
1666           n_left_to_next -= 1;
1667
1668           b0 = vlib_get_buffer (vm, bi0);
1669           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1670
1671           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1672           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1673                                                                sw_if_index0);
1674
1675           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1676             {
1677               next0 = SNAT_OUT2IN_NEXT_DROP;
1678               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1679               goto trace0;
1680             }
1681
1682           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1683           udp0 = ip4_next_header (ip0);
1684           tcp0 = (tcp_header_t *) udp0;
1685           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1686
1687           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1688                                                  ip0->dst_address,
1689                                                  ip0->fragment_id,
1690                                                  ip0->protocol,
1691                                                  1,
1692                                                  &fragments_to_drop);
1693
1694           if (PREDICT_FALSE (!reass0))
1695             {
1696               next0 = SNAT_OUT2IN_NEXT_DROP;
1697               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1698               goto trace0;
1699             }
1700
1701           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1702             {
1703               key0.addr = ip0->dst_address;
1704               key0.port = udp0->dst_port;
1705               key0.protocol = proto0;
1706               key0.fib_index = rx_fib_index0;
1707               kv0.key = key0.as_u64;
1708
1709               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1710                 {
1711                   /* Try to match static mapping by external address and port,
1712                      destination address and port in packet */
1713                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1714                     {
1715                       if (!sm->forwarding_enabled)
1716                         {
1717                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1718                           /*
1719                            * Send DHCP packets to the ipv4 stack, or we won't
1720                            * be able to use dhcp client on the outside interface
1721                            */
1722                           if (PREDICT_TRUE (proto0 != SNAT_PROTOCOL_UDP
1723                               || (udp0->dst_port
1724                                   != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))))
1725                             next0 = SNAT_OUT2IN_NEXT_DROP;
1726                           else
1727                             vnet_feature_next
1728                               (vnet_buffer (b0)->sw_if_index[VLIB_RX],
1729                                &next0, b0);
1730                           goto trace0;
1731                         }
1732                       else
1733                         goto trace0;
1734                     }
1735
1736                   /* Create session initiated by host from external network */
1737                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1738                                                          thread_index);
1739                   if (!s0)
1740                     {
1741                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1742                       next0 = SNAT_OUT2IN_NEXT_DROP;
1743                       goto trace0;
1744                     }
1745                   reass0->sess_index = s0 - per_thread_data->sessions;
1746                   reass0->thread_index = thread_index;
1747                 }
1748               else
1749                 {
1750                   s0 = pool_elt_at_index (per_thread_data->sessions,
1751                                           value0.value);
1752                   reass0->sess_index = value0.value;
1753                 }
1754               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1755             }
1756           else
1757             {
1758               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1759                 {
1760                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1761                     {
1762                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1763                       next0 = SNAT_OUT2IN_NEXT_DROP;
1764                       goto trace0;
1765                     }
1766                   cached0 = 1;
1767                   goto trace0;
1768                 }
1769               s0 = pool_elt_at_index (per_thread_data->sessions,
1770                                       reass0->sess_index);
1771             }
1772
1773           old_addr0 = ip0->dst_address.as_u32;
1774           ip0->dst_address = s0->in2out.addr;
1775           new_addr0 = ip0->dst_address.as_u32;
1776           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1777
1778           sum0 = ip0->checksum;
1779           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1780                                  ip4_header_t,
1781                                  dst_address /* changed member */);
1782           ip0->checksum = ip_csum_fold (sum0);
1783
1784           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1785             {
1786               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1787                 {
1788                   old_port0 = tcp0->dst_port;
1789                   tcp0->dst_port = s0->in2out.port;
1790                   new_port0 = tcp0->dst_port;
1791
1792                   sum0 = tcp0->checksum;
1793                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1794                                          ip4_header_t,
1795                                          dst_address /* changed member */);
1796
1797                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1798                                          ip4_header_t /* cheat */,
1799                                          length /* changed member */);
1800                   tcp0->checksum = ip_csum_fold(sum0);
1801                 }
1802               else
1803                 {
1804                   old_port0 = udp0->dst_port;
1805                   udp0->dst_port = s0->in2out.port;
1806                   udp0->checksum = 0;
1807                 }
1808             }
1809
1810           /* Accounting */
1811           s0->last_heard = now;
1812           s0->total_pkts++;
1813           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1814           /* Per-user LRU list maintenance */
1815           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1816                              s0->per_user_index);
1817           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1818                               s0->per_user_list_head_index,
1819                               s0->per_user_index);
1820
1821         trace0:
1822           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1823                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1824             {
1825               nat44_out2in_reass_trace_t *t =
1826                  vlib_add_trace (vm, node, b0, sizeof (*t));
1827               t->cached = cached0;
1828               t->sw_if_index = sw_if_index0;
1829               t->next_index = next0;
1830             }
1831
1832           if (cached0)
1833             {
1834               n_left_to_next++;
1835               to_next--;
1836             }
1837           else
1838             {
1839               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1840
1841               /* verify speculative enqueue, maybe switch current next frame */
1842               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1843                                                to_next, n_left_to_next,
1844                                                bi0, next0);
1845             }
1846
1847           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1848             {
1849               from = vlib_frame_vector_args (frame);
1850               u32 len = vec_len (fragments_to_loopback);
1851               if (len <= VLIB_FRAME_SIZE)
1852                 {
1853                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1854                   n_left_from = len;
1855                   vec_reset_length (fragments_to_loopback);
1856                 }
1857               else
1858                 {
1859                   clib_memcpy (from,
1860                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1861                                sizeof (u32) * VLIB_FRAME_SIZE);
1862                   n_left_from = VLIB_FRAME_SIZE;
1863                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1864                 }
1865             }
1866        }
1867
1868       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1869     }
1870
1871   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1872                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1873                                pkts_processed);
1874
1875   nat_send_all_to_node (vm, fragments_to_drop, node,
1876                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1877                         SNAT_OUT2IN_NEXT_DROP);
1878
1879   vec_free (fragments_to_drop);
1880   vec_free (fragments_to_loopback);
1881   return frame->n_vectors;
1882 }
1883
1884 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1885   .function = nat44_out2in_reass_node_fn,
1886   .name = "nat44-out2in-reass",
1887   .vector_size = sizeof (u32),
1888   .format_trace = format_nat44_out2in_reass_trace,
1889   .type = VLIB_NODE_TYPE_INTERNAL,
1890
1891   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1892   .error_strings = snat_out2in_error_strings,
1893
1894   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1895
1896   /* edit / add dispositions here */
1897   .next_nodes = {
1898     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1899     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1900     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1901     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1902   },
1903 };
1904 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1905                               nat44_out2in_reass_node_fn);
1906
1907 /**************************/
1908 /*** deterministic mode ***/
1909 /**************************/
1910 static uword
1911 snat_det_out2in_node_fn (vlib_main_t * vm,
1912                          vlib_node_runtime_t * node,
1913                          vlib_frame_t * frame)
1914 {
1915   u32 n_left_from, * from, * to_next;
1916   snat_out2in_next_t next_index;
1917   u32 pkts_processed = 0;
1918   snat_main_t * sm = &snat_main;
1919   u32 thread_index = vlib_get_thread_index ();
1920
1921   from = vlib_frame_vector_args (frame);
1922   n_left_from = frame->n_vectors;
1923   next_index = node->cached_next_index;
1924
1925   while (n_left_from > 0)
1926     {
1927       u32 n_left_to_next;
1928
1929       vlib_get_next_frame (vm, node, next_index,
1930                            to_next, n_left_to_next);
1931
1932       while (n_left_from >= 4 && n_left_to_next >= 2)
1933         {
1934           u32 bi0, bi1;
1935           vlib_buffer_t * b0, * b1;
1936           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1937           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1938           u32 sw_if_index0, sw_if_index1;
1939           ip4_header_t * ip0, * ip1;
1940           ip_csum_t sum0, sum1;
1941           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1942           u16 new_port0, old_port0, old_port1, new_port1;
1943           udp_header_t * udp0, * udp1;
1944           tcp_header_t * tcp0, * tcp1;
1945           u32 proto0, proto1;
1946           snat_det_out_key_t key0, key1;
1947           snat_det_map_t * dm0, * dm1;
1948           snat_det_session_t * ses0 = 0, * ses1 = 0;
1949           u32 rx_fib_index0, rx_fib_index1;
1950           icmp46_header_t * icmp0, * icmp1;
1951
1952           /* Prefetch next iteration. */
1953           {
1954             vlib_buffer_t * p2, * p3;
1955
1956             p2 = vlib_get_buffer (vm, from[2]);
1957             p3 = vlib_get_buffer (vm, from[3]);
1958
1959             vlib_prefetch_buffer_header (p2, LOAD);
1960             vlib_prefetch_buffer_header (p3, LOAD);
1961
1962             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1963             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1964           }
1965
1966           /* speculatively enqueue b0 and b1 to the current next frame */
1967           to_next[0] = bi0 = from[0];
1968           to_next[1] = bi1 = from[1];
1969           from += 2;
1970           to_next += 2;
1971           n_left_from -= 2;
1972           n_left_to_next -= 2;
1973
1974           b0 = vlib_get_buffer (vm, bi0);
1975           b1 = vlib_get_buffer (vm, bi1);
1976
1977           ip0 = vlib_buffer_get_current (b0);
1978           udp0 = ip4_next_header (ip0);
1979           tcp0 = (tcp_header_t *) udp0;
1980
1981           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1982
1983           if (PREDICT_FALSE(ip0->ttl == 1))
1984             {
1985               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1986               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1987                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1988                                            0);
1989               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1990               goto trace0;
1991             }
1992
1993           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1994
1995           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1996             {
1997               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1998               icmp0 = (icmp46_header_t *) udp0;
1999
2000               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2001                                   rx_fib_index0, node, next0, thread_index,
2002                                   &ses0, &dm0);
2003               goto trace0;
2004             }
2005
2006           key0.ext_host_addr = ip0->src_address;
2007           key0.ext_host_port = tcp0->src;
2008           key0.out_port = tcp0->dst;
2009
2010           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2011           if (PREDICT_FALSE(!dm0))
2012             {
2013               clib_warning("unknown dst address:  %U",
2014                            format_ip4_address, &ip0->dst_address);
2015               next0 = SNAT_OUT2IN_NEXT_DROP;
2016               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2017               goto trace0;
2018             }
2019
2020           snat_det_reverse(dm0, &ip0->dst_address,
2021                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2022
2023           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2024           if (PREDICT_FALSE(!ses0))
2025             {
2026               clib_warning("no match src %U:%d dst %U:%d for user %U",
2027                            format_ip4_address, &ip0->src_address,
2028                            clib_net_to_host_u16 (tcp0->src),
2029                            format_ip4_address, &ip0->dst_address,
2030                            clib_net_to_host_u16 (tcp0->dst),
2031                            format_ip4_address, &new_addr0);
2032               next0 = SNAT_OUT2IN_NEXT_DROP;
2033               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2034               goto trace0;
2035             }
2036           new_port0 = ses0->in_port;
2037
2038           old_addr0 = ip0->dst_address;
2039           ip0->dst_address = new_addr0;
2040           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2041
2042           sum0 = ip0->checksum;
2043           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2044                                  ip4_header_t,
2045                                  dst_address /* changed member */);
2046           ip0->checksum = ip_csum_fold (sum0);
2047
2048           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2049             {
2050               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2051                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2052               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2053                 snat_det_ses_close(dm0, ses0);
2054
2055               old_port0 = tcp0->dst;
2056               tcp0->dst = new_port0;
2057
2058               sum0 = tcp0->checksum;
2059               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2060                                      ip4_header_t,
2061                                      dst_address /* changed member */);
2062
2063               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2064                                      ip4_header_t /* cheat */,
2065                                      length /* changed member */);
2066               tcp0->checksum = ip_csum_fold(sum0);
2067             }
2068           else
2069             {
2070               old_port0 = udp0->dst_port;
2071               udp0->dst_port = new_port0;
2072               udp0->checksum = 0;
2073             }
2074
2075         trace0:
2076
2077           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2078                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2079             {
2080               snat_out2in_trace_t *t =
2081                  vlib_add_trace (vm, node, b0, sizeof (*t));
2082               t->sw_if_index = sw_if_index0;
2083               t->next_index = next0;
2084               t->session_index = ~0;
2085               if (ses0)
2086                 t->session_index = ses0 - dm0->sessions;
2087             }
2088
2089           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2090
2091           b1 = vlib_get_buffer (vm, bi1);
2092
2093           ip1 = vlib_buffer_get_current (b1);
2094           udp1 = ip4_next_header (ip1);
2095           tcp1 = (tcp_header_t *) udp1;
2096
2097           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2098
2099           if (PREDICT_FALSE(ip1->ttl == 1))
2100             {
2101               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2102               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2103                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2104                                            0);
2105               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2106               goto trace1;
2107             }
2108
2109           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2110
2111           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2112             {
2113               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2114               icmp1 = (icmp46_header_t *) udp1;
2115
2116               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
2117                                   rx_fib_index1, node, next1, thread_index,
2118                                   &ses1, &dm1);
2119               goto trace1;
2120             }
2121
2122           key1.ext_host_addr = ip1->src_address;
2123           key1.ext_host_port = tcp1->src;
2124           key1.out_port = tcp1->dst;
2125
2126           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
2127           if (PREDICT_FALSE(!dm1))
2128             {
2129               clib_warning("unknown dst address:  %U",
2130                            format_ip4_address, &ip1->dst_address);
2131               next1 = SNAT_OUT2IN_NEXT_DROP;
2132               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2133               goto trace1;
2134             }
2135
2136           snat_det_reverse(dm1, &ip1->dst_address,
2137                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
2138
2139           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
2140           if (PREDICT_FALSE(!ses1))
2141             {
2142               clib_warning("no match src %U:%d dst %U:%d for user %U",
2143                            format_ip4_address, &ip1->src_address,
2144                            clib_net_to_host_u16 (tcp1->src),
2145                            format_ip4_address, &ip1->dst_address,
2146                            clib_net_to_host_u16 (tcp1->dst),
2147                            format_ip4_address, &new_addr1);
2148               next1 = SNAT_OUT2IN_NEXT_DROP;
2149               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2150               goto trace1;
2151             }
2152           new_port1 = ses1->in_port;
2153
2154           old_addr1 = ip1->dst_address;
2155           ip1->dst_address = new_addr1;
2156           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2157
2158           sum1 = ip1->checksum;
2159           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2160                                  ip4_header_t,
2161                                  dst_address /* changed member */);
2162           ip1->checksum = ip_csum_fold (sum1);
2163
2164           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2165             {
2166               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2167                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2168               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
2169                 snat_det_ses_close(dm1, ses1);
2170
2171               old_port1 = tcp1->dst;
2172               tcp1->dst = new_port1;
2173
2174               sum1 = tcp1->checksum;
2175               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2176                                      ip4_header_t,
2177                                      dst_address /* changed member */);
2178
2179               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2180                                      ip4_header_t /* cheat */,
2181                                      length /* changed member */);
2182               tcp1->checksum = ip_csum_fold(sum1);
2183             }
2184           else
2185             {
2186               old_port1 = udp1->dst_port;
2187               udp1->dst_port = new_port1;
2188               udp1->checksum = 0;
2189             }
2190
2191         trace1:
2192
2193           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2194                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2195             {
2196               snat_out2in_trace_t *t =
2197                  vlib_add_trace (vm, node, b1, sizeof (*t));
2198               t->sw_if_index = sw_if_index1;
2199               t->next_index = next1;
2200               t->session_index = ~0;
2201               if (ses1)
2202                 t->session_index = ses1 - dm1->sessions;
2203             }
2204
2205           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
2206
2207           /* verify speculative enqueues, maybe switch current next frame */
2208           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2209                                            to_next, n_left_to_next,
2210                                            bi0, bi1, next0, next1);
2211          }
2212
2213       while (n_left_from > 0 && n_left_to_next > 0)
2214         {
2215           u32 bi0;
2216           vlib_buffer_t * b0;
2217           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
2218           u32 sw_if_index0;
2219           ip4_header_t * ip0;
2220           ip_csum_t sum0;
2221           ip4_address_t new_addr0, old_addr0;
2222           u16 new_port0, old_port0;
2223           udp_header_t * udp0;
2224           tcp_header_t * tcp0;
2225           u32 proto0;
2226           snat_det_out_key_t key0;
2227           snat_det_map_t * dm0;
2228           snat_det_session_t * ses0 = 0;
2229           u32 rx_fib_index0;
2230           icmp46_header_t * icmp0;
2231
2232           /* speculatively enqueue b0 to the current next frame */
2233           bi0 = from[0];
2234           to_next[0] = bi0;
2235           from += 1;
2236           to_next += 1;
2237           n_left_from -= 1;
2238           n_left_to_next -= 1;
2239
2240           b0 = vlib_get_buffer (vm, bi0);
2241
2242           ip0 = vlib_buffer_get_current (b0);
2243           udp0 = ip4_next_header (ip0);
2244           tcp0 = (tcp_header_t *) udp0;
2245
2246           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2247
2248           if (PREDICT_FALSE(ip0->ttl == 1))
2249             {
2250               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2251               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2252                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2253                                            0);
2254               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2255               goto trace00;
2256             }
2257
2258           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2259
2260           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2261             {
2262               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2263               icmp0 = (icmp46_header_t *) udp0;
2264
2265               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2266                                   rx_fib_index0, node, next0, thread_index,
2267                                   &ses0, &dm0);
2268               goto trace00;
2269             }
2270
2271           key0.ext_host_addr = ip0->src_address;
2272           key0.ext_host_port = tcp0->src;
2273           key0.out_port = tcp0->dst;
2274
2275           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2276           if (PREDICT_FALSE(!dm0))
2277             {
2278               clib_warning("unknown dst address:  %U",
2279                            format_ip4_address, &ip0->dst_address);
2280               next0 = SNAT_OUT2IN_NEXT_DROP;
2281               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2282               goto trace00;
2283             }
2284
2285           snat_det_reverse(dm0, &ip0->dst_address,
2286                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2287
2288           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2289           if (PREDICT_FALSE(!ses0))
2290             {
2291               clib_warning("no match src %U:%d dst %U:%d for user %U",
2292                            format_ip4_address, &ip0->src_address,
2293                            clib_net_to_host_u16 (tcp0->src),
2294                            format_ip4_address, &ip0->dst_address,
2295                            clib_net_to_host_u16 (tcp0->dst),
2296                            format_ip4_address, &new_addr0);
2297               next0 = SNAT_OUT2IN_NEXT_DROP;
2298               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2299               goto trace00;
2300             }
2301           new_port0 = ses0->in_port;
2302
2303           old_addr0 = ip0->dst_address;
2304           ip0->dst_address = new_addr0;
2305           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2306
2307           sum0 = ip0->checksum;
2308           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2309                                  ip4_header_t,
2310                                  dst_address /* changed member */);
2311           ip0->checksum = ip_csum_fold (sum0);
2312
2313           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2314             {
2315               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2316                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2317               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2318                 snat_det_ses_close(dm0, ses0);
2319
2320               old_port0 = tcp0->dst;
2321               tcp0->dst = new_port0;
2322
2323               sum0 = tcp0->checksum;
2324               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2325                                      ip4_header_t,
2326                                      dst_address /* changed member */);
2327
2328               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2329                                      ip4_header_t /* cheat */,
2330                                      length /* changed member */);
2331               tcp0->checksum = ip_csum_fold(sum0);
2332             }
2333           else
2334             {
2335               old_port0 = udp0->dst_port;
2336               udp0->dst_port = new_port0;
2337               udp0->checksum = 0;
2338             }
2339
2340         trace00:
2341
2342           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2343                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2344             {
2345               snat_out2in_trace_t *t =
2346                  vlib_add_trace (vm, node, b0, sizeof (*t));
2347               t->sw_if_index = sw_if_index0;
2348               t->next_index = next0;
2349               t->session_index = ~0;
2350               if (ses0)
2351                 t->session_index = ses0 - dm0->sessions;
2352             }
2353
2354           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2355
2356           /* verify speculative enqueue, maybe switch current next frame */
2357           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2358                                            to_next, n_left_to_next,
2359                                            bi0, next0);
2360         }
2361
2362       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2363     }
2364
2365   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
2366                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2367                                pkts_processed);
2368   return frame->n_vectors;
2369 }
2370
2371 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
2372   .function = snat_det_out2in_node_fn,
2373   .name = "nat44-det-out2in",
2374   .vector_size = sizeof (u32),
2375   .format_trace = format_snat_out2in_trace,
2376   .type = VLIB_NODE_TYPE_INTERNAL,
2377
2378   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2379   .error_strings = snat_out2in_error_strings,
2380
2381   .runtime_data_bytes = sizeof (snat_runtime_t),
2382
2383   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2384
2385   /* edit / add dispositions here */
2386   .next_nodes = {
2387     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2388     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2389     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2390     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2391   },
2392 };
2393 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
2394
2395 /**
2396  * Get address and port values to be used for ICMP packet translation
2397  * and create session if needed
2398  *
2399  * @param[in,out] sm             NAT main
2400  * @param[in,out] node           NAT node runtime
2401  * @param[in] thread_index       thread index
2402  * @param[in,out] b0             buffer containing packet to be translated
2403  * @param[out] p_proto           protocol used for matching
2404  * @param[out] p_value           address and port after NAT translation
2405  * @param[out] p_dont_translate  if packet should not be translated
2406  * @param d                      optional parameter
2407  * @param e                      optional parameter
2408  */
2409 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
2410                           u32 thread_index, vlib_buffer_t *b0,
2411                           ip4_header_t *ip0, u8 *p_proto,
2412                           snat_session_key_t *p_value,
2413                           u8 *p_dont_translate, void *d, void *e)
2414 {
2415   icmp46_header_t *icmp0;
2416   u32 sw_if_index0;
2417   u8 protocol;
2418   snat_det_out_key_t key0;
2419   u8 dont_translate = 0;
2420   u32 next0 = ~0;
2421   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2422   ip4_header_t *inner_ip0;
2423   void *l4_header = 0;
2424   icmp46_header_t *inner_icmp0;
2425   snat_det_map_t * dm0 = 0;
2426   ip4_address_t new_addr0 = {{0}};
2427   snat_det_session_t * ses0 = 0;
2428   ip4_address_t out_addr;
2429
2430   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2431   echo0 = (icmp_echo_header_t *)(icmp0+1);
2432   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2433
2434   if (!icmp_is_error_message (icmp0))
2435     {
2436       protocol = SNAT_PROTOCOL_ICMP;
2437       key0.ext_host_addr = ip0->src_address;
2438       key0.ext_host_port = 0;
2439       key0.out_port = echo0->identifier;
2440       out_addr = ip0->dst_address;
2441     }
2442   else
2443     {
2444       inner_ip0 = (ip4_header_t *)(echo0+1);
2445       l4_header = ip4_next_header (inner_ip0);
2446       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2447       key0.ext_host_addr = inner_ip0->dst_address;
2448       out_addr = inner_ip0->src_address;
2449       switch (protocol)
2450         {
2451         case SNAT_PROTOCOL_ICMP:
2452           inner_icmp0 = (icmp46_header_t*)l4_header;
2453           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2454           key0.ext_host_port = 0;
2455           key0.out_port = inner_echo0->identifier;
2456           break;
2457         case SNAT_PROTOCOL_UDP:
2458         case SNAT_PROTOCOL_TCP:
2459           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2460           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
2461           break;
2462         default:
2463           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2464           next0 = SNAT_OUT2IN_NEXT_DROP;
2465           goto out;
2466         }
2467     }
2468
2469   dm0 = snat_det_map_by_out(sm, &out_addr);
2470   if (PREDICT_FALSE(!dm0))
2471     {
2472       /* Don't NAT packet aimed at the intfc address */
2473       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2474                                           ip0->dst_address.as_u32)))
2475         {
2476           dont_translate = 1;
2477           goto out;
2478         }
2479       clib_warning("unknown dst address:  %U",
2480                    format_ip4_address, &ip0->dst_address);
2481       goto out;
2482     }
2483
2484   snat_det_reverse(dm0, &ip0->dst_address,
2485                    clib_net_to_host_u16(key0.out_port), &new_addr0);
2486
2487   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2488   if (PREDICT_FALSE(!ses0))
2489     {
2490       /* Don't NAT packet aimed at the intfc address */
2491       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2492                                           ip0->dst_address.as_u32)))
2493         {
2494           dont_translate = 1;
2495           goto out;
2496         }
2497       clib_warning("no match src %U:%d dst %U:%d for user %U",
2498                    format_ip4_address, &key0.ext_host_addr,
2499                    clib_net_to_host_u16 (key0.ext_host_port),
2500                    format_ip4_address, &out_addr,
2501                    clib_net_to_host_u16 (key0.out_port),
2502                    format_ip4_address, &new_addr0);
2503       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2504       next0 = SNAT_OUT2IN_NEXT_DROP;
2505       goto out;
2506     }
2507
2508   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
2509                     !icmp_is_error_message (icmp0)))
2510     {
2511       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2512       next0 = SNAT_OUT2IN_NEXT_DROP;
2513       goto out;
2514     }
2515
2516   goto out;
2517
2518 out:
2519   *p_proto = protocol;
2520   if (ses0)
2521     {
2522       p_value->addr = new_addr0;
2523       p_value->fib_index = sm->inside_fib_index;
2524       p_value->port = ses0->in_port;
2525     }
2526   *p_dont_translate = dont_translate;
2527   if (d)
2528     *(snat_det_session_t**)d = ses0;
2529   if (e)
2530     *(snat_det_map_t**)e = dm0;
2531   return next0;
2532 }
2533
2534 /**********************/
2535 /*** worker handoff ***/
2536 /**********************/
2537 static uword
2538 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
2539                                vlib_node_runtime_t * node,
2540                                vlib_frame_t * frame)
2541 {
2542   snat_main_t *sm = &snat_main;
2543   vlib_thread_main_t *tm = vlib_get_thread_main ();
2544   u32 n_left_from, *from, *to_next = 0;
2545   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2546   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2547     = 0;
2548   vlib_frame_queue_elt_t *hf = 0;
2549   vlib_frame_t *f = 0;
2550   int i;
2551   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2552   u32 next_worker_index = 0;
2553   u32 current_worker_index = ~0;
2554   u32 thread_index = vlib_get_thread_index ();
2555
2556   ASSERT (vec_len (sm->workers));
2557
2558   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2559     {
2560       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2561
2562       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2563                                sm->first_worker_index + sm->num_workers - 1,
2564                                (vlib_frame_queue_t *) (~0));
2565     }
2566
2567   from = vlib_frame_vector_args (frame);
2568   n_left_from = frame->n_vectors;
2569
2570   while (n_left_from > 0)
2571     {
2572       u32 bi0;
2573       vlib_buffer_t *b0;
2574       u32 sw_if_index0;
2575       u32 rx_fib_index0;
2576       ip4_header_t * ip0;
2577       u8 do_handoff;
2578
2579       bi0 = from[0];
2580       from += 1;
2581       n_left_from -= 1;
2582
2583       b0 = vlib_get_buffer (vm, bi0);
2584
2585       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2586       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2587
2588       ip0 = vlib_buffer_get_current (b0);
2589
2590       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2591
2592       if (PREDICT_FALSE (next_worker_index != thread_index))
2593         {
2594           do_handoff = 1;
2595
2596           if (next_worker_index != current_worker_index)
2597             {
2598               if (hf)
2599                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2600
2601               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2602                                                       next_worker_index,
2603                                                       handoff_queue_elt_by_worker_index);
2604
2605               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2606               to_next_worker = &hf->buffer_index[hf->n_vectors];
2607               current_worker_index = next_worker_index;
2608             }
2609
2610           /* enqueue to correct worker thread */
2611           to_next_worker[0] = bi0;
2612           to_next_worker++;
2613           n_left_to_next_worker--;
2614
2615           if (n_left_to_next_worker == 0)
2616             {
2617               hf->n_vectors = VLIB_FRAME_SIZE;
2618               vlib_put_frame_queue_elt (hf);
2619               current_worker_index = ~0;
2620               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2621               hf = 0;
2622             }
2623         }
2624       else
2625         {
2626           do_handoff = 0;
2627           /* if this is 1st frame */
2628           if (!f)
2629             {
2630               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2631               to_next = vlib_frame_vector_args (f);
2632             }
2633
2634           to_next[0] = bi0;
2635           to_next += 1;
2636           f->n_vectors++;
2637         }
2638
2639       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2640                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2641         {
2642           snat_out2in_worker_handoff_trace_t *t =
2643             vlib_add_trace (vm, node, b0, sizeof (*t));
2644           t->next_worker_index = next_worker_index;
2645           t->do_handoff = do_handoff;
2646         }
2647     }
2648
2649   if (f)
2650     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2651
2652   if (hf)
2653     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2654
2655   /* Ship frames to the worker nodes */
2656   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2657     {
2658       if (handoff_queue_elt_by_worker_index[i])
2659         {
2660           hf = handoff_queue_elt_by_worker_index[i];
2661           /*
2662            * It works better to let the handoff node
2663            * rate-adapt, always ship the handoff queue element.
2664            */
2665           if (1 || hf->n_vectors == hf->last_n_vectors)
2666             {
2667               vlib_put_frame_queue_elt (hf);
2668               handoff_queue_elt_by_worker_index[i] = 0;
2669             }
2670           else
2671             hf->last_n_vectors = hf->n_vectors;
2672         }
2673       congested_handoff_queue_by_worker_index[i] =
2674         (vlib_frame_queue_t *) (~0);
2675     }
2676   hf = 0;
2677   current_worker_index = ~0;
2678   return frame->n_vectors;
2679 }
2680
2681 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2682   .function = snat_out2in_worker_handoff_fn,
2683   .name = "nat44-out2in-worker-handoff",
2684   .vector_size = sizeof (u32),
2685   .format_trace = format_snat_out2in_worker_handoff_trace,
2686   .type = VLIB_NODE_TYPE_INTERNAL,
2687
2688   .n_next_nodes = 1,
2689
2690   .next_nodes = {
2691     [0] = "error-drop",
2692   },
2693 };
2694
2695 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2696
2697 static uword
2698 snat_out2in_fast_node_fn (vlib_main_t * vm,
2699                           vlib_node_runtime_t * node,
2700                           vlib_frame_t * frame)
2701 {
2702   u32 n_left_from, * from, * to_next;
2703   snat_out2in_next_t next_index;
2704   u32 pkts_processed = 0;
2705   snat_main_t * sm = &snat_main;
2706
2707   from = vlib_frame_vector_args (frame);
2708   n_left_from = frame->n_vectors;
2709   next_index = node->cached_next_index;
2710
2711   while (n_left_from > 0)
2712     {
2713       u32 n_left_to_next;
2714
2715       vlib_get_next_frame (vm, node, next_index,
2716                            to_next, n_left_to_next);
2717
2718       while (n_left_from > 0 && n_left_to_next > 0)
2719         {
2720           u32 bi0;
2721           vlib_buffer_t * b0;
2722           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2723           u32 sw_if_index0;
2724           ip4_header_t * ip0;
2725           ip_csum_t sum0;
2726           u32 new_addr0, old_addr0;
2727           u16 new_port0, old_port0;
2728           udp_header_t * udp0;
2729           tcp_header_t * tcp0;
2730           icmp46_header_t * icmp0;
2731           snat_session_key_t key0, sm0;
2732           u32 proto0;
2733           u32 rx_fib_index0;
2734
2735           /* speculatively enqueue b0 to the current next frame */
2736           bi0 = from[0];
2737           to_next[0] = bi0;
2738           from += 1;
2739           to_next += 1;
2740           n_left_from -= 1;
2741           n_left_to_next -= 1;
2742
2743           b0 = vlib_get_buffer (vm, bi0);
2744
2745           ip0 = vlib_buffer_get_current (b0);
2746           udp0 = ip4_next_header (ip0);
2747           tcp0 = (tcp_header_t *) udp0;
2748           icmp0 = (icmp46_header_t *) udp0;
2749
2750           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2751           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2752
2753           vnet_feature_next (sw_if_index0, &next0, b0);
2754
2755           if (PREDICT_FALSE(ip0->ttl == 1))
2756             {
2757               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2758               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2759                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2760                                            0);
2761               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2762               goto trace00;
2763             }
2764
2765           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2766
2767           if (PREDICT_FALSE (proto0 == ~0))
2768               goto trace00;
2769
2770           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2771             {
2772               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2773                                   rx_fib_index0, node, next0, ~0, 0, 0);
2774               goto trace00;
2775             }
2776
2777           key0.addr = ip0->dst_address;
2778           key0.port = udp0->dst_port;
2779           key0.fib_index = rx_fib_index0;
2780
2781           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2782             {
2783               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2784               goto trace00;
2785             }
2786
2787           new_addr0 = sm0.addr.as_u32;
2788           new_port0 = sm0.port;
2789           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2790           old_addr0 = ip0->dst_address.as_u32;
2791           ip0->dst_address.as_u32 = new_addr0;
2792
2793           sum0 = ip0->checksum;
2794           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2795                                  ip4_header_t,
2796                                  dst_address /* changed member */);
2797           ip0->checksum = ip_csum_fold (sum0);
2798
2799           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2800             {
2801                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2802                 {
2803                   old_port0 = tcp0->dst_port;
2804                   tcp0->dst_port = new_port0;
2805
2806                   sum0 = tcp0->checksum;
2807                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2808                                          ip4_header_t,
2809                                          dst_address /* changed member */);
2810
2811                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2812                                          ip4_header_t /* cheat */,
2813                                          length /* changed member */);
2814                   tcp0->checksum = ip_csum_fold(sum0);
2815                 }
2816               else
2817                 {
2818                   old_port0 = udp0->dst_port;
2819                   udp0->dst_port = new_port0;
2820                   udp0->checksum = 0;
2821                 }
2822             }
2823           else
2824             {
2825               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2826                 {
2827                   sum0 = tcp0->checksum;
2828                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2829                                          ip4_header_t,
2830                                          dst_address /* changed member */);
2831
2832                   tcp0->checksum = ip_csum_fold(sum0);
2833                 }
2834             }
2835
2836         trace00:
2837
2838           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2839                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2840             {
2841               snat_out2in_trace_t *t =
2842                  vlib_add_trace (vm, node, b0, sizeof (*t));
2843               t->sw_if_index = sw_if_index0;
2844               t->next_index = next0;
2845             }
2846
2847           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2848
2849           /* verify speculative enqueue, maybe switch current next frame */
2850           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2851                                            to_next, n_left_to_next,
2852                                            bi0, next0);
2853         }
2854
2855       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2856     }
2857
2858   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2859                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2860                                pkts_processed);
2861   return frame->n_vectors;
2862 }
2863
2864 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2865   .function = snat_out2in_fast_node_fn,
2866   .name = "nat44-out2in-fast",
2867   .vector_size = sizeof (u32),
2868   .format_trace = format_snat_out2in_fast_trace,
2869   .type = VLIB_NODE_TYPE_INTERNAL,
2870
2871   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2872   .error_strings = snat_out2in_error_strings,
2873
2874   .runtime_data_bytes = sizeof (snat_runtime_t),
2875
2876   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2877
2878   /* edit / add dispositions here */
2879   .next_nodes = {
2880     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2881     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2882     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2883     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2884   },
2885 };
2886 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);