NAT64: multi-thread support (VPP-891)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38 } snat_out2in_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_out2in_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
51
52   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
53               t->sw_if_index, t->next_index, t->session_index);
54   return s;
55 }
56
57 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
62
63   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
64               t->sw_if_index, t->next_index);
65   return s;
66 }
67
68 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   snat_out2in_worker_handoff_trace_t * t =
73     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74   char * m;
75
76   m = t->do_handoff ? "next worker" : "same worker";
77   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
78
79   return s;
80 }
81
82 typedef struct {
83   u32 sw_if_index;
84   u32 next_index;
85   u8 cached;
86 } nat44_out2in_reass_trace_t;
87
88 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
89 {
90   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
91   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
92   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
93
94   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
95               t->sw_if_index, t->next_index,
96               t->cached ? "cached" : "translated");
97
98   return s;
99 }
100
101 vlib_node_registration_t snat_out2in_node;
102 vlib_node_registration_t snat_out2in_fast_node;
103 vlib_node_registration_t snat_out2in_worker_handoff_node;
104 vlib_node_registration_t snat_det_out2in_node;
105 vlib_node_registration_t nat44_out2in_reass_node;
106
107 #define foreach_snat_out2in_error                       \
108 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
109 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
110 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
111 _(NO_TRANSLATION, "No translation")                     \
112 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
113 _(DROP_FRAGMENT, "Drop fragment")                       \
114 _(MAX_REASS, "Maximum reassemblies exceeded")           \
115 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
116
117 typedef enum {
118 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
119   foreach_snat_out2in_error
120 #undef _
121   SNAT_OUT2IN_N_ERROR,
122 } snat_out2in_error_t;
123
124 static char * snat_out2in_error_strings[] = {
125 #define _(sym,string) string,
126   foreach_snat_out2in_error
127 #undef _
128 };
129
130 typedef enum {
131   SNAT_OUT2IN_NEXT_DROP,
132   SNAT_OUT2IN_NEXT_LOOKUP,
133   SNAT_OUT2IN_NEXT_ICMP_ERROR,
134   SNAT_OUT2IN_NEXT_REASS,
135   SNAT_OUT2IN_N_NEXT,
136 } snat_out2in_next_t;
137
138 /**
139  * @brief Create session for static mapping.
140  *
141  * Create NAT session initiated by host from external network with static
142  * mapping.
143  *
144  * @param sm     NAT main.
145  * @param b0     Vlib buffer.
146  * @param in2out In2out NAT44 session key.
147  * @param out2in Out2in NAT44 session key.
148  * @param node   Vlib node.
149  *
150  * @returns SNAT session if successfully created otherwise 0.
151  */
152 static inline snat_session_t *
153 create_session_for_static_mapping (snat_main_t *sm,
154                                    vlib_buffer_t *b0,
155                                    snat_session_key_t in2out,
156                                    snat_session_key_t out2in,
157                                    vlib_node_runtime_t * node,
158                                    u32 thread_index)
159 {
160   snat_user_t *u;
161   snat_user_key_t user_key;
162   snat_session_t *s;
163   clib_bihash_kv_8_8_t kv0, value0;
164   dlist_elt_t * per_user_translation_list_elt;
165   dlist_elt_t * per_user_list_head_elt;
166   ip4_header_t *ip0;
167   udp_header_t *udp0;
168
169   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
170     {
171       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
172       return 0;
173     }
174
175   ip0 = vlib_buffer_get_current (b0);
176   udp0 = ip4_next_header (ip0);
177
178   user_key.addr = in2out.addr;
179   user_key.fib_index = in2out.fib_index;
180   kv0.key = user_key.as_u64;
181
182   /* Ever heard of the "user" = inside ip4 address before? */
183   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
184                               &kv0, &value0))
185     {
186       /* no, make a new one */
187       pool_get (sm->per_thread_data[thread_index].users, u);
188       memset (u, 0, sizeof (*u));
189       u->addr = in2out.addr;
190       u->fib_index = in2out.fib_index;
191
192       pool_get (sm->per_thread_data[thread_index].list_pool,
193                 per_user_list_head_elt);
194
195       u->sessions_per_user_list_head_index = per_user_list_head_elt -
196         sm->per_thread_data[thread_index].list_pool;
197
198       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
199                        u->sessions_per_user_list_head_index);
200
201       kv0.value = u - sm->per_thread_data[thread_index].users;
202
203       /* add user */
204       clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
205                                &kv0, 1 /* is_add */);
206     }
207   else
208     {
209       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
210                              value0.value);
211     }
212
213   pool_get (sm->per_thread_data[thread_index].sessions, s);
214   memset (s, 0, sizeof (*s));
215
216   s->outside_address_index = ~0;
217   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
218   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
219   s->ext_host_port = udp0->src_port;
220   u->nstaticsessions++;
221
222   /* Create list elts */
223   pool_get (sm->per_thread_data[thread_index].list_pool,
224             per_user_translation_list_elt);
225   clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
226                    per_user_translation_list_elt -
227                    sm->per_thread_data[thread_index].list_pool);
228
229   per_user_translation_list_elt->value =
230     s - sm->per_thread_data[thread_index].sessions;
231   s->per_user_index =
232     per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
233   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
234
235   clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
236                       s->per_user_list_head_index,
237                       per_user_translation_list_elt -
238                       sm->per_thread_data[thread_index].list_pool);
239
240   s->in2out = in2out;
241   s->out2in = out2in;
242   s->in2out.protocol = out2in.protocol;
243
244   /* Add to translation hashes */
245   kv0.key = s->in2out.as_u64;
246   kv0.value = s - sm->per_thread_data[thread_index].sessions;
247   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
248                                1 /* is_add */))
249       clib_warning ("in2out key add failed");
250
251   kv0.key = s->out2in.as_u64;
252   kv0.value = s - sm->per_thread_data[thread_index].sessions;
253
254   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
255                                1 /* is_add */))
256       clib_warning ("out2in key add failed");
257
258   /* log NAT event */
259   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
260                                       s->out2in.addr.as_u32,
261                                       s->in2out.protocol,
262                                       s->in2out.port,
263                                       s->out2in.port,
264                                       s->in2out.fib_index);
265    return s;
266 }
267
268 static_always_inline
269 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
270                                  snat_session_key_t *p_key0)
271 {
272   icmp46_header_t *icmp0;
273   snat_session_key_t key0;
274   icmp_echo_header_t *echo0, *inner_echo0 = 0;
275   ip4_header_t *inner_ip0;
276   void *l4_header = 0;
277   icmp46_header_t *inner_icmp0;
278
279   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
280   echo0 = (icmp_echo_header_t *)(icmp0+1);
281
282   if (!icmp_is_error_message (icmp0))
283     {
284       key0.protocol = SNAT_PROTOCOL_ICMP;
285       key0.addr = ip0->dst_address;
286       key0.port = echo0->identifier;
287     }
288   else
289     {
290       inner_ip0 = (ip4_header_t *)(echo0+1);
291       l4_header = ip4_next_header (inner_ip0);
292       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
293       key0.addr = inner_ip0->src_address;
294       switch (key0.protocol)
295         {
296         case SNAT_PROTOCOL_ICMP:
297           inner_icmp0 = (icmp46_header_t*)l4_header;
298           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
299           key0.port = inner_echo0->identifier;
300           break;
301         case SNAT_PROTOCOL_UDP:
302         case SNAT_PROTOCOL_TCP:
303           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
304           break;
305         default:
306           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
307         }
308     }
309   *p_key0 = key0;
310   return -1; /* success */
311 }
312
313 /**
314  * Get address and port values to be used for ICMP packet translation
315  * and create session if needed
316  *
317  * @param[in,out] sm             NAT main
318  * @param[in,out] node           NAT node runtime
319  * @param[in] thread_index       thread index
320  * @param[in,out] b0             buffer containing packet to be translated
321  * @param[out] p_proto           protocol used for matching
322  * @param[out] p_value           address and port after NAT translation
323  * @param[out] p_dont_translate  if packet should not be translated
324  * @param d                      optional parameter
325  * @param e                      optional parameter
326  */
327 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
328                            u32 thread_index, vlib_buffer_t *b0,
329                            ip4_header_t *ip0, u8 *p_proto,
330                            snat_session_key_t *p_value,
331                            u8 *p_dont_translate, void *d, void *e)
332 {
333   icmp46_header_t *icmp0;
334   u32 sw_if_index0;
335   u32 rx_fib_index0;
336   snat_session_key_t key0;
337   snat_session_key_t sm0;
338   snat_session_t *s0 = 0;
339   u8 dont_translate = 0;
340   clib_bihash_kv_8_8_t kv0, value0;
341   u8 is_addr_only;
342   u32 next0 = ~0;
343   int err;
344
345   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
346   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
347   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
348
349   key0.protocol = 0;
350
351   err = icmp_get_key (ip0, &key0);
352   if (err != -1)
353     {
354       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
355       next0 = SNAT_OUT2IN_NEXT_DROP;
356       goto out;
357     }
358   key0.fib_index = rx_fib_index0;
359
360   kv0.key = key0.as_u64;
361
362   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
363                               &value0))
364     {
365       /* Try to match static mapping by external address and port,
366          destination address and port in packet */
367       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
368         {
369           /* Don't NAT packet aimed at the intfc address */
370           if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
371                                               ip0->dst_address.as_u32)))
372             {
373               dont_translate = 1;
374               goto out;
375             }
376           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
377           next0 = SNAT_OUT2IN_NEXT_DROP;
378           goto out;
379         }
380
381       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
382                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
383         {
384           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
385           next0 = SNAT_OUT2IN_NEXT_DROP;
386           goto out;
387         }
388
389       /* Create session initiated by host from external network */
390       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
391                                              node, thread_index);
392
393       if (!s0)
394         {
395           next0 = SNAT_OUT2IN_NEXT_DROP;
396           goto out;
397         }
398     }
399   else
400     {
401       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
402                         icmp0->type != ICMP4_echo_request &&
403                         !icmp_is_error_message (icmp0)))
404         {
405           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
406           next0 = SNAT_OUT2IN_NEXT_DROP;
407           goto out;
408         }
409
410       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
411                               value0.value);
412     }
413
414 out:
415   *p_proto = key0.protocol;
416   if (s0)
417     *p_value = s0->in2out;
418   *p_dont_translate = dont_translate;
419   if (d)
420     *(snat_session_t**)d = s0;
421   return next0;
422 }
423
424 /**
425  * Get address and port values to be used for ICMP packet translation
426  *
427  * @param[in] sm                 NAT main
428  * @param[in,out] node           NAT node runtime
429  * @param[in] thread_index       thread index
430  * @param[in,out] b0             buffer containing packet to be translated
431  * @param[out] p_proto           protocol used for matching
432  * @param[out] p_value           address and port after NAT translation
433  * @param[out] p_dont_translate  if packet should not be translated
434  * @param d                      optional parameter
435  * @param e                      optional parameter
436  */
437 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
438                            u32 thread_index, vlib_buffer_t *b0,
439                            ip4_header_t *ip0, u8 *p_proto,
440                            snat_session_key_t *p_value,
441                            u8 *p_dont_translate, void *d, void *e)
442 {
443   icmp46_header_t *icmp0;
444   u32 sw_if_index0;
445   u32 rx_fib_index0;
446   snat_session_key_t key0;
447   snat_session_key_t sm0;
448   u8 dont_translate = 0;
449   u8 is_addr_only;
450   u32 next0 = ~0;
451   int err;
452
453   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
454   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
455   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
456
457   err = icmp_get_key (ip0, &key0);
458   if (err != -1)
459     {
460       b0->error = node->errors[err];
461       next0 = SNAT_OUT2IN_NEXT_DROP;
462       goto out2;
463     }
464   key0.fib_index = rx_fib_index0;
465
466   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
467     {
468       /* Don't NAT packet aimed at the intfc address */
469       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
470         {
471           dont_translate = 1;
472           goto out;
473         }
474       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
475       next0 = SNAT_OUT2IN_NEXT_DROP;
476       goto out;
477     }
478
479   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
480                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
481                     !icmp_is_error_message (icmp0)))
482     {
483       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
484       next0 = SNAT_OUT2IN_NEXT_DROP;
485       goto out;
486     }
487
488 out:
489   *p_value = sm0;
490 out2:
491   *p_proto = key0.protocol;
492   *p_dont_translate = dont_translate;
493   return next0;
494 }
495
496 static inline u32 icmp_out2in (snat_main_t *sm,
497                                vlib_buffer_t * b0,
498                                ip4_header_t * ip0,
499                                icmp46_header_t * icmp0,
500                                u32 sw_if_index0,
501                                u32 rx_fib_index0,
502                                vlib_node_runtime_t * node,
503                                u32 next0,
504                                u32 thread_index,
505                                void *d,
506                                void *e)
507 {
508   snat_session_key_t sm0;
509   u8 protocol;
510   icmp_echo_header_t *echo0, *inner_echo0 = 0;
511   ip4_header_t *inner_ip0 = 0;
512   void *l4_header = 0;
513   icmp46_header_t *inner_icmp0;
514   u8 dont_translate;
515   u32 new_addr0, old_addr0;
516   u16 old_id0, new_id0;
517   ip_csum_t sum0;
518   u16 checksum0;
519   u32 next0_tmp;
520
521   echo0 = (icmp_echo_header_t *)(icmp0+1);
522
523   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
524                                        &protocol, &sm0, &dont_translate, d, e);
525   if (next0_tmp != ~0)
526     next0 = next0_tmp;
527   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
528     goto out;
529
530   sum0 = ip_incremental_checksum (0, icmp0,
531                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
532   checksum0 = ~ip_csum_fold (sum0);
533   if (checksum0 != 0 && checksum0 != 0xffff)
534     {
535       next0 = SNAT_OUT2IN_NEXT_DROP;
536       goto out;
537     }
538
539   old_addr0 = ip0->dst_address.as_u32;
540   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
541   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
542
543   sum0 = ip0->checksum;
544   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
545                          dst_address /* changed member */);
546   ip0->checksum = ip_csum_fold (sum0);
547
548   if (!icmp_is_error_message (icmp0))
549     {
550       new_id0 = sm0.port;
551       if (PREDICT_FALSE(new_id0 != echo0->identifier))
552         {
553           old_id0 = echo0->identifier;
554           new_id0 = sm0.port;
555           echo0->identifier = new_id0;
556
557           sum0 = icmp0->checksum;
558           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
559                                  identifier /* changed member */);
560           icmp0->checksum = ip_csum_fold (sum0);
561         }
562     }
563   else
564     {
565       inner_ip0 = (ip4_header_t *)(echo0+1);
566       l4_header = ip4_next_header (inner_ip0);
567
568       if (!ip4_header_checksum_is_valid (inner_ip0))
569         {
570           next0 = SNAT_OUT2IN_NEXT_DROP;
571           goto out;
572         }
573
574       old_addr0 = inner_ip0->src_address.as_u32;
575       inner_ip0->src_address = sm0.addr;
576       new_addr0 = inner_ip0->src_address.as_u32;
577
578       sum0 = icmp0->checksum;
579       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
580                              src_address /* changed member */);
581       icmp0->checksum = ip_csum_fold (sum0);
582
583       switch (protocol)
584         {
585         case SNAT_PROTOCOL_ICMP:
586           inner_icmp0 = (icmp46_header_t*)l4_header;
587           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
588
589           old_id0 = inner_echo0->identifier;
590           new_id0 = sm0.port;
591           inner_echo0->identifier = new_id0;
592
593           sum0 = icmp0->checksum;
594           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
595                                  identifier);
596           icmp0->checksum = ip_csum_fold (sum0);
597           break;
598         case SNAT_PROTOCOL_UDP:
599         case SNAT_PROTOCOL_TCP:
600           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
601           new_id0 = sm0.port;
602           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
603
604           sum0 = icmp0->checksum;
605           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
606                                  src_port);
607           icmp0->checksum = ip_csum_fold (sum0);
608           break;
609         default:
610           ASSERT(0);
611         }
612     }
613
614 out:
615   return next0;
616 }
617
618
619 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
620                                          vlib_buffer_t * b0,
621                                          ip4_header_t * ip0,
622                                          icmp46_header_t * icmp0,
623                                          u32 sw_if_index0,
624                                          u32 rx_fib_index0,
625                                          vlib_node_runtime_t * node,
626                                          u32 next0, f64 now,
627                                          u32 thread_index,
628                                          snat_session_t ** p_s0)
629 {
630   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
631                       next0, thread_index, p_s0, 0);
632   snat_session_t * s0 = *p_s0;
633   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
634     {
635       /* Accounting */
636       s0->last_heard = now;
637       s0->total_pkts++;
638       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
639       /* Per-user LRU list maintenance for dynamic translation */
640       if (!snat_is_session_static (s0))
641         {
642           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
643                              s0->per_user_index);
644           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
645                               s0->per_user_list_head_index,
646                               s0->per_user_index);
647         }
648     }
649   return next0;
650 }
651
652 static snat_session_t *
653 snat_out2in_unknown_proto (snat_main_t *sm,
654                            vlib_buffer_t * b,
655                            ip4_header_t * ip,
656                            u32 rx_fib_index,
657                            u32 thread_index,
658                            f64 now,
659                            vlib_main_t * vm,
660                            vlib_node_runtime_t * node)
661 {
662   clib_bihash_kv_8_8_t kv, value;
663   clib_bihash_kv_16_8_t s_kv, s_value;
664   snat_static_mapping_t *m;
665   snat_session_key_t m_key;
666   u32 old_addr, new_addr;
667   ip_csum_t sum;
668   nat_ed_ses_key_t key;
669   snat_session_t * s;
670   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
671   snat_user_key_t u_key;
672   snat_user_t *u;
673   dlist_elt_t *head, *elt;
674
675   old_addr = ip->dst_address.as_u32;
676
677   key.l_addr = ip->dst_address;
678   key.r_addr = ip->src_address;
679   key.fib_index = rx_fib_index;
680   key.proto = ip->protocol;
681   key.rsvd = 0;
682   key.l_port = 0;
683   s_kv.key[0] = key.as_u64[0];
684   s_kv.key[1] = key.as_u64[1];
685
686   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
687     {
688       s = pool_elt_at_index (tsm->sessions, s_value.value);
689       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
690     }
691   else
692     {
693       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
694         {
695           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
696           return 0;
697         }
698
699       m_key.addr = ip->dst_address;
700       m_key.port = 0;
701       m_key.protocol = 0;
702       m_key.fib_index = rx_fib_index;
703       kv.key = m_key.as_u64;
704       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
705         {
706           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
707           return 0;
708         }
709
710       m = pool_elt_at_index (sm->static_mappings, value.value);
711
712       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
713
714       u_key.addr = ip->src_address;
715       u_key.fib_index = m->fib_index;
716       kv.key = u_key.as_u64;
717
718       /* Ever heard of the "user" = src ip4 address before? */
719       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
720         {
721           /* no, make a new one */
722           pool_get (tsm->users, u);
723           memset (u, 0, sizeof (*u));
724           u->addr = ip->src_address;
725           u->fib_index = rx_fib_index;
726
727           pool_get (tsm->list_pool, head);
728           u->sessions_per_user_list_head_index = head - tsm->list_pool;
729
730           clib_dlist_init (tsm->list_pool,
731                            u->sessions_per_user_list_head_index);
732
733           kv.value = u - tsm->users;
734
735           /* add user */
736           clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
737         }
738       else
739         {
740           u = pool_elt_at_index (tsm->users, value.value);
741         }
742
743       /* Create a new session */
744       pool_get (tsm->sessions, s);
745       memset (s, 0, sizeof (*s));
746
747       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
748       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
749       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
750       s->outside_address_index = ~0;
751       s->out2in.addr.as_u32 = old_addr;
752       s->out2in.fib_index = rx_fib_index;
753       s->in2out.addr.as_u32 = new_addr;
754       s->in2out.fib_index = m->fib_index;
755       s->in2out.port = s->out2in.port = ip->protocol;
756       u->nstaticsessions++;
757
758       /* Create list elts */
759       pool_get (tsm->list_pool, elt);
760       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
761       elt->value = s - tsm->sessions;
762       s->per_user_index = elt - tsm->list_pool;
763       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
764       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
765                           s->per_user_index);
766
767       /* Add to lookup tables */
768       s_kv.value = s - tsm->sessions;
769       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
770         clib_warning ("out2in key add failed");
771
772       key.l_addr = ip->dst_address;
773       key.fib_index = m->fib_index;
774       s_kv.key[0] = key.as_u64[0];
775       s_kv.key[1] = key.as_u64[1];
776       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
777         clib_warning ("in2out key add failed");
778    }
779
780   /* Update IP checksum */
781   sum = ip->checksum;
782   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
783   ip->checksum = ip_csum_fold (sum);
784
785   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
786
787   /* Accounting */
788   s->last_heard = now;
789   s->total_pkts++;
790   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
791   /* Per-user LRU list maintenance */
792   clib_dlist_remove (tsm->list_pool, s->per_user_index);
793   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
794                       s->per_user_index);
795
796   return s;
797 }
798
799 static snat_session_t *
800 snat_out2in_lb (snat_main_t *sm,
801                 vlib_buffer_t * b,
802                 ip4_header_t * ip,
803                 u32 rx_fib_index,
804                 u32 thread_index,
805                 f64 now,
806                 vlib_main_t * vm,
807                 vlib_node_runtime_t * node)
808 {
809   nat_ed_ses_key_t key;
810   clib_bihash_kv_16_8_t s_kv, s_value;
811   udp_header_t *udp = ip4_next_header (ip);
812   tcp_header_t *tcp = (tcp_header_t *) udp;
813   snat_session_t *s = 0;
814   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
815   snat_session_key_t e_key, l_key;
816   clib_bihash_kv_8_8_t kv, value;
817   u32 old_addr, new_addr;
818   u32 proto = ip_proto_to_snat_proto (ip->protocol);
819   u16 new_port, old_port;
820   ip_csum_t sum;
821   snat_user_key_t u_key;
822   snat_user_t *u;
823   dlist_elt_t *head, *elt;
824
825   old_addr = ip->dst_address.as_u32;
826
827   key.l_addr = ip->dst_address;
828   key.r_addr = ip->src_address;
829   key.fib_index = rx_fib_index;
830   key.proto = ip->protocol;
831   key.rsvd = 0;
832   key.l_port = udp->dst_port;
833   s_kv.key[0] = key.as_u64[0];
834   s_kv.key[1] = key.as_u64[1];
835
836   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
837     {
838       s = pool_elt_at_index (tsm->sessions, s_value.value);
839     }
840   else
841     {
842       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
843         {
844           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
845           return 0;
846         }
847
848       e_key.addr = ip->dst_address;
849       e_key.port = udp->dst_port;
850       e_key.protocol = proto;
851       e_key.fib_index = rx_fib_index;
852       if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0))
853         return 0;
854
855       u_key.addr = l_key.addr;
856       u_key.fib_index = l_key.fib_index;
857       kv.key = u_key.as_u64;
858
859       /* Ever heard of the "user" = src ip4 address before? */
860       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
861         {
862           /* no, make a new one */
863           pool_get (tsm->users, u);
864           memset (u, 0, sizeof (*u));
865           u->addr = l_key.addr;
866           u->fib_index = l_key.fib_index;
867
868           pool_get (tsm->list_pool, head);
869           u->sessions_per_user_list_head_index = head - tsm->list_pool;
870
871           clib_dlist_init (tsm->list_pool,
872                            u->sessions_per_user_list_head_index);
873
874           kv.value = u - tsm->users;
875
876           /* add user */
877           if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
878             clib_warning ("user key add failed");
879         }
880       else
881         {
882           u = pool_elt_at_index (tsm->users, value.value);
883         }
884
885       /* Create a new session */
886       pool_get (tsm->sessions, s);
887       memset (s, 0, sizeof (*s));
888
889       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
890       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
891       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
892       s->outside_address_index = ~0;
893       s->out2in = e_key;
894       s->in2out = l_key;
895       u->nstaticsessions++;
896
897       /* Create list elts */
898       pool_get (tsm->list_pool, elt);
899       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
900       elt->value = s - tsm->sessions;
901       s->per_user_index = elt - tsm->list_pool;
902       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
903       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
904                           s->per_user_index);
905
906       /* Add to lookup tables */
907       s_kv.value = s - tsm->sessions;
908       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
909         clib_warning ("out2in-ed key add failed");
910
911       key.l_addr = l_key.addr;
912       key.fib_index = l_key.fib_index;
913       key.l_port = l_key.port;
914       s_kv.key[0] = key.as_u64[0];
915       s_kv.key[1] = key.as_u64[1];
916       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
917         clib_warning ("in2out-ed key add failed");
918     }
919
920   new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
921
922   /* Update IP checksum */
923   sum = ip->checksum;
924   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
925   ip->checksum = ip_csum_fold (sum);
926
927   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
928     {
929       old_port = tcp->dst_port;
930       tcp->dst_port = s->in2out.port;
931       new_port = tcp->dst_port;
932
933       sum = tcp->checksum;
934       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
935       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
936       tcp->checksum = ip_csum_fold(sum);
937     }
938   else
939     {
940       udp->dst_port = s->in2out.port;
941       udp->checksum = 0;
942     }
943
944   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
945
946   /* Accounting */
947   s->last_heard = now;
948   s->total_pkts++;
949   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
950   return s;
951 }
952
953 static uword
954 snat_out2in_node_fn (vlib_main_t * vm,
955                   vlib_node_runtime_t * node,
956                   vlib_frame_t * frame)
957 {
958   u32 n_left_from, * from, * to_next;
959   snat_out2in_next_t next_index;
960   u32 pkts_processed = 0;
961   snat_main_t * sm = &snat_main;
962   f64 now = vlib_time_now (vm);
963   u32 thread_index = vlib_get_thread_index ();
964
965   from = vlib_frame_vector_args (frame);
966   n_left_from = frame->n_vectors;
967   next_index = node->cached_next_index;
968
969   while (n_left_from > 0)
970     {
971       u32 n_left_to_next;
972
973       vlib_get_next_frame (vm, node, next_index,
974                            to_next, n_left_to_next);
975
976       while (n_left_from >= 4 && n_left_to_next >= 2)
977         {
978           u32 bi0, bi1;
979           vlib_buffer_t * b0, * b1;
980           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
981           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
982           u32 sw_if_index0, sw_if_index1;
983           ip4_header_t * ip0, *ip1;
984           ip_csum_t sum0, sum1;
985           u32 new_addr0, old_addr0;
986           u16 new_port0, old_port0;
987           u32 new_addr1, old_addr1;
988           u16 new_port1, old_port1;
989           udp_header_t * udp0, * udp1;
990           tcp_header_t * tcp0, * tcp1;
991           icmp46_header_t * icmp0, * icmp1;
992           snat_session_key_t key0, key1, sm0, sm1;
993           u32 rx_fib_index0, rx_fib_index1;
994           u32 proto0, proto1;
995           snat_session_t * s0 = 0, * s1 = 0;
996           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
997
998           /* Prefetch next iteration. */
999           {
1000             vlib_buffer_t * p2, * p3;
1001
1002             p2 = vlib_get_buffer (vm, from[2]);
1003             p3 = vlib_get_buffer (vm, from[3]);
1004
1005             vlib_prefetch_buffer_header (p2, LOAD);
1006             vlib_prefetch_buffer_header (p3, LOAD);
1007
1008             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1009             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1010           }
1011
1012           /* speculatively enqueue b0 and b1 to the current next frame */
1013           to_next[0] = bi0 = from[0];
1014           to_next[1] = bi1 = from[1];
1015           from += 2;
1016           to_next += 2;
1017           n_left_from -= 2;
1018           n_left_to_next -= 2;
1019
1020           b0 = vlib_get_buffer (vm, bi0);
1021           b1 = vlib_get_buffer (vm, bi1);
1022
1023           vnet_buffer (b0)->snat.flags = 0;
1024           vnet_buffer (b1)->snat.flags = 0;
1025
1026           ip0 = vlib_buffer_get_current (b0);
1027           udp0 = ip4_next_header (ip0);
1028           tcp0 = (tcp_header_t *) udp0;
1029           icmp0 = (icmp46_header_t *) udp0;
1030
1031           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1032           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1033                                    sw_if_index0);
1034
1035           if (PREDICT_FALSE(ip0->ttl == 1))
1036             {
1037               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1038               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1039                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1040                                            0);
1041               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1042               goto trace0;
1043             }
1044
1045           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1046
1047           if (PREDICT_FALSE (proto0 == ~0))
1048             {
1049               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1050                                              thread_index, now, vm, node);
1051               if (!s0)
1052                 next0 = SNAT_OUT2IN_NEXT_DROP;
1053               goto trace0;
1054             }
1055
1056           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1057             {
1058               next0 = icmp_out2in_slow_path
1059                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1060                  next0, now, thread_index, &s0);
1061               goto trace0;
1062             }
1063
1064           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1065             {
1066               next0 = SNAT_OUT2IN_NEXT_REASS;
1067               goto trace0;
1068             }
1069
1070           key0.addr = ip0->dst_address;
1071           key0.port = udp0->dst_port;
1072           key0.protocol = proto0;
1073           key0.fib_index = rx_fib_index0;
1074
1075           kv0.key = key0.as_u64;
1076
1077           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1078                                       &kv0, &value0))
1079             {
1080               /* Try to match static mapping by external address and port,
1081                  destination address and port in packet */
1082               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1083                 {
1084                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1085                   /*
1086                    * Send DHCP packets to the ipv4 stack, or we won't
1087                    * be able to use dhcp client on the outside interface
1088                    */
1089                   if (proto0 != SNAT_PROTOCOL_UDP
1090                       || (udp0->dst_port
1091                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1092                     next0 = SNAT_OUT2IN_NEXT_DROP;
1093                   goto trace0;
1094                 }
1095
1096               /* Create session initiated by host from external network */
1097               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1098                                                      thread_index);
1099               if (!s0)
1100                 {
1101                   next0 = SNAT_OUT2IN_NEXT_DROP;
1102                   goto trace0;
1103                 }
1104             }
1105           else
1106             {
1107               if (PREDICT_FALSE (value0.value == ~0ULL))
1108                 {
1109                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1110                                       now, vm, node);
1111                   if (!s0)
1112                     next0 = SNAT_OUT2IN_NEXT_DROP;
1113                   goto trace0;
1114                 }
1115               else
1116                 {
1117                   s0 = pool_elt_at_index (
1118                     sm->per_thread_data[thread_index].sessions,
1119                     value0.value);
1120                 }
1121             }
1122
1123           old_addr0 = ip0->dst_address.as_u32;
1124           ip0->dst_address = s0->in2out.addr;
1125           new_addr0 = ip0->dst_address.as_u32;
1126           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1127
1128           sum0 = ip0->checksum;
1129           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1130                                  ip4_header_t,
1131                                  dst_address /* changed member */);
1132           ip0->checksum = ip_csum_fold (sum0);
1133
1134           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1135             {
1136               old_port0 = tcp0->dst_port;
1137               tcp0->dst_port = s0->in2out.port;
1138               new_port0 = tcp0->dst_port;
1139
1140               sum0 = tcp0->checksum;
1141               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1142                                      ip4_header_t,
1143                                      dst_address /* changed member */);
1144
1145               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1146                                      ip4_header_t /* cheat */,
1147                                      length /* changed member */);
1148               tcp0->checksum = ip_csum_fold(sum0);
1149             }
1150           else
1151             {
1152               old_port0 = udp0->dst_port;
1153               udp0->dst_port = s0->in2out.port;
1154               udp0->checksum = 0;
1155             }
1156
1157           /* Accounting */
1158           s0->last_heard = now;
1159           s0->total_pkts++;
1160           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1161           /* Per-user LRU list maintenance for dynamic translation */
1162           if (!snat_is_session_static (s0))
1163             {
1164               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1165                                  s0->per_user_index);
1166               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1167                                   s0->per_user_list_head_index,
1168                                   s0->per_user_index);
1169             }
1170         trace0:
1171
1172           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1173                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1174             {
1175               snat_out2in_trace_t *t =
1176                  vlib_add_trace (vm, node, b0, sizeof (*t));
1177               t->sw_if_index = sw_if_index0;
1178               t->next_index = next0;
1179               t->session_index = ~0;
1180               if (s0)
1181                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1182             }
1183
1184           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1185
1186
1187           ip1 = vlib_buffer_get_current (b1);
1188           udp1 = ip4_next_header (ip1);
1189           tcp1 = (tcp_header_t *) udp1;
1190           icmp1 = (icmp46_header_t *) udp1;
1191
1192           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1193           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1194                                    sw_if_index1);
1195
1196           if (PREDICT_FALSE(ip1->ttl == 1))
1197             {
1198               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1199               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1200                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1201                                            0);
1202               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1203               goto trace1;
1204             }
1205
1206           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1207
1208           if (PREDICT_FALSE (proto1 == ~0))
1209             {
1210               s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
1211                                              thread_index, now, vm, node);
1212               if (!s1)
1213                 next1 = SNAT_OUT2IN_NEXT_DROP;
1214               goto trace1;
1215             }
1216
1217           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1218             {
1219               next1 = icmp_out2in_slow_path
1220                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1221                  next1, now, thread_index, &s1);
1222               goto trace1;
1223             }
1224
1225           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
1226             {
1227               next1 = SNAT_OUT2IN_NEXT_REASS;
1228               goto trace1;
1229             }
1230
1231           key1.addr = ip1->dst_address;
1232           key1.port = udp1->dst_port;
1233           key1.protocol = proto1;
1234           key1.fib_index = rx_fib_index1;
1235
1236           kv1.key = key1.as_u64;
1237
1238           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1239                                       &kv1, &value1))
1240             {
1241               /* Try to match static mapping by external address and port,
1242                  destination address and port in packet */
1243               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0))
1244                 {
1245                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1246                   /*
1247                    * Send DHCP packets to the ipv4 stack, or we won't
1248                    * be able to use dhcp client on the outside interface
1249                    */
1250                   if (proto1 != SNAT_PROTOCOL_UDP
1251                       || (udp1->dst_port
1252                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1253                     next1 = SNAT_OUT2IN_NEXT_DROP;
1254                   goto trace1;
1255                 }
1256
1257               /* Create session initiated by host from external network */
1258               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1259                                                      thread_index);
1260               if (!s1)
1261                 {
1262                   next1 = SNAT_OUT2IN_NEXT_DROP;
1263                   goto trace1;
1264                 }
1265             }
1266           else
1267             {
1268               if (PREDICT_FALSE (value1.value == ~0ULL))
1269                 {
1270                   s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1271                                       now, vm, node);
1272                   if (!s1)
1273                     next1 = SNAT_OUT2IN_NEXT_DROP;
1274                   goto trace1;
1275                 }
1276               else
1277                 {
1278                   s1 = pool_elt_at_index (
1279                     sm->per_thread_data[thread_index].sessions,
1280                     value1.value);
1281                 }
1282             }
1283
1284           old_addr1 = ip1->dst_address.as_u32;
1285           ip1->dst_address = s1->in2out.addr;
1286           new_addr1 = ip1->dst_address.as_u32;
1287           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1288
1289           sum1 = ip1->checksum;
1290           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1291                                  ip4_header_t,
1292                                  dst_address /* changed member */);
1293           ip1->checksum = ip_csum_fold (sum1);
1294
1295           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1296             {
1297               old_port1 = tcp1->dst_port;
1298               tcp1->dst_port = s1->in2out.port;
1299               new_port1 = tcp1->dst_port;
1300
1301               sum1 = tcp1->checksum;
1302               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1303                                      ip4_header_t,
1304                                      dst_address /* changed member */);
1305
1306               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1307                                      ip4_header_t /* cheat */,
1308                                      length /* changed member */);
1309               tcp1->checksum = ip_csum_fold(sum1);
1310             }
1311           else
1312             {
1313               old_port1 = udp1->dst_port;
1314               udp1->dst_port = s1->in2out.port;
1315               udp1->checksum = 0;
1316             }
1317
1318           /* Accounting */
1319           s1->last_heard = now;
1320           s1->total_pkts++;
1321           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1322           /* Per-user LRU list maintenance for dynamic translation */
1323           if (!snat_is_session_static (s1))
1324             {
1325               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1326                                  s1->per_user_index);
1327               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1328                                   s1->per_user_list_head_index,
1329                                   s1->per_user_index);
1330             }
1331         trace1:
1332
1333           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1334                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1335             {
1336               snat_out2in_trace_t *t =
1337                  vlib_add_trace (vm, node, b1, sizeof (*t));
1338               t->sw_if_index = sw_if_index1;
1339               t->next_index = next1;
1340               t->session_index = ~0;
1341               if (s1)
1342                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1343             }
1344
1345           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1346
1347           /* verify speculative enqueues, maybe switch current next frame */
1348           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1349                                            to_next, n_left_to_next,
1350                                            bi0, bi1, next0, next1);
1351         }
1352
1353       while (n_left_from > 0 && n_left_to_next > 0)
1354         {
1355           u32 bi0;
1356           vlib_buffer_t * b0;
1357           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1358           u32 sw_if_index0;
1359           ip4_header_t * ip0;
1360           ip_csum_t sum0;
1361           u32 new_addr0, old_addr0;
1362           u16 new_port0, old_port0;
1363           udp_header_t * udp0;
1364           tcp_header_t * tcp0;
1365           icmp46_header_t * icmp0;
1366           snat_session_key_t key0, sm0;
1367           u32 rx_fib_index0;
1368           u32 proto0;
1369           snat_session_t * s0 = 0;
1370           clib_bihash_kv_8_8_t kv0, value0;
1371
1372           /* speculatively enqueue b0 to the current next frame */
1373           bi0 = from[0];
1374           to_next[0] = bi0;
1375           from += 1;
1376           to_next += 1;
1377           n_left_from -= 1;
1378           n_left_to_next -= 1;
1379
1380           b0 = vlib_get_buffer (vm, bi0);
1381
1382           vnet_buffer (b0)->snat.flags = 0;
1383
1384           ip0 = vlib_buffer_get_current (b0);
1385           udp0 = ip4_next_header (ip0);
1386           tcp0 = (tcp_header_t *) udp0;
1387           icmp0 = (icmp46_header_t *) udp0;
1388
1389           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1390           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1391                                    sw_if_index0);
1392
1393           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1394
1395           if (PREDICT_FALSE (proto0 == ~0))
1396             {
1397               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1398                                              thread_index, now, vm, node);
1399               if (!s0)
1400                 next0 = SNAT_OUT2IN_NEXT_DROP;
1401               goto trace00;
1402             }
1403
1404           if (PREDICT_FALSE(ip0->ttl == 1))
1405             {
1406               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1407               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1408                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1409                                            0);
1410               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1411               goto trace00;
1412             }
1413
1414           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1415             {
1416               next0 = icmp_out2in_slow_path
1417                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1418                  next0, now, thread_index, &s0);
1419               goto trace00;
1420             }
1421
1422           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1423             {
1424               next0 = SNAT_OUT2IN_NEXT_REASS;
1425               goto trace00;
1426             }
1427
1428           key0.addr = ip0->dst_address;
1429           key0.port = udp0->dst_port;
1430           key0.protocol = proto0;
1431           key0.fib_index = rx_fib_index0;
1432
1433           kv0.key = key0.as_u64;
1434
1435           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1436                                       &kv0, &value0))
1437             {
1438               /* Try to match static mapping by external address and port,
1439                  destination address and port in packet */
1440               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1441                 {
1442                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1443                   /*
1444                    * Send DHCP packets to the ipv4 stack, or we won't
1445                    * be able to use dhcp client on the outside interface
1446                    */
1447                   if (proto0 != SNAT_PROTOCOL_UDP
1448                       || (udp0->dst_port
1449                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1450
1451                     next0 = SNAT_OUT2IN_NEXT_DROP;
1452                   goto trace00;
1453                 }
1454
1455               /* Create session initiated by host from external network */
1456               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1457                                                      thread_index);
1458               if (!s0)
1459                 {
1460                   next0 = SNAT_OUT2IN_NEXT_DROP;
1461                   goto trace00;
1462                 }
1463             }
1464           else
1465             {
1466               if (PREDICT_FALSE (value0.value == ~0ULL))
1467                 {
1468                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1469                                       now, vm, node);
1470                   if (!s0)
1471                     next0 = SNAT_OUT2IN_NEXT_DROP;
1472                   goto trace00;
1473                 }
1474               else
1475                 {
1476                   s0 = pool_elt_at_index (
1477                     sm->per_thread_data[thread_index].sessions,
1478                     value0.value);
1479                 }
1480             }
1481
1482           old_addr0 = ip0->dst_address.as_u32;
1483           ip0->dst_address = s0->in2out.addr;
1484           new_addr0 = ip0->dst_address.as_u32;
1485           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1486
1487           sum0 = ip0->checksum;
1488           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1489                                  ip4_header_t,
1490                                  dst_address /* changed member */);
1491           ip0->checksum = ip_csum_fold (sum0);
1492
1493           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1494             {
1495               old_port0 = tcp0->dst_port;
1496               tcp0->dst_port = s0->in2out.port;
1497               new_port0 = tcp0->dst_port;
1498
1499               sum0 = tcp0->checksum;
1500               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1501                                      ip4_header_t,
1502                                      dst_address /* changed member */);
1503
1504               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1505                                      ip4_header_t /* cheat */,
1506                                      length /* changed member */);
1507               tcp0->checksum = ip_csum_fold(sum0);
1508             }
1509           else
1510             {
1511               old_port0 = udp0->dst_port;
1512               udp0->dst_port = s0->in2out.port;
1513               udp0->checksum = 0;
1514             }
1515
1516           /* Accounting */
1517           s0->last_heard = now;
1518           s0->total_pkts++;
1519           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1520           /* Per-user LRU list maintenance for dynamic translation */
1521           if (!snat_is_session_static (s0))
1522             {
1523               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1524                                  s0->per_user_index);
1525               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1526                                   s0->per_user_list_head_index,
1527                                   s0->per_user_index);
1528             }
1529         trace00:
1530
1531           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1532                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1533             {
1534               snat_out2in_trace_t *t =
1535                  vlib_add_trace (vm, node, b0, sizeof (*t));
1536               t->sw_if_index = sw_if_index0;
1537               t->next_index = next0;
1538               t->session_index = ~0;
1539               if (s0)
1540                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1541             }
1542
1543           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1544
1545           /* verify speculative enqueue, maybe switch current next frame */
1546           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1547                                            to_next, n_left_to_next,
1548                                            bi0, next0);
1549         }
1550
1551       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1552     }
1553
1554   vlib_node_increment_counter (vm, snat_out2in_node.index,
1555                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1556                                pkts_processed);
1557   return frame->n_vectors;
1558 }
1559
1560 VLIB_REGISTER_NODE (snat_out2in_node) = {
1561   .function = snat_out2in_node_fn,
1562   .name = "nat44-out2in",
1563   .vector_size = sizeof (u32),
1564   .format_trace = format_snat_out2in_trace,
1565   .type = VLIB_NODE_TYPE_INTERNAL,
1566
1567   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1568   .error_strings = snat_out2in_error_strings,
1569
1570   .runtime_data_bytes = sizeof (snat_runtime_t),
1571
1572   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1573
1574   /* edit / add dispositions here */
1575   .next_nodes = {
1576     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1577     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1578     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1579     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1580   },
1581 };
1582 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1583
1584 static uword
1585 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1586                             vlib_node_runtime_t * node,
1587                             vlib_frame_t * frame)
1588 {
1589   u32 n_left_from, *from, *to_next;
1590   snat_out2in_next_t next_index;
1591   u32 pkts_processed = 0;
1592   snat_main_t *sm = &snat_main;
1593   f64 now = vlib_time_now (vm);
1594   u32 thread_index = vlib_get_thread_index ();
1595   snat_main_per_thread_data_t *per_thread_data =
1596     &sm->per_thread_data[thread_index];
1597   u32 *fragments_to_drop = 0;
1598   u32 *fragments_to_loopback = 0;
1599
1600   from = vlib_frame_vector_args (frame);
1601   n_left_from = frame->n_vectors;
1602   next_index = node->cached_next_index;
1603
1604   while (n_left_from > 0)
1605     {
1606       u32 n_left_to_next;
1607
1608       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1609
1610       while (n_left_from > 0 && n_left_to_next > 0)
1611        {
1612           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1613           vlib_buffer_t *b0;
1614           u32 next0;
1615           u8 cached0 = 0;
1616           ip4_header_t *ip0;
1617           nat_reass_ip4_t *reass0;
1618           udp_header_t * udp0;
1619           tcp_header_t * tcp0;
1620           snat_session_key_t key0, sm0;
1621           clib_bihash_kv_8_8_t kv0, value0;
1622           snat_session_t * s0 = 0;
1623           u16 old_port0, new_port0;
1624           ip_csum_t sum0;
1625
1626           /* speculatively enqueue b0 to the current next frame */
1627           bi0 = from[0];
1628           to_next[0] = bi0;
1629           from += 1;
1630           to_next += 1;
1631           n_left_from -= 1;
1632           n_left_to_next -= 1;
1633
1634           b0 = vlib_get_buffer (vm, bi0);
1635           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1636
1637           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1638           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1639                                                                sw_if_index0);
1640
1641           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1642             {
1643               next0 = SNAT_OUT2IN_NEXT_DROP;
1644               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1645               goto trace0;
1646             }
1647
1648           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1649           udp0 = ip4_next_header (ip0);
1650           tcp0 = (tcp_header_t *) udp0;
1651           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1652
1653           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1654                                                  ip0->dst_address,
1655                                                  ip0->fragment_id,
1656                                                  ip0->protocol,
1657                                                  1,
1658                                                  &fragments_to_drop);
1659
1660           if (PREDICT_FALSE (!reass0))
1661             {
1662               next0 = SNAT_OUT2IN_NEXT_DROP;
1663               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1664               goto trace0;
1665             }
1666
1667           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1668             {
1669               key0.addr = ip0->dst_address;
1670               key0.port = udp0->dst_port;
1671               key0.protocol = proto0;
1672               key0.fib_index = rx_fib_index0;
1673               kv0.key = key0.as_u64;
1674
1675               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1676                 {
1677                   /* Try to match static mapping by external address and port,
1678                      destination address and port in packet */
1679                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1680                     {
1681                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1682                       /*
1683                        * Send DHCP packets to the ipv4 stack, or we won't
1684                        * be able to use dhcp client on the outside interface
1685                        */
1686                       if (proto0 != SNAT_PROTOCOL_UDP
1687                           || (udp0->dst_port
1688                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1689
1690                         next0 = SNAT_OUT2IN_NEXT_DROP;
1691                       goto trace0;
1692                     }
1693
1694                   /* Create session initiated by host from external network */
1695                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1696                                                          thread_index);
1697                   if (!s0)
1698                     {
1699                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1700                       next0 = SNAT_OUT2IN_NEXT_DROP;
1701                       goto trace0;
1702                     }
1703                   reass0->sess_index = s0 - per_thread_data->sessions;
1704                   reass0->thread_index = thread_index;
1705                 }
1706               else
1707                 {
1708                   s0 = pool_elt_at_index (per_thread_data->sessions,
1709                                           value0.value);
1710                   reass0->sess_index = value0.value;
1711                 }
1712               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1713             }
1714           else
1715             {
1716               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1717                 {
1718                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1719                     {
1720                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1721                       next0 = SNAT_OUT2IN_NEXT_DROP;
1722                       goto trace0;
1723                     }
1724                   cached0 = 1;
1725                   goto trace0;
1726                 }
1727               s0 = pool_elt_at_index (per_thread_data->sessions,
1728                                       reass0->sess_index);
1729             }
1730
1731           old_addr0 = ip0->dst_address.as_u32;
1732           ip0->dst_address = s0->in2out.addr;
1733           new_addr0 = ip0->dst_address.as_u32;
1734           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1735
1736           sum0 = ip0->checksum;
1737           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1738                                  ip4_header_t,
1739                                  dst_address /* changed member */);
1740           ip0->checksum = ip_csum_fold (sum0);
1741
1742           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1743             {
1744               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1745                 {
1746                   old_port0 = tcp0->dst_port;
1747                   tcp0->dst_port = s0->in2out.port;
1748                   new_port0 = tcp0->dst_port;
1749
1750                   sum0 = tcp0->checksum;
1751                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1752                                          ip4_header_t,
1753                                          dst_address /* changed member */);
1754
1755                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1756                                          ip4_header_t /* cheat */,
1757                                          length /* changed member */);
1758                   tcp0->checksum = ip_csum_fold(sum0);
1759                 }
1760               else
1761                 {
1762                   old_port0 = udp0->dst_port;
1763                   udp0->dst_port = s0->in2out.port;
1764                   udp0->checksum = 0;
1765                 }
1766             }
1767
1768           /* Accounting */
1769           s0->last_heard = now;
1770           s0->total_pkts++;
1771           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1772           /* Per-user LRU list maintenance for dynamic translation */
1773           if (!snat_is_session_static (s0))
1774             {
1775               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1776                                  s0->per_user_index);
1777               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1778                                   s0->per_user_list_head_index,
1779                                   s0->per_user_index);
1780             }
1781
1782         trace0:
1783           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1784                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1785             {
1786               nat44_out2in_reass_trace_t *t =
1787                  vlib_add_trace (vm, node, b0, sizeof (*t));
1788               t->cached = cached0;
1789               t->sw_if_index = sw_if_index0;
1790               t->next_index = next0;
1791             }
1792
1793           if (cached0)
1794             {
1795               n_left_to_next++;
1796               to_next--;
1797             }
1798           else
1799             {
1800               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1801
1802               /* verify speculative enqueue, maybe switch current next frame */
1803               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1804                                                to_next, n_left_to_next,
1805                                                bi0, next0);
1806             }
1807
1808           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1809             {
1810               from = vlib_frame_vector_args (frame);
1811               u32 len = vec_len (fragments_to_loopback);
1812               if (len <= VLIB_FRAME_SIZE)
1813                 {
1814                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1815                   n_left_from = len;
1816                   vec_reset_length (fragments_to_loopback);
1817                 }
1818               else
1819                 {
1820                   clib_memcpy (from,
1821                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1822                                sizeof (u32) * VLIB_FRAME_SIZE);
1823                   n_left_from = VLIB_FRAME_SIZE;
1824                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1825                 }
1826             }
1827        }
1828
1829       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1830     }
1831
1832   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1833                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1834                                pkts_processed);
1835
1836   nat_send_all_to_node (vm, fragments_to_drop, node,
1837                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1838                         SNAT_OUT2IN_NEXT_DROP);
1839
1840   vec_free (fragments_to_drop);
1841   vec_free (fragments_to_loopback);
1842   return frame->n_vectors;
1843 }
1844
1845 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1846   .function = nat44_out2in_reass_node_fn,
1847   .name = "nat44-out2in-reass",
1848   .vector_size = sizeof (u32),
1849   .format_trace = format_nat44_out2in_reass_trace,
1850   .type = VLIB_NODE_TYPE_INTERNAL,
1851
1852   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1853   .error_strings = snat_out2in_error_strings,
1854
1855   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1856
1857   /* edit / add dispositions here */
1858   .next_nodes = {
1859     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1860     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1861     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1862     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1863   },
1864 };
1865 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1866                               nat44_out2in_reass_node_fn);
1867
1868 /**************************/
1869 /*** deterministic mode ***/
1870 /**************************/
1871 static uword
1872 snat_det_out2in_node_fn (vlib_main_t * vm,
1873                          vlib_node_runtime_t * node,
1874                          vlib_frame_t * frame)
1875 {
1876   u32 n_left_from, * from, * to_next;
1877   snat_out2in_next_t next_index;
1878   u32 pkts_processed = 0;
1879   snat_main_t * sm = &snat_main;
1880   u32 thread_index = vlib_get_thread_index ();
1881
1882   from = vlib_frame_vector_args (frame);
1883   n_left_from = frame->n_vectors;
1884   next_index = node->cached_next_index;
1885
1886   while (n_left_from > 0)
1887     {
1888       u32 n_left_to_next;
1889
1890       vlib_get_next_frame (vm, node, next_index,
1891                            to_next, n_left_to_next);
1892
1893       while (n_left_from >= 4 && n_left_to_next >= 2)
1894         {
1895           u32 bi0, bi1;
1896           vlib_buffer_t * b0, * b1;
1897           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1898           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1899           u32 sw_if_index0, sw_if_index1;
1900           ip4_header_t * ip0, * ip1;
1901           ip_csum_t sum0, sum1;
1902           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1903           u16 new_port0, old_port0, old_port1, new_port1;
1904           udp_header_t * udp0, * udp1;
1905           tcp_header_t * tcp0, * tcp1;
1906           u32 proto0, proto1;
1907           snat_det_out_key_t key0, key1;
1908           snat_det_map_t * dm0, * dm1;
1909           snat_det_session_t * ses0 = 0, * ses1 = 0;
1910           u32 rx_fib_index0, rx_fib_index1;
1911           icmp46_header_t * icmp0, * icmp1;
1912
1913           /* Prefetch next iteration. */
1914           {
1915             vlib_buffer_t * p2, * p3;
1916
1917             p2 = vlib_get_buffer (vm, from[2]);
1918             p3 = vlib_get_buffer (vm, from[3]);
1919
1920             vlib_prefetch_buffer_header (p2, LOAD);
1921             vlib_prefetch_buffer_header (p3, LOAD);
1922
1923             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1924             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1925           }
1926
1927           /* speculatively enqueue b0 and b1 to the current next frame */
1928           to_next[0] = bi0 = from[0];
1929           to_next[1] = bi1 = from[1];
1930           from += 2;
1931           to_next += 2;
1932           n_left_from -= 2;
1933           n_left_to_next -= 2;
1934
1935           b0 = vlib_get_buffer (vm, bi0);
1936           b1 = vlib_get_buffer (vm, bi1);
1937
1938           ip0 = vlib_buffer_get_current (b0);
1939           udp0 = ip4_next_header (ip0);
1940           tcp0 = (tcp_header_t *) udp0;
1941
1942           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1943
1944           if (PREDICT_FALSE(ip0->ttl == 1))
1945             {
1946               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1947               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1948                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1949                                            0);
1950               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1951               goto trace0;
1952             }
1953
1954           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1955
1956           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1957             {
1958               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1959               icmp0 = (icmp46_header_t *) udp0;
1960
1961               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1962                                   rx_fib_index0, node, next0, thread_index,
1963                                   &ses0, &dm0);
1964               goto trace0;
1965             }
1966
1967           key0.ext_host_addr = ip0->src_address;
1968           key0.ext_host_port = tcp0->src;
1969           key0.out_port = tcp0->dst;
1970
1971           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1972           if (PREDICT_FALSE(!dm0))
1973             {
1974               clib_warning("unknown dst address:  %U",
1975                            format_ip4_address, &ip0->dst_address);
1976               next0 = SNAT_OUT2IN_NEXT_DROP;
1977               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1978               goto trace0;
1979             }
1980
1981           snat_det_reverse(dm0, &ip0->dst_address,
1982                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
1983
1984           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1985           if (PREDICT_FALSE(!ses0))
1986             {
1987               clib_warning("no match src %U:%d dst %U:%d for user %U",
1988                            format_ip4_address, &ip0->src_address,
1989                            clib_net_to_host_u16 (tcp0->src),
1990                            format_ip4_address, &ip0->dst_address,
1991                            clib_net_to_host_u16 (tcp0->dst),
1992                            format_ip4_address, &new_addr0);
1993               next0 = SNAT_OUT2IN_NEXT_DROP;
1994               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1995               goto trace0;
1996             }
1997           new_port0 = ses0->in_port;
1998
1999           old_addr0 = ip0->dst_address;
2000           ip0->dst_address = new_addr0;
2001           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2002
2003           sum0 = ip0->checksum;
2004           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2005                                  ip4_header_t,
2006                                  dst_address /* changed member */);
2007           ip0->checksum = ip_csum_fold (sum0);
2008
2009           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2010             {
2011               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2012                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2013               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2014                 snat_det_ses_close(dm0, ses0);
2015
2016               old_port0 = tcp0->dst;
2017               tcp0->dst = new_port0;
2018
2019               sum0 = tcp0->checksum;
2020               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2021                                      ip4_header_t,
2022                                      dst_address /* changed member */);
2023
2024               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2025                                      ip4_header_t /* cheat */,
2026                                      length /* changed member */);
2027               tcp0->checksum = ip_csum_fold(sum0);
2028             }
2029           else
2030             {
2031               old_port0 = udp0->dst_port;
2032               udp0->dst_port = new_port0;
2033               udp0->checksum = 0;
2034             }
2035
2036         trace0:
2037
2038           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2039                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2040             {
2041               snat_out2in_trace_t *t =
2042                  vlib_add_trace (vm, node, b0, sizeof (*t));
2043               t->sw_if_index = sw_if_index0;
2044               t->next_index = next0;
2045               t->session_index = ~0;
2046               if (ses0)
2047                 t->session_index = ses0 - dm0->sessions;
2048             }
2049
2050           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2051
2052           b1 = vlib_get_buffer (vm, bi1);
2053
2054           ip1 = vlib_buffer_get_current (b1);
2055           udp1 = ip4_next_header (ip1);
2056           tcp1 = (tcp_header_t *) udp1;
2057
2058           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2059
2060           if (PREDICT_FALSE(ip1->ttl == 1))
2061             {
2062               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2063               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2064                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2065                                            0);
2066               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2067               goto trace1;
2068             }
2069
2070           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2071
2072           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2073             {
2074               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2075               icmp1 = (icmp46_header_t *) udp1;
2076
2077               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
2078                                   rx_fib_index1, node, next1, thread_index,
2079                                   &ses1, &dm1);
2080               goto trace1;
2081             }
2082
2083           key1.ext_host_addr = ip1->src_address;
2084           key1.ext_host_port = tcp1->src;
2085           key1.out_port = tcp1->dst;
2086
2087           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
2088           if (PREDICT_FALSE(!dm1))
2089             {
2090               clib_warning("unknown dst address:  %U",
2091                            format_ip4_address, &ip1->dst_address);
2092               next1 = SNAT_OUT2IN_NEXT_DROP;
2093               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2094               goto trace1;
2095             }
2096
2097           snat_det_reverse(dm1, &ip1->dst_address,
2098                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
2099
2100           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
2101           if (PREDICT_FALSE(!ses1))
2102             {
2103               clib_warning("no match src %U:%d dst %U:%d for user %U",
2104                            format_ip4_address, &ip1->src_address,
2105                            clib_net_to_host_u16 (tcp1->src),
2106                            format_ip4_address, &ip1->dst_address,
2107                            clib_net_to_host_u16 (tcp1->dst),
2108                            format_ip4_address, &new_addr1);
2109               next1 = SNAT_OUT2IN_NEXT_DROP;
2110               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2111               goto trace1;
2112             }
2113           new_port1 = ses1->in_port;
2114
2115           old_addr1 = ip1->dst_address;
2116           ip1->dst_address = new_addr1;
2117           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2118
2119           sum1 = ip1->checksum;
2120           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2121                                  ip4_header_t,
2122                                  dst_address /* changed member */);
2123           ip1->checksum = ip_csum_fold (sum1);
2124
2125           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2126             {
2127               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2128                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2129               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
2130                 snat_det_ses_close(dm1, ses1);
2131
2132               old_port1 = tcp1->dst;
2133               tcp1->dst = new_port1;
2134
2135               sum1 = tcp1->checksum;
2136               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2137                                      ip4_header_t,
2138                                      dst_address /* changed member */);
2139
2140               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2141                                      ip4_header_t /* cheat */,
2142                                      length /* changed member */);
2143               tcp1->checksum = ip_csum_fold(sum1);
2144             }
2145           else
2146             {
2147               old_port1 = udp1->dst_port;
2148               udp1->dst_port = new_port1;
2149               udp1->checksum = 0;
2150             }
2151
2152         trace1:
2153
2154           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2155                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2156             {
2157               snat_out2in_trace_t *t =
2158                  vlib_add_trace (vm, node, b1, sizeof (*t));
2159               t->sw_if_index = sw_if_index1;
2160               t->next_index = next1;
2161               t->session_index = ~0;
2162               if (ses1)
2163                 t->session_index = ses1 - dm1->sessions;
2164             }
2165
2166           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
2167
2168           /* verify speculative enqueues, maybe switch current next frame */
2169           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2170                                            to_next, n_left_to_next,
2171                                            bi0, bi1, next0, next1);
2172          }
2173
2174       while (n_left_from > 0 && n_left_to_next > 0)
2175         {
2176           u32 bi0;
2177           vlib_buffer_t * b0;
2178           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
2179           u32 sw_if_index0;
2180           ip4_header_t * ip0;
2181           ip_csum_t sum0;
2182           ip4_address_t new_addr0, old_addr0;
2183           u16 new_port0, old_port0;
2184           udp_header_t * udp0;
2185           tcp_header_t * tcp0;
2186           u32 proto0;
2187           snat_det_out_key_t key0;
2188           snat_det_map_t * dm0;
2189           snat_det_session_t * ses0 = 0;
2190           u32 rx_fib_index0;
2191           icmp46_header_t * icmp0;
2192
2193           /* speculatively enqueue b0 to the current next frame */
2194           bi0 = from[0];
2195           to_next[0] = bi0;
2196           from += 1;
2197           to_next += 1;
2198           n_left_from -= 1;
2199           n_left_to_next -= 1;
2200
2201           b0 = vlib_get_buffer (vm, bi0);
2202
2203           ip0 = vlib_buffer_get_current (b0);
2204           udp0 = ip4_next_header (ip0);
2205           tcp0 = (tcp_header_t *) udp0;
2206
2207           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2208
2209           if (PREDICT_FALSE(ip0->ttl == 1))
2210             {
2211               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2212               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2213                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2214                                            0);
2215               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2216               goto trace00;
2217             }
2218
2219           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2220
2221           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2222             {
2223               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2224               icmp0 = (icmp46_header_t *) udp0;
2225
2226               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2227                                   rx_fib_index0, node, next0, thread_index,
2228                                   &ses0, &dm0);
2229               goto trace00;
2230             }
2231
2232           key0.ext_host_addr = ip0->src_address;
2233           key0.ext_host_port = tcp0->src;
2234           key0.out_port = tcp0->dst;
2235
2236           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2237           if (PREDICT_FALSE(!dm0))
2238             {
2239               clib_warning("unknown dst address:  %U",
2240                            format_ip4_address, &ip0->dst_address);
2241               next0 = SNAT_OUT2IN_NEXT_DROP;
2242               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2243               goto trace00;
2244             }
2245
2246           snat_det_reverse(dm0, &ip0->dst_address,
2247                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2248
2249           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2250           if (PREDICT_FALSE(!ses0))
2251             {
2252               clib_warning("no match src %U:%d dst %U:%d for user %U",
2253                            format_ip4_address, &ip0->src_address,
2254                            clib_net_to_host_u16 (tcp0->src),
2255                            format_ip4_address, &ip0->dst_address,
2256                            clib_net_to_host_u16 (tcp0->dst),
2257                            format_ip4_address, &new_addr0);
2258               next0 = SNAT_OUT2IN_NEXT_DROP;
2259               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2260               goto trace00;
2261             }
2262           new_port0 = ses0->in_port;
2263
2264           old_addr0 = ip0->dst_address;
2265           ip0->dst_address = new_addr0;
2266           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2267
2268           sum0 = ip0->checksum;
2269           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2270                                  ip4_header_t,
2271                                  dst_address /* changed member */);
2272           ip0->checksum = ip_csum_fold (sum0);
2273
2274           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2275             {
2276               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2277                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2278               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2279                 snat_det_ses_close(dm0, ses0);
2280
2281               old_port0 = tcp0->dst;
2282               tcp0->dst = new_port0;
2283
2284               sum0 = tcp0->checksum;
2285               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2286                                      ip4_header_t,
2287                                      dst_address /* changed member */);
2288
2289               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2290                                      ip4_header_t /* cheat */,
2291                                      length /* changed member */);
2292               tcp0->checksum = ip_csum_fold(sum0);
2293             }
2294           else
2295             {
2296               old_port0 = udp0->dst_port;
2297               udp0->dst_port = new_port0;
2298               udp0->checksum = 0;
2299             }
2300
2301         trace00:
2302
2303           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2304                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2305             {
2306               snat_out2in_trace_t *t =
2307                  vlib_add_trace (vm, node, b0, sizeof (*t));
2308               t->sw_if_index = sw_if_index0;
2309               t->next_index = next0;
2310               t->session_index = ~0;
2311               if (ses0)
2312                 t->session_index = ses0 - dm0->sessions;
2313             }
2314
2315           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2316
2317           /* verify speculative enqueue, maybe switch current next frame */
2318           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2319                                            to_next, n_left_to_next,
2320                                            bi0, next0);
2321         }
2322
2323       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2324     }
2325
2326   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
2327                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2328                                pkts_processed);
2329   return frame->n_vectors;
2330 }
2331
2332 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
2333   .function = snat_det_out2in_node_fn,
2334   .name = "nat44-det-out2in",
2335   .vector_size = sizeof (u32),
2336   .format_trace = format_snat_out2in_trace,
2337   .type = VLIB_NODE_TYPE_INTERNAL,
2338
2339   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2340   .error_strings = snat_out2in_error_strings,
2341
2342   .runtime_data_bytes = sizeof (snat_runtime_t),
2343
2344   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2345
2346   /* edit / add dispositions here */
2347   .next_nodes = {
2348     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2349     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2350     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2351     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2352   },
2353 };
2354 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
2355
2356 /**
2357  * Get address and port values to be used for ICMP packet translation
2358  * and create session if needed
2359  *
2360  * @param[in,out] sm             NAT main
2361  * @param[in,out] node           NAT node runtime
2362  * @param[in] thread_index       thread index
2363  * @param[in,out] b0             buffer containing packet to be translated
2364  * @param[out] p_proto           protocol used for matching
2365  * @param[out] p_value           address and port after NAT translation
2366  * @param[out] p_dont_translate  if packet should not be translated
2367  * @param d                      optional parameter
2368  * @param e                      optional parameter
2369  */
2370 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
2371                           u32 thread_index, vlib_buffer_t *b0,
2372                           ip4_header_t *ip0, u8 *p_proto,
2373                           snat_session_key_t *p_value,
2374                           u8 *p_dont_translate, void *d, void *e)
2375 {
2376   icmp46_header_t *icmp0;
2377   u32 sw_if_index0;
2378   u8 protocol;
2379   snat_det_out_key_t key0;
2380   u8 dont_translate = 0;
2381   u32 next0 = ~0;
2382   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2383   ip4_header_t *inner_ip0;
2384   void *l4_header = 0;
2385   icmp46_header_t *inner_icmp0;
2386   snat_det_map_t * dm0 = 0;
2387   ip4_address_t new_addr0 = {{0}};
2388   snat_det_session_t * ses0 = 0;
2389   ip4_address_t out_addr;
2390
2391   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2392   echo0 = (icmp_echo_header_t *)(icmp0+1);
2393   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2394
2395   if (!icmp_is_error_message (icmp0))
2396     {
2397       protocol = SNAT_PROTOCOL_ICMP;
2398       key0.ext_host_addr = ip0->src_address;
2399       key0.ext_host_port = 0;
2400       key0.out_port = echo0->identifier;
2401       out_addr = ip0->dst_address;
2402     }
2403   else
2404     {
2405       inner_ip0 = (ip4_header_t *)(echo0+1);
2406       l4_header = ip4_next_header (inner_ip0);
2407       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2408       key0.ext_host_addr = inner_ip0->dst_address;
2409       out_addr = inner_ip0->src_address;
2410       switch (protocol)
2411         {
2412         case SNAT_PROTOCOL_ICMP:
2413           inner_icmp0 = (icmp46_header_t*)l4_header;
2414           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2415           key0.ext_host_port = 0;
2416           key0.out_port = inner_echo0->identifier;
2417           break;
2418         case SNAT_PROTOCOL_UDP:
2419         case SNAT_PROTOCOL_TCP:
2420           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2421           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
2422           break;
2423         default:
2424           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2425           next0 = SNAT_OUT2IN_NEXT_DROP;
2426           goto out;
2427         }
2428     }
2429
2430   dm0 = snat_det_map_by_out(sm, &out_addr);
2431   if (PREDICT_FALSE(!dm0))
2432     {
2433       /* Don't NAT packet aimed at the intfc address */
2434       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2435                                           ip0->dst_address.as_u32)))
2436         {
2437           dont_translate = 1;
2438           goto out;
2439         }
2440       clib_warning("unknown dst address:  %U",
2441                    format_ip4_address, &ip0->dst_address);
2442       goto out;
2443     }
2444
2445   snat_det_reverse(dm0, &ip0->dst_address,
2446                    clib_net_to_host_u16(key0.out_port), &new_addr0);
2447
2448   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2449   if (PREDICT_FALSE(!ses0))
2450     {
2451       /* Don't NAT packet aimed at the intfc address */
2452       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2453                                           ip0->dst_address.as_u32)))
2454         {
2455           dont_translate = 1;
2456           goto out;
2457         }
2458       clib_warning("no match src %U:%d dst %U:%d for user %U",
2459                    format_ip4_address, &key0.ext_host_addr,
2460                    clib_net_to_host_u16 (key0.ext_host_port),
2461                    format_ip4_address, &out_addr,
2462                    clib_net_to_host_u16 (key0.out_port),
2463                    format_ip4_address, &new_addr0);
2464       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2465       next0 = SNAT_OUT2IN_NEXT_DROP;
2466       goto out;
2467     }
2468
2469   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
2470                     !icmp_is_error_message (icmp0)))
2471     {
2472       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2473       next0 = SNAT_OUT2IN_NEXT_DROP;
2474       goto out;
2475     }
2476
2477   goto out;
2478
2479 out:
2480   *p_proto = protocol;
2481   if (ses0)
2482     {
2483       p_value->addr = new_addr0;
2484       p_value->fib_index = sm->inside_fib_index;
2485       p_value->port = ses0->in_port;
2486     }
2487   *p_dont_translate = dont_translate;
2488   if (d)
2489     *(snat_det_session_t**)d = ses0;
2490   if (e)
2491     *(snat_det_map_t**)e = dm0;
2492   return next0;
2493 }
2494
2495 /**********************/
2496 /*** worker handoff ***/
2497 /**********************/
2498 static uword
2499 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
2500                                vlib_node_runtime_t * node,
2501                                vlib_frame_t * frame)
2502 {
2503   snat_main_t *sm = &snat_main;
2504   vlib_thread_main_t *tm = vlib_get_thread_main ();
2505   u32 n_left_from, *from, *to_next = 0;
2506   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2507   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2508     = 0;
2509   vlib_frame_queue_elt_t *hf = 0;
2510   vlib_frame_t *f = 0;
2511   int i;
2512   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2513   u32 next_worker_index = 0;
2514   u32 current_worker_index = ~0;
2515   u32 thread_index = vlib_get_thread_index ();
2516
2517   ASSERT (vec_len (sm->workers));
2518
2519   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2520     {
2521       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2522
2523       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2524                                sm->first_worker_index + sm->num_workers - 1,
2525                                (vlib_frame_queue_t *) (~0));
2526     }
2527
2528   from = vlib_frame_vector_args (frame);
2529   n_left_from = frame->n_vectors;
2530
2531   while (n_left_from > 0)
2532     {
2533       u32 bi0;
2534       vlib_buffer_t *b0;
2535       u32 sw_if_index0;
2536       u32 rx_fib_index0;
2537       ip4_header_t * ip0;
2538       u8 do_handoff;
2539
2540       bi0 = from[0];
2541       from += 1;
2542       n_left_from -= 1;
2543
2544       b0 = vlib_get_buffer (vm, bi0);
2545
2546       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2547       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2548
2549       ip0 = vlib_buffer_get_current (b0);
2550
2551       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2552
2553       if (PREDICT_FALSE (next_worker_index != thread_index))
2554         {
2555           do_handoff = 1;
2556
2557           if (next_worker_index != current_worker_index)
2558             {
2559               if (hf)
2560                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2561
2562               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2563                                                       next_worker_index,
2564                                                       handoff_queue_elt_by_worker_index);
2565
2566               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2567               to_next_worker = &hf->buffer_index[hf->n_vectors];
2568               current_worker_index = next_worker_index;
2569             }
2570
2571           /* enqueue to correct worker thread */
2572           to_next_worker[0] = bi0;
2573           to_next_worker++;
2574           n_left_to_next_worker--;
2575
2576           if (n_left_to_next_worker == 0)
2577             {
2578               hf->n_vectors = VLIB_FRAME_SIZE;
2579               vlib_put_frame_queue_elt (hf);
2580               current_worker_index = ~0;
2581               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2582               hf = 0;
2583             }
2584         }
2585       else
2586         {
2587           do_handoff = 0;
2588           /* if this is 1st frame */
2589           if (!f)
2590             {
2591               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2592               to_next = vlib_frame_vector_args (f);
2593             }
2594
2595           to_next[0] = bi0;
2596           to_next += 1;
2597           f->n_vectors++;
2598         }
2599
2600       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2601                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2602         {
2603           snat_out2in_worker_handoff_trace_t *t =
2604             vlib_add_trace (vm, node, b0, sizeof (*t));
2605           t->next_worker_index = next_worker_index;
2606           t->do_handoff = do_handoff;
2607         }
2608     }
2609
2610   if (f)
2611     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2612
2613   if (hf)
2614     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2615
2616   /* Ship frames to the worker nodes */
2617   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2618     {
2619       if (handoff_queue_elt_by_worker_index[i])
2620         {
2621           hf = handoff_queue_elt_by_worker_index[i];
2622           /*
2623            * It works better to let the handoff node
2624            * rate-adapt, always ship the handoff queue element.
2625            */
2626           if (1 || hf->n_vectors == hf->last_n_vectors)
2627             {
2628               vlib_put_frame_queue_elt (hf);
2629               handoff_queue_elt_by_worker_index[i] = 0;
2630             }
2631           else
2632             hf->last_n_vectors = hf->n_vectors;
2633         }
2634       congested_handoff_queue_by_worker_index[i] =
2635         (vlib_frame_queue_t *) (~0);
2636     }
2637   hf = 0;
2638   current_worker_index = ~0;
2639   return frame->n_vectors;
2640 }
2641
2642 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2643   .function = snat_out2in_worker_handoff_fn,
2644   .name = "nat44-out2in-worker-handoff",
2645   .vector_size = sizeof (u32),
2646   .format_trace = format_snat_out2in_worker_handoff_trace,
2647   .type = VLIB_NODE_TYPE_INTERNAL,
2648
2649   .n_next_nodes = 1,
2650
2651   .next_nodes = {
2652     [0] = "error-drop",
2653   },
2654 };
2655
2656 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2657
2658 static uword
2659 snat_out2in_fast_node_fn (vlib_main_t * vm,
2660                           vlib_node_runtime_t * node,
2661                           vlib_frame_t * frame)
2662 {
2663   u32 n_left_from, * from, * to_next;
2664   snat_out2in_next_t next_index;
2665   u32 pkts_processed = 0;
2666   snat_main_t * sm = &snat_main;
2667
2668   from = vlib_frame_vector_args (frame);
2669   n_left_from = frame->n_vectors;
2670   next_index = node->cached_next_index;
2671
2672   while (n_left_from > 0)
2673     {
2674       u32 n_left_to_next;
2675
2676       vlib_get_next_frame (vm, node, next_index,
2677                            to_next, n_left_to_next);
2678
2679       while (n_left_from > 0 && n_left_to_next > 0)
2680         {
2681           u32 bi0;
2682           vlib_buffer_t * b0;
2683           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2684           u32 sw_if_index0;
2685           ip4_header_t * ip0;
2686           ip_csum_t sum0;
2687           u32 new_addr0, old_addr0;
2688           u16 new_port0, old_port0;
2689           udp_header_t * udp0;
2690           tcp_header_t * tcp0;
2691           icmp46_header_t * icmp0;
2692           snat_session_key_t key0, sm0;
2693           u32 proto0;
2694           u32 rx_fib_index0;
2695
2696           /* speculatively enqueue b0 to the current next frame */
2697           bi0 = from[0];
2698           to_next[0] = bi0;
2699           from += 1;
2700           to_next += 1;
2701           n_left_from -= 1;
2702           n_left_to_next -= 1;
2703
2704           b0 = vlib_get_buffer (vm, bi0);
2705
2706           ip0 = vlib_buffer_get_current (b0);
2707           udp0 = ip4_next_header (ip0);
2708           tcp0 = (tcp_header_t *) udp0;
2709           icmp0 = (icmp46_header_t *) udp0;
2710
2711           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2712           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2713
2714           vnet_feature_next (sw_if_index0, &next0, b0);
2715
2716           if (PREDICT_FALSE(ip0->ttl == 1))
2717             {
2718               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2719               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2720                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2721                                            0);
2722               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2723               goto trace00;
2724             }
2725
2726           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2727
2728           if (PREDICT_FALSE (proto0 == ~0))
2729               goto trace00;
2730
2731           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2732             {
2733               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2734                                   rx_fib_index0, node, next0, ~0, 0, 0);
2735               goto trace00;
2736             }
2737
2738           key0.addr = ip0->dst_address;
2739           key0.port = udp0->dst_port;
2740           key0.fib_index = rx_fib_index0;
2741
2742           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
2743             {
2744               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2745               goto trace00;
2746             }
2747
2748           new_addr0 = sm0.addr.as_u32;
2749           new_port0 = sm0.port;
2750           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2751           old_addr0 = ip0->dst_address.as_u32;
2752           ip0->dst_address.as_u32 = new_addr0;
2753
2754           sum0 = ip0->checksum;
2755           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2756                                  ip4_header_t,
2757                                  dst_address /* changed member */);
2758           ip0->checksum = ip_csum_fold (sum0);
2759
2760           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2761             {
2762                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2763                 {
2764                   old_port0 = tcp0->dst_port;
2765                   tcp0->dst_port = new_port0;
2766
2767                   sum0 = tcp0->checksum;
2768                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2769                                          ip4_header_t,
2770                                          dst_address /* changed member */);
2771
2772                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2773                                          ip4_header_t /* cheat */,
2774                                          length /* changed member */);
2775                   tcp0->checksum = ip_csum_fold(sum0);
2776                 }
2777               else
2778                 {
2779                   old_port0 = udp0->dst_port;
2780                   udp0->dst_port = new_port0;
2781                   udp0->checksum = 0;
2782                 }
2783             }
2784           else
2785             {
2786               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2787                 {
2788                   sum0 = tcp0->checksum;
2789                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2790                                          ip4_header_t,
2791                                          dst_address /* changed member */);
2792
2793                   tcp0->checksum = ip_csum_fold(sum0);
2794                 }
2795             }
2796
2797         trace00:
2798
2799           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2800                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2801             {
2802               snat_out2in_trace_t *t =
2803                  vlib_add_trace (vm, node, b0, sizeof (*t));
2804               t->sw_if_index = sw_if_index0;
2805               t->next_index = next0;
2806             }
2807
2808           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2809
2810           /* verify speculative enqueue, maybe switch current next frame */
2811           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2812                                            to_next, n_left_to_next,
2813                                            bi0, next0);
2814         }
2815
2816       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2817     }
2818
2819   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2820                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2821                                pkts_processed);
2822   return frame->n_vectors;
2823 }
2824
2825 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2826   .function = snat_out2in_fast_node_fn,
2827   .name = "nat44-out2in-fast",
2828   .vector_size = sizeof (u32),
2829   .format_trace = format_snat_out2in_fast_trace,
2830   .type = VLIB_NODE_TYPE_INTERNAL,
2831
2832   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2833   .error_strings = snat_out2in_error_strings,
2834
2835   .runtime_data_bytes = sizeof (snat_runtime_t),
2836
2837   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2838
2839   /* edit / add dispositions here */
2840   .next_nodes = {
2841     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2842     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2843     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2844     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2845   },
2846 };
2847 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);