SNAT: IP fragmentation (VPP-890)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38 } snat_out2in_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_out2in_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
51
52   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
53               t->sw_if_index, t->next_index, t->session_index);
54   return s;
55 }
56
57 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
62
63   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
64               t->sw_if_index, t->next_index);
65   return s;
66 }
67
68 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   snat_out2in_worker_handoff_trace_t * t =
73     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74   char * m;
75
76   m = t->do_handoff ? "next worker" : "same worker";
77   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
78
79   return s;
80 }
81
82 typedef struct {
83   u32 sw_if_index;
84   u32 next_index;
85   u8 cached;
86 } nat44_out2in_reass_trace_t;
87
88 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
89 {
90   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
91   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
92   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
93
94   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
95               t->sw_if_index, t->next_index,
96               t->cached ? "cached" : "translated");
97
98   return s;
99 }
100
101 vlib_node_registration_t snat_out2in_node;
102 vlib_node_registration_t snat_out2in_fast_node;
103 vlib_node_registration_t snat_out2in_worker_handoff_node;
104 vlib_node_registration_t snat_det_out2in_node;
105 vlib_node_registration_t nat44_out2in_reass_node;
106
107 #define foreach_snat_out2in_error                       \
108 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
109 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
110 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
111 _(NO_TRANSLATION, "No translation")                     \
112 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
113 _(DROP_FRAGMENT, "Drop fragment")                       \
114 _(MAX_REASS, "Maximum reassemblies exceeded")           \
115 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
116
117 typedef enum {
118 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
119   foreach_snat_out2in_error
120 #undef _
121   SNAT_OUT2IN_N_ERROR,
122 } snat_out2in_error_t;
123
124 static char * snat_out2in_error_strings[] = {
125 #define _(sym,string) string,
126   foreach_snat_out2in_error
127 #undef _
128 };
129
130 typedef enum {
131   SNAT_OUT2IN_NEXT_DROP,
132   SNAT_OUT2IN_NEXT_LOOKUP,
133   SNAT_OUT2IN_NEXT_ICMP_ERROR,
134   SNAT_OUT2IN_NEXT_REASS,
135   SNAT_OUT2IN_N_NEXT,
136 } snat_out2in_next_t;
137
138 /**
139  * @brief Create session for static mapping.
140  *
141  * Create NAT session initiated by host from external network with static
142  * mapping.
143  *
144  * @param sm     NAT main.
145  * @param b0     Vlib buffer.
146  * @param in2out In2out NAT44 session key.
147  * @param out2in Out2in NAT44 session key.
148  * @param node   Vlib node.
149  *
150  * @returns SNAT session if successfully created otherwise 0.
151  */
152 static inline snat_session_t *
153 create_session_for_static_mapping (snat_main_t *sm,
154                                    vlib_buffer_t *b0,
155                                    snat_session_key_t in2out,
156                                    snat_session_key_t out2in,
157                                    vlib_node_runtime_t * node,
158                                    u32 thread_index)
159 {
160   snat_user_t *u;
161   snat_user_key_t user_key;
162   snat_session_t *s;
163   clib_bihash_kv_8_8_t kv0, value0;
164   dlist_elt_t * per_user_translation_list_elt;
165   dlist_elt_t * per_user_list_head_elt;
166   ip4_header_t *ip0;
167   udp_header_t *udp0;
168
169   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
170     {
171       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
172       return 0;
173     }
174
175   ip0 = vlib_buffer_get_current (b0);
176   udp0 = ip4_next_header (ip0);
177
178   user_key.addr = in2out.addr;
179   user_key.fib_index = in2out.fib_index;
180   kv0.key = user_key.as_u64;
181
182   /* Ever heard of the "user" = inside ip4 address before? */
183   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
184                               &kv0, &value0))
185     {
186       /* no, make a new one */
187       pool_get (sm->per_thread_data[thread_index].users, u);
188       memset (u, 0, sizeof (*u));
189       u->addr = in2out.addr;
190       u->fib_index = in2out.fib_index;
191
192       pool_get (sm->per_thread_data[thread_index].list_pool,
193                 per_user_list_head_elt);
194
195       u->sessions_per_user_list_head_index = per_user_list_head_elt -
196         sm->per_thread_data[thread_index].list_pool;
197
198       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
199                        u->sessions_per_user_list_head_index);
200
201       kv0.value = u - sm->per_thread_data[thread_index].users;
202
203       /* add user */
204       clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
205                                &kv0, 1 /* is_add */);
206     }
207   else
208     {
209       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
210                              value0.value);
211     }
212
213   pool_get (sm->per_thread_data[thread_index].sessions, s);
214   memset (s, 0, sizeof (*s));
215
216   s->outside_address_index = ~0;
217   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
218   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
219   s->ext_host_port = udp0->src_port;
220   u->nstaticsessions++;
221
222   /* Create list elts */
223   pool_get (sm->per_thread_data[thread_index].list_pool,
224             per_user_translation_list_elt);
225   clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
226                    per_user_translation_list_elt -
227                    sm->per_thread_data[thread_index].list_pool);
228
229   per_user_translation_list_elt->value =
230     s - sm->per_thread_data[thread_index].sessions;
231   s->per_user_index =
232     per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
233   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
234
235   clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
236                       s->per_user_list_head_index,
237                       per_user_translation_list_elt -
238                       sm->per_thread_data[thread_index].list_pool);
239
240   s->in2out = in2out;
241   s->out2in = out2in;
242   s->in2out.protocol = out2in.protocol;
243
244   /* Add to translation hashes */
245   kv0.key = s->in2out.as_u64;
246   kv0.value = s - sm->per_thread_data[thread_index].sessions;
247   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
248                                1 /* is_add */))
249       clib_warning ("in2out key add failed");
250
251   kv0.key = s->out2in.as_u64;
252   kv0.value = s - sm->per_thread_data[thread_index].sessions;
253
254   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
255                                1 /* is_add */))
256       clib_warning ("out2in key add failed");
257
258   /* log NAT event */
259   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
260                                       s->out2in.addr.as_u32,
261                                       s->in2out.protocol,
262                                       s->in2out.port,
263                                       s->out2in.port,
264                                       s->in2out.fib_index);
265    return s;
266 }
267
268 static_always_inline
269 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
270                                  snat_session_key_t *p_key0)
271 {
272   icmp46_header_t *icmp0;
273   snat_session_key_t key0;
274   icmp_echo_header_t *echo0, *inner_echo0 = 0;
275   ip4_header_t *inner_ip0;
276   void *l4_header = 0;
277   icmp46_header_t *inner_icmp0;
278
279   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
280   echo0 = (icmp_echo_header_t *)(icmp0+1);
281
282   if (!icmp_is_error_message (icmp0))
283     {
284       key0.protocol = SNAT_PROTOCOL_ICMP;
285       key0.addr = ip0->dst_address;
286       key0.port = echo0->identifier;
287     }
288   else
289     {
290       inner_ip0 = (ip4_header_t *)(echo0+1);
291       l4_header = ip4_next_header (inner_ip0);
292       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
293       key0.addr = inner_ip0->src_address;
294       switch (key0.protocol)
295         {
296         case SNAT_PROTOCOL_ICMP:
297           inner_icmp0 = (icmp46_header_t*)l4_header;
298           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
299           key0.port = inner_echo0->identifier;
300           break;
301         case SNAT_PROTOCOL_UDP:
302         case SNAT_PROTOCOL_TCP:
303           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
304           break;
305         default:
306           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
307         }
308     }
309   *p_key0 = key0;
310   return -1; /* success */
311 }
312
313 /**
314  * Get address and port values to be used for ICMP packet translation
315  * and create session if needed
316  *
317  * @param[in,out] sm             NAT main
318  * @param[in,out] node           NAT node runtime
319  * @param[in] thread_index       thread index
320  * @param[in,out] b0             buffer containing packet to be translated
321  * @param[out] p_proto           protocol used for matching
322  * @param[out] p_value           address and port after NAT translation
323  * @param[out] p_dont_translate  if packet should not be translated
324  * @param d                      optional parameter
325  * @param e                      optional parameter
326  */
327 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
328                            u32 thread_index, vlib_buffer_t *b0,
329                            ip4_header_t *ip0, u8 *p_proto,
330                            snat_session_key_t *p_value,
331                            u8 *p_dont_translate, void *d, void *e)
332 {
333   icmp46_header_t *icmp0;
334   u32 sw_if_index0;
335   u32 rx_fib_index0;
336   snat_session_key_t key0;
337   snat_session_key_t sm0;
338   snat_session_t *s0 = 0;
339   u8 dont_translate = 0;
340   clib_bihash_kv_8_8_t kv0, value0;
341   u8 is_addr_only;
342   u32 next0 = ~0;
343   int err;
344
345   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
346   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
347   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
348
349   key0.protocol = 0;
350
351   err = icmp_get_key (ip0, &key0);
352   if (err != -1)
353     {
354       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
355       next0 = SNAT_OUT2IN_NEXT_DROP;
356       goto out;
357     }
358   key0.fib_index = rx_fib_index0;
359
360   kv0.key = key0.as_u64;
361
362   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
363                               &value0))
364     {
365       /* Try to match static mapping by external address and port,
366          destination address and port in packet */
367       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
368         {
369           /* Don't NAT packet aimed at the intfc address */
370           if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
371                                               ip0->dst_address.as_u32)))
372             {
373               dont_translate = 1;
374               goto out;
375             }
376           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
377           next0 = SNAT_OUT2IN_NEXT_DROP;
378           goto out;
379         }
380
381       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
382                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
383         {
384           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
385           next0 = SNAT_OUT2IN_NEXT_DROP;
386           goto out;
387         }
388
389       /* Create session initiated by host from external network */
390       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
391                                              node, thread_index);
392
393       if (!s0)
394         {
395           next0 = SNAT_OUT2IN_NEXT_DROP;
396           goto out;
397         }
398     }
399   else
400     {
401       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
402                         icmp0->type != ICMP4_echo_request &&
403                         !icmp_is_error_message (icmp0)))
404         {
405           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
406           next0 = SNAT_OUT2IN_NEXT_DROP;
407           goto out;
408         }
409
410       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
411                               value0.value);
412     }
413
414 out:
415   *p_proto = key0.protocol;
416   if (s0)
417     *p_value = s0->in2out;
418   *p_dont_translate = dont_translate;
419   if (d)
420     *(snat_session_t**)d = s0;
421   return next0;
422 }
423
424 /**
425  * Get address and port values to be used for ICMP packet translation
426  *
427  * @param[in] sm                 NAT main
428  * @param[in,out] node           NAT node runtime
429  * @param[in] thread_index       thread index
430  * @param[in,out] b0             buffer containing packet to be translated
431  * @param[out] p_proto           protocol used for matching
432  * @param[out] p_value           address and port after NAT translation
433  * @param[out] p_dont_translate  if packet should not be translated
434  * @param d                      optional parameter
435  * @param e                      optional parameter
436  */
437 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
438                            u32 thread_index, vlib_buffer_t *b0,
439                            ip4_header_t *ip0, u8 *p_proto,
440                            snat_session_key_t *p_value,
441                            u8 *p_dont_translate, void *d, void *e)
442 {
443   icmp46_header_t *icmp0;
444   u32 sw_if_index0;
445   u32 rx_fib_index0;
446   snat_session_key_t key0;
447   snat_session_key_t sm0;
448   u8 dont_translate = 0;
449   u8 is_addr_only;
450   u32 next0 = ~0;
451   int err;
452
453   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
454   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
455   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
456
457   err = icmp_get_key (ip0, &key0);
458   if (err != -1)
459     {
460       b0->error = node->errors[err];
461       next0 = SNAT_OUT2IN_NEXT_DROP;
462       goto out2;
463     }
464   key0.fib_index = rx_fib_index0;
465
466   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
467     {
468       /* Don't NAT packet aimed at the intfc address */
469       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
470         {
471           dont_translate = 1;
472           goto out;
473         }
474       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
475       next0 = SNAT_OUT2IN_NEXT_DROP;
476       goto out;
477     }
478
479   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
480                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
481                     !icmp_is_error_message (icmp0)))
482     {
483       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
484       next0 = SNAT_OUT2IN_NEXT_DROP;
485       goto out;
486     }
487
488 out:
489   *p_value = sm0;
490 out2:
491   *p_proto = key0.protocol;
492   *p_dont_translate = dont_translate;
493   return next0;
494 }
495
496 static inline u32 icmp_out2in (snat_main_t *sm,
497                                vlib_buffer_t * b0,
498                                ip4_header_t * ip0,
499                                icmp46_header_t * icmp0,
500                                u32 sw_if_index0,
501                                u32 rx_fib_index0,
502                                vlib_node_runtime_t * node,
503                                u32 next0,
504                                u32 thread_index,
505                                void *d,
506                                void *e)
507 {
508   snat_session_key_t sm0;
509   u8 protocol;
510   icmp_echo_header_t *echo0, *inner_echo0 = 0;
511   ip4_header_t *inner_ip0 = 0;
512   void *l4_header = 0;
513   icmp46_header_t *inner_icmp0;
514   u8 dont_translate;
515   u32 new_addr0, old_addr0;
516   u16 old_id0, new_id0;
517   ip_csum_t sum0;
518   u16 checksum0;
519   u32 next0_tmp;
520
521   echo0 = (icmp_echo_header_t *)(icmp0+1);
522
523   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
524                                        &protocol, &sm0, &dont_translate, d, e);
525   if (next0_tmp != ~0)
526     next0 = next0_tmp;
527   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
528     goto out;
529
530   sum0 = ip_incremental_checksum (0, icmp0,
531                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
532   checksum0 = ~ip_csum_fold (sum0);
533   if (checksum0 != 0 && checksum0 != 0xffff)
534     {
535       next0 = SNAT_OUT2IN_NEXT_DROP;
536       goto out;
537     }
538
539   old_addr0 = ip0->dst_address.as_u32;
540   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
541   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
542
543   sum0 = ip0->checksum;
544   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
545                          dst_address /* changed member */);
546   ip0->checksum = ip_csum_fold (sum0);
547
548   if (!icmp_is_error_message (icmp0))
549     {
550       new_id0 = sm0.port;
551       if (PREDICT_FALSE(new_id0 != echo0->identifier))
552         {
553           old_id0 = echo0->identifier;
554           new_id0 = sm0.port;
555           echo0->identifier = new_id0;
556
557           sum0 = icmp0->checksum;
558           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
559                                  identifier /* changed member */);
560           icmp0->checksum = ip_csum_fold (sum0);
561         }
562     }
563   else
564     {
565       inner_ip0 = (ip4_header_t *)(echo0+1);
566       l4_header = ip4_next_header (inner_ip0);
567
568       if (!ip4_header_checksum_is_valid (inner_ip0))
569         {
570           next0 = SNAT_OUT2IN_NEXT_DROP;
571           goto out;
572         }
573
574       old_addr0 = inner_ip0->src_address.as_u32;
575       inner_ip0->src_address = sm0.addr;
576       new_addr0 = inner_ip0->src_address.as_u32;
577
578       sum0 = icmp0->checksum;
579       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
580                              src_address /* changed member */);
581       icmp0->checksum = ip_csum_fold (sum0);
582
583       switch (protocol)
584         {
585         case SNAT_PROTOCOL_ICMP:
586           inner_icmp0 = (icmp46_header_t*)l4_header;
587           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
588
589           old_id0 = inner_echo0->identifier;
590           new_id0 = sm0.port;
591           inner_echo0->identifier = new_id0;
592
593           sum0 = icmp0->checksum;
594           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
595                                  identifier);
596           icmp0->checksum = ip_csum_fold (sum0);
597           break;
598         case SNAT_PROTOCOL_UDP:
599         case SNAT_PROTOCOL_TCP:
600           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
601           new_id0 = sm0.port;
602           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
603
604           sum0 = icmp0->checksum;
605           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
606                                  src_port);
607           icmp0->checksum = ip_csum_fold (sum0);
608           break;
609         default:
610           ASSERT(0);
611         }
612     }
613
614 out:
615   return next0;
616 }
617
618
619 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
620                                          vlib_buffer_t * b0,
621                                          ip4_header_t * ip0,
622                                          icmp46_header_t * icmp0,
623                                          u32 sw_if_index0,
624                                          u32 rx_fib_index0,
625                                          vlib_node_runtime_t * node,
626                                          u32 next0, f64 now,
627                                          u32 thread_index,
628                                          snat_session_t ** p_s0)
629 {
630   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
631                       next0, thread_index, p_s0, 0);
632   snat_session_t * s0 = *p_s0;
633   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
634     {
635       /* Accounting */
636       s0->last_heard = now;
637       s0->total_pkts++;
638       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
639       /* Per-user LRU list maintenance for dynamic translation */
640       if (!snat_is_session_static (s0))
641         {
642           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
643                              s0->per_user_index);
644           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
645                               s0->per_user_list_head_index,
646                               s0->per_user_index);
647         }
648     }
649   return next0;
650 }
651
652 static snat_session_t *
653 snat_out2in_unknown_proto (snat_main_t *sm,
654                            vlib_buffer_t * b,
655                            ip4_header_t * ip,
656                            u32 rx_fib_index,
657                            u32 thread_index,
658                            f64 now,
659                            vlib_main_t * vm,
660                            vlib_node_runtime_t * node)
661 {
662   clib_bihash_kv_8_8_t kv, value;
663   clib_bihash_kv_16_8_t s_kv, s_value;
664   snat_static_mapping_t *m;
665   snat_session_key_t m_key;
666   u32 old_addr, new_addr;
667   ip_csum_t sum;
668   nat_ed_ses_key_t key;
669   snat_session_t * s;
670   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
671   snat_user_key_t u_key;
672   snat_user_t *u;
673   dlist_elt_t *head, *elt;
674
675   old_addr = ip->dst_address.as_u32;
676
677   key.l_addr = ip->dst_address;
678   key.r_addr = ip->src_address;
679   key.fib_index = rx_fib_index;
680   key.proto = ip->protocol;
681   key.rsvd = 0;
682   key.l_port = 0;
683   s_kv.key[0] = key.as_u64[0];
684   s_kv.key[1] = key.as_u64[1];
685
686   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
687     {
688       s = pool_elt_at_index (tsm->sessions, s_value.value);
689       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
690     }
691   else
692     {
693       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
694         {
695           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
696           return 0;
697         }
698
699       m_key.addr = ip->dst_address;
700       m_key.port = 0;
701       m_key.protocol = 0;
702       m_key.fib_index = rx_fib_index;
703       kv.key = m_key.as_u64;
704       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
705         {
706           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
707           return 0;
708         }
709
710       m = pool_elt_at_index (sm->static_mappings, value.value);
711
712       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
713
714       u_key.addr = ip->src_address;
715       u_key.fib_index = m->fib_index;
716       kv.key = u_key.as_u64;
717
718       /* Ever heard of the "user" = src ip4 address before? */
719       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
720         {
721           /* no, make a new one */
722           pool_get (tsm->users, u);
723           memset (u, 0, sizeof (*u));
724           u->addr = ip->src_address;
725           u->fib_index = rx_fib_index;
726
727           pool_get (tsm->list_pool, head);
728           u->sessions_per_user_list_head_index = head - tsm->list_pool;
729
730           clib_dlist_init (tsm->list_pool,
731                            u->sessions_per_user_list_head_index);
732
733           kv.value = u - tsm->users;
734
735           /* add user */
736           clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
737         }
738       else
739         {
740           u = pool_elt_at_index (tsm->users, value.value);
741         }
742
743       /* Create a new session */
744       pool_get (tsm->sessions, s);
745       memset (s, 0, sizeof (*s));
746
747       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
748       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
749       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
750       s->outside_address_index = ~0;
751       s->out2in.addr.as_u32 = old_addr;
752       s->out2in.fib_index = rx_fib_index;
753       s->in2out.addr.as_u32 = new_addr;
754       s->in2out.fib_index = m->fib_index;
755       s->in2out.port = s->out2in.port = ip->protocol;
756       u->nstaticsessions++;
757
758       /* Create list elts */
759       pool_get (tsm->list_pool, elt);
760       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
761       elt->value = s - tsm->sessions;
762       s->per_user_index = elt - tsm->list_pool;
763       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
764       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
765                           s->per_user_index);
766
767       /* Add to lookup tables */
768       s_kv.value = s - tsm->sessions;
769       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
770         clib_warning ("out2in key add failed");
771
772       key.l_addr = ip->dst_address;
773       key.fib_index = m->fib_index;
774       s_kv.key[0] = key.as_u64[0];
775       s_kv.key[1] = key.as_u64[1];
776       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
777         clib_warning ("in2out key add failed");
778    }
779
780   /* Update IP checksum */
781   sum = ip->checksum;
782   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
783   ip->checksum = ip_csum_fold (sum);
784
785   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
786
787   /* Accounting */
788   s->last_heard = now;
789   s->total_pkts++;
790   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
791   /* Per-user LRU list maintenance */
792   clib_dlist_remove (tsm->list_pool, s->per_user_index);
793   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
794                       s->per_user_index);
795
796   return s;
797 }
798
799 static snat_session_t *
800 snat_out2in_lb (snat_main_t *sm,
801                 vlib_buffer_t * b,
802                 ip4_header_t * ip,
803                 u32 rx_fib_index,
804                 u32 thread_index,
805                 f64 now,
806                 vlib_main_t * vm,
807                 vlib_node_runtime_t * node)
808 {
809   nat_ed_ses_key_t key;
810   clib_bihash_kv_16_8_t s_kv, s_value;
811   udp_header_t *udp = ip4_next_header (ip);
812   tcp_header_t *tcp = (tcp_header_t *) udp;
813   snat_session_t *s = 0;
814   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
815   snat_session_key_t e_key, l_key;
816   clib_bihash_kv_8_8_t kv, value;
817   u32 old_addr, new_addr;
818   u32 proto = ip_proto_to_snat_proto (ip->protocol);
819   u16 new_port, old_port;
820   ip_csum_t sum;
821   snat_user_key_t u_key;
822   snat_user_t *u;
823   dlist_elt_t *head, *elt;
824
825   old_addr = ip->dst_address.as_u32;
826
827   key.l_addr = ip->dst_address;
828   key.r_addr = ip->src_address;
829   key.fib_index = rx_fib_index;
830   key.proto = ip->protocol;
831   key.rsvd = 0;
832   key.l_port = udp->dst_port;
833   s_kv.key[0] = key.as_u64[0];
834   s_kv.key[1] = key.as_u64[1];
835
836   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
837     {
838       s = pool_elt_at_index (tsm->sessions, s_value.value);
839     }
840   else
841     {
842       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
843         {
844           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
845           return 0;
846         }
847
848       e_key.addr = ip->dst_address;
849       e_key.port = udp->dst_port;
850       e_key.protocol = proto;
851       e_key.fib_index = rx_fib_index;
852       if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0))
853         return 0;
854
855       u_key.addr = l_key.addr;
856       u_key.fib_index = l_key.fib_index;
857       kv.key = u_key.as_u64;
858
859       /* Ever heard of the "user" = src ip4 address before? */
860       if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
861         {
862           /* no, make a new one */
863           pool_get (tsm->users, u);
864           memset (u, 0, sizeof (*u));
865           u->addr = l_key.addr;
866           u->fib_index = l_key.fib_index;
867
868           pool_get (tsm->list_pool, head);
869           u->sessions_per_user_list_head_index = head - tsm->list_pool;
870
871           clib_dlist_init (tsm->list_pool,
872                            u->sessions_per_user_list_head_index);
873
874           kv.value = u - tsm->users;
875
876           /* add user */
877           if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
878             clib_warning ("user key add failed");
879         }
880       else
881         {
882           u = pool_elt_at_index (tsm->users, value.value);
883         }
884
885       /* Create a new session */
886       pool_get (tsm->sessions, s);
887       memset (s, 0, sizeof (*s));
888
889       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
890       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
891       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
892       s->outside_address_index = ~0;
893       s->out2in = e_key;
894       s->in2out = l_key;
895       u->nstaticsessions++;
896
897       /* Create list elts */
898       pool_get (tsm->list_pool, elt);
899       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
900       elt->value = s - tsm->sessions;
901       s->per_user_index = elt - tsm->list_pool;
902       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
903       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
904                           s->per_user_index);
905
906       /* Add to lookup tables */
907       s_kv.value = s - tsm->sessions;
908       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
909         clib_warning ("out2in-ed key add failed");
910
911       key.l_addr = l_key.addr;
912       key.fib_index = l_key.fib_index;
913       key.l_port = l_key.port;
914       s_kv.key[0] = key.as_u64[0];
915       s_kv.key[1] = key.as_u64[1];
916       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
917         clib_warning ("in2out-ed key add failed");
918     }
919
920   new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
921
922   /* Update IP checksum */
923   sum = ip->checksum;
924   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
925   ip->checksum = ip_csum_fold (sum);
926
927   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
928     {
929       old_port = tcp->dst_port;
930       tcp->dst_port = s->in2out.port;
931       new_port = tcp->dst_port;
932
933       sum = tcp->checksum;
934       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
935       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
936       tcp->checksum = ip_csum_fold(sum);
937     }
938   else
939     {
940       udp->dst_port = s->in2out.port;
941       udp->checksum = 0;
942     }
943
944   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
945
946   /* Accounting */
947   s->last_heard = now;
948   s->total_pkts++;
949   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
950   return s;
951 }
952
953 static uword
954 snat_out2in_node_fn (vlib_main_t * vm,
955                   vlib_node_runtime_t * node,
956                   vlib_frame_t * frame)
957 {
958   u32 n_left_from, * from, * to_next;
959   snat_out2in_next_t next_index;
960   u32 pkts_processed = 0;
961   snat_main_t * sm = &snat_main;
962   f64 now = vlib_time_now (vm);
963   u32 thread_index = vlib_get_thread_index ();
964
965   from = vlib_frame_vector_args (frame);
966   n_left_from = frame->n_vectors;
967   next_index = node->cached_next_index;
968
969   while (n_left_from > 0)
970     {
971       u32 n_left_to_next;
972
973       vlib_get_next_frame (vm, node, next_index,
974                            to_next, n_left_to_next);
975
976       while (n_left_from >= 4 && n_left_to_next >= 2)
977         {
978           u32 bi0, bi1;
979           vlib_buffer_t * b0, * b1;
980           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
981           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
982           u32 sw_if_index0, sw_if_index1;
983           ip4_header_t * ip0, *ip1;
984           ip_csum_t sum0, sum1;
985           u32 new_addr0, old_addr0;
986           u16 new_port0, old_port0;
987           u32 new_addr1, old_addr1;
988           u16 new_port1, old_port1;
989           udp_header_t * udp0, * udp1;
990           tcp_header_t * tcp0, * tcp1;
991           icmp46_header_t * icmp0, * icmp1;
992           snat_session_key_t key0, key1, sm0, sm1;
993           u32 rx_fib_index0, rx_fib_index1;
994           u32 proto0, proto1;
995           snat_session_t * s0 = 0, * s1 = 0;
996           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
997
998           /* Prefetch next iteration. */
999           {
1000             vlib_buffer_t * p2, * p3;
1001
1002             p2 = vlib_get_buffer (vm, from[2]);
1003             p3 = vlib_get_buffer (vm, from[3]);
1004
1005             vlib_prefetch_buffer_header (p2, LOAD);
1006             vlib_prefetch_buffer_header (p3, LOAD);
1007
1008             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1009             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1010           }
1011
1012           /* speculatively enqueue b0 and b1 to the current next frame */
1013           to_next[0] = bi0 = from[0];
1014           to_next[1] = bi1 = from[1];
1015           from += 2;
1016           to_next += 2;
1017           n_left_from -= 2;
1018           n_left_to_next -= 2;
1019
1020           b0 = vlib_get_buffer (vm, bi0);
1021           b1 = vlib_get_buffer (vm, bi1);
1022
1023           vnet_buffer (b0)->snat.flags = 0;
1024           vnet_buffer (b1)->snat.flags = 0;
1025
1026           ip0 = vlib_buffer_get_current (b0);
1027           udp0 = ip4_next_header (ip0);
1028           tcp0 = (tcp_header_t *) udp0;
1029           icmp0 = (icmp46_header_t *) udp0;
1030
1031           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1032           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1033                                    sw_if_index0);
1034
1035           if (PREDICT_FALSE(ip0->ttl == 1))
1036             {
1037               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1038               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1039                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1040                                            0);
1041               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1042               goto trace0;
1043             }
1044
1045           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1046
1047           if (PREDICT_FALSE (proto0 == ~0))
1048             {
1049               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1050                                              thread_index, now, vm, node);
1051               if (!s0)
1052                 next0 = SNAT_OUT2IN_NEXT_DROP;
1053               goto trace0;
1054             }
1055
1056           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1057             {
1058               next0 = icmp_out2in_slow_path
1059                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1060                  next0, now, thread_index, &s0);
1061               goto trace0;
1062             }
1063
1064           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1065             {
1066               next0 = SNAT_OUT2IN_NEXT_REASS;
1067               goto trace0;
1068             }
1069
1070           key0.addr = ip0->dst_address;
1071           key0.port = udp0->dst_port;
1072           key0.protocol = proto0;
1073           key0.fib_index = rx_fib_index0;
1074
1075           kv0.key = key0.as_u64;
1076
1077           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1078                                       &kv0, &value0))
1079             {
1080               /* Try to match static mapping by external address and port,
1081                  destination address and port in packet */
1082               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1083                 {
1084                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1085                   /*
1086                    * Send DHCP packets to the ipv4 stack, or we won't
1087                    * be able to use dhcp client on the outside interface
1088                    */
1089                   if (proto0 != SNAT_PROTOCOL_UDP
1090                       || (udp0->dst_port
1091                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1092                     next0 = SNAT_OUT2IN_NEXT_DROP;
1093                   goto trace0;
1094                 }
1095
1096               /* Create session initiated by host from external network */
1097               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1098                                                      thread_index);
1099               if (!s0)
1100                 {
1101                   next0 = SNAT_OUT2IN_NEXT_DROP;
1102                   goto trace0;
1103                 }
1104             }
1105           else
1106             {
1107               if (PREDICT_FALSE (value0.value == ~0ULL))
1108                 {
1109                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1110                                       now, vm, node);
1111                   if (!s0)
1112                     next0 = SNAT_OUT2IN_NEXT_DROP;
1113                   goto trace0;
1114                 }
1115               else
1116                 {
1117                   s0 = pool_elt_at_index (
1118                     sm->per_thread_data[thread_index].sessions,
1119                     value0.value);
1120                 }
1121             }
1122
1123           old_addr0 = ip0->dst_address.as_u32;
1124           ip0->dst_address = s0->in2out.addr;
1125           new_addr0 = ip0->dst_address.as_u32;
1126           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1127
1128           sum0 = ip0->checksum;
1129           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1130                                  ip4_header_t,
1131                                  dst_address /* changed member */);
1132           ip0->checksum = ip_csum_fold (sum0);
1133
1134           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1135             {
1136               old_port0 = tcp0->dst_port;
1137               tcp0->dst_port = s0->in2out.port;
1138               new_port0 = tcp0->dst_port;
1139
1140               sum0 = tcp0->checksum;
1141               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1142                                      ip4_header_t,
1143                                      dst_address /* changed member */);
1144
1145               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1146                                      ip4_header_t /* cheat */,
1147                                      length /* changed member */);
1148               tcp0->checksum = ip_csum_fold(sum0);
1149             }
1150           else
1151             {
1152               old_port0 = udp0->dst_port;
1153               udp0->dst_port = s0->in2out.port;
1154               udp0->checksum = 0;
1155             }
1156
1157           /* Accounting */
1158           s0->last_heard = now;
1159           s0->total_pkts++;
1160           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1161           /* Per-user LRU list maintenance for dynamic translation */
1162           if (!snat_is_session_static (s0))
1163             {
1164               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1165                                  s0->per_user_index);
1166               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1167                                   s0->per_user_list_head_index,
1168                                   s0->per_user_index);
1169             }
1170         trace0:
1171
1172           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1173                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1174             {
1175               snat_out2in_trace_t *t =
1176                  vlib_add_trace (vm, node, b0, sizeof (*t));
1177               t->sw_if_index = sw_if_index0;
1178               t->next_index = next0;
1179               t->session_index = ~0;
1180               if (s0)
1181                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1182             }
1183
1184           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1185
1186
1187           ip1 = vlib_buffer_get_current (b1);
1188           udp1 = ip4_next_header (ip1);
1189           tcp1 = (tcp_header_t *) udp1;
1190           icmp1 = (icmp46_header_t *) udp1;
1191
1192           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1193           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1194                                    sw_if_index1);
1195
1196           if (PREDICT_FALSE(ip1->ttl == 1))
1197             {
1198               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1199               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1200                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1201                                            0);
1202               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1203               goto trace1;
1204             }
1205
1206           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1207
1208           if (PREDICT_FALSE (proto1 == ~0))
1209             {
1210               s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
1211                                              thread_index, now, vm, node);
1212               if (!s1)
1213                 next1 = SNAT_OUT2IN_NEXT_DROP;
1214               goto trace1;
1215             }
1216
1217           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1218             {
1219               next1 = icmp_out2in_slow_path
1220                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1221                  next1, now, thread_index, &s1);
1222               goto trace1;
1223             }
1224
1225           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
1226             {
1227               next1 = SNAT_OUT2IN_NEXT_REASS;
1228               goto trace1;
1229             }
1230
1231           key1.addr = ip1->dst_address;
1232           key1.port = udp1->dst_port;
1233           key1.protocol = proto1;
1234           key1.fib_index = rx_fib_index1;
1235
1236           kv1.key = key1.as_u64;
1237
1238           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1239                                       &kv1, &value1))
1240             {
1241               /* Try to match static mapping by external address and port,
1242                  destination address and port in packet */
1243               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0))
1244                 {
1245                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1246                   /*
1247                    * Send DHCP packets to the ipv4 stack, or we won't
1248                    * be able to use dhcp client on the outside interface
1249                    */
1250                   if (proto1 != SNAT_PROTOCOL_UDP
1251                       || (udp1->dst_port
1252                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1253                     next1 = SNAT_OUT2IN_NEXT_DROP;
1254                   goto trace1;
1255                 }
1256
1257               /* Create session initiated by host from external network */
1258               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1259                                                      thread_index);
1260               if (!s1)
1261                 {
1262                   next1 = SNAT_OUT2IN_NEXT_DROP;
1263                   goto trace1;
1264                 }
1265             }
1266           else
1267             {
1268               if (PREDICT_FALSE (value1.value == ~0ULL))
1269                 {
1270                   s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1271                                       now, vm, node);
1272                   if (!s1)
1273                     next1 = SNAT_OUT2IN_NEXT_DROP;
1274                   goto trace1;
1275                 }
1276               else
1277                 {
1278                   s1 = pool_elt_at_index (
1279                     sm->per_thread_data[thread_index].sessions,
1280                     value1.value);
1281                 }
1282             }
1283
1284           old_addr1 = ip1->dst_address.as_u32;
1285           ip1->dst_address = s1->in2out.addr;
1286           new_addr1 = ip1->dst_address.as_u32;
1287           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1288
1289           sum1 = ip1->checksum;
1290           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1291                                  ip4_header_t,
1292                                  dst_address /* changed member */);
1293           ip1->checksum = ip_csum_fold (sum1);
1294
1295           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1296             {
1297               old_port1 = tcp1->dst_port;
1298               tcp1->dst_port = s1->in2out.port;
1299               new_port1 = tcp1->dst_port;
1300
1301               sum1 = tcp1->checksum;
1302               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1303                                      ip4_header_t,
1304                                      dst_address /* changed member */);
1305
1306               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1307                                      ip4_header_t /* cheat */,
1308                                      length /* changed member */);
1309               tcp1->checksum = ip_csum_fold(sum1);
1310             }
1311           else
1312             {
1313               old_port1 = udp1->dst_port;
1314               udp1->dst_port = s1->in2out.port;
1315               udp1->checksum = 0;
1316             }
1317
1318           /* Accounting */
1319           s1->last_heard = now;
1320           s1->total_pkts++;
1321           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1322           /* Per-user LRU list maintenance for dynamic translation */
1323           if (!snat_is_session_static (s1))
1324             {
1325               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1326                                  s1->per_user_index);
1327               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1328                                   s1->per_user_list_head_index,
1329                                   s1->per_user_index);
1330             }
1331         trace1:
1332
1333           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1334                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1335             {
1336               snat_out2in_trace_t *t =
1337                  vlib_add_trace (vm, node, b1, sizeof (*t));
1338               t->sw_if_index = sw_if_index1;
1339               t->next_index = next1;
1340               t->session_index = ~0;
1341               if (s1)
1342                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1343             }
1344
1345           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1346
1347           /* verify speculative enqueues, maybe switch current next frame */
1348           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1349                                            to_next, n_left_to_next,
1350                                            bi0, bi1, next0, next1);
1351         }
1352
1353       while (n_left_from > 0 && n_left_to_next > 0)
1354         {
1355           u32 bi0;
1356           vlib_buffer_t * b0;
1357           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1358           u32 sw_if_index0;
1359           ip4_header_t * ip0;
1360           ip_csum_t sum0;
1361           u32 new_addr0, old_addr0;
1362           u16 new_port0, old_port0;
1363           udp_header_t * udp0;
1364           tcp_header_t * tcp0;
1365           icmp46_header_t * icmp0;
1366           snat_session_key_t key0, sm0;
1367           u32 rx_fib_index0;
1368           u32 proto0;
1369           snat_session_t * s0 = 0;
1370           clib_bihash_kv_8_8_t kv0, value0;
1371
1372           /* speculatively enqueue b0 to the current next frame */
1373           bi0 = from[0];
1374           to_next[0] = bi0;
1375           from += 1;
1376           to_next += 1;
1377           n_left_from -= 1;
1378           n_left_to_next -= 1;
1379
1380           b0 = vlib_get_buffer (vm, bi0);
1381
1382           vnet_buffer (b0)->snat.flags = 0;
1383
1384           ip0 = vlib_buffer_get_current (b0);
1385           udp0 = ip4_next_header (ip0);
1386           tcp0 = (tcp_header_t *) udp0;
1387           icmp0 = (icmp46_header_t *) udp0;
1388
1389           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1390           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1391                                    sw_if_index0);
1392
1393           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1394
1395           if (PREDICT_FALSE (proto0 == ~0))
1396             {
1397               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1398                                              thread_index, now, vm, node);
1399               if (!s0)
1400                 next0 = SNAT_OUT2IN_NEXT_DROP;
1401               goto trace00;
1402             }
1403
1404           if (PREDICT_FALSE(ip0->ttl == 1))
1405             {
1406               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1407               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1408                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1409                                            0);
1410               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1411               goto trace00;
1412             }
1413
1414           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1415             {
1416               next0 = icmp_out2in_slow_path
1417                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1418                  next0, now, thread_index, &s0);
1419               goto trace00;
1420             }
1421
1422           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1423             {
1424               next0 = SNAT_OUT2IN_NEXT_REASS;
1425               goto trace00;
1426             }
1427
1428           key0.addr = ip0->dst_address;
1429           key0.port = udp0->dst_port;
1430           key0.protocol = proto0;
1431           key0.fib_index = rx_fib_index0;
1432
1433           kv0.key = key0.as_u64;
1434
1435           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1436                                       &kv0, &value0))
1437             {
1438               /* Try to match static mapping by external address and port,
1439                  destination address and port in packet */
1440               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1441                 {
1442                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1443                   /*
1444                    * Send DHCP packets to the ipv4 stack, or we won't
1445                    * be able to use dhcp client on the outside interface
1446                    */
1447                   if (proto0 != SNAT_PROTOCOL_UDP
1448                       || (udp0->dst_port
1449                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1450
1451                     next0 = SNAT_OUT2IN_NEXT_DROP;
1452                   goto trace00;
1453                 }
1454
1455               /* Create session initiated by host from external network */
1456               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1457                                                      thread_index);
1458               if (!s0)
1459                 {
1460                   next0 = SNAT_OUT2IN_NEXT_DROP;
1461                   goto trace00;
1462                 }
1463             }
1464           else
1465             {
1466               if (PREDICT_FALSE (value0.value == ~0ULL))
1467                 {
1468                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1469                                       now, vm, node);
1470                   if (!s0)
1471                     next0 = SNAT_OUT2IN_NEXT_DROP;
1472                   goto trace00;
1473                 }
1474               else
1475                 {
1476                   s0 = pool_elt_at_index (
1477                     sm->per_thread_data[thread_index].sessions,
1478                     value0.value);
1479                 }
1480             }
1481
1482           old_addr0 = ip0->dst_address.as_u32;
1483           ip0->dst_address = s0->in2out.addr;
1484           new_addr0 = ip0->dst_address.as_u32;
1485           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1486
1487           sum0 = ip0->checksum;
1488           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1489                                  ip4_header_t,
1490                                  dst_address /* changed member */);
1491           ip0->checksum = ip_csum_fold (sum0);
1492
1493           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1494             {
1495               old_port0 = tcp0->dst_port;
1496               tcp0->dst_port = s0->in2out.port;
1497               new_port0 = tcp0->dst_port;
1498
1499               sum0 = tcp0->checksum;
1500               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1501                                      ip4_header_t,
1502                                      dst_address /* changed member */);
1503
1504               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1505                                      ip4_header_t /* cheat */,
1506                                      length /* changed member */);
1507               tcp0->checksum = ip_csum_fold(sum0);
1508             }
1509           else
1510             {
1511               old_port0 = udp0->dst_port;
1512               udp0->dst_port = s0->in2out.port;
1513               udp0->checksum = 0;
1514             }
1515
1516           /* Accounting */
1517           s0->last_heard = now;
1518           s0->total_pkts++;
1519           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1520           /* Per-user LRU list maintenance for dynamic translation */
1521           if (!snat_is_session_static (s0))
1522             {
1523               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1524                                  s0->per_user_index);
1525               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1526                                   s0->per_user_list_head_index,
1527                                   s0->per_user_index);
1528             }
1529         trace00:
1530
1531           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1532                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1533             {
1534               snat_out2in_trace_t *t =
1535                  vlib_add_trace (vm, node, b0, sizeof (*t));
1536               t->sw_if_index = sw_if_index0;
1537               t->next_index = next0;
1538               t->session_index = ~0;
1539               if (s0)
1540                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1541             }
1542
1543           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1544
1545           /* verify speculative enqueue, maybe switch current next frame */
1546           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1547                                            to_next, n_left_to_next,
1548                                            bi0, next0);
1549         }
1550
1551       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1552     }
1553
1554   vlib_node_increment_counter (vm, snat_out2in_node.index,
1555                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1556                                pkts_processed);
1557   return frame->n_vectors;
1558 }
1559
1560 VLIB_REGISTER_NODE (snat_out2in_node) = {
1561   .function = snat_out2in_node_fn,
1562   .name = "nat44-out2in",
1563   .vector_size = sizeof (u32),
1564   .format_trace = format_snat_out2in_trace,
1565   .type = VLIB_NODE_TYPE_INTERNAL,
1566
1567   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1568   .error_strings = snat_out2in_error_strings,
1569
1570   .runtime_data_bytes = sizeof (snat_runtime_t),
1571
1572   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1573
1574   /* edit / add dispositions here */
1575   .next_nodes = {
1576     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1577     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1578     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1579     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1580   },
1581 };
1582 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1583
1584 static uword
1585 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1586                             vlib_node_runtime_t * node,
1587                             vlib_frame_t * frame)
1588 {
1589   u32 n_left_from, *from, *to_next;
1590   snat_out2in_next_t next_index;
1591   u32 pkts_processed = 0;
1592   snat_main_t *sm = &snat_main;
1593   f64 now = vlib_time_now (vm);
1594   u32 thread_index = vlib_get_thread_index ();
1595   snat_main_per_thread_data_t *per_thread_data =
1596     &sm->per_thread_data[thread_index];
1597   u32 *fragments_to_drop = 0;
1598   u32 *fragments_to_loopback = 0;
1599
1600   from = vlib_frame_vector_args (frame);
1601   n_left_from = frame->n_vectors;
1602   next_index = node->cached_next_index;
1603
1604   while (n_left_from > 0)
1605     {
1606       u32 n_left_to_next;
1607
1608       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1609
1610       while (n_left_from > 0 && n_left_to_next > 0)
1611        {
1612           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1613           vlib_buffer_t *b0;
1614           u32 next0;
1615           u8 cached0 = 0;
1616           ip4_header_t *ip0;
1617           nat_reass_ip4_t *reass0;
1618           udp_header_t * udp0;
1619           tcp_header_t * tcp0;
1620           snat_session_key_t key0, sm0;
1621           clib_bihash_kv_8_8_t kv0, value0;
1622           snat_session_t * s0 = 0;
1623           u16 old_port0, new_port0;
1624           ip_csum_t sum0;
1625
1626           /* speculatively enqueue b0 to the current next frame */
1627           bi0 = from[0];
1628           to_next[0] = bi0;
1629           from += 1;
1630           to_next += 1;
1631           n_left_from -= 1;
1632           n_left_to_next -= 1;
1633
1634           b0 = vlib_get_buffer (vm, bi0);
1635           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1636
1637           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1638           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1639                                                                sw_if_index0);
1640
1641           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1642             {
1643               next0 = SNAT_OUT2IN_NEXT_DROP;
1644               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1645               goto trace0;
1646             }
1647
1648           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1649           udp0 = ip4_next_header (ip0);
1650           tcp0 = (tcp_header_t *) udp0;
1651           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1652
1653           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1654                                                  ip0->dst_address,
1655                                                  ip0->fragment_id,
1656                                                  ip0->protocol,
1657                                                  1,
1658                                                  &fragments_to_drop);
1659
1660           if (PREDICT_FALSE (!reass0))
1661             {
1662               next0 = SNAT_OUT2IN_NEXT_DROP;
1663               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1664               goto trace0;
1665             }
1666
1667           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1668             {
1669               key0.addr = ip0->dst_address;
1670               key0.port = udp0->dst_port;
1671               key0.protocol = proto0;
1672               key0.fib_index = rx_fib_index0;
1673               kv0.key = key0.as_u64;
1674
1675               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1676                 {
1677                   /* Try to match static mapping by external address and port,
1678                      destination address and port in packet */
1679                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1680                     {
1681                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1682                       /*
1683                        * Send DHCP packets to the ipv4 stack, or we won't
1684                        * be able to use dhcp client on the outside interface
1685                        */
1686                       if (proto0 != SNAT_PROTOCOL_UDP
1687                           || (udp0->dst_port
1688                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1689
1690                         next0 = SNAT_OUT2IN_NEXT_DROP;
1691                       goto trace0;
1692                     }
1693
1694                   /* Create session initiated by host from external network */
1695                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1696                                                          thread_index);
1697                   if (!s0)
1698                     {
1699                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1700                       next0 = SNAT_OUT2IN_NEXT_DROP;
1701                       goto trace0;
1702                     }
1703                   reass0->sess_index = s0 - per_thread_data->sessions;
1704                 }
1705               else
1706                 {
1707                   s0 = pool_elt_at_index (per_thread_data->sessions,
1708                                           value0.value);
1709                   reass0->sess_index = value0.value;
1710                 }
1711               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1712             }
1713           else
1714             {
1715               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1716                 {
1717                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1718                     {
1719                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1720                       next0 = SNAT_OUT2IN_NEXT_DROP;
1721                       goto trace0;
1722                     }
1723                   cached0 = 1;
1724                   goto trace0;
1725                 }
1726               s0 = pool_elt_at_index (per_thread_data->sessions,
1727                                       reass0->sess_index);
1728             }
1729
1730           old_addr0 = ip0->dst_address.as_u32;
1731           ip0->dst_address = s0->in2out.addr;
1732           new_addr0 = ip0->dst_address.as_u32;
1733           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1734
1735           sum0 = ip0->checksum;
1736           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1737                                  ip4_header_t,
1738                                  dst_address /* changed member */);
1739           ip0->checksum = ip_csum_fold (sum0);
1740
1741           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1742             {
1743               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1744                 {
1745                   old_port0 = tcp0->dst_port;
1746                   tcp0->dst_port = s0->in2out.port;
1747                   new_port0 = tcp0->dst_port;
1748
1749                   sum0 = tcp0->checksum;
1750                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1751                                          ip4_header_t,
1752                                          dst_address /* changed member */);
1753
1754                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1755                                          ip4_header_t /* cheat */,
1756                                          length /* changed member */);
1757                   tcp0->checksum = ip_csum_fold(sum0);
1758                 }
1759               else
1760                 {
1761                   old_port0 = udp0->dst_port;
1762                   udp0->dst_port = s0->in2out.port;
1763                   udp0->checksum = 0;
1764                 }
1765             }
1766
1767           /* Accounting */
1768           s0->last_heard = now;
1769           s0->total_pkts++;
1770           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1771           /* Per-user LRU list maintenance for dynamic translation */
1772           if (!snat_is_session_static (s0))
1773             {
1774               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1775                                  s0->per_user_index);
1776               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1777                                   s0->per_user_list_head_index,
1778                                   s0->per_user_index);
1779             }
1780
1781         trace0:
1782           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1783                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1784             {
1785               nat44_out2in_reass_trace_t *t =
1786                  vlib_add_trace (vm, node, b0, sizeof (*t));
1787               t->cached = cached0;
1788               t->sw_if_index = sw_if_index0;
1789               t->next_index = next0;
1790             }
1791
1792           if (cached0)
1793             {
1794               n_left_to_next++;
1795               to_next--;
1796             }
1797           else
1798             {
1799               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1800
1801               /* verify speculative enqueue, maybe switch current next frame */
1802               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1803                                                to_next, n_left_to_next,
1804                                                bi0, next0);
1805             }
1806
1807           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1808             {
1809               from = vlib_frame_vector_args (frame);
1810               u32 len = vec_len (fragments_to_loopback);
1811               if (len <= VLIB_FRAME_SIZE)
1812                 {
1813                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1814                   n_left_from = len;
1815                   vec_reset_length (fragments_to_loopback);
1816                 }
1817               else
1818                 {
1819                   clib_memcpy (from,
1820                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1821                                sizeof (u32) * VLIB_FRAME_SIZE);
1822                   n_left_from = VLIB_FRAME_SIZE;
1823                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1824                 }
1825             }
1826        }
1827
1828       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1829     }
1830
1831   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1832                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1833                                pkts_processed);
1834
1835   nat_send_all_to_node (vm, fragments_to_drop, node,
1836                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1837                         SNAT_OUT2IN_NEXT_DROP);
1838
1839   vec_free (fragments_to_drop);
1840   vec_free (fragments_to_loopback);
1841   return frame->n_vectors;
1842 }
1843
1844 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1845   .function = nat44_out2in_reass_node_fn,
1846   .name = "nat44-out2in-reass",
1847   .vector_size = sizeof (u32),
1848   .format_trace = format_nat44_out2in_reass_trace,
1849   .type = VLIB_NODE_TYPE_INTERNAL,
1850
1851   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1852   .error_strings = snat_out2in_error_strings,
1853
1854   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1855
1856   /* edit / add dispositions here */
1857   .next_nodes = {
1858     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1859     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1860     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1861     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1862   },
1863 };
1864 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1865                               nat44_out2in_reass_node_fn);
1866
1867 /**************************/
1868 /*** deterministic mode ***/
1869 /**************************/
1870 static uword
1871 snat_det_out2in_node_fn (vlib_main_t * vm,
1872                          vlib_node_runtime_t * node,
1873                          vlib_frame_t * frame)
1874 {
1875   u32 n_left_from, * from, * to_next;
1876   snat_out2in_next_t next_index;
1877   u32 pkts_processed = 0;
1878   snat_main_t * sm = &snat_main;
1879   u32 thread_index = vlib_get_thread_index ();
1880
1881   from = vlib_frame_vector_args (frame);
1882   n_left_from = frame->n_vectors;
1883   next_index = node->cached_next_index;
1884
1885   while (n_left_from > 0)
1886     {
1887       u32 n_left_to_next;
1888
1889       vlib_get_next_frame (vm, node, next_index,
1890                            to_next, n_left_to_next);
1891
1892       while (n_left_from >= 4 && n_left_to_next >= 2)
1893         {
1894           u32 bi0, bi1;
1895           vlib_buffer_t * b0, * b1;
1896           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1897           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1898           u32 sw_if_index0, sw_if_index1;
1899           ip4_header_t * ip0, * ip1;
1900           ip_csum_t sum0, sum1;
1901           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1902           u16 new_port0, old_port0, old_port1, new_port1;
1903           udp_header_t * udp0, * udp1;
1904           tcp_header_t * tcp0, * tcp1;
1905           u32 proto0, proto1;
1906           snat_det_out_key_t key0, key1;
1907           snat_det_map_t * dm0, * dm1;
1908           snat_det_session_t * ses0 = 0, * ses1 = 0;
1909           u32 rx_fib_index0, rx_fib_index1;
1910           icmp46_header_t * icmp0, * icmp1;
1911
1912           /* Prefetch next iteration. */
1913           {
1914             vlib_buffer_t * p2, * p3;
1915
1916             p2 = vlib_get_buffer (vm, from[2]);
1917             p3 = vlib_get_buffer (vm, from[3]);
1918
1919             vlib_prefetch_buffer_header (p2, LOAD);
1920             vlib_prefetch_buffer_header (p3, LOAD);
1921
1922             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1923             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1924           }
1925
1926           /* speculatively enqueue b0 and b1 to the current next frame */
1927           to_next[0] = bi0 = from[0];
1928           to_next[1] = bi1 = from[1];
1929           from += 2;
1930           to_next += 2;
1931           n_left_from -= 2;
1932           n_left_to_next -= 2;
1933
1934           b0 = vlib_get_buffer (vm, bi0);
1935           b1 = vlib_get_buffer (vm, bi1);
1936
1937           ip0 = vlib_buffer_get_current (b0);
1938           udp0 = ip4_next_header (ip0);
1939           tcp0 = (tcp_header_t *) udp0;
1940
1941           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1942
1943           if (PREDICT_FALSE(ip0->ttl == 1))
1944             {
1945               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1946               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1947                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1948                                            0);
1949               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1950               goto trace0;
1951             }
1952
1953           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1954
1955           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1956             {
1957               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1958               icmp0 = (icmp46_header_t *) udp0;
1959
1960               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1961                                   rx_fib_index0, node, next0, thread_index,
1962                                   &ses0, &dm0);
1963               goto trace0;
1964             }
1965
1966           key0.ext_host_addr = ip0->src_address;
1967           key0.ext_host_port = tcp0->src;
1968           key0.out_port = tcp0->dst;
1969
1970           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1971           if (PREDICT_FALSE(!dm0))
1972             {
1973               clib_warning("unknown dst address:  %U",
1974                            format_ip4_address, &ip0->dst_address);
1975               next0 = SNAT_OUT2IN_NEXT_DROP;
1976               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1977               goto trace0;
1978             }
1979
1980           snat_det_reverse(dm0, &ip0->dst_address,
1981                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
1982
1983           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1984           if (PREDICT_FALSE(!ses0))
1985             {
1986               clib_warning("no match src %U:%d dst %U:%d for user %U",
1987                            format_ip4_address, &ip0->src_address,
1988                            clib_net_to_host_u16 (tcp0->src),
1989                            format_ip4_address, &ip0->dst_address,
1990                            clib_net_to_host_u16 (tcp0->dst),
1991                            format_ip4_address, &new_addr0);
1992               next0 = SNAT_OUT2IN_NEXT_DROP;
1993               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1994               goto trace0;
1995             }
1996           new_port0 = ses0->in_port;
1997
1998           old_addr0 = ip0->dst_address;
1999           ip0->dst_address = new_addr0;
2000           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2001
2002           sum0 = ip0->checksum;
2003           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2004                                  ip4_header_t,
2005                                  dst_address /* changed member */);
2006           ip0->checksum = ip_csum_fold (sum0);
2007
2008           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2009             {
2010               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2011                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2012               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2013                 snat_det_ses_close(dm0, ses0);
2014
2015               old_port0 = tcp0->dst;
2016               tcp0->dst = new_port0;
2017
2018               sum0 = tcp0->checksum;
2019               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2020                                      ip4_header_t,
2021                                      dst_address /* changed member */);
2022
2023               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2024                                      ip4_header_t /* cheat */,
2025                                      length /* changed member */);
2026               tcp0->checksum = ip_csum_fold(sum0);
2027             }
2028           else
2029             {
2030               old_port0 = udp0->dst_port;
2031               udp0->dst_port = new_port0;
2032               udp0->checksum = 0;
2033             }
2034
2035         trace0:
2036
2037           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2038                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2039             {
2040               snat_out2in_trace_t *t =
2041                  vlib_add_trace (vm, node, b0, sizeof (*t));
2042               t->sw_if_index = sw_if_index0;
2043               t->next_index = next0;
2044               t->session_index = ~0;
2045               if (ses0)
2046                 t->session_index = ses0 - dm0->sessions;
2047             }
2048
2049           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2050
2051           b1 = vlib_get_buffer (vm, bi1);
2052
2053           ip1 = vlib_buffer_get_current (b1);
2054           udp1 = ip4_next_header (ip1);
2055           tcp1 = (tcp_header_t *) udp1;
2056
2057           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2058
2059           if (PREDICT_FALSE(ip1->ttl == 1))
2060             {
2061               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2062               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2063                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2064                                            0);
2065               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2066               goto trace1;
2067             }
2068
2069           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2070
2071           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2072             {
2073               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2074               icmp1 = (icmp46_header_t *) udp1;
2075
2076               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
2077                                   rx_fib_index1, node, next1, thread_index,
2078                                   &ses1, &dm1);
2079               goto trace1;
2080             }
2081
2082           key1.ext_host_addr = ip1->src_address;
2083           key1.ext_host_port = tcp1->src;
2084           key1.out_port = tcp1->dst;
2085
2086           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
2087           if (PREDICT_FALSE(!dm1))
2088             {
2089               clib_warning("unknown dst address:  %U",
2090                            format_ip4_address, &ip1->dst_address);
2091               next1 = SNAT_OUT2IN_NEXT_DROP;
2092               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2093               goto trace1;
2094             }
2095
2096           snat_det_reverse(dm1, &ip1->dst_address,
2097                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
2098
2099           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
2100           if (PREDICT_FALSE(!ses1))
2101             {
2102               clib_warning("no match src %U:%d dst %U:%d for user %U",
2103                            format_ip4_address, &ip1->src_address,
2104                            clib_net_to_host_u16 (tcp1->src),
2105                            format_ip4_address, &ip1->dst_address,
2106                            clib_net_to_host_u16 (tcp1->dst),
2107                            format_ip4_address, &new_addr1);
2108               next1 = SNAT_OUT2IN_NEXT_DROP;
2109               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2110               goto trace1;
2111             }
2112           new_port1 = ses1->in_port;
2113
2114           old_addr1 = ip1->dst_address;
2115           ip1->dst_address = new_addr1;
2116           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2117
2118           sum1 = ip1->checksum;
2119           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2120                                  ip4_header_t,
2121                                  dst_address /* changed member */);
2122           ip1->checksum = ip_csum_fold (sum1);
2123
2124           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2125             {
2126               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2127                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2128               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
2129                 snat_det_ses_close(dm1, ses1);
2130
2131               old_port1 = tcp1->dst;
2132               tcp1->dst = new_port1;
2133
2134               sum1 = tcp1->checksum;
2135               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2136                                      ip4_header_t,
2137                                      dst_address /* changed member */);
2138
2139               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2140                                      ip4_header_t /* cheat */,
2141                                      length /* changed member */);
2142               tcp1->checksum = ip_csum_fold(sum1);
2143             }
2144           else
2145             {
2146               old_port1 = udp1->dst_port;
2147               udp1->dst_port = new_port1;
2148               udp1->checksum = 0;
2149             }
2150
2151         trace1:
2152
2153           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2154                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2155             {
2156               snat_out2in_trace_t *t =
2157                  vlib_add_trace (vm, node, b1, sizeof (*t));
2158               t->sw_if_index = sw_if_index1;
2159               t->next_index = next1;
2160               t->session_index = ~0;
2161               if (ses1)
2162                 t->session_index = ses1 - dm1->sessions;
2163             }
2164
2165           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
2166
2167           /* verify speculative enqueues, maybe switch current next frame */
2168           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2169                                            to_next, n_left_to_next,
2170                                            bi0, bi1, next0, next1);
2171          }
2172
2173       while (n_left_from > 0 && n_left_to_next > 0)
2174         {
2175           u32 bi0;
2176           vlib_buffer_t * b0;
2177           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
2178           u32 sw_if_index0;
2179           ip4_header_t * ip0;
2180           ip_csum_t sum0;
2181           ip4_address_t new_addr0, old_addr0;
2182           u16 new_port0, old_port0;
2183           udp_header_t * udp0;
2184           tcp_header_t * tcp0;
2185           u32 proto0;
2186           snat_det_out_key_t key0;
2187           snat_det_map_t * dm0;
2188           snat_det_session_t * ses0 = 0;
2189           u32 rx_fib_index0;
2190           icmp46_header_t * icmp0;
2191
2192           /* speculatively enqueue b0 to the current next frame */
2193           bi0 = from[0];
2194           to_next[0] = bi0;
2195           from += 1;
2196           to_next += 1;
2197           n_left_from -= 1;
2198           n_left_to_next -= 1;
2199
2200           b0 = vlib_get_buffer (vm, bi0);
2201
2202           ip0 = vlib_buffer_get_current (b0);
2203           udp0 = ip4_next_header (ip0);
2204           tcp0 = (tcp_header_t *) udp0;
2205
2206           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2207
2208           if (PREDICT_FALSE(ip0->ttl == 1))
2209             {
2210               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2211               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2212                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2213                                            0);
2214               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2215               goto trace00;
2216             }
2217
2218           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2219
2220           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2221             {
2222               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2223               icmp0 = (icmp46_header_t *) udp0;
2224
2225               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2226                                   rx_fib_index0, node, next0, thread_index,
2227                                   &ses0, &dm0);
2228               goto trace00;
2229             }
2230
2231           key0.ext_host_addr = ip0->src_address;
2232           key0.ext_host_port = tcp0->src;
2233           key0.out_port = tcp0->dst;
2234
2235           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2236           if (PREDICT_FALSE(!dm0))
2237             {
2238               clib_warning("unknown dst address:  %U",
2239                            format_ip4_address, &ip0->dst_address);
2240               next0 = SNAT_OUT2IN_NEXT_DROP;
2241               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2242               goto trace00;
2243             }
2244
2245           snat_det_reverse(dm0, &ip0->dst_address,
2246                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2247
2248           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2249           if (PREDICT_FALSE(!ses0))
2250             {
2251               clib_warning("no match src %U:%d dst %U:%d for user %U",
2252                            format_ip4_address, &ip0->src_address,
2253                            clib_net_to_host_u16 (tcp0->src),
2254                            format_ip4_address, &ip0->dst_address,
2255                            clib_net_to_host_u16 (tcp0->dst),
2256                            format_ip4_address, &new_addr0);
2257               next0 = SNAT_OUT2IN_NEXT_DROP;
2258               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2259               goto trace00;
2260             }
2261           new_port0 = ses0->in_port;
2262
2263           old_addr0 = ip0->dst_address;
2264           ip0->dst_address = new_addr0;
2265           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2266
2267           sum0 = ip0->checksum;
2268           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2269                                  ip4_header_t,
2270                                  dst_address /* changed member */);
2271           ip0->checksum = ip_csum_fold (sum0);
2272
2273           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2274             {
2275               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2276                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2277               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2278                 snat_det_ses_close(dm0, ses0);
2279
2280               old_port0 = tcp0->dst;
2281               tcp0->dst = new_port0;
2282
2283               sum0 = tcp0->checksum;
2284               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2285                                      ip4_header_t,
2286                                      dst_address /* changed member */);
2287
2288               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2289                                      ip4_header_t /* cheat */,
2290                                      length /* changed member */);
2291               tcp0->checksum = ip_csum_fold(sum0);
2292             }
2293           else
2294             {
2295               old_port0 = udp0->dst_port;
2296               udp0->dst_port = new_port0;
2297               udp0->checksum = 0;
2298             }
2299
2300         trace00:
2301
2302           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2303                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2304             {
2305               snat_out2in_trace_t *t =
2306                  vlib_add_trace (vm, node, b0, sizeof (*t));
2307               t->sw_if_index = sw_if_index0;
2308               t->next_index = next0;
2309               t->session_index = ~0;
2310               if (ses0)
2311                 t->session_index = ses0 - dm0->sessions;
2312             }
2313
2314           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2315
2316           /* verify speculative enqueue, maybe switch current next frame */
2317           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2318                                            to_next, n_left_to_next,
2319                                            bi0, next0);
2320         }
2321
2322       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2323     }
2324
2325   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
2326                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2327                                pkts_processed);
2328   return frame->n_vectors;
2329 }
2330
2331 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
2332   .function = snat_det_out2in_node_fn,
2333   .name = "nat44-det-out2in",
2334   .vector_size = sizeof (u32),
2335   .format_trace = format_snat_out2in_trace,
2336   .type = VLIB_NODE_TYPE_INTERNAL,
2337
2338   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2339   .error_strings = snat_out2in_error_strings,
2340
2341   .runtime_data_bytes = sizeof (snat_runtime_t),
2342
2343   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2344
2345   /* edit / add dispositions here */
2346   .next_nodes = {
2347     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2348     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2349     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2350     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2351   },
2352 };
2353 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
2354
2355 /**
2356  * Get address and port values to be used for ICMP packet translation
2357  * and create session if needed
2358  *
2359  * @param[in,out] sm             NAT main
2360  * @param[in,out] node           NAT node runtime
2361  * @param[in] thread_index       thread index
2362  * @param[in,out] b0             buffer containing packet to be translated
2363  * @param[out] p_proto           protocol used for matching
2364  * @param[out] p_value           address and port after NAT translation
2365  * @param[out] p_dont_translate  if packet should not be translated
2366  * @param d                      optional parameter
2367  * @param e                      optional parameter
2368  */
2369 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
2370                           u32 thread_index, vlib_buffer_t *b0,
2371                           ip4_header_t *ip0, u8 *p_proto,
2372                           snat_session_key_t *p_value,
2373                           u8 *p_dont_translate, void *d, void *e)
2374 {
2375   icmp46_header_t *icmp0;
2376   u32 sw_if_index0;
2377   u8 protocol;
2378   snat_det_out_key_t key0;
2379   u8 dont_translate = 0;
2380   u32 next0 = ~0;
2381   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2382   ip4_header_t *inner_ip0;
2383   void *l4_header = 0;
2384   icmp46_header_t *inner_icmp0;
2385   snat_det_map_t * dm0 = 0;
2386   ip4_address_t new_addr0 = {{0}};
2387   snat_det_session_t * ses0 = 0;
2388   ip4_address_t out_addr;
2389
2390   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2391   echo0 = (icmp_echo_header_t *)(icmp0+1);
2392   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2393
2394   if (!icmp_is_error_message (icmp0))
2395     {
2396       protocol = SNAT_PROTOCOL_ICMP;
2397       key0.ext_host_addr = ip0->src_address;
2398       key0.ext_host_port = 0;
2399       key0.out_port = echo0->identifier;
2400       out_addr = ip0->dst_address;
2401     }
2402   else
2403     {
2404       inner_ip0 = (ip4_header_t *)(echo0+1);
2405       l4_header = ip4_next_header (inner_ip0);
2406       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2407       key0.ext_host_addr = inner_ip0->dst_address;
2408       out_addr = inner_ip0->src_address;
2409       switch (protocol)
2410         {
2411         case SNAT_PROTOCOL_ICMP:
2412           inner_icmp0 = (icmp46_header_t*)l4_header;
2413           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2414           key0.ext_host_port = 0;
2415           key0.out_port = inner_echo0->identifier;
2416           break;
2417         case SNAT_PROTOCOL_UDP:
2418         case SNAT_PROTOCOL_TCP:
2419           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2420           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
2421           break;
2422         default:
2423           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2424           next0 = SNAT_OUT2IN_NEXT_DROP;
2425           goto out;
2426         }
2427     }
2428
2429   dm0 = snat_det_map_by_out(sm, &out_addr);
2430   if (PREDICT_FALSE(!dm0))
2431     {
2432       /* Don't NAT packet aimed at the intfc address */
2433       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2434                                           ip0->dst_address.as_u32)))
2435         {
2436           dont_translate = 1;
2437           goto out;
2438         }
2439       clib_warning("unknown dst address:  %U",
2440                    format_ip4_address, &ip0->dst_address);
2441       goto out;
2442     }
2443
2444   snat_det_reverse(dm0, &ip0->dst_address,
2445                    clib_net_to_host_u16(key0.out_port), &new_addr0);
2446
2447   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2448   if (PREDICT_FALSE(!ses0))
2449     {
2450       /* Don't NAT packet aimed at the intfc address */
2451       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2452                                           ip0->dst_address.as_u32)))
2453         {
2454           dont_translate = 1;
2455           goto out;
2456         }
2457       clib_warning("no match src %U:%d dst %U:%d for user %U",
2458                    format_ip4_address, &key0.ext_host_addr,
2459                    clib_net_to_host_u16 (key0.ext_host_port),
2460                    format_ip4_address, &out_addr,
2461                    clib_net_to_host_u16 (key0.out_port),
2462                    format_ip4_address, &new_addr0);
2463       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2464       next0 = SNAT_OUT2IN_NEXT_DROP;
2465       goto out;
2466     }
2467
2468   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
2469                     !icmp_is_error_message (icmp0)))
2470     {
2471       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2472       next0 = SNAT_OUT2IN_NEXT_DROP;
2473       goto out;
2474     }
2475
2476   goto out;
2477
2478 out:
2479   *p_proto = protocol;
2480   if (ses0)
2481     {
2482       p_value->addr = new_addr0;
2483       p_value->fib_index = sm->inside_fib_index;
2484       p_value->port = ses0->in_port;
2485     }
2486   *p_dont_translate = dont_translate;
2487   if (d)
2488     *(snat_det_session_t**)d = ses0;
2489   if (e)
2490     *(snat_det_map_t**)e = dm0;
2491   return next0;
2492 }
2493
2494 /**********************/
2495 /*** worker handoff ***/
2496 /**********************/
2497 static uword
2498 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
2499                                vlib_node_runtime_t * node,
2500                                vlib_frame_t * frame)
2501 {
2502   snat_main_t *sm = &snat_main;
2503   vlib_thread_main_t *tm = vlib_get_thread_main ();
2504   u32 n_left_from, *from, *to_next = 0;
2505   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2506   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2507     = 0;
2508   vlib_frame_queue_elt_t *hf = 0;
2509   vlib_frame_t *f = 0;
2510   int i;
2511   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2512   u32 next_worker_index = 0;
2513   u32 current_worker_index = ~0;
2514   u32 thread_index = vlib_get_thread_index ();
2515
2516   ASSERT (vec_len (sm->workers));
2517
2518   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2519     {
2520       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2521
2522       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2523                                sm->first_worker_index + sm->num_workers - 1,
2524                                (vlib_frame_queue_t *) (~0));
2525     }
2526
2527   from = vlib_frame_vector_args (frame);
2528   n_left_from = frame->n_vectors;
2529
2530   while (n_left_from > 0)
2531     {
2532       u32 bi0;
2533       vlib_buffer_t *b0;
2534       u32 sw_if_index0;
2535       u32 rx_fib_index0;
2536       ip4_header_t * ip0;
2537       u8 do_handoff;
2538
2539       bi0 = from[0];
2540       from += 1;
2541       n_left_from -= 1;
2542
2543       b0 = vlib_get_buffer (vm, bi0);
2544
2545       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2546       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2547
2548       ip0 = vlib_buffer_get_current (b0);
2549
2550       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2551
2552       if (PREDICT_FALSE (next_worker_index != thread_index))
2553         {
2554           do_handoff = 1;
2555
2556           if (next_worker_index != current_worker_index)
2557             {
2558               if (hf)
2559                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2560
2561               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2562                                                       next_worker_index,
2563                                                       handoff_queue_elt_by_worker_index);
2564
2565               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2566               to_next_worker = &hf->buffer_index[hf->n_vectors];
2567               current_worker_index = next_worker_index;
2568             }
2569
2570           /* enqueue to correct worker thread */
2571           to_next_worker[0] = bi0;
2572           to_next_worker++;
2573           n_left_to_next_worker--;
2574
2575           if (n_left_to_next_worker == 0)
2576             {
2577               hf->n_vectors = VLIB_FRAME_SIZE;
2578               vlib_put_frame_queue_elt (hf);
2579               current_worker_index = ~0;
2580               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2581               hf = 0;
2582             }
2583         }
2584       else
2585         {
2586           do_handoff = 0;
2587           /* if this is 1st frame */
2588           if (!f)
2589             {
2590               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2591               to_next = vlib_frame_vector_args (f);
2592             }
2593
2594           to_next[0] = bi0;
2595           to_next += 1;
2596           f->n_vectors++;
2597         }
2598
2599       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2600                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2601         {
2602           snat_out2in_worker_handoff_trace_t *t =
2603             vlib_add_trace (vm, node, b0, sizeof (*t));
2604           t->next_worker_index = next_worker_index;
2605           t->do_handoff = do_handoff;
2606         }
2607     }
2608
2609   if (f)
2610     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2611
2612   if (hf)
2613     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2614
2615   /* Ship frames to the worker nodes */
2616   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2617     {
2618       if (handoff_queue_elt_by_worker_index[i])
2619         {
2620           hf = handoff_queue_elt_by_worker_index[i];
2621           /*
2622            * It works better to let the handoff node
2623            * rate-adapt, always ship the handoff queue element.
2624            */
2625           if (1 || hf->n_vectors == hf->last_n_vectors)
2626             {
2627               vlib_put_frame_queue_elt (hf);
2628               handoff_queue_elt_by_worker_index[i] = 0;
2629             }
2630           else
2631             hf->last_n_vectors = hf->n_vectors;
2632         }
2633       congested_handoff_queue_by_worker_index[i] =
2634         (vlib_frame_queue_t *) (~0);
2635     }
2636   hf = 0;
2637   current_worker_index = ~0;
2638   return frame->n_vectors;
2639 }
2640
2641 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2642   .function = snat_out2in_worker_handoff_fn,
2643   .name = "nat44-out2in-worker-handoff",
2644   .vector_size = sizeof (u32),
2645   .format_trace = format_snat_out2in_worker_handoff_trace,
2646   .type = VLIB_NODE_TYPE_INTERNAL,
2647
2648   .n_next_nodes = 1,
2649
2650   .next_nodes = {
2651     [0] = "error-drop",
2652   },
2653 };
2654
2655 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2656
2657 static uword
2658 snat_out2in_fast_node_fn (vlib_main_t * vm,
2659                           vlib_node_runtime_t * node,
2660                           vlib_frame_t * frame)
2661 {
2662   u32 n_left_from, * from, * to_next;
2663   snat_out2in_next_t next_index;
2664   u32 pkts_processed = 0;
2665   snat_main_t * sm = &snat_main;
2666
2667   from = vlib_frame_vector_args (frame);
2668   n_left_from = frame->n_vectors;
2669   next_index = node->cached_next_index;
2670
2671   while (n_left_from > 0)
2672     {
2673       u32 n_left_to_next;
2674
2675       vlib_get_next_frame (vm, node, next_index,
2676                            to_next, n_left_to_next);
2677
2678       while (n_left_from > 0 && n_left_to_next > 0)
2679         {
2680           u32 bi0;
2681           vlib_buffer_t * b0;
2682           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2683           u32 sw_if_index0;
2684           ip4_header_t * ip0;
2685           ip_csum_t sum0;
2686           u32 new_addr0, old_addr0;
2687           u16 new_port0, old_port0;
2688           udp_header_t * udp0;
2689           tcp_header_t * tcp0;
2690           icmp46_header_t * icmp0;
2691           snat_session_key_t key0, sm0;
2692           u32 proto0;
2693           u32 rx_fib_index0;
2694
2695           /* speculatively enqueue b0 to the current next frame */
2696           bi0 = from[0];
2697           to_next[0] = bi0;
2698           from += 1;
2699           to_next += 1;
2700           n_left_from -= 1;
2701           n_left_to_next -= 1;
2702
2703           b0 = vlib_get_buffer (vm, bi0);
2704
2705           ip0 = vlib_buffer_get_current (b0);
2706           udp0 = ip4_next_header (ip0);
2707           tcp0 = (tcp_header_t *) udp0;
2708           icmp0 = (icmp46_header_t *) udp0;
2709
2710           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2711           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2712
2713           vnet_feature_next (sw_if_index0, &next0, b0);
2714
2715           if (PREDICT_FALSE(ip0->ttl == 1))
2716             {
2717               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2718               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2719                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2720                                            0);
2721               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2722               goto trace00;
2723             }
2724
2725           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2726
2727           if (PREDICT_FALSE (proto0 == ~0))
2728               goto trace00;
2729
2730           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2731             {
2732               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2733                                   rx_fib_index0, node, next0, ~0, 0, 0);
2734               goto trace00;
2735             }
2736
2737           key0.addr = ip0->dst_address;
2738           key0.port = udp0->dst_port;
2739           key0.fib_index = rx_fib_index0;
2740
2741           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
2742             {
2743               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2744               goto trace00;
2745             }
2746
2747           new_addr0 = sm0.addr.as_u32;
2748           new_port0 = sm0.port;
2749           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2750           old_addr0 = ip0->dst_address.as_u32;
2751           ip0->dst_address.as_u32 = new_addr0;
2752
2753           sum0 = ip0->checksum;
2754           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2755                                  ip4_header_t,
2756                                  dst_address /* changed member */);
2757           ip0->checksum = ip_csum_fold (sum0);
2758
2759           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2760             {
2761                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2762                 {
2763                   old_port0 = tcp0->dst_port;
2764                   tcp0->dst_port = new_port0;
2765
2766                   sum0 = tcp0->checksum;
2767                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2768                                          ip4_header_t,
2769                                          dst_address /* changed member */);
2770
2771                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2772                                          ip4_header_t /* cheat */,
2773                                          length /* changed member */);
2774                   tcp0->checksum = ip_csum_fold(sum0);
2775                 }
2776               else
2777                 {
2778                   old_port0 = udp0->dst_port;
2779                   udp0->dst_port = new_port0;
2780                   udp0->checksum = 0;
2781                 }
2782             }
2783           else
2784             {
2785               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2786                 {
2787                   sum0 = tcp0->checksum;
2788                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2789                                          ip4_header_t,
2790                                          dst_address /* changed member */);
2791
2792                   tcp0->checksum = ip_csum_fold(sum0);
2793                 }
2794             }
2795
2796         trace00:
2797
2798           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2799                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2800             {
2801               snat_out2in_trace_t *t =
2802                  vlib_add_trace (vm, node, b0, sizeof (*t));
2803               t->sw_if_index = sw_if_index0;
2804               t->next_index = next0;
2805             }
2806
2807           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2808
2809           /* verify speculative enqueue, maybe switch current next frame */
2810           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2811                                            to_next, n_left_to_next,
2812                                            bi0, next0);
2813         }
2814
2815       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2816     }
2817
2818   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2819                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2820                                pkts_processed);
2821   return frame->n_vectors;
2822 }
2823
2824 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2825   .function = snat_out2in_fast_node_fn,
2826   .name = "nat44-out2in-fast",
2827   .vector_size = sizeof (u32),
2828   .format_trace = format_snat_out2in_fast_trace,
2829   .type = VLIB_NODE_TYPE_INTERNAL,
2830
2831   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2832   .error_strings = snat_out2in_error_strings,
2833
2834   .runtime_data_bytes = sizeof (snat_runtime_t),
2835
2836   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2837
2838   /* edit / add dispositions here */
2839   .next_nodes = {
2840     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2841     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2842     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2843     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2844   },
2845 };
2846 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);