cba42465e912744156b78b5a536a84d0f43e447a
[vpp.git] / src / plugins / snat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <snat/snat.h>
26 #include <snat/snat_ipfix_logging.h>
27 #include <snat/snat_det.h>
28
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
32
33 typedef struct {
34   u32 sw_if_index;
35   u32 next_index;
36   u32 session_index;
37 } snat_out2in_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_out2in_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
50   
51   s = format (s, "SNAT_OUT2IN: sw_if_index %d, next index %d, session index %d",
52               t->sw_if_index, t->next_index, t->session_index);
53   return s;
54 }
55
56 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
61   
62   s = format (s, "SNAT_OUT2IN_FAST: sw_if_index %d, next index %d",
63               t->sw_if_index, t->next_index);
64   return s;
65 }
66
67 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
68 {
69   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
70   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
71   snat_out2in_worker_handoff_trace_t * t =
72     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
73   char * m;
74
75   m = t->do_handoff ? "next worker" : "same worker";
76   s = format (s, "SNAT_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
77
78   return s;
79 }
80
81 vlib_node_registration_t snat_out2in_node;
82 vlib_node_registration_t snat_out2in_fast_node;
83 vlib_node_registration_t snat_out2in_worker_handoff_node;
84 vlib_node_registration_t snat_det_out2in_node;
85
86 #define foreach_snat_out2in_error                       \
87 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
88 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
89 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
90 _(NO_TRANSLATION, "No translation")
91   
92 typedef enum {
93 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
94   foreach_snat_out2in_error
95 #undef _
96   SNAT_OUT2IN_N_ERROR,
97 } snat_out2in_error_t;
98
99 static char * snat_out2in_error_strings[] = {
100 #define _(sym,string) string,
101   foreach_snat_out2in_error
102 #undef _
103 };
104
105 typedef enum {
106   SNAT_OUT2IN_NEXT_DROP,
107   SNAT_OUT2IN_NEXT_LOOKUP,
108   SNAT_OUT2IN_NEXT_ICMP_ERROR,
109   SNAT_OUT2IN_N_NEXT,
110 } snat_out2in_next_t;
111
112 /**
113  * @brief Create session for static mapping.
114  *
115  * Create NAT session initiated by host from external network with static
116  * mapping.
117  *
118  * @param sm     SNAT main.
119  * @param b0     Vlib buffer.
120  * @param in2out In2out SNAT session key.
121  * @param out2in Out2in SNAT session key.
122  * @param node   Vlib node.
123  *
124  * @returns SNAT session if successfully created otherwise 0.
125  */
126 static inline snat_session_t *
127 create_session_for_static_mapping (snat_main_t *sm,
128                                    vlib_buffer_t *b0,
129                                    snat_session_key_t in2out,
130                                    snat_session_key_t out2in,
131                                    vlib_node_runtime_t * node,
132                                    u32 thread_index)
133 {
134   snat_user_t *u;
135   snat_user_key_t user_key;
136   snat_session_t *s;
137   clib_bihash_kv_8_8_t kv0, value0;
138   dlist_elt_t * per_user_translation_list_elt;
139   dlist_elt_t * per_user_list_head_elt;
140   ip4_header_t *ip0;
141
142   ip0 = vlib_buffer_get_current (b0);
143
144   user_key.addr = in2out.addr;
145   user_key.fib_index = in2out.fib_index;
146   kv0.key = user_key.as_u64;
147
148   /* Ever heard of the "user" = inside ip4 address before? */
149   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
150     {
151       /* no, make a new one */
152       pool_get (sm->per_thread_data[thread_index].users, u);
153       memset (u, 0, sizeof (*u));
154       u->addr = in2out.addr;
155       u->fib_index = in2out.fib_index;
156
157       pool_get (sm->per_thread_data[thread_index].list_pool,
158                 per_user_list_head_elt);
159
160       u->sessions_per_user_list_head_index = per_user_list_head_elt -
161         sm->per_thread_data[thread_index].list_pool;
162
163       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
164                        u->sessions_per_user_list_head_index);
165
166       kv0.value = u - sm->per_thread_data[thread_index].users;
167
168       /* add user */
169       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
170
171       /* add non-traslated packets worker lookup */
172       kv0.value = thread_index;
173       clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
174     }
175   else
176     {
177       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
178                              value0.value);
179     }
180
181   pool_get (sm->per_thread_data[thread_index].sessions, s);
182   memset (s, 0, sizeof (*s));
183
184   s->outside_address_index = ~0;
185   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
186   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
187   u->nstaticsessions++;
188
189   /* Create list elts */
190   pool_get (sm->per_thread_data[thread_index].list_pool,
191             per_user_translation_list_elt);
192   clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
193                    per_user_translation_list_elt -
194                    sm->per_thread_data[thread_index].list_pool);
195
196   per_user_translation_list_elt->value =
197     s - sm->per_thread_data[thread_index].sessions;
198   s->per_user_index =
199     per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
200   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
201
202   clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
203                       s->per_user_list_head_index,
204                       per_user_translation_list_elt -
205                       sm->per_thread_data[thread_index].list_pool);
206
207   s->in2out = in2out;
208   s->out2in = out2in;
209   s->in2out.protocol = out2in.protocol;
210
211   /* Add to translation hashes */
212   kv0.key = s->in2out.as_u64;
213   kv0.value = s - sm->per_thread_data[thread_index].sessions;
214   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
215       clib_warning ("in2out key add failed");
216
217   kv0.key = s->out2in.as_u64;
218   kv0.value = s - sm->per_thread_data[thread_index].sessions;
219
220   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
221       clib_warning ("out2in key add failed");
222
223   /* log NAT event */
224   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
225                                       s->out2in.addr.as_u32,
226                                       s->in2out.protocol,
227                                       s->in2out.port,
228                                       s->out2in.port,
229                                       s->in2out.fib_index);
230    return s;
231 }
232
233 static_always_inline
234 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
235                                  snat_session_key_t *p_key0)
236 {
237   icmp46_header_t *icmp0;
238   snat_session_key_t key0;
239   icmp_echo_header_t *echo0, *inner_echo0 = 0;
240   ip4_header_t *inner_ip0;
241   void *l4_header = 0;
242   icmp46_header_t *inner_icmp0;
243
244   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
245   echo0 = (icmp_echo_header_t *)(icmp0+1);
246
247   if (!icmp_is_error_message (icmp0))
248     {
249       key0.protocol = SNAT_PROTOCOL_ICMP;
250       key0.addr = ip0->dst_address;
251       key0.port = echo0->identifier;
252     }
253   else
254     {
255       inner_ip0 = (ip4_header_t *)(echo0+1);
256       l4_header = ip4_next_header (inner_ip0);
257       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
258       key0.addr = inner_ip0->src_address;
259       switch (key0.protocol)
260         {
261         case SNAT_PROTOCOL_ICMP:
262           inner_icmp0 = (icmp46_header_t*)l4_header;
263           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
264           key0.port = inner_echo0->identifier;
265           break;
266         case SNAT_PROTOCOL_UDP:
267         case SNAT_PROTOCOL_TCP:
268           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
269           break;
270         default:
271           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
272         }
273     }
274   *p_key0 = key0;
275   return -1; /* success */
276 }
277
278 /**
279  * Get address and port values to be used for packet SNAT translation
280  * and create session if needed
281  *
282  * @param[in,out] sm             SNAT main
283  * @param[in,out] node           SNAT node runtime
284  * @param[in] thread_index       thread index
285  * @param[in,out] b0             buffer containing packet to be translated
286  * @param[out] p_proto           protocol used for matching
287  * @param[out] p_value           address and port after NAT translation
288  * @param[out] p_dont_translate  if packet should not be translated
289  * @param d                      optional parameter
290  * @param e                      optional parameter
291  */
292 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
293                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
294                            snat_session_key_t *p_value,
295                            u8 *p_dont_translate, void *d, void *e)
296 {
297   ip4_header_t *ip0;
298   icmp46_header_t *icmp0;
299   u32 sw_if_index0;
300   u32 rx_fib_index0;
301   snat_session_key_t key0;
302   snat_session_key_t sm0;
303   snat_session_t *s0 = 0;
304   u8 dont_translate = 0;
305   clib_bihash_kv_8_8_t kv0, value0;
306   u8 is_addr_only;
307   u32 next0 = ~0;
308   int err;
309
310   ip0 = vlib_buffer_get_current (b0);
311   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
312   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
313   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
314
315   key0.protocol = 0;
316
317   err = icmp_get_key (ip0, &key0);
318   if (err != -1)
319     {
320       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
321       next0 = SNAT_OUT2IN_NEXT_DROP;
322       goto out;
323     }
324   key0.fib_index = rx_fib_index0;
325
326   kv0.key = key0.as_u64;
327
328   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
329     {
330       /* Try to match static mapping by external address and port,
331          destination address and port in packet */
332       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
333         {
334           /* Don't NAT packet aimed at the intfc address */
335           if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
336                                               ip0->dst_address.as_u32)))
337             {
338               dont_translate = 1;
339               goto out;
340             }
341           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
342           next0 = SNAT_OUT2IN_NEXT_DROP;
343           goto out;
344         }
345
346       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
347                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
348         {
349           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
350           next0 = SNAT_OUT2IN_NEXT_DROP;
351           goto out;
352         }
353
354       /* Create session initiated by host from external network */
355       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
356                                              node, thread_index);
357
358       if (!s0)
359         {
360           next0 = SNAT_OUT2IN_NEXT_DROP;
361           goto out;
362         }
363     }
364   else
365     {
366       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
367                         icmp0->type != ICMP4_echo_request &&
368                         !icmp_is_error_message (icmp0)))
369         {
370           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
371           next0 = SNAT_OUT2IN_NEXT_DROP;
372           goto out;
373         }
374
375       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
376                               value0.value);
377     }
378
379 out:
380   *p_proto = key0.protocol;
381   if (s0)
382     *p_value = s0->in2out;
383   *p_dont_translate = dont_translate;
384   if (d)
385     *(snat_session_t**)d = s0;
386   return next0;
387 }
388
389 /**
390  * Get address and port values to be used for packet SNAT translation
391  *
392  * @param[in] sm                 SNAT main
393  * @param[in,out] node           SNAT node runtime
394  * @param[in] thread_index       thread index
395  * @param[in,out] b0             buffer containing packet to be translated
396  * @param[out] p_proto           protocol used for matching
397  * @param[out] p_value           address and port after NAT translation
398  * @param[out] p_dont_translate  if packet should not be translated
399  * @param d                      optional parameter
400  * @param e                      optional parameter
401  */
402 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
403                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
404                            snat_session_key_t *p_value,
405                            u8 *p_dont_translate, void *d, void *e)
406 {
407   ip4_header_t *ip0;
408   icmp46_header_t *icmp0;
409   u32 sw_if_index0;
410   u32 rx_fib_index0;
411   snat_session_key_t key0;
412   snat_session_key_t sm0;
413   u8 dont_translate = 0;
414   u8 is_addr_only;
415   u32 next0 = ~0;
416   int err;
417
418   ip0 = vlib_buffer_get_current (b0);
419   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
420   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
421   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
422
423   err = icmp_get_key (ip0, &key0);
424   if (err != -1)
425     {
426       b0->error = node->errors[err];
427       next0 = SNAT_OUT2IN_NEXT_DROP;
428       goto out2;
429     }
430   key0.fib_index = rx_fib_index0;
431
432   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
433     {
434       /* Don't NAT packet aimed at the intfc address */
435       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
436         {
437           dont_translate = 1;
438           goto out;
439         }
440       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
441       next0 = SNAT_OUT2IN_NEXT_DROP;
442       goto out;
443     }
444
445   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
446                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
447                     !icmp_is_error_message (icmp0)))
448     {
449       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
450       next0 = SNAT_OUT2IN_NEXT_DROP;
451       goto out;
452     }
453
454 out:
455   *p_value = sm0;
456 out2:
457   *p_proto = key0.protocol;
458   *p_dont_translate = dont_translate;
459   return next0;
460 }
461
462 static inline u32 icmp_out2in (snat_main_t *sm,
463                                vlib_buffer_t * b0,
464                                ip4_header_t * ip0,
465                                icmp46_header_t * icmp0,
466                                u32 sw_if_index0,
467                                u32 rx_fib_index0,
468                                vlib_node_runtime_t * node,
469                                u32 next0,
470                                u32 thread_index,
471                                void *d,
472                                void *e)
473 {
474   snat_session_key_t sm0;
475   u8 protocol;
476   icmp_echo_header_t *echo0, *inner_echo0 = 0;
477   ip4_header_t *inner_ip0 = 0;
478   void *l4_header = 0;
479   icmp46_header_t *inner_icmp0;
480   u8 dont_translate;
481   u32 new_addr0, old_addr0;
482   u16 old_id0, new_id0;
483   ip_csum_t sum0;
484   u16 checksum0;
485   u32 next0_tmp;
486
487   echo0 = (icmp_echo_header_t *)(icmp0+1);
488
489   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0,
490                                        &protocol, &sm0, &dont_translate, d, e);
491   if (next0_tmp != ~0)
492     next0 = next0_tmp;
493   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
494     goto out;
495
496   sum0 = ip_incremental_checksum (0, icmp0,
497                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
498   checksum0 = ~ip_csum_fold (sum0);
499   if (checksum0 != 0 && checksum0 != 0xffff)
500     {
501       next0 = SNAT_OUT2IN_NEXT_DROP;
502       goto out;
503     }
504
505   old_addr0 = ip0->dst_address.as_u32;
506   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
507   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
508
509   sum0 = ip0->checksum;
510   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
511                          dst_address /* changed member */);
512   ip0->checksum = ip_csum_fold (sum0);
513
514   if (!icmp_is_error_message (icmp0))
515     {
516       new_id0 = sm0.port;
517       if (PREDICT_FALSE(new_id0 != echo0->identifier))
518         {
519           old_id0 = echo0->identifier;
520           new_id0 = sm0.port;
521           echo0->identifier = new_id0;
522
523           sum0 = icmp0->checksum;
524           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
525                                  identifier /* changed member */);
526           icmp0->checksum = ip_csum_fold (sum0);
527         }
528     }
529   else
530     {
531       inner_ip0 = (ip4_header_t *)(echo0+1);
532       l4_header = ip4_next_header (inner_ip0);
533
534       if (!ip4_header_checksum_is_valid (inner_ip0))
535         {
536           next0 = SNAT_OUT2IN_NEXT_DROP;
537           goto out;
538         }
539
540       old_addr0 = inner_ip0->src_address.as_u32;
541       inner_ip0->src_address = sm0.addr;
542       new_addr0 = inner_ip0->src_address.as_u32;
543
544       sum0 = icmp0->checksum;
545       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
546                              src_address /* changed member */);
547       icmp0->checksum = ip_csum_fold (sum0);
548
549       switch (protocol)
550         {
551         case SNAT_PROTOCOL_ICMP:
552           inner_icmp0 = (icmp46_header_t*)l4_header;
553           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
554
555           old_id0 = inner_echo0->identifier;
556           new_id0 = sm0.port;
557           inner_echo0->identifier = new_id0;
558
559           sum0 = icmp0->checksum;
560           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
561                                  identifier);
562           icmp0->checksum = ip_csum_fold (sum0);
563           break;
564         case SNAT_PROTOCOL_UDP:
565         case SNAT_PROTOCOL_TCP:
566           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
567           new_id0 = sm0.port;
568           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
569
570           sum0 = icmp0->checksum;
571           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
572                                  src_port);
573           icmp0->checksum = ip_csum_fold (sum0);
574           break;
575         default:
576           ASSERT(0);
577         }
578     }
579
580 out:
581   return next0;
582 }
583
584
585 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
586                                          vlib_buffer_t * b0,
587                                          ip4_header_t * ip0,
588                                          icmp46_header_t * icmp0,
589                                          u32 sw_if_index0,
590                                          u32 rx_fib_index0,
591                                          vlib_node_runtime_t * node,
592                                          u32 next0, f64 now,
593                                          u32 thread_index,
594                                          snat_session_t ** p_s0)
595 {
596   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
597                       next0, thread_index, p_s0, 0);
598   snat_session_t * s0 = *p_s0;
599   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
600     {
601       /* Accounting */
602       s0->last_heard = now;
603       s0->total_pkts++;
604       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
605       /* Per-user LRU list maintenance for dynamic translation */
606       if (!snat_is_session_static (s0))
607         {
608           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
609                              s0->per_user_index);
610           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
611                               s0->per_user_list_head_index,
612                               s0->per_user_index);
613         }
614     }
615   return next0;
616 }
617
618 static void
619 snat_out2in_unknown_proto (snat_main_t *sm,
620                            vlib_buffer_t * b,
621                            ip4_header_t * ip,
622                            u32 rx_fib_index,
623                            u32 thread_index,
624                            f64 now,
625                            vlib_main_t * vm)
626 {
627   clib_bihash_kv_8_8_t kv, value;
628   clib_bihash_kv_16_8_t s_kv, s_value;
629   snat_static_mapping_t *m;
630   snat_session_key_t m_key;
631   u32 old_addr, new_addr;
632   ip_csum_t sum;
633   snat_unk_proto_ses_key_t key;
634   snat_session_t * s;
635   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
636   snat_user_key_t u_key;
637   snat_user_t *u;
638   dlist_elt_t *head, *elt;
639
640   old_addr = ip->dst_address.as_u32;
641
642   key.l_addr = ip->dst_address;
643   key.r_addr = ip->src_address;
644   key.fib_index = rx_fib_index;
645   key.proto = ip->protocol;
646   key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
647   s_kv.key[0] = key.as_u64[0];
648   s_kv.key[1] = key.as_u64[1];
649
650   if (!clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
651     {
652       s = pool_elt_at_index (tsm->sessions, s_value.value);
653       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
654     }
655   else
656     {
657       m_key.addr = ip->dst_address;
658       m_key.port = 0;
659       m_key.protocol = 0;
660       m_key.fib_index = rx_fib_index;
661       kv.key = m_key.as_u64;
662       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
663         return;
664
665       m = pool_elt_at_index (sm->static_mappings, value.value);
666
667       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
668
669       u_key.addr = ip->src_address;
670       u_key.fib_index = m->fib_index;
671       kv.key = u_key.as_u64;
672
673       /* Ever heard of the "user" = src ip4 address before? */
674       if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
675         {
676           /* no, make a new one */
677           pool_get (tsm->users, u);
678           memset (u, 0, sizeof (*u));
679           u->addr = ip->src_address;
680           u->fib_index = rx_fib_index;
681
682           pool_get (tsm->list_pool, head);
683           u->sessions_per_user_list_head_index = head - tsm->list_pool;
684
685           clib_dlist_init (tsm->list_pool,
686                            u->sessions_per_user_list_head_index);
687
688           kv.value = u - tsm->users;
689
690           /* add user */
691           clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1);
692         }
693       else
694         {
695           u = pool_elt_at_index (tsm->users, value.value);
696         }
697
698       /* Create a new session */
699       pool_get (tsm->sessions, s);
700       memset (s, 0, sizeof (*s));
701
702       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
703       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
704       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
705       s->outside_address_index = ~0;
706       s->out2in.addr.as_u32 = old_addr;
707       s->out2in.fib_index = rx_fib_index;
708       s->in2out.addr.as_u32 = new_addr;
709       s->in2out.fib_index = m->fib_index;
710       s->in2out.port = s->out2in.port = ip->protocol;
711       u->nstaticsessions++;
712
713       /* Create list elts */
714       pool_get (tsm->list_pool, elt);
715       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
716       elt->value = s - tsm->sessions;
717       s->per_user_index = elt - tsm->list_pool;
718       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
719       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
720                           s->per_user_index);
721
722       /* Add to lookup tables */
723       s_kv.value = s - tsm->sessions;
724       if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
725         clib_warning ("out2in key add failed");
726
727       key.l_addr = ip->dst_address;
728       key.fib_index = m->fib_index;
729       s_kv.key[0] = key.as_u64[0];
730       s_kv.key[1] = key.as_u64[1];
731       if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
732         clib_warning ("in2out key add failed");
733    }
734
735   /* Update IP checksum */
736   sum = ip->checksum;
737   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
738   ip->checksum = ip_csum_fold (sum);
739
740   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
741
742   /* Accounting */
743   s->last_heard = now;
744   s->total_pkts++;
745   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
746   /* Per-user LRU list maintenance */
747   clib_dlist_remove (tsm->list_pool, s->per_user_index);
748   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
749                       s->per_user_index);
750 }
751
752 static uword
753 snat_out2in_node_fn (vlib_main_t * vm,
754                   vlib_node_runtime_t * node,
755                   vlib_frame_t * frame)
756 {
757   u32 n_left_from, * from, * to_next;
758   snat_out2in_next_t next_index;
759   u32 pkts_processed = 0;
760   snat_main_t * sm = &snat_main;
761   f64 now = vlib_time_now (vm);
762   u32 thread_index = vlib_get_thread_index ();
763
764   from = vlib_frame_vector_args (frame);
765   n_left_from = frame->n_vectors;
766   next_index = node->cached_next_index;
767
768   while (n_left_from > 0)
769     {
770       u32 n_left_to_next;
771
772       vlib_get_next_frame (vm, node, next_index,
773                            to_next, n_left_to_next);
774
775       while (n_left_from >= 4 && n_left_to_next >= 2)
776         {
777           u32 bi0, bi1;
778           vlib_buffer_t * b0, * b1;
779           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
780           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
781           u32 sw_if_index0, sw_if_index1;
782           ip4_header_t * ip0, *ip1;
783           ip_csum_t sum0, sum1;
784           u32 new_addr0, old_addr0;
785           u16 new_port0, old_port0;
786           u32 new_addr1, old_addr1;
787           u16 new_port1, old_port1;
788           udp_header_t * udp0, * udp1;
789           tcp_header_t * tcp0, * tcp1;
790           icmp46_header_t * icmp0, * icmp1;
791           snat_session_key_t key0, key1, sm0, sm1;
792           u32 rx_fib_index0, rx_fib_index1;
793           u32 proto0, proto1;
794           snat_session_t * s0 = 0, * s1 = 0;
795           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
796           
797           /* Prefetch next iteration. */
798           {
799             vlib_buffer_t * p2, * p3;
800             
801             p2 = vlib_get_buffer (vm, from[2]);
802             p3 = vlib_get_buffer (vm, from[3]);
803             
804             vlib_prefetch_buffer_header (p2, LOAD);
805             vlib_prefetch_buffer_header (p3, LOAD);
806
807             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
808             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
809           }
810
811           /* speculatively enqueue b0 and b1 to the current next frame */
812           to_next[0] = bi0 = from[0];
813           to_next[1] = bi1 = from[1];
814           from += 2;
815           to_next += 2;
816           n_left_from -= 2;
817           n_left_to_next -= 2;
818
819           b0 = vlib_get_buffer (vm, bi0);
820           b1 = vlib_get_buffer (vm, bi1);
821             
822           ip0 = vlib_buffer_get_current (b0);
823           udp0 = ip4_next_header (ip0);
824           tcp0 = (tcp_header_t *) udp0;
825           icmp0 = (icmp46_header_t *) udp0;
826
827           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
828           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
829                                    sw_if_index0);
830
831           if (PREDICT_FALSE(ip0->ttl == 1))
832             {
833               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
834               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
835                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
836                                            0);
837               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
838               goto trace0;
839             }
840
841           proto0 = ip_proto_to_snat_proto (ip0->protocol);
842
843           if (PREDICT_FALSE (proto0 == ~0))
844             {
845               snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
846                                         thread_index, now, vm);
847               goto trace0;
848             }
849
850           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
851             {
852               next0 = icmp_out2in_slow_path 
853                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
854                  next0, now, thread_index, &s0);
855               goto trace0;
856             }
857
858           key0.addr = ip0->dst_address;
859           key0.port = udp0->dst_port;
860           key0.protocol = proto0;
861           key0.fib_index = rx_fib_index0;
862           
863           kv0.key = key0.as_u64;
864
865           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
866             {
867               /* Try to match static mapping by external address and port,
868                  destination address and port in packet */
869               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
870                 {
871                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
872                   /* 
873                    * Send DHCP packets to the ipv4 stack, or we won't
874                    * be able to use dhcp client on the outside interface
875                    */
876                   if (proto0 != SNAT_PROTOCOL_UDP 
877                       || (udp0->dst_port 
878                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
879                     next0 = SNAT_OUT2IN_NEXT_DROP;
880                   goto trace0;
881                 }
882
883               /* Create session initiated by host from external network */
884               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
885                                                      thread_index);
886               if (!s0)
887                 {
888                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
889                   next0 = SNAT_OUT2IN_NEXT_DROP;
890                   goto trace0;
891                 }
892             }
893           else
894             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
895                                     value0.value);
896
897           old_addr0 = ip0->dst_address.as_u32;
898           ip0->dst_address = s0->in2out.addr;
899           new_addr0 = ip0->dst_address.as_u32;
900           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
901
902           sum0 = ip0->checksum;
903           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
904                                  ip4_header_t,
905                                  dst_address /* changed member */);
906           ip0->checksum = ip_csum_fold (sum0);
907
908           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
909             {
910               old_port0 = tcp0->dst_port;
911               tcp0->dst_port = s0->in2out.port;
912               new_port0 = tcp0->dst_port;
913
914               sum0 = tcp0->checksum;
915               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
916                                      ip4_header_t,
917                                      dst_address /* changed member */);
918
919               sum0 = ip_csum_update (sum0, old_port0, new_port0,
920                                      ip4_header_t /* cheat */,
921                                      length /* changed member */);
922               tcp0->checksum = ip_csum_fold(sum0);
923             }
924           else
925             {
926               old_port0 = udp0->dst_port;
927               udp0->dst_port = s0->in2out.port;
928               udp0->checksum = 0;
929             }
930
931           /* Accounting */
932           s0->last_heard = now;
933           s0->total_pkts++;
934           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
935           /* Per-user LRU list maintenance for dynamic translation */
936           if (!snat_is_session_static (s0))
937             {
938               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
939                                  s0->per_user_index);
940               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
941                                   s0->per_user_list_head_index,
942                                   s0->per_user_index);
943             }
944         trace0:
945
946           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
947                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
948             {
949               snat_out2in_trace_t *t = 
950                  vlib_add_trace (vm, node, b0, sizeof (*t));
951               t->sw_if_index = sw_if_index0;
952               t->next_index = next0;
953               t->session_index = ~0;
954               if (s0)
955                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
956             }
957
958           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
959
960
961           ip1 = vlib_buffer_get_current (b1);
962           udp1 = ip4_next_header (ip1);
963           tcp1 = (tcp_header_t *) udp1;
964           icmp1 = (icmp46_header_t *) udp1;
965
966           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
967           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
968                                    sw_if_index1);
969
970           if (PREDICT_FALSE(ip1->ttl == 1))
971             {
972               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
973               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
974                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
975                                            0);
976               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
977               goto trace1;
978             }
979
980           proto1 = ip_proto_to_snat_proto (ip1->protocol);
981
982           if (PREDICT_FALSE (proto1 == ~0))
983             {
984               snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
985                                         thread_index, now, vm);
986               goto trace1;
987             }
988
989           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
990             {
991               next1 = icmp_out2in_slow_path 
992                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, 
993                  next1, now, thread_index, &s1);
994               goto trace1;
995             }
996
997           key1.addr = ip1->dst_address;
998           key1.port = udp1->dst_port;
999           key1.protocol = proto1;
1000           key1.fib_index = rx_fib_index1;
1001           
1002           kv1.key = key1.as_u64;
1003
1004           if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1))
1005             {
1006               /* Try to match static mapping by external address and port,
1007                  destination address and port in packet */
1008               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0))
1009                 {
1010                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1011                   /* 
1012                    * Send DHCP packets to the ipv4 stack, or we won't
1013                    * be able to use dhcp client on the outside interface
1014                    */
1015                   if (proto1 != SNAT_PROTOCOL_UDP 
1016                       || (udp1->dst_port 
1017                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1018                     next1 = SNAT_OUT2IN_NEXT_DROP;
1019                   goto trace1;
1020                 }
1021
1022               /* Create session initiated by host from external network */
1023               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1024                                                      thread_index);
1025               if (!s1)
1026                 {
1027                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1028                   next1 = SNAT_OUT2IN_NEXT_DROP;
1029                   goto trace1;
1030                 }
1031             }
1032           else
1033             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1034                                     value1.value);
1035
1036           old_addr1 = ip1->dst_address.as_u32;
1037           ip1->dst_address = s1->in2out.addr;
1038           new_addr1 = ip1->dst_address.as_u32;
1039           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1040
1041           sum1 = ip1->checksum;
1042           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1043                                  ip4_header_t,
1044                                  dst_address /* changed member */);
1045           ip1->checksum = ip_csum_fold (sum1);
1046
1047           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1048             {
1049               old_port1 = tcp1->dst_port;
1050               tcp1->dst_port = s1->in2out.port;
1051               new_port1 = tcp1->dst_port;
1052
1053               sum1 = tcp1->checksum;
1054               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1055                                      ip4_header_t,
1056                                      dst_address /* changed member */);
1057
1058               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1059                                      ip4_header_t /* cheat */,
1060                                      length /* changed member */);
1061               tcp1->checksum = ip_csum_fold(sum1);
1062             }
1063           else
1064             {
1065               old_port1 = udp1->dst_port;
1066               udp1->dst_port = s1->in2out.port;
1067               udp1->checksum = 0;
1068             }
1069
1070           /* Accounting */
1071           s1->last_heard = now;
1072           s1->total_pkts++;
1073           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1074           /* Per-user LRU list maintenance for dynamic translation */
1075           if (!snat_is_session_static (s1))
1076             {
1077               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1078                                  s1->per_user_index);
1079               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1080                                   s1->per_user_list_head_index,
1081                                   s1->per_user_index);
1082             }
1083         trace1:
1084
1085           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1086                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
1087             {
1088               snat_out2in_trace_t *t = 
1089                  vlib_add_trace (vm, node, b1, sizeof (*t));
1090               t->sw_if_index = sw_if_index1;
1091               t->next_index = next1;
1092               t->session_index = ~0;
1093               if (s1)
1094                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1095             }
1096
1097           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1098
1099           /* verify speculative enqueues, maybe switch current next frame */
1100           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1101                                            to_next, n_left_to_next,
1102                                            bi0, bi1, next0, next1);
1103         }
1104
1105       while (n_left_from > 0 && n_left_to_next > 0)
1106         {
1107           u32 bi0;
1108           vlib_buffer_t * b0;
1109           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1110           u32 sw_if_index0;
1111           ip4_header_t * ip0;
1112           ip_csum_t sum0;
1113           u32 new_addr0, old_addr0;
1114           u16 new_port0, old_port0;
1115           udp_header_t * udp0;
1116           tcp_header_t * tcp0;
1117           icmp46_header_t * icmp0;
1118           snat_session_key_t key0, sm0;
1119           u32 rx_fib_index0;
1120           u32 proto0;
1121           snat_session_t * s0 = 0;
1122           clib_bihash_kv_8_8_t kv0, value0;
1123           
1124           /* speculatively enqueue b0 to the current next frame */
1125           bi0 = from[0];
1126           to_next[0] = bi0;
1127           from += 1;
1128           to_next += 1;
1129           n_left_from -= 1;
1130           n_left_to_next -= 1;
1131
1132           b0 = vlib_get_buffer (vm, bi0);
1133
1134           ip0 = vlib_buffer_get_current (b0);
1135           udp0 = ip4_next_header (ip0);
1136           tcp0 = (tcp_header_t *) udp0;
1137           icmp0 = (icmp46_header_t *) udp0;
1138
1139           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1140           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
1141                                    sw_if_index0);
1142
1143           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1144
1145           if (PREDICT_FALSE (proto0 == ~0))
1146             {
1147               snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1148                                         thread_index, now, vm);
1149               goto trace00;
1150             }
1151
1152           if (PREDICT_FALSE(ip0->ttl == 1))
1153             {
1154               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1155               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1156                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1157                                            0);
1158               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1159               goto trace00;
1160             }
1161
1162           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1163             {
1164               next0 = icmp_out2in_slow_path 
1165                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
1166                  next0, now, thread_index, &s0);
1167               goto trace00;
1168             }
1169
1170           key0.addr = ip0->dst_address;
1171           key0.port = udp0->dst_port;
1172           key0.protocol = proto0;
1173           key0.fib_index = rx_fib_index0;
1174           
1175           kv0.key = key0.as_u64;
1176
1177           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
1178             {
1179               /* Try to match static mapping by external address and port,
1180                  destination address and port in packet */
1181               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1182                 {
1183                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1184                   /* 
1185                    * Send DHCP packets to the ipv4 stack, or we won't
1186                    * be able to use dhcp client on the outside interface
1187                    */
1188                   if (proto0 != SNAT_PROTOCOL_UDP 
1189                       || (udp0->dst_port 
1190                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1191
1192                     next0 = SNAT_OUT2IN_NEXT_DROP;
1193                   goto trace00;
1194                 }
1195
1196               /* Create session initiated by host from external network */
1197               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1198                                                      thread_index);
1199               if (!s0)
1200                 {
1201                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1202                     next0 = SNAT_OUT2IN_NEXT_DROP;
1203                   goto trace00;
1204                 }
1205             }
1206           else
1207             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1208                                     value0.value);
1209
1210           old_addr0 = ip0->dst_address.as_u32;
1211           ip0->dst_address = s0->in2out.addr;
1212           new_addr0 = ip0->dst_address.as_u32;
1213           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1214
1215           sum0 = ip0->checksum;
1216           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1217                                  ip4_header_t,
1218                                  dst_address /* changed member */);
1219           ip0->checksum = ip_csum_fold (sum0);
1220
1221           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1222             {
1223               old_port0 = tcp0->dst_port;
1224               tcp0->dst_port = s0->in2out.port;
1225               new_port0 = tcp0->dst_port;
1226
1227               sum0 = tcp0->checksum;
1228               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1229                                      ip4_header_t,
1230                                      dst_address /* changed member */);
1231
1232               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1233                                      ip4_header_t /* cheat */,
1234                                      length /* changed member */);
1235               tcp0->checksum = ip_csum_fold(sum0);
1236             }
1237           else
1238             {
1239               old_port0 = udp0->dst_port;
1240               udp0->dst_port = s0->in2out.port;
1241               udp0->checksum = 0;
1242             }
1243
1244           /* Accounting */
1245           s0->last_heard = now;
1246           s0->total_pkts++;
1247           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1248           /* Per-user LRU list maintenance for dynamic translation */
1249           if (!snat_is_session_static (s0))
1250             {
1251               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1252                                  s0->per_user_index);
1253               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1254                                   s0->per_user_list_head_index,
1255                                   s0->per_user_index);
1256             }
1257         trace00:
1258
1259           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1260                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1261             {
1262               snat_out2in_trace_t *t = 
1263                  vlib_add_trace (vm, node, b0, sizeof (*t));
1264               t->sw_if_index = sw_if_index0;
1265               t->next_index = next0;
1266               t->session_index = ~0;
1267               if (s0)
1268                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1269             }
1270
1271           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1272
1273           /* verify speculative enqueue, maybe switch current next frame */
1274           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1275                                            to_next, n_left_to_next,
1276                                            bi0, next0);
1277         }
1278
1279       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1280     }
1281
1282   vlib_node_increment_counter (vm, snat_out2in_node.index, 
1283                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, 
1284                                pkts_processed);
1285   return frame->n_vectors;
1286 }
1287
1288 VLIB_REGISTER_NODE (snat_out2in_node) = {
1289   .function = snat_out2in_node_fn,
1290   .name = "snat-out2in",
1291   .vector_size = sizeof (u32),
1292   .format_trace = format_snat_out2in_trace,
1293   .type = VLIB_NODE_TYPE_INTERNAL,
1294   
1295   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1296   .error_strings = snat_out2in_error_strings,
1297
1298   .runtime_data_bytes = sizeof (snat_runtime_t),
1299   
1300   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1301
1302   /* edit / add dispositions here */
1303   .next_nodes = {
1304     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1305     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1306     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1307   },
1308 };
1309 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1310
1311 /**************************/
1312 /*** deterministic mode ***/
1313 /**************************/
1314 static uword
1315 snat_det_out2in_node_fn (vlib_main_t * vm,
1316                          vlib_node_runtime_t * node,
1317                          vlib_frame_t * frame)
1318 {
1319   u32 n_left_from, * from, * to_next;
1320   snat_out2in_next_t next_index;
1321   u32 pkts_processed = 0;
1322   snat_main_t * sm = &snat_main;
1323   u32 thread_index = vlib_get_thread_index ();
1324
1325   from = vlib_frame_vector_args (frame);
1326   n_left_from = frame->n_vectors;
1327   next_index = node->cached_next_index;
1328
1329   while (n_left_from > 0)
1330     {
1331       u32 n_left_to_next;
1332
1333       vlib_get_next_frame (vm, node, next_index,
1334                            to_next, n_left_to_next);
1335
1336       while (n_left_from >= 4 && n_left_to_next >= 2)
1337         {
1338           u32 bi0, bi1;
1339           vlib_buffer_t * b0, * b1;
1340           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1341           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1342           u32 sw_if_index0, sw_if_index1;
1343           ip4_header_t * ip0, * ip1;
1344           ip_csum_t sum0, sum1;
1345           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1346           u16 new_port0, old_port0, old_port1, new_port1;
1347           udp_header_t * udp0, * udp1;
1348           tcp_header_t * tcp0, * tcp1;
1349           u32 proto0, proto1;
1350           snat_det_out_key_t key0, key1;
1351           snat_det_map_t * dm0, * dm1;
1352           snat_det_session_t * ses0 = 0, * ses1 = 0;
1353           u32 rx_fib_index0, rx_fib_index1;
1354           icmp46_header_t * icmp0, * icmp1;
1355
1356           /* Prefetch next iteration. */
1357           {
1358             vlib_buffer_t * p2, * p3;
1359
1360             p2 = vlib_get_buffer (vm, from[2]);
1361             p3 = vlib_get_buffer (vm, from[3]);
1362
1363             vlib_prefetch_buffer_header (p2, LOAD);
1364             vlib_prefetch_buffer_header (p3, LOAD);
1365
1366             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1367             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1368           }
1369
1370           /* speculatively enqueue b0 and b1 to the current next frame */
1371           to_next[0] = bi0 = from[0];
1372           to_next[1] = bi1 = from[1];
1373           from += 2;
1374           to_next += 2;
1375           n_left_from -= 2;
1376           n_left_to_next -= 2;
1377
1378           b0 = vlib_get_buffer (vm, bi0);
1379           b1 = vlib_get_buffer (vm, bi1);
1380
1381           ip0 = vlib_buffer_get_current (b0);
1382           udp0 = ip4_next_header (ip0);
1383           tcp0 = (tcp_header_t *) udp0;
1384
1385           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1386
1387           if (PREDICT_FALSE(ip0->ttl == 1))
1388             {
1389               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1390               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1391                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1392                                            0);
1393               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1394               goto trace0;
1395             }
1396
1397           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1398
1399           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1400             {
1401               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1402               icmp0 = (icmp46_header_t *) udp0;
1403
1404               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1405                                   rx_fib_index0, node, next0, thread_index,
1406                                   &ses0, &dm0);
1407               goto trace0;
1408             }
1409
1410           key0.ext_host_addr = ip0->src_address;
1411           key0.ext_host_port = tcp0->src;
1412           key0.out_port = tcp0->dst;
1413
1414           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1415           if (PREDICT_FALSE(!dm0))
1416             {
1417               clib_warning("unknown dst address:  %U",
1418                            format_ip4_address, &ip0->dst_address);
1419               next0 = SNAT_OUT2IN_NEXT_DROP;
1420               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1421               goto trace0;
1422             }
1423
1424           snat_det_reverse(dm0, &ip0->dst_address,
1425                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
1426
1427           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1428           if (PREDICT_FALSE(!ses0))
1429             {
1430               clib_warning("no match src %U:%d dst %U:%d for user %U",
1431                            format_ip4_address, &ip0->src_address,
1432                            clib_net_to_host_u16 (tcp0->src),
1433                            format_ip4_address, &ip0->dst_address,
1434                            clib_net_to_host_u16 (tcp0->dst),
1435                            format_ip4_address, &new_addr0);
1436               next0 = SNAT_OUT2IN_NEXT_DROP;
1437               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1438               goto trace0;
1439             }
1440           new_port0 = ses0->in_port;
1441
1442           old_addr0 = ip0->dst_address;
1443           ip0->dst_address = new_addr0;
1444           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
1445
1446           sum0 = ip0->checksum;
1447           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1448                                  ip4_header_t,
1449                                  dst_address /* changed member */);
1450           ip0->checksum = ip_csum_fold (sum0);
1451
1452           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1453             {
1454               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1455                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
1456               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
1457                 snat_det_ses_close(dm0, ses0);
1458
1459               old_port0 = tcp0->dst;
1460               tcp0->dst = new_port0;
1461
1462               sum0 = tcp0->checksum;
1463               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1464                                      ip4_header_t,
1465                                      dst_address /* changed member */);
1466
1467               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1468                                      ip4_header_t /* cheat */,
1469                                      length /* changed member */);
1470               tcp0->checksum = ip_csum_fold(sum0);
1471             }
1472           else
1473             {
1474               old_port0 = udp0->dst_port;
1475               udp0->dst_port = new_port0;
1476               udp0->checksum = 0;
1477             }
1478
1479         trace0:
1480
1481           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1482                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1483             {
1484               snat_out2in_trace_t *t =
1485                  vlib_add_trace (vm, node, b0, sizeof (*t));
1486               t->sw_if_index = sw_if_index0;
1487               t->next_index = next0;
1488               t->session_index = ~0;
1489               if (ses0)
1490                 t->session_index = ses0 - dm0->sessions;
1491             }
1492
1493           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1494
1495           b1 = vlib_get_buffer (vm, bi1);
1496
1497           ip1 = vlib_buffer_get_current (b1);
1498           udp1 = ip4_next_header (ip1);
1499           tcp1 = (tcp_header_t *) udp1;
1500
1501           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1502
1503           if (PREDICT_FALSE(ip1->ttl == 1))
1504             {
1505               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1506               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1507                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1508                                            0);
1509               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1510               goto trace1;
1511             }
1512
1513           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1514
1515           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1516             {
1517               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1518               icmp1 = (icmp46_header_t *) udp1;
1519
1520               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
1521                                   rx_fib_index1, node, next1, thread_index,
1522                                   &ses1, &dm1);
1523               goto trace1;
1524             }
1525
1526           key1.ext_host_addr = ip1->src_address;
1527           key1.ext_host_port = tcp1->src;
1528           key1.out_port = tcp1->dst;
1529
1530           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
1531           if (PREDICT_FALSE(!dm1))
1532             {
1533               clib_warning("unknown dst address:  %U",
1534                            format_ip4_address, &ip1->dst_address);
1535               next1 = SNAT_OUT2IN_NEXT_DROP;
1536               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1537               goto trace1;
1538             }
1539
1540           snat_det_reverse(dm1, &ip1->dst_address,
1541                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
1542
1543           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
1544           if (PREDICT_FALSE(!ses1))
1545             {
1546               clib_warning("no match src %U:%d dst %U:%d for user %U",
1547                            format_ip4_address, &ip1->src_address,
1548                            clib_net_to_host_u16 (tcp1->src),
1549                            format_ip4_address, &ip1->dst_address,
1550                            clib_net_to_host_u16 (tcp1->dst),
1551                            format_ip4_address, &new_addr1);
1552               next1 = SNAT_OUT2IN_NEXT_DROP;
1553               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1554               goto trace1;
1555             }
1556           new_port1 = ses1->in_port;
1557
1558           old_addr1 = ip1->dst_address;
1559           ip1->dst_address = new_addr1;
1560           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
1561
1562           sum1 = ip1->checksum;
1563           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1564                                  ip4_header_t,
1565                                  dst_address /* changed member */);
1566           ip1->checksum = ip_csum_fold (sum1);
1567
1568           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1569             {
1570               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1571                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
1572               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
1573                 snat_det_ses_close(dm1, ses1);
1574
1575               old_port1 = tcp1->dst;
1576               tcp1->dst = new_port1;
1577
1578               sum1 = tcp1->checksum;
1579               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1580                                      ip4_header_t,
1581                                      dst_address /* changed member */);
1582
1583               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1584                                      ip4_header_t /* cheat */,
1585                                      length /* changed member */);
1586               tcp1->checksum = ip_csum_fold(sum1);
1587             }
1588           else
1589             {
1590               old_port1 = udp1->dst_port;
1591               udp1->dst_port = new_port1;
1592               udp1->checksum = 0;
1593             }
1594
1595         trace1:
1596
1597           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1598                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1599             {
1600               snat_out2in_trace_t *t =
1601                  vlib_add_trace (vm, node, b1, sizeof (*t));
1602               t->sw_if_index = sw_if_index1;
1603               t->next_index = next1;
1604               t->session_index = ~0;
1605               if (ses1)
1606                 t->session_index = ses1 - dm1->sessions;
1607             }
1608
1609           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1610
1611           /* verify speculative enqueues, maybe switch current next frame */
1612           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1613                                            to_next, n_left_to_next,
1614                                            bi0, bi1, next0, next1);
1615          }
1616
1617       while (n_left_from > 0 && n_left_to_next > 0)
1618         {
1619           u32 bi0;
1620           vlib_buffer_t * b0;
1621           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1622           u32 sw_if_index0;
1623           ip4_header_t * ip0;
1624           ip_csum_t sum0;
1625           ip4_address_t new_addr0, old_addr0;
1626           u16 new_port0, old_port0;
1627           udp_header_t * udp0;
1628           tcp_header_t * tcp0;
1629           u32 proto0;
1630           snat_det_out_key_t key0;
1631           snat_det_map_t * dm0;
1632           snat_det_session_t * ses0 = 0;
1633           u32 rx_fib_index0;
1634           icmp46_header_t * icmp0;
1635
1636           /* speculatively enqueue b0 to the current next frame */
1637           bi0 = from[0];
1638           to_next[0] = bi0;
1639           from += 1;
1640           to_next += 1;
1641           n_left_from -= 1;
1642           n_left_to_next -= 1;
1643
1644           b0 = vlib_get_buffer (vm, bi0);
1645
1646           ip0 = vlib_buffer_get_current (b0);
1647           udp0 = ip4_next_header (ip0);
1648           tcp0 = (tcp_header_t *) udp0;
1649
1650           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1651
1652           if (PREDICT_FALSE(ip0->ttl == 1))
1653             {
1654               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1655               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1656                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1657                                            0);
1658               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1659               goto trace00;
1660             }
1661
1662           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1663
1664           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1665             {
1666               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1667               icmp0 = (icmp46_header_t *) udp0;
1668
1669               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1670                                   rx_fib_index0, node, next0, thread_index,
1671                                   &ses0, &dm0);
1672               goto trace00;
1673             }
1674
1675           key0.ext_host_addr = ip0->src_address;
1676           key0.ext_host_port = tcp0->src;
1677           key0.out_port = tcp0->dst;
1678
1679           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1680           if (PREDICT_FALSE(!dm0))
1681             {
1682               clib_warning("unknown dst address:  %U",
1683                            format_ip4_address, &ip0->dst_address);
1684               next0 = SNAT_OUT2IN_NEXT_DROP;
1685               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1686               goto trace00;
1687             }
1688
1689           snat_det_reverse(dm0, &ip0->dst_address,
1690                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
1691
1692           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1693           if (PREDICT_FALSE(!ses0))
1694             {
1695               clib_warning("no match src %U:%d dst %U:%d for user %U",
1696                            format_ip4_address, &ip0->src_address,
1697                            clib_net_to_host_u16 (tcp0->src),
1698                            format_ip4_address, &ip0->dst_address,
1699                            clib_net_to_host_u16 (tcp0->dst),
1700                            format_ip4_address, &new_addr0);
1701               next0 = SNAT_OUT2IN_NEXT_DROP;
1702               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1703               goto trace00;
1704             }
1705           new_port0 = ses0->in_port;
1706
1707           old_addr0 = ip0->dst_address;
1708           ip0->dst_address = new_addr0;
1709           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
1710
1711           sum0 = ip0->checksum;
1712           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1713                                  ip4_header_t,
1714                                  dst_address /* changed member */);
1715           ip0->checksum = ip_csum_fold (sum0);
1716
1717           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1718             {
1719               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1720                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
1721               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
1722                 snat_det_ses_close(dm0, ses0);
1723
1724               old_port0 = tcp0->dst;
1725               tcp0->dst = new_port0;
1726
1727               sum0 = tcp0->checksum;
1728               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1729                                      ip4_header_t,
1730                                      dst_address /* changed member */);
1731
1732               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1733                                      ip4_header_t /* cheat */,
1734                                      length /* changed member */);
1735               tcp0->checksum = ip_csum_fold(sum0);
1736             }
1737           else
1738             {
1739               old_port0 = udp0->dst_port;
1740               udp0->dst_port = new_port0;
1741               udp0->checksum = 0;
1742             }
1743
1744         trace00:
1745
1746           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1747                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1748             {
1749               snat_out2in_trace_t *t =
1750                  vlib_add_trace (vm, node, b0, sizeof (*t));
1751               t->sw_if_index = sw_if_index0;
1752               t->next_index = next0;
1753               t->session_index = ~0;
1754               if (ses0)
1755                 t->session_index = ses0 - dm0->sessions;
1756             }
1757
1758           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1759
1760           /* verify speculative enqueue, maybe switch current next frame */
1761           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1762                                            to_next, n_left_to_next,
1763                                            bi0, next0);
1764         }
1765
1766       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1767     }
1768
1769   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
1770                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1771                                pkts_processed);
1772   return frame->n_vectors;
1773 }
1774
1775 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
1776   .function = snat_det_out2in_node_fn,
1777   .name = "snat-det-out2in",
1778   .vector_size = sizeof (u32),
1779   .format_trace = format_snat_out2in_trace,
1780   .type = VLIB_NODE_TYPE_INTERNAL,
1781
1782   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1783   .error_strings = snat_out2in_error_strings,
1784
1785   .runtime_data_bytes = sizeof (snat_runtime_t),
1786
1787   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1788
1789   /* edit / add dispositions here */
1790   .next_nodes = {
1791     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1792     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1793     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1794   },
1795 };
1796 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
1797
1798 /**
1799  * Get address and port values to be used for packet SNAT translation
1800  * and create session if needed
1801  *
1802  * @param[in,out] sm             SNAT main
1803  * @param[in,out] node           SNAT node runtime
1804  * @param[in] thread_index       thread index
1805  * @param[in,out] b0             buffer containing packet to be translated
1806  * @param[out] p_proto           protocol used for matching
1807  * @param[out] p_value           address and port after NAT translation
1808  * @param[out] p_dont_translate  if packet should not be translated
1809  * @param d                      optional parameter
1810  * @param e                      optional parameter
1811  */
1812 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
1813                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
1814                           snat_session_key_t *p_value,
1815                           u8 *p_dont_translate, void *d, void *e)
1816 {
1817   ip4_header_t *ip0;
1818   icmp46_header_t *icmp0;
1819   u32 sw_if_index0;
1820   u8 protocol;
1821   snat_det_out_key_t key0;
1822   u8 dont_translate = 0;
1823   u32 next0 = ~0;
1824   icmp_echo_header_t *echo0, *inner_echo0 = 0;
1825   ip4_header_t *inner_ip0;
1826   void *l4_header = 0;
1827   icmp46_header_t *inner_icmp0;
1828   snat_det_map_t * dm0 = 0;
1829   ip4_address_t new_addr0 = {{0}};
1830   snat_det_session_t * ses0 = 0;
1831   ip4_address_t out_addr;
1832
1833   ip0 = vlib_buffer_get_current (b0);
1834   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
1835   echo0 = (icmp_echo_header_t *)(icmp0+1);
1836   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1837
1838   if (!icmp_is_error_message (icmp0))
1839     {
1840       protocol = SNAT_PROTOCOL_ICMP;
1841       key0.ext_host_addr = ip0->src_address;
1842       key0.ext_host_port = 0;
1843       key0.out_port = echo0->identifier;
1844       out_addr = ip0->dst_address;
1845     }
1846   else
1847     {
1848       inner_ip0 = (ip4_header_t *)(echo0+1);
1849       l4_header = ip4_next_header (inner_ip0);
1850       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
1851       key0.ext_host_addr = inner_ip0->dst_address;
1852       out_addr = inner_ip0->src_address;
1853       switch (protocol)
1854         {
1855         case SNAT_PROTOCOL_ICMP:
1856           inner_icmp0 = (icmp46_header_t*)l4_header;
1857           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
1858           key0.ext_host_port = 0;
1859           key0.out_port = inner_echo0->identifier;
1860           break;
1861         case SNAT_PROTOCOL_UDP:
1862         case SNAT_PROTOCOL_TCP:
1863           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
1864           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
1865           break;
1866         default:
1867           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
1868           next0 = SNAT_OUT2IN_NEXT_DROP;
1869           goto out;
1870         }
1871     }
1872
1873   dm0 = snat_det_map_by_out(sm, &out_addr);
1874   if (PREDICT_FALSE(!dm0))
1875     {
1876       /* Don't NAT packet aimed at the intfc address */
1877       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
1878                                           ip0->dst_address.as_u32)))
1879         {
1880           dont_translate = 1;
1881           goto out;
1882         }
1883       clib_warning("unknown dst address:  %U",
1884                    format_ip4_address, &ip0->dst_address);
1885       goto out;
1886     }
1887
1888   snat_det_reverse(dm0, &ip0->dst_address,
1889                    clib_net_to_host_u16(key0.out_port), &new_addr0);
1890
1891   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1892   if (PREDICT_FALSE(!ses0))
1893     {
1894       /* Don't NAT packet aimed at the intfc address */
1895       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
1896                                           ip0->dst_address.as_u32)))
1897         {
1898           dont_translate = 1;
1899           goto out;
1900         }
1901       clib_warning("no match src %U:%d dst %U:%d for user %U",
1902                    format_ip4_address, &key0.ext_host_addr,
1903                    clib_net_to_host_u16 (key0.ext_host_port),
1904                    format_ip4_address, &out_addr,
1905                    clib_net_to_host_u16 (key0.out_port),
1906                    format_ip4_address, &new_addr0);
1907       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1908       next0 = SNAT_OUT2IN_NEXT_DROP;
1909       goto out;
1910     }
1911
1912   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
1913                     !icmp_is_error_message (icmp0)))
1914     {
1915       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
1916       next0 = SNAT_OUT2IN_NEXT_DROP;
1917       goto out;
1918     }
1919
1920   goto out;
1921
1922 out:
1923   *p_proto = protocol;
1924   if (ses0)
1925     {
1926       p_value->addr = new_addr0;
1927       p_value->fib_index = sm->inside_fib_index;
1928       p_value->port = ses0->in_port;
1929     }
1930   *p_dont_translate = dont_translate;
1931   if (d)
1932     *(snat_det_session_t**)d = ses0;
1933   if (e)
1934     *(snat_det_map_t**)e = dm0;
1935   return next0;
1936 }
1937
1938 /**********************/
1939 /*** worker handoff ***/
1940 /**********************/
1941 static uword
1942 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
1943                                vlib_node_runtime_t * node,
1944                                vlib_frame_t * frame)
1945 {
1946   snat_main_t *sm = &snat_main;
1947   vlib_thread_main_t *tm = vlib_get_thread_main ();
1948   u32 n_left_from, *from, *to_next = 0;
1949   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1950   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1951     = 0;
1952   vlib_frame_queue_elt_t *hf = 0;
1953   vlib_frame_t *f = 0;
1954   int i;
1955   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1956   u32 next_worker_index = 0;
1957   u32 current_worker_index = ~0;
1958   u32 thread_index = vlib_get_thread_index ();
1959
1960   ASSERT (vec_len (sm->workers));
1961
1962   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1963     {
1964       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1965
1966       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1967                                sm->first_worker_index + sm->num_workers - 1,
1968                                (vlib_frame_queue_t *) (~0));
1969     }
1970
1971   from = vlib_frame_vector_args (frame);
1972   n_left_from = frame->n_vectors;
1973
1974   while (n_left_from > 0)
1975     {
1976       u32 bi0;
1977       vlib_buffer_t *b0;
1978       u32 sw_if_index0;
1979       u32 rx_fib_index0;
1980       ip4_header_t * ip0;
1981       u8 do_handoff;
1982
1983       bi0 = from[0];
1984       from += 1;
1985       n_left_from -= 1;
1986
1987       b0 = vlib_get_buffer (vm, bi0);
1988
1989       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1990       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1991
1992       ip0 = vlib_buffer_get_current (b0);
1993
1994       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
1995
1996       if (PREDICT_FALSE (next_worker_index != thread_index))
1997         {
1998           do_handoff = 1;
1999
2000           if (next_worker_index != current_worker_index)
2001             {
2002               if (hf)
2003                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2004
2005               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2006                                                       next_worker_index,
2007                                                       handoff_queue_elt_by_worker_index);
2008
2009               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2010               to_next_worker = &hf->buffer_index[hf->n_vectors];
2011               current_worker_index = next_worker_index;
2012             }
2013
2014           /* enqueue to correct worker thread */
2015           to_next_worker[0] = bi0;
2016           to_next_worker++;
2017           n_left_to_next_worker--;
2018
2019           if (n_left_to_next_worker == 0)
2020             {
2021               hf->n_vectors = VLIB_FRAME_SIZE;
2022               vlib_put_frame_queue_elt (hf);
2023               current_worker_index = ~0;
2024               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2025               hf = 0;
2026             }
2027         }
2028       else
2029         {
2030           do_handoff = 0;
2031           /* if this is 1st frame */
2032           if (!f)
2033             {
2034               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2035               to_next = vlib_frame_vector_args (f);
2036             }
2037
2038           to_next[0] = bi0;
2039           to_next += 1;
2040           f->n_vectors++;
2041         }
2042
2043       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2044                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2045         {
2046           snat_out2in_worker_handoff_trace_t *t =
2047             vlib_add_trace (vm, node, b0, sizeof (*t));
2048           t->next_worker_index = next_worker_index;
2049           t->do_handoff = do_handoff;
2050         }
2051     }
2052
2053   if (f)
2054     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2055
2056   if (hf)
2057     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2058
2059   /* Ship frames to the worker nodes */
2060   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2061     {
2062       if (handoff_queue_elt_by_worker_index[i])
2063         {
2064           hf = handoff_queue_elt_by_worker_index[i];
2065           /*
2066            * It works better to let the handoff node
2067            * rate-adapt, always ship the handoff queue element.
2068            */
2069           if (1 || hf->n_vectors == hf->last_n_vectors)
2070             {
2071               vlib_put_frame_queue_elt (hf);
2072               handoff_queue_elt_by_worker_index[i] = 0;
2073             }
2074           else
2075             hf->last_n_vectors = hf->n_vectors;
2076         }
2077       congested_handoff_queue_by_worker_index[i] =
2078         (vlib_frame_queue_t *) (~0);
2079     }
2080   hf = 0;
2081   current_worker_index = ~0;
2082   return frame->n_vectors;
2083 }
2084
2085 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2086   .function = snat_out2in_worker_handoff_fn,
2087   .name = "snat-out2in-worker-handoff",
2088   .vector_size = sizeof (u32),
2089   .format_trace = format_snat_out2in_worker_handoff_trace,
2090   .type = VLIB_NODE_TYPE_INTERNAL,
2091   
2092   .n_next_nodes = 1,
2093
2094   .next_nodes = {
2095     [0] = "error-drop",
2096   },
2097 };
2098
2099 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2100
2101 static uword
2102 snat_out2in_fast_node_fn (vlib_main_t * vm,
2103                           vlib_node_runtime_t * node,
2104                           vlib_frame_t * frame)
2105 {
2106   u32 n_left_from, * from, * to_next;
2107   snat_out2in_next_t next_index;
2108   u32 pkts_processed = 0;
2109   snat_main_t * sm = &snat_main;
2110
2111   from = vlib_frame_vector_args (frame);
2112   n_left_from = frame->n_vectors;
2113   next_index = node->cached_next_index;
2114
2115   while (n_left_from > 0)
2116     {
2117       u32 n_left_to_next;
2118
2119       vlib_get_next_frame (vm, node, next_index,
2120                            to_next, n_left_to_next);
2121
2122       while (n_left_from > 0 && n_left_to_next > 0)
2123         {
2124           u32 bi0;
2125           vlib_buffer_t * b0;
2126           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2127           u32 sw_if_index0;
2128           ip4_header_t * ip0;
2129           ip_csum_t sum0;
2130           u32 new_addr0, old_addr0;
2131           u16 new_port0, old_port0;
2132           udp_header_t * udp0;
2133           tcp_header_t * tcp0;
2134           icmp46_header_t * icmp0;
2135           snat_session_key_t key0, sm0;
2136           u32 proto0;
2137           u32 rx_fib_index0;
2138
2139           /* speculatively enqueue b0 to the current next frame */
2140           bi0 = from[0];
2141           to_next[0] = bi0;
2142           from += 1;
2143           to_next += 1;
2144           n_left_from -= 1;
2145           n_left_to_next -= 1;
2146
2147           b0 = vlib_get_buffer (vm, bi0);
2148
2149           ip0 = vlib_buffer_get_current (b0);
2150           udp0 = ip4_next_header (ip0);
2151           tcp0 = (tcp_header_t *) udp0;
2152           icmp0 = (icmp46_header_t *) udp0;
2153
2154           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2155           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2156
2157           vnet_feature_next (sw_if_index0, &next0, b0);
2158
2159           if (PREDICT_FALSE(ip0->ttl == 1))
2160             {
2161               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2162               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2163                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2164                                            0);
2165               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2166               goto trace00;
2167             }
2168
2169           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2170
2171           if (PREDICT_FALSE (proto0 == ~0))
2172               goto trace00;
2173
2174           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2175             {
2176               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2177                                   rx_fib_index0, node, next0, ~0, 0, 0);
2178               goto trace00;
2179             }
2180
2181           key0.addr = ip0->dst_address;
2182           key0.port = udp0->dst_port;
2183           key0.fib_index = rx_fib_index0;
2184
2185           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
2186             {
2187               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2188               goto trace00;
2189             }
2190
2191           new_addr0 = sm0.addr.as_u32;
2192           new_port0 = sm0.port;
2193           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2194           old_addr0 = ip0->dst_address.as_u32;
2195           ip0->dst_address.as_u32 = new_addr0;
2196
2197           sum0 = ip0->checksum;
2198           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2199                                  ip4_header_t,
2200                                  dst_address /* changed member */);
2201           ip0->checksum = ip_csum_fold (sum0);
2202
2203           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2204             {
2205                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2206                 {
2207                   old_port0 = tcp0->dst_port;
2208                   tcp0->dst_port = new_port0;
2209
2210                   sum0 = tcp0->checksum;
2211                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2212                                          ip4_header_t,
2213                                          dst_address /* changed member */);
2214
2215                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2216                                          ip4_header_t /* cheat */,
2217                                          length /* changed member */);
2218                   tcp0->checksum = ip_csum_fold(sum0);
2219                 }
2220               else
2221                 {
2222                   old_port0 = udp0->dst_port;
2223                   udp0->dst_port = new_port0;
2224                   udp0->checksum = 0;
2225                 }
2226             }
2227           else
2228             {
2229               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2230                 {
2231                   sum0 = tcp0->checksum;
2232                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2233                                          ip4_header_t,
2234                                          dst_address /* changed member */);
2235
2236                   tcp0->checksum = ip_csum_fold(sum0);
2237                 }
2238             }
2239
2240         trace00:
2241
2242           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2243                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2244             {
2245               snat_out2in_trace_t *t =
2246                  vlib_add_trace (vm, node, b0, sizeof (*t));
2247               t->sw_if_index = sw_if_index0;
2248               t->next_index = next0;
2249             }
2250
2251           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2252
2253           /* verify speculative enqueue, maybe switch current next frame */
2254           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2255                                            to_next, n_left_to_next,
2256                                            bi0, next0);
2257         }
2258
2259       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2260     }
2261
2262   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2263                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2264                                pkts_processed);
2265   return frame->n_vectors;
2266 }
2267
2268 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2269   .function = snat_out2in_fast_node_fn,
2270   .name = "snat-out2in-fast",
2271   .vector_size = sizeof (u32),
2272   .format_trace = format_snat_out2in_fast_trace,
2273   .type = VLIB_NODE_TYPE_INTERNAL,
2274   
2275   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2276   .error_strings = snat_out2in_error_strings,
2277
2278   .runtime_data_bytes = sizeof (snat_runtime_t),
2279   
2280   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2281
2282   /* edit / add dispositions here */
2283   .next_nodes = {
2284     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2285     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2286     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2287   },
2288 };
2289 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);