NAT: Rename snat plugin to nat (VPP-955)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
32
33 typedef struct {
34   u32 sw_if_index;
35   u32 next_index;
36   u32 session_index;
37 } snat_out2in_trace_t;
38
39 typedef struct {
40   u32 next_worker_index;
41   u8 do_handoff;
42 } snat_out2in_worker_handoff_trace_t;
43
44 /* packet trace format function */
45 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
46 {
47   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
50
51   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
52               t->sw_if_index, t->next_index, t->session_index);
53   return s;
54 }
55
56 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
57 {
58   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
61
62   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
63               t->sw_if_index, t->next_index);
64   return s;
65 }
66
67 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
68 {
69   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
70   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
71   snat_out2in_worker_handoff_trace_t * t =
72     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
73   char * m;
74
75   m = t->do_handoff ? "next worker" : "same worker";
76   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
77
78   return s;
79 }
80
81 vlib_node_registration_t snat_out2in_node;
82 vlib_node_registration_t snat_out2in_fast_node;
83 vlib_node_registration_t snat_out2in_worker_handoff_node;
84 vlib_node_registration_t snat_det_out2in_node;
85
86 #define foreach_snat_out2in_error                       \
87 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
88 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
89 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
90 _(NO_TRANSLATION, "No translation")
91
92 typedef enum {
93 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
94   foreach_snat_out2in_error
95 #undef _
96   SNAT_OUT2IN_N_ERROR,
97 } snat_out2in_error_t;
98
99 static char * snat_out2in_error_strings[] = {
100 #define _(sym,string) string,
101   foreach_snat_out2in_error
102 #undef _
103 };
104
105 typedef enum {
106   SNAT_OUT2IN_NEXT_DROP,
107   SNAT_OUT2IN_NEXT_LOOKUP,
108   SNAT_OUT2IN_NEXT_ICMP_ERROR,
109   SNAT_OUT2IN_N_NEXT,
110 } snat_out2in_next_t;
111
112 /**
113  * @brief Create session for static mapping.
114  *
115  * Create NAT session initiated by host from external network with static
116  * mapping.
117  *
118  * @param sm     NAT main.
119  * @param b0     Vlib buffer.
120  * @param in2out In2out NAT44 session key.
121  * @param out2in Out2in NAT44 session key.
122  * @param node   Vlib node.
123  *
124  * @returns SNAT session if successfully created otherwise 0.
125  */
126 static inline snat_session_t *
127 create_session_for_static_mapping (snat_main_t *sm,
128                                    vlib_buffer_t *b0,
129                                    snat_session_key_t in2out,
130                                    snat_session_key_t out2in,
131                                    vlib_node_runtime_t * node,
132                                    u32 thread_index)
133 {
134   snat_user_t *u;
135   snat_user_key_t user_key;
136   snat_session_t *s;
137   clib_bihash_kv_8_8_t kv0, value0;
138   dlist_elt_t * per_user_translation_list_elt;
139   dlist_elt_t * per_user_list_head_elt;
140   ip4_header_t *ip0;
141
142   ip0 = vlib_buffer_get_current (b0);
143
144   user_key.addr = in2out.addr;
145   user_key.fib_index = in2out.fib_index;
146   kv0.key = user_key.as_u64;
147
148   /* Ever heard of the "user" = inside ip4 address before? */
149   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
150     {
151       /* no, make a new one */
152       pool_get (sm->per_thread_data[thread_index].users, u);
153       memset (u, 0, sizeof (*u));
154       u->addr = in2out.addr;
155       u->fib_index = in2out.fib_index;
156
157       pool_get (sm->per_thread_data[thread_index].list_pool,
158                 per_user_list_head_elt);
159
160       u->sessions_per_user_list_head_index = per_user_list_head_elt -
161         sm->per_thread_data[thread_index].list_pool;
162
163       clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
164                        u->sessions_per_user_list_head_index);
165
166       kv0.value = u - sm->per_thread_data[thread_index].users;
167
168       /* add user */
169       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
170
171       /* add non-traslated packets worker lookup */
172       kv0.value = thread_index;
173       clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
174     }
175   else
176     {
177       u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
178                              value0.value);
179     }
180
181   pool_get (sm->per_thread_data[thread_index].sessions, s);
182   memset (s, 0, sizeof (*s));
183
184   s->outside_address_index = ~0;
185   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
186   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
187   u->nstaticsessions++;
188
189   /* Create list elts */
190   pool_get (sm->per_thread_data[thread_index].list_pool,
191             per_user_translation_list_elt);
192   clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
193                    per_user_translation_list_elt -
194                    sm->per_thread_data[thread_index].list_pool);
195
196   per_user_translation_list_elt->value =
197     s - sm->per_thread_data[thread_index].sessions;
198   s->per_user_index =
199     per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
200   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
201
202   clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
203                       s->per_user_list_head_index,
204                       per_user_translation_list_elt -
205                       sm->per_thread_data[thread_index].list_pool);
206
207   s->in2out = in2out;
208   s->out2in = out2in;
209   s->in2out.protocol = out2in.protocol;
210
211   /* Add to translation hashes */
212   kv0.key = s->in2out.as_u64;
213   kv0.value = s - sm->per_thread_data[thread_index].sessions;
214   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
215       clib_warning ("in2out key add failed");
216
217   kv0.key = s->out2in.as_u64;
218   kv0.value = s - sm->per_thread_data[thread_index].sessions;
219
220   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
221       clib_warning ("out2in key add failed");
222
223   /* log NAT event */
224   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
225                                       s->out2in.addr.as_u32,
226                                       s->in2out.protocol,
227                                       s->in2out.port,
228                                       s->out2in.port,
229                                       s->in2out.fib_index);
230    return s;
231 }
232
233 static_always_inline
234 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
235                                  snat_session_key_t *p_key0)
236 {
237   icmp46_header_t *icmp0;
238   snat_session_key_t key0;
239   icmp_echo_header_t *echo0, *inner_echo0 = 0;
240   ip4_header_t *inner_ip0;
241   void *l4_header = 0;
242   icmp46_header_t *inner_icmp0;
243
244   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
245   echo0 = (icmp_echo_header_t *)(icmp0+1);
246
247   if (!icmp_is_error_message (icmp0))
248     {
249       key0.protocol = SNAT_PROTOCOL_ICMP;
250       key0.addr = ip0->dst_address;
251       key0.port = echo0->identifier;
252     }
253   else
254     {
255       inner_ip0 = (ip4_header_t *)(echo0+1);
256       l4_header = ip4_next_header (inner_ip0);
257       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
258       key0.addr = inner_ip0->src_address;
259       switch (key0.protocol)
260         {
261         case SNAT_PROTOCOL_ICMP:
262           inner_icmp0 = (icmp46_header_t*)l4_header;
263           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
264           key0.port = inner_echo0->identifier;
265           break;
266         case SNAT_PROTOCOL_UDP:
267         case SNAT_PROTOCOL_TCP:
268           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
269           break;
270         default:
271           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
272         }
273     }
274   *p_key0 = key0;
275   return -1; /* success */
276 }
277
278 /**
279  * Get address and port values to be used for ICMP packet translation
280  * and create session if needed
281  *
282  * @param[in,out] sm             NAT main
283  * @param[in,out] node           NAT node runtime
284  * @param[in] thread_index       thread index
285  * @param[in,out] b0             buffer containing packet to be translated
286  * @param[out] p_proto           protocol used for matching
287  * @param[out] p_value           address and port after NAT translation
288  * @param[out] p_dont_translate  if packet should not be translated
289  * @param d                      optional parameter
290  * @param e                      optional parameter
291  */
292 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
293                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
294                            snat_session_key_t *p_value,
295                            u8 *p_dont_translate, void *d, void *e)
296 {
297   ip4_header_t *ip0;
298   icmp46_header_t *icmp0;
299   u32 sw_if_index0;
300   u32 rx_fib_index0;
301   snat_session_key_t key0;
302   snat_session_key_t sm0;
303   snat_session_t *s0 = 0;
304   u8 dont_translate = 0;
305   clib_bihash_kv_8_8_t kv0, value0;
306   u8 is_addr_only;
307   u32 next0 = ~0;
308   int err;
309
310   ip0 = vlib_buffer_get_current (b0);
311   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
312   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
313   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
314
315   key0.protocol = 0;
316
317   err = icmp_get_key (ip0, &key0);
318   if (err != -1)
319     {
320       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
321       next0 = SNAT_OUT2IN_NEXT_DROP;
322       goto out;
323     }
324   key0.fib_index = rx_fib_index0;
325
326   kv0.key = key0.as_u64;
327
328   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
329     {
330       /* Try to match static mapping by external address and port,
331          destination address and port in packet */
332       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
333         {
334           /* Don't NAT packet aimed at the intfc address */
335           if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
336                                               ip0->dst_address.as_u32)))
337             {
338               dont_translate = 1;
339               goto out;
340             }
341           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
342           next0 = SNAT_OUT2IN_NEXT_DROP;
343           goto out;
344         }
345
346       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
347                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
348         {
349           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
350           next0 = SNAT_OUT2IN_NEXT_DROP;
351           goto out;
352         }
353
354       /* Create session initiated by host from external network */
355       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
356                                              node, thread_index);
357
358       if (!s0)
359         {
360           next0 = SNAT_OUT2IN_NEXT_DROP;
361           goto out;
362         }
363     }
364   else
365     {
366       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
367                         icmp0->type != ICMP4_echo_request &&
368                         !icmp_is_error_message (icmp0)))
369         {
370           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
371           next0 = SNAT_OUT2IN_NEXT_DROP;
372           goto out;
373         }
374
375       s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
376                               value0.value);
377     }
378
379 out:
380   *p_proto = key0.protocol;
381   if (s0)
382     *p_value = s0->in2out;
383   *p_dont_translate = dont_translate;
384   if (d)
385     *(snat_session_t**)d = s0;
386   return next0;
387 }
388
389 /**
390  * Get address and port values to be used for ICMP packet translation
391  *
392  * @param[in] sm                 NAT main
393  * @param[in,out] node           NAT node runtime
394  * @param[in] thread_index       thread index
395  * @param[in,out] b0             buffer containing packet to be translated
396  * @param[out] p_proto           protocol used for matching
397  * @param[out] p_value           address and port after NAT translation
398  * @param[out] p_dont_translate  if packet should not be translated
399  * @param d                      optional parameter
400  * @param e                      optional parameter
401  */
402 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
403                            u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
404                            snat_session_key_t *p_value,
405                            u8 *p_dont_translate, void *d, void *e)
406 {
407   ip4_header_t *ip0;
408   icmp46_header_t *icmp0;
409   u32 sw_if_index0;
410   u32 rx_fib_index0;
411   snat_session_key_t key0;
412   snat_session_key_t sm0;
413   u8 dont_translate = 0;
414   u8 is_addr_only;
415   u32 next0 = ~0;
416   int err;
417
418   ip0 = vlib_buffer_get_current (b0);
419   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
420   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
421   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
422
423   err = icmp_get_key (ip0, &key0);
424   if (err != -1)
425     {
426       b0->error = node->errors[err];
427       next0 = SNAT_OUT2IN_NEXT_DROP;
428       goto out2;
429     }
430   key0.fib_index = rx_fib_index0;
431
432   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
433     {
434       /* Don't NAT packet aimed at the intfc address */
435       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
436         {
437           dont_translate = 1;
438           goto out;
439         }
440       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
441       next0 = SNAT_OUT2IN_NEXT_DROP;
442       goto out;
443     }
444
445   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
446                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
447                     !icmp_is_error_message (icmp0)))
448     {
449       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
450       next0 = SNAT_OUT2IN_NEXT_DROP;
451       goto out;
452     }
453
454 out:
455   *p_value = sm0;
456 out2:
457   *p_proto = key0.protocol;
458   *p_dont_translate = dont_translate;
459   return next0;
460 }
461
462 static inline u32 icmp_out2in (snat_main_t *sm,
463                                vlib_buffer_t * b0,
464                                ip4_header_t * ip0,
465                                icmp46_header_t * icmp0,
466                                u32 sw_if_index0,
467                                u32 rx_fib_index0,
468                                vlib_node_runtime_t * node,
469                                u32 next0,
470                                u32 thread_index,
471                                void *d,
472                                void *e)
473 {
474   snat_session_key_t sm0;
475   u8 protocol;
476   icmp_echo_header_t *echo0, *inner_echo0 = 0;
477   ip4_header_t *inner_ip0 = 0;
478   void *l4_header = 0;
479   icmp46_header_t *inner_icmp0;
480   u8 dont_translate;
481   u32 new_addr0, old_addr0;
482   u16 old_id0, new_id0;
483   ip_csum_t sum0;
484   u16 checksum0;
485   u32 next0_tmp;
486
487   echo0 = (icmp_echo_header_t *)(icmp0+1);
488
489   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0,
490                                        &protocol, &sm0, &dont_translate, d, e);
491   if (next0_tmp != ~0)
492     next0 = next0_tmp;
493   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
494     goto out;
495
496   sum0 = ip_incremental_checksum (0, icmp0,
497                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
498   checksum0 = ~ip_csum_fold (sum0);
499   if (checksum0 != 0 && checksum0 != 0xffff)
500     {
501       next0 = SNAT_OUT2IN_NEXT_DROP;
502       goto out;
503     }
504
505   old_addr0 = ip0->dst_address.as_u32;
506   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
507   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
508
509   sum0 = ip0->checksum;
510   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
511                          dst_address /* changed member */);
512   ip0->checksum = ip_csum_fold (sum0);
513
514   if (!icmp_is_error_message (icmp0))
515     {
516       new_id0 = sm0.port;
517       if (PREDICT_FALSE(new_id0 != echo0->identifier))
518         {
519           old_id0 = echo0->identifier;
520           new_id0 = sm0.port;
521           echo0->identifier = new_id0;
522
523           sum0 = icmp0->checksum;
524           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
525                                  identifier /* changed member */);
526           icmp0->checksum = ip_csum_fold (sum0);
527         }
528     }
529   else
530     {
531       inner_ip0 = (ip4_header_t *)(echo0+1);
532       l4_header = ip4_next_header (inner_ip0);
533
534       if (!ip4_header_checksum_is_valid (inner_ip0))
535         {
536           next0 = SNAT_OUT2IN_NEXT_DROP;
537           goto out;
538         }
539
540       old_addr0 = inner_ip0->src_address.as_u32;
541       inner_ip0->src_address = sm0.addr;
542       new_addr0 = inner_ip0->src_address.as_u32;
543
544       sum0 = icmp0->checksum;
545       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
546                              src_address /* changed member */);
547       icmp0->checksum = ip_csum_fold (sum0);
548
549       switch (protocol)
550         {
551         case SNAT_PROTOCOL_ICMP:
552           inner_icmp0 = (icmp46_header_t*)l4_header;
553           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
554
555           old_id0 = inner_echo0->identifier;
556           new_id0 = sm0.port;
557           inner_echo0->identifier = new_id0;
558
559           sum0 = icmp0->checksum;
560           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
561                                  identifier);
562           icmp0->checksum = ip_csum_fold (sum0);
563           break;
564         case SNAT_PROTOCOL_UDP:
565         case SNAT_PROTOCOL_TCP:
566           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
567           new_id0 = sm0.port;
568           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
569
570           sum0 = icmp0->checksum;
571           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
572                                  src_port);
573           icmp0->checksum = ip_csum_fold (sum0);
574           break;
575         default:
576           ASSERT(0);
577         }
578     }
579
580 out:
581   return next0;
582 }
583
584
585 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
586                                          vlib_buffer_t * b0,
587                                          ip4_header_t * ip0,
588                                          icmp46_header_t * icmp0,
589                                          u32 sw_if_index0,
590                                          u32 rx_fib_index0,
591                                          vlib_node_runtime_t * node,
592                                          u32 next0, f64 now,
593                                          u32 thread_index,
594                                          snat_session_t ** p_s0)
595 {
596   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
597                       next0, thread_index, p_s0, 0);
598   snat_session_t * s0 = *p_s0;
599   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
600     {
601       /* Accounting */
602       s0->last_heard = now;
603       s0->total_pkts++;
604       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
605       /* Per-user LRU list maintenance for dynamic translation */
606       if (!snat_is_session_static (s0))
607         {
608           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
609                              s0->per_user_index);
610           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
611                               s0->per_user_list_head_index,
612                               s0->per_user_index);
613         }
614     }
615   return next0;
616 }
617
618 static void
619 snat_out2in_unknown_proto (snat_main_t *sm,
620                            vlib_buffer_t * b,
621                            ip4_header_t * ip,
622                            u32 rx_fib_index,
623                            u32 thread_index,
624                            f64 now,
625                            vlib_main_t * vm)
626 {
627   clib_bihash_kv_8_8_t kv, value;
628   clib_bihash_kv_16_8_t s_kv, s_value;
629   snat_static_mapping_t *m;
630   snat_session_key_t m_key;
631   u32 old_addr, new_addr;
632   ip_csum_t sum;
633   snat_unk_proto_ses_key_t key;
634   snat_session_t * s;
635   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
636   snat_user_key_t u_key;
637   snat_user_t *u;
638   dlist_elt_t *head, *elt;
639
640   old_addr = ip->dst_address.as_u32;
641
642   key.l_addr = ip->dst_address;
643   key.r_addr = ip->src_address;
644   key.fib_index = rx_fib_index;
645   key.proto = ip->protocol;
646   key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
647   s_kv.key[0] = key.as_u64[0];
648   s_kv.key[1] = key.as_u64[1];
649
650   if (!clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
651     {
652       s = pool_elt_at_index (tsm->sessions, s_value.value);
653       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
654     }
655   else
656     {
657       m_key.addr = ip->dst_address;
658       m_key.port = 0;
659       m_key.protocol = 0;
660       m_key.fib_index = rx_fib_index;
661       kv.key = m_key.as_u64;
662       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
663         return;
664
665       m = pool_elt_at_index (sm->static_mappings, value.value);
666
667       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
668
669       u_key.addr = ip->src_address;
670       u_key.fib_index = m->fib_index;
671       kv.key = u_key.as_u64;
672
673       /* Ever heard of the "user" = src ip4 address before? */
674       if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
675         {
676           /* no, make a new one */
677           pool_get (tsm->users, u);
678           memset (u, 0, sizeof (*u));
679           u->addr = ip->src_address;
680           u->fib_index = rx_fib_index;
681
682           pool_get (tsm->list_pool, head);
683           u->sessions_per_user_list_head_index = head - tsm->list_pool;
684
685           clib_dlist_init (tsm->list_pool,
686                            u->sessions_per_user_list_head_index);
687
688           kv.value = u - tsm->users;
689
690           /* add user */
691           clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1);
692         }
693       else
694         {
695           u = pool_elt_at_index (tsm->users, value.value);
696         }
697
698       /* Create a new session */
699       pool_get (tsm->sessions, s);
700       memset (s, 0, sizeof (*s));
701
702       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
703       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
704       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
705       s->outside_address_index = ~0;
706       s->out2in.addr.as_u32 = old_addr;
707       s->out2in.fib_index = rx_fib_index;
708       s->in2out.addr.as_u32 = new_addr;
709       s->in2out.fib_index = m->fib_index;
710       s->in2out.port = s->out2in.port = ip->protocol;
711       u->nstaticsessions++;
712
713       /* Create list elts */
714       pool_get (tsm->list_pool, elt);
715       clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
716       elt->value = s - tsm->sessions;
717       s->per_user_index = elt - tsm->list_pool;
718       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
719       clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
720                           s->per_user_index);
721
722       /* Add to lookup tables */
723       s_kv.value = s - tsm->sessions;
724       if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
725         clib_warning ("out2in key add failed");
726
727       key.l_addr = ip->dst_address;
728       key.fib_index = m->fib_index;
729       s_kv.key[0] = key.as_u64[0];
730       s_kv.key[1] = key.as_u64[1];
731       if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
732         clib_warning ("in2out key add failed");
733    }
734
735   /* Update IP checksum */
736   sum = ip->checksum;
737   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
738   ip->checksum = ip_csum_fold (sum);
739
740   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
741
742   /* Accounting */
743   s->last_heard = now;
744   s->total_pkts++;
745   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
746   /* Per-user LRU list maintenance */
747   clib_dlist_remove (tsm->list_pool, s->per_user_index);
748   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
749                       s->per_user_index);
750 }
751
752 static uword
753 snat_out2in_node_fn (vlib_main_t * vm,
754                   vlib_node_runtime_t * node,
755                   vlib_frame_t * frame)
756 {
757   u32 n_left_from, * from, * to_next;
758   snat_out2in_next_t next_index;
759   u32 pkts_processed = 0;
760   snat_main_t * sm = &snat_main;
761   f64 now = vlib_time_now (vm);
762   u32 thread_index = vlib_get_thread_index ();
763
764   from = vlib_frame_vector_args (frame);
765   n_left_from = frame->n_vectors;
766   next_index = node->cached_next_index;
767
768   while (n_left_from > 0)
769     {
770       u32 n_left_to_next;
771
772       vlib_get_next_frame (vm, node, next_index,
773                            to_next, n_left_to_next);
774
775       while (n_left_from >= 4 && n_left_to_next >= 2)
776         {
777           u32 bi0, bi1;
778           vlib_buffer_t * b0, * b1;
779           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
780           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
781           u32 sw_if_index0, sw_if_index1;
782           ip4_header_t * ip0, *ip1;
783           ip_csum_t sum0, sum1;
784           u32 new_addr0, old_addr0;
785           u16 new_port0, old_port0;
786           u32 new_addr1, old_addr1;
787           u16 new_port1, old_port1;
788           udp_header_t * udp0, * udp1;
789           tcp_header_t * tcp0, * tcp1;
790           icmp46_header_t * icmp0, * icmp1;
791           snat_session_key_t key0, key1, sm0, sm1;
792           u32 rx_fib_index0, rx_fib_index1;
793           u32 proto0, proto1;
794           snat_session_t * s0 = 0, * s1 = 0;
795           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
796
797           /* Prefetch next iteration. */
798           {
799             vlib_buffer_t * p2, * p3;
800
801             p2 = vlib_get_buffer (vm, from[2]);
802             p3 = vlib_get_buffer (vm, from[3]);
803
804             vlib_prefetch_buffer_header (p2, LOAD);
805             vlib_prefetch_buffer_header (p3, LOAD);
806
807             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
808             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
809           }
810
811           /* speculatively enqueue b0 and b1 to the current next frame */
812           to_next[0] = bi0 = from[0];
813           to_next[1] = bi1 = from[1];
814           from += 2;
815           to_next += 2;
816           n_left_from -= 2;
817           n_left_to_next -= 2;
818
819           b0 = vlib_get_buffer (vm, bi0);
820           b1 = vlib_get_buffer (vm, bi1);
821
822           vnet_buffer (b0)->snat.flags = 0;
823           vnet_buffer (b1)->snat.flags = 0;
824
825           ip0 = vlib_buffer_get_current (b0);
826           udp0 = ip4_next_header (ip0);
827           tcp0 = (tcp_header_t *) udp0;
828           icmp0 = (icmp46_header_t *) udp0;
829
830           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
831           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
832                                    sw_if_index0);
833
834           if (PREDICT_FALSE(ip0->ttl == 1))
835             {
836               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
837               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
838                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
839                                            0);
840               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
841               goto trace0;
842             }
843
844           proto0 = ip_proto_to_snat_proto (ip0->protocol);
845
846           if (PREDICT_FALSE (proto0 == ~0))
847             {
848               snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
849                                         thread_index, now, vm);
850               goto trace0;
851             }
852
853           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
854             {
855               next0 = icmp_out2in_slow_path
856                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
857                  next0, now, thread_index, &s0);
858               goto trace0;
859             }
860
861           key0.addr = ip0->dst_address;
862           key0.port = udp0->dst_port;
863           key0.protocol = proto0;
864           key0.fib_index = rx_fib_index0;
865
866           kv0.key = key0.as_u64;
867
868           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
869             {
870               /* Try to match static mapping by external address and port,
871                  destination address and port in packet */
872               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
873                 {
874                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
875                   /*
876                    * Send DHCP packets to the ipv4 stack, or we won't
877                    * be able to use dhcp client on the outside interface
878                    */
879                   if (proto0 != SNAT_PROTOCOL_UDP
880                       || (udp0->dst_port
881                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
882                     next0 = SNAT_OUT2IN_NEXT_DROP;
883                   goto trace0;
884                 }
885
886               /* Create session initiated by host from external network */
887               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
888                                                      thread_index);
889               if (!s0)
890                 {
891                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
892                   next0 = SNAT_OUT2IN_NEXT_DROP;
893                   goto trace0;
894                 }
895             }
896           else
897             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
898                                     value0.value);
899
900           old_addr0 = ip0->dst_address.as_u32;
901           ip0->dst_address = s0->in2out.addr;
902           new_addr0 = ip0->dst_address.as_u32;
903           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
904
905           sum0 = ip0->checksum;
906           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
907                                  ip4_header_t,
908                                  dst_address /* changed member */);
909           ip0->checksum = ip_csum_fold (sum0);
910
911           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
912             {
913               old_port0 = tcp0->dst_port;
914               tcp0->dst_port = s0->in2out.port;
915               new_port0 = tcp0->dst_port;
916
917               sum0 = tcp0->checksum;
918               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
919                                      ip4_header_t,
920                                      dst_address /* changed member */);
921
922               sum0 = ip_csum_update (sum0, old_port0, new_port0,
923                                      ip4_header_t /* cheat */,
924                                      length /* changed member */);
925               tcp0->checksum = ip_csum_fold(sum0);
926             }
927           else
928             {
929               old_port0 = udp0->dst_port;
930               udp0->dst_port = s0->in2out.port;
931               udp0->checksum = 0;
932             }
933
934           /* Accounting */
935           s0->last_heard = now;
936           s0->total_pkts++;
937           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
938           /* Per-user LRU list maintenance for dynamic translation */
939           if (!snat_is_session_static (s0))
940             {
941               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
942                                  s0->per_user_index);
943               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
944                                   s0->per_user_list_head_index,
945                                   s0->per_user_index);
946             }
947         trace0:
948
949           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
950                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
951             {
952               snat_out2in_trace_t *t =
953                  vlib_add_trace (vm, node, b0, sizeof (*t));
954               t->sw_if_index = sw_if_index0;
955               t->next_index = next0;
956               t->session_index = ~0;
957               if (s0)
958                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
959             }
960
961           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
962
963
964           ip1 = vlib_buffer_get_current (b1);
965           udp1 = ip4_next_header (ip1);
966           tcp1 = (tcp_header_t *) udp1;
967           icmp1 = (icmp46_header_t *) udp1;
968
969           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
970           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
971                                    sw_if_index1);
972
973           if (PREDICT_FALSE(ip1->ttl == 1))
974             {
975               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
976               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
977                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
978                                            0);
979               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
980               goto trace1;
981             }
982
983           proto1 = ip_proto_to_snat_proto (ip1->protocol);
984
985           if (PREDICT_FALSE (proto1 == ~0))
986             {
987               snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
988                                         thread_index, now, vm);
989               goto trace1;
990             }
991
992           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
993             {
994               next1 = icmp_out2in_slow_path
995                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
996                  next1, now, thread_index, &s1);
997               goto trace1;
998             }
999
1000           key1.addr = ip1->dst_address;
1001           key1.port = udp1->dst_port;
1002           key1.protocol = proto1;
1003           key1.fib_index = rx_fib_index1;
1004
1005           kv1.key = key1.as_u64;
1006
1007           if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1))
1008             {
1009               /* Try to match static mapping by external address and port,
1010                  destination address and port in packet */
1011               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0))
1012                 {
1013                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1014                   /*
1015                    * Send DHCP packets to the ipv4 stack, or we won't
1016                    * be able to use dhcp client on the outside interface
1017                    */
1018                   if (proto1 != SNAT_PROTOCOL_UDP
1019                       || (udp1->dst_port
1020                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1021                     next1 = SNAT_OUT2IN_NEXT_DROP;
1022                   goto trace1;
1023                 }
1024
1025               /* Create session initiated by host from external network */
1026               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1027                                                      thread_index);
1028               if (!s1)
1029                 {
1030                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1031                   next1 = SNAT_OUT2IN_NEXT_DROP;
1032                   goto trace1;
1033                 }
1034             }
1035           else
1036             s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1037                                     value1.value);
1038
1039           old_addr1 = ip1->dst_address.as_u32;
1040           ip1->dst_address = s1->in2out.addr;
1041           new_addr1 = ip1->dst_address.as_u32;
1042           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1043
1044           sum1 = ip1->checksum;
1045           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1046                                  ip4_header_t,
1047                                  dst_address /* changed member */);
1048           ip1->checksum = ip_csum_fold (sum1);
1049
1050           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1051             {
1052               old_port1 = tcp1->dst_port;
1053               tcp1->dst_port = s1->in2out.port;
1054               new_port1 = tcp1->dst_port;
1055
1056               sum1 = tcp1->checksum;
1057               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1058                                      ip4_header_t,
1059                                      dst_address /* changed member */);
1060
1061               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1062                                      ip4_header_t /* cheat */,
1063                                      length /* changed member */);
1064               tcp1->checksum = ip_csum_fold(sum1);
1065             }
1066           else
1067             {
1068               old_port1 = udp1->dst_port;
1069               udp1->dst_port = s1->in2out.port;
1070               udp1->checksum = 0;
1071             }
1072
1073           /* Accounting */
1074           s1->last_heard = now;
1075           s1->total_pkts++;
1076           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1077           /* Per-user LRU list maintenance for dynamic translation */
1078           if (!snat_is_session_static (s1))
1079             {
1080               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1081                                  s1->per_user_index);
1082               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1083                                   s1->per_user_list_head_index,
1084                                   s1->per_user_index);
1085             }
1086         trace1:
1087
1088           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1089                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1090             {
1091               snat_out2in_trace_t *t =
1092                  vlib_add_trace (vm, node, b1, sizeof (*t));
1093               t->sw_if_index = sw_if_index1;
1094               t->next_index = next1;
1095               t->session_index = ~0;
1096               if (s1)
1097                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1098             }
1099
1100           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1101
1102           /* verify speculative enqueues, maybe switch current next frame */
1103           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1104                                            to_next, n_left_to_next,
1105                                            bi0, bi1, next0, next1);
1106         }
1107
1108       while (n_left_from > 0 && n_left_to_next > 0)
1109         {
1110           u32 bi0;
1111           vlib_buffer_t * b0;
1112           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1113           u32 sw_if_index0;
1114           ip4_header_t * ip0;
1115           ip_csum_t sum0;
1116           u32 new_addr0, old_addr0;
1117           u16 new_port0, old_port0;
1118           udp_header_t * udp0;
1119           tcp_header_t * tcp0;
1120           icmp46_header_t * icmp0;
1121           snat_session_key_t key0, sm0;
1122           u32 rx_fib_index0;
1123           u32 proto0;
1124           snat_session_t * s0 = 0;
1125           clib_bihash_kv_8_8_t kv0, value0;
1126
1127           /* speculatively enqueue b0 to the current next frame */
1128           bi0 = from[0];
1129           to_next[0] = bi0;
1130           from += 1;
1131           to_next += 1;
1132           n_left_from -= 1;
1133           n_left_to_next -= 1;
1134
1135           b0 = vlib_get_buffer (vm, bi0);
1136
1137           vnet_buffer (b0)->snat.flags = 0;
1138
1139           ip0 = vlib_buffer_get_current (b0);
1140           udp0 = ip4_next_header (ip0);
1141           tcp0 = (tcp_header_t *) udp0;
1142           icmp0 = (icmp46_header_t *) udp0;
1143
1144           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1145           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1146                                    sw_if_index0);
1147
1148           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1149
1150           if (PREDICT_FALSE (proto0 == ~0))
1151             {
1152               snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1153                                         thread_index, now, vm);
1154               goto trace00;
1155             }
1156
1157           if (PREDICT_FALSE(ip0->ttl == 1))
1158             {
1159               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1160               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1161                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1162                                            0);
1163               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1164               goto trace00;
1165             }
1166
1167           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1168             {
1169               next0 = icmp_out2in_slow_path
1170                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1171                  next0, now, thread_index, &s0);
1172               goto trace00;
1173             }
1174
1175           key0.addr = ip0->dst_address;
1176           key0.port = udp0->dst_port;
1177           key0.protocol = proto0;
1178           key0.fib_index = rx_fib_index0;
1179
1180           kv0.key = key0.as_u64;
1181
1182           if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
1183             {
1184               /* Try to match static mapping by external address and port,
1185                  destination address and port in packet */
1186               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
1187                 {
1188                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1189                   /*
1190                    * Send DHCP packets to the ipv4 stack, or we won't
1191                    * be able to use dhcp client on the outside interface
1192                    */
1193                   if (proto0 != SNAT_PROTOCOL_UDP
1194                       || (udp0->dst_port
1195                           != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1196
1197                     next0 = SNAT_OUT2IN_NEXT_DROP;
1198                   goto trace00;
1199                 }
1200
1201               /* Create session initiated by host from external network */
1202               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1203                                                      thread_index);
1204               if (!s0)
1205                 {
1206                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1207                     next0 = SNAT_OUT2IN_NEXT_DROP;
1208                   goto trace00;
1209                 }
1210             }
1211           else
1212             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
1213                                     value0.value);
1214
1215           old_addr0 = ip0->dst_address.as_u32;
1216           ip0->dst_address = s0->in2out.addr;
1217           new_addr0 = ip0->dst_address.as_u32;
1218           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1219
1220           sum0 = ip0->checksum;
1221           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1222                                  ip4_header_t,
1223                                  dst_address /* changed member */);
1224           ip0->checksum = ip_csum_fold (sum0);
1225
1226           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1227             {
1228               old_port0 = tcp0->dst_port;
1229               tcp0->dst_port = s0->in2out.port;
1230               new_port0 = tcp0->dst_port;
1231
1232               sum0 = tcp0->checksum;
1233               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1234                                      ip4_header_t,
1235                                      dst_address /* changed member */);
1236
1237               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1238                                      ip4_header_t /* cheat */,
1239                                      length /* changed member */);
1240               tcp0->checksum = ip_csum_fold(sum0);
1241             }
1242           else
1243             {
1244               old_port0 = udp0->dst_port;
1245               udp0->dst_port = s0->in2out.port;
1246               udp0->checksum = 0;
1247             }
1248
1249           /* Accounting */
1250           s0->last_heard = now;
1251           s0->total_pkts++;
1252           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1253           /* Per-user LRU list maintenance for dynamic translation */
1254           if (!snat_is_session_static (s0))
1255             {
1256               clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1257                                  s0->per_user_index);
1258               clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1259                                   s0->per_user_list_head_index,
1260                                   s0->per_user_index);
1261             }
1262         trace00:
1263
1264           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1265                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1266             {
1267               snat_out2in_trace_t *t =
1268                  vlib_add_trace (vm, node, b0, sizeof (*t));
1269               t->sw_if_index = sw_if_index0;
1270               t->next_index = next0;
1271               t->session_index = ~0;
1272               if (s0)
1273                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1274             }
1275
1276           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1277
1278           /* verify speculative enqueue, maybe switch current next frame */
1279           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1280                                            to_next, n_left_to_next,
1281                                            bi0, next0);
1282         }
1283
1284       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1285     }
1286
1287   vlib_node_increment_counter (vm, snat_out2in_node.index,
1288                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1289                                pkts_processed);
1290   return frame->n_vectors;
1291 }
1292
1293 VLIB_REGISTER_NODE (snat_out2in_node) = {
1294   .function = snat_out2in_node_fn,
1295   .name = "nat44-out2in",
1296   .vector_size = sizeof (u32),
1297   .format_trace = format_snat_out2in_trace,
1298   .type = VLIB_NODE_TYPE_INTERNAL,
1299
1300   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1301   .error_strings = snat_out2in_error_strings,
1302
1303   .runtime_data_bytes = sizeof (snat_runtime_t),
1304
1305   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1306
1307   /* edit / add dispositions here */
1308   .next_nodes = {
1309     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1310     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1311     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1312   },
1313 };
1314 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1315
1316 /**************************/
1317 /*** deterministic mode ***/
1318 /**************************/
1319 static uword
1320 snat_det_out2in_node_fn (vlib_main_t * vm,
1321                          vlib_node_runtime_t * node,
1322                          vlib_frame_t * frame)
1323 {
1324   u32 n_left_from, * from, * to_next;
1325   snat_out2in_next_t next_index;
1326   u32 pkts_processed = 0;
1327   snat_main_t * sm = &snat_main;
1328   u32 thread_index = vlib_get_thread_index ();
1329
1330   from = vlib_frame_vector_args (frame);
1331   n_left_from = frame->n_vectors;
1332   next_index = node->cached_next_index;
1333
1334   while (n_left_from > 0)
1335     {
1336       u32 n_left_to_next;
1337
1338       vlib_get_next_frame (vm, node, next_index,
1339                            to_next, n_left_to_next);
1340
1341       while (n_left_from >= 4 && n_left_to_next >= 2)
1342         {
1343           u32 bi0, bi1;
1344           vlib_buffer_t * b0, * b1;
1345           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1346           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1347           u32 sw_if_index0, sw_if_index1;
1348           ip4_header_t * ip0, * ip1;
1349           ip_csum_t sum0, sum1;
1350           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1351           u16 new_port0, old_port0, old_port1, new_port1;
1352           udp_header_t * udp0, * udp1;
1353           tcp_header_t * tcp0, * tcp1;
1354           u32 proto0, proto1;
1355           snat_det_out_key_t key0, key1;
1356           snat_det_map_t * dm0, * dm1;
1357           snat_det_session_t * ses0 = 0, * ses1 = 0;
1358           u32 rx_fib_index0, rx_fib_index1;
1359           icmp46_header_t * icmp0, * icmp1;
1360
1361           /* Prefetch next iteration. */
1362           {
1363             vlib_buffer_t * p2, * p3;
1364
1365             p2 = vlib_get_buffer (vm, from[2]);
1366             p3 = vlib_get_buffer (vm, from[3]);
1367
1368             vlib_prefetch_buffer_header (p2, LOAD);
1369             vlib_prefetch_buffer_header (p3, LOAD);
1370
1371             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1372             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1373           }
1374
1375           /* speculatively enqueue b0 and b1 to the current next frame */
1376           to_next[0] = bi0 = from[0];
1377           to_next[1] = bi1 = from[1];
1378           from += 2;
1379           to_next += 2;
1380           n_left_from -= 2;
1381           n_left_to_next -= 2;
1382
1383           b0 = vlib_get_buffer (vm, bi0);
1384           b1 = vlib_get_buffer (vm, bi1);
1385
1386           ip0 = vlib_buffer_get_current (b0);
1387           udp0 = ip4_next_header (ip0);
1388           tcp0 = (tcp_header_t *) udp0;
1389
1390           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1391
1392           if (PREDICT_FALSE(ip0->ttl == 1))
1393             {
1394               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1395               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1396                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1397                                            0);
1398               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1399               goto trace0;
1400             }
1401
1402           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1403
1404           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1405             {
1406               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1407               icmp0 = (icmp46_header_t *) udp0;
1408
1409               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1410                                   rx_fib_index0, node, next0, thread_index,
1411                                   &ses0, &dm0);
1412               goto trace0;
1413             }
1414
1415           key0.ext_host_addr = ip0->src_address;
1416           key0.ext_host_port = tcp0->src;
1417           key0.out_port = tcp0->dst;
1418
1419           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1420           if (PREDICT_FALSE(!dm0))
1421             {
1422               clib_warning("unknown dst address:  %U",
1423                            format_ip4_address, &ip0->dst_address);
1424               next0 = SNAT_OUT2IN_NEXT_DROP;
1425               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1426               goto trace0;
1427             }
1428
1429           snat_det_reverse(dm0, &ip0->dst_address,
1430                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
1431
1432           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1433           if (PREDICT_FALSE(!ses0))
1434             {
1435               clib_warning("no match src %U:%d dst %U:%d for user %U",
1436                            format_ip4_address, &ip0->src_address,
1437                            clib_net_to_host_u16 (tcp0->src),
1438                            format_ip4_address, &ip0->dst_address,
1439                            clib_net_to_host_u16 (tcp0->dst),
1440                            format_ip4_address, &new_addr0);
1441               next0 = SNAT_OUT2IN_NEXT_DROP;
1442               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1443               goto trace0;
1444             }
1445           new_port0 = ses0->in_port;
1446
1447           old_addr0 = ip0->dst_address;
1448           ip0->dst_address = new_addr0;
1449           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
1450
1451           sum0 = ip0->checksum;
1452           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1453                                  ip4_header_t,
1454                                  dst_address /* changed member */);
1455           ip0->checksum = ip_csum_fold (sum0);
1456
1457           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1458             {
1459               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1460                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
1461               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
1462                 snat_det_ses_close(dm0, ses0);
1463
1464               old_port0 = tcp0->dst;
1465               tcp0->dst = new_port0;
1466
1467               sum0 = tcp0->checksum;
1468               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1469                                      ip4_header_t,
1470                                      dst_address /* changed member */);
1471
1472               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1473                                      ip4_header_t /* cheat */,
1474                                      length /* changed member */);
1475               tcp0->checksum = ip_csum_fold(sum0);
1476             }
1477           else
1478             {
1479               old_port0 = udp0->dst_port;
1480               udp0->dst_port = new_port0;
1481               udp0->checksum = 0;
1482             }
1483
1484         trace0:
1485
1486           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1487                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1488             {
1489               snat_out2in_trace_t *t =
1490                  vlib_add_trace (vm, node, b0, sizeof (*t));
1491               t->sw_if_index = sw_if_index0;
1492               t->next_index = next0;
1493               t->session_index = ~0;
1494               if (ses0)
1495                 t->session_index = ses0 - dm0->sessions;
1496             }
1497
1498           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1499
1500           b1 = vlib_get_buffer (vm, bi1);
1501
1502           ip1 = vlib_buffer_get_current (b1);
1503           udp1 = ip4_next_header (ip1);
1504           tcp1 = (tcp_header_t *) udp1;
1505
1506           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1507
1508           if (PREDICT_FALSE(ip1->ttl == 1))
1509             {
1510               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1511               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1512                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1513                                            0);
1514               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1515               goto trace1;
1516             }
1517
1518           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1519
1520           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
1521             {
1522               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
1523               icmp1 = (icmp46_header_t *) udp1;
1524
1525               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
1526                                   rx_fib_index1, node, next1, thread_index,
1527                                   &ses1, &dm1);
1528               goto trace1;
1529             }
1530
1531           key1.ext_host_addr = ip1->src_address;
1532           key1.ext_host_port = tcp1->src;
1533           key1.out_port = tcp1->dst;
1534
1535           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
1536           if (PREDICT_FALSE(!dm1))
1537             {
1538               clib_warning("unknown dst address:  %U",
1539                            format_ip4_address, &ip1->dst_address);
1540               next1 = SNAT_OUT2IN_NEXT_DROP;
1541               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1542               goto trace1;
1543             }
1544
1545           snat_det_reverse(dm1, &ip1->dst_address,
1546                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
1547
1548           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
1549           if (PREDICT_FALSE(!ses1))
1550             {
1551               clib_warning("no match src %U:%d dst %U:%d for user %U",
1552                            format_ip4_address, &ip1->src_address,
1553                            clib_net_to_host_u16 (tcp1->src),
1554                            format_ip4_address, &ip1->dst_address,
1555                            clib_net_to_host_u16 (tcp1->dst),
1556                            format_ip4_address, &new_addr1);
1557               next1 = SNAT_OUT2IN_NEXT_DROP;
1558               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1559               goto trace1;
1560             }
1561           new_port1 = ses1->in_port;
1562
1563           old_addr1 = ip1->dst_address;
1564           ip1->dst_address = new_addr1;
1565           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
1566
1567           sum1 = ip1->checksum;
1568           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1569                                  ip4_header_t,
1570                                  dst_address /* changed member */);
1571           ip1->checksum = ip_csum_fold (sum1);
1572
1573           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1574             {
1575               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
1576                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
1577               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
1578                 snat_det_ses_close(dm1, ses1);
1579
1580               old_port1 = tcp1->dst;
1581               tcp1->dst = new_port1;
1582
1583               sum1 = tcp1->checksum;
1584               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
1585                                      ip4_header_t,
1586                                      dst_address /* changed member */);
1587
1588               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1589                                      ip4_header_t /* cheat */,
1590                                      length /* changed member */);
1591               tcp1->checksum = ip_csum_fold(sum1);
1592             }
1593           else
1594             {
1595               old_port1 = udp1->dst_port;
1596               udp1->dst_port = new_port1;
1597               udp1->checksum = 0;
1598             }
1599
1600         trace1:
1601
1602           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1603                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1604             {
1605               snat_out2in_trace_t *t =
1606                  vlib_add_trace (vm, node, b1, sizeof (*t));
1607               t->sw_if_index = sw_if_index1;
1608               t->next_index = next1;
1609               t->session_index = ~0;
1610               if (ses1)
1611                 t->session_index = ses1 - dm1->sessions;
1612             }
1613
1614           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1615
1616           /* verify speculative enqueues, maybe switch current next frame */
1617           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1618                                            to_next, n_left_to_next,
1619                                            bi0, bi1, next0, next1);
1620          }
1621
1622       while (n_left_from > 0 && n_left_to_next > 0)
1623         {
1624           u32 bi0;
1625           vlib_buffer_t * b0;
1626           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1627           u32 sw_if_index0;
1628           ip4_header_t * ip0;
1629           ip_csum_t sum0;
1630           ip4_address_t new_addr0, old_addr0;
1631           u16 new_port0, old_port0;
1632           udp_header_t * udp0;
1633           tcp_header_t * tcp0;
1634           u32 proto0;
1635           snat_det_out_key_t key0;
1636           snat_det_map_t * dm0;
1637           snat_det_session_t * ses0 = 0;
1638           u32 rx_fib_index0;
1639           icmp46_header_t * icmp0;
1640
1641           /* speculatively enqueue b0 to the current next frame */
1642           bi0 = from[0];
1643           to_next[0] = bi0;
1644           from += 1;
1645           to_next += 1;
1646           n_left_from -= 1;
1647           n_left_to_next -= 1;
1648
1649           b0 = vlib_get_buffer (vm, bi0);
1650
1651           ip0 = vlib_buffer_get_current (b0);
1652           udp0 = ip4_next_header (ip0);
1653           tcp0 = (tcp_header_t *) udp0;
1654
1655           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1656
1657           if (PREDICT_FALSE(ip0->ttl == 1))
1658             {
1659               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1660               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1661                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1662                                            0);
1663               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1664               goto trace00;
1665             }
1666
1667           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1668
1669           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1670             {
1671               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1672               icmp0 = (icmp46_header_t *) udp0;
1673
1674               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1675                                   rx_fib_index0, node, next0, thread_index,
1676                                   &ses0, &dm0);
1677               goto trace00;
1678             }
1679
1680           key0.ext_host_addr = ip0->src_address;
1681           key0.ext_host_port = tcp0->src;
1682           key0.out_port = tcp0->dst;
1683
1684           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1685           if (PREDICT_FALSE(!dm0))
1686             {
1687               clib_warning("unknown dst address:  %U",
1688                            format_ip4_address, &ip0->dst_address);
1689               next0 = SNAT_OUT2IN_NEXT_DROP;
1690               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1691               goto trace00;
1692             }
1693
1694           snat_det_reverse(dm0, &ip0->dst_address,
1695                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
1696
1697           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1698           if (PREDICT_FALSE(!ses0))
1699             {
1700               clib_warning("no match src %U:%d dst %U:%d for user %U",
1701                            format_ip4_address, &ip0->src_address,
1702                            clib_net_to_host_u16 (tcp0->src),
1703                            format_ip4_address, &ip0->dst_address,
1704                            clib_net_to_host_u16 (tcp0->dst),
1705                            format_ip4_address, &new_addr0);
1706               next0 = SNAT_OUT2IN_NEXT_DROP;
1707               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1708               goto trace00;
1709             }
1710           new_port0 = ses0->in_port;
1711
1712           old_addr0 = ip0->dst_address;
1713           ip0->dst_address = new_addr0;
1714           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
1715
1716           sum0 = ip0->checksum;
1717           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1718                                  ip4_header_t,
1719                                  dst_address /* changed member */);
1720           ip0->checksum = ip_csum_fold (sum0);
1721
1722           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1723             {
1724               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
1725                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
1726               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
1727                 snat_det_ses_close(dm0, ses0);
1728
1729               old_port0 = tcp0->dst;
1730               tcp0->dst = new_port0;
1731
1732               sum0 = tcp0->checksum;
1733               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
1734                                      ip4_header_t,
1735                                      dst_address /* changed member */);
1736
1737               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1738                                      ip4_header_t /* cheat */,
1739                                      length /* changed member */);
1740               tcp0->checksum = ip_csum_fold(sum0);
1741             }
1742           else
1743             {
1744               old_port0 = udp0->dst_port;
1745               udp0->dst_port = new_port0;
1746               udp0->checksum = 0;
1747             }
1748
1749         trace00:
1750
1751           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1752                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1753             {
1754               snat_out2in_trace_t *t =
1755                  vlib_add_trace (vm, node, b0, sizeof (*t));
1756               t->sw_if_index = sw_if_index0;
1757               t->next_index = next0;
1758               t->session_index = ~0;
1759               if (ses0)
1760                 t->session_index = ses0 - dm0->sessions;
1761             }
1762
1763           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1764
1765           /* verify speculative enqueue, maybe switch current next frame */
1766           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1767                                            to_next, n_left_to_next,
1768                                            bi0, next0);
1769         }
1770
1771       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1772     }
1773
1774   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
1775                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1776                                pkts_processed);
1777   return frame->n_vectors;
1778 }
1779
1780 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
1781   .function = snat_det_out2in_node_fn,
1782   .name = "nat44-det-out2in",
1783   .vector_size = sizeof (u32),
1784   .format_trace = format_snat_out2in_trace,
1785   .type = VLIB_NODE_TYPE_INTERNAL,
1786
1787   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1788   .error_strings = snat_out2in_error_strings,
1789
1790   .runtime_data_bytes = sizeof (snat_runtime_t),
1791
1792   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1793
1794   /* edit / add dispositions here */
1795   .next_nodes = {
1796     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1797     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1798     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1799   },
1800 };
1801 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
1802
1803 /**
1804  * Get address and port values to be used for ICMP packet translation
1805  * and create session if needed
1806  *
1807  * @param[in,out] sm             NAT main
1808  * @param[in,out] node           NAT node runtime
1809  * @param[in] thread_index       thread index
1810  * @param[in,out] b0             buffer containing packet to be translated
1811  * @param[out] p_proto           protocol used for matching
1812  * @param[out] p_value           address and port after NAT translation
1813  * @param[out] p_dont_translate  if packet should not be translated
1814  * @param d                      optional parameter
1815  * @param e                      optional parameter
1816  */
1817 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
1818                           u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
1819                           snat_session_key_t *p_value,
1820                           u8 *p_dont_translate, void *d, void *e)
1821 {
1822   ip4_header_t *ip0;
1823   icmp46_header_t *icmp0;
1824   u32 sw_if_index0;
1825   u8 protocol;
1826   snat_det_out_key_t key0;
1827   u8 dont_translate = 0;
1828   u32 next0 = ~0;
1829   icmp_echo_header_t *echo0, *inner_echo0 = 0;
1830   ip4_header_t *inner_ip0;
1831   void *l4_header = 0;
1832   icmp46_header_t *inner_icmp0;
1833   snat_det_map_t * dm0 = 0;
1834   ip4_address_t new_addr0 = {{0}};
1835   snat_det_session_t * ses0 = 0;
1836   ip4_address_t out_addr;
1837
1838   ip0 = vlib_buffer_get_current (b0);
1839   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
1840   echo0 = (icmp_echo_header_t *)(icmp0+1);
1841   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1842
1843   if (!icmp_is_error_message (icmp0))
1844     {
1845       protocol = SNAT_PROTOCOL_ICMP;
1846       key0.ext_host_addr = ip0->src_address;
1847       key0.ext_host_port = 0;
1848       key0.out_port = echo0->identifier;
1849       out_addr = ip0->dst_address;
1850     }
1851   else
1852     {
1853       inner_ip0 = (ip4_header_t *)(echo0+1);
1854       l4_header = ip4_next_header (inner_ip0);
1855       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
1856       key0.ext_host_addr = inner_ip0->dst_address;
1857       out_addr = inner_ip0->src_address;
1858       switch (protocol)
1859         {
1860         case SNAT_PROTOCOL_ICMP:
1861           inner_icmp0 = (icmp46_header_t*)l4_header;
1862           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
1863           key0.ext_host_port = 0;
1864           key0.out_port = inner_echo0->identifier;
1865           break;
1866         case SNAT_PROTOCOL_UDP:
1867         case SNAT_PROTOCOL_TCP:
1868           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
1869           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
1870           break;
1871         default:
1872           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
1873           next0 = SNAT_OUT2IN_NEXT_DROP;
1874           goto out;
1875         }
1876     }
1877
1878   dm0 = snat_det_map_by_out(sm, &out_addr);
1879   if (PREDICT_FALSE(!dm0))
1880     {
1881       /* Don't NAT packet aimed at the intfc address */
1882       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
1883                                           ip0->dst_address.as_u32)))
1884         {
1885           dont_translate = 1;
1886           goto out;
1887         }
1888       clib_warning("unknown dst address:  %U",
1889                    format_ip4_address, &ip0->dst_address);
1890       goto out;
1891     }
1892
1893   snat_det_reverse(dm0, &ip0->dst_address,
1894                    clib_net_to_host_u16(key0.out_port), &new_addr0);
1895
1896   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
1897   if (PREDICT_FALSE(!ses0))
1898     {
1899       /* Don't NAT packet aimed at the intfc address */
1900       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
1901                                           ip0->dst_address.as_u32)))
1902         {
1903           dont_translate = 1;
1904           goto out;
1905         }
1906       clib_warning("no match src %U:%d dst %U:%d for user %U",
1907                    format_ip4_address, &key0.ext_host_addr,
1908                    clib_net_to_host_u16 (key0.ext_host_port),
1909                    format_ip4_address, &out_addr,
1910                    clib_net_to_host_u16 (key0.out_port),
1911                    format_ip4_address, &new_addr0);
1912       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1913       next0 = SNAT_OUT2IN_NEXT_DROP;
1914       goto out;
1915     }
1916
1917   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
1918                     !icmp_is_error_message (icmp0)))
1919     {
1920       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
1921       next0 = SNAT_OUT2IN_NEXT_DROP;
1922       goto out;
1923     }
1924
1925   goto out;
1926
1927 out:
1928   *p_proto = protocol;
1929   if (ses0)
1930     {
1931       p_value->addr = new_addr0;
1932       p_value->fib_index = sm->inside_fib_index;
1933       p_value->port = ses0->in_port;
1934     }
1935   *p_dont_translate = dont_translate;
1936   if (d)
1937     *(snat_det_session_t**)d = ses0;
1938   if (e)
1939     *(snat_det_map_t**)e = dm0;
1940   return next0;
1941 }
1942
1943 /**********************/
1944 /*** worker handoff ***/
1945 /**********************/
1946 static uword
1947 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
1948                                vlib_node_runtime_t * node,
1949                                vlib_frame_t * frame)
1950 {
1951   snat_main_t *sm = &snat_main;
1952   vlib_thread_main_t *tm = vlib_get_thread_main ();
1953   u32 n_left_from, *from, *to_next = 0;
1954   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1955   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1956     = 0;
1957   vlib_frame_queue_elt_t *hf = 0;
1958   vlib_frame_t *f = 0;
1959   int i;
1960   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1961   u32 next_worker_index = 0;
1962   u32 current_worker_index = ~0;
1963   u32 thread_index = vlib_get_thread_index ();
1964
1965   ASSERT (vec_len (sm->workers));
1966
1967   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1968     {
1969       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1970
1971       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1972                                sm->first_worker_index + sm->num_workers - 1,
1973                                (vlib_frame_queue_t *) (~0));
1974     }
1975
1976   from = vlib_frame_vector_args (frame);
1977   n_left_from = frame->n_vectors;
1978
1979   while (n_left_from > 0)
1980     {
1981       u32 bi0;
1982       vlib_buffer_t *b0;
1983       u32 sw_if_index0;
1984       u32 rx_fib_index0;
1985       ip4_header_t * ip0;
1986       u8 do_handoff;
1987
1988       bi0 = from[0];
1989       from += 1;
1990       n_left_from -= 1;
1991
1992       b0 = vlib_get_buffer (vm, bi0);
1993
1994       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1995       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1996
1997       ip0 = vlib_buffer_get_current (b0);
1998
1999       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2000
2001       if (PREDICT_FALSE (next_worker_index != thread_index))
2002         {
2003           do_handoff = 1;
2004
2005           if (next_worker_index != current_worker_index)
2006             {
2007               if (hf)
2008                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2009
2010               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2011                                                       next_worker_index,
2012                                                       handoff_queue_elt_by_worker_index);
2013
2014               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2015               to_next_worker = &hf->buffer_index[hf->n_vectors];
2016               current_worker_index = next_worker_index;
2017             }
2018
2019           /* enqueue to correct worker thread */
2020           to_next_worker[0] = bi0;
2021           to_next_worker++;
2022           n_left_to_next_worker--;
2023
2024           if (n_left_to_next_worker == 0)
2025             {
2026               hf->n_vectors = VLIB_FRAME_SIZE;
2027               vlib_put_frame_queue_elt (hf);
2028               current_worker_index = ~0;
2029               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2030               hf = 0;
2031             }
2032         }
2033       else
2034         {
2035           do_handoff = 0;
2036           /* if this is 1st frame */
2037           if (!f)
2038             {
2039               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2040               to_next = vlib_frame_vector_args (f);
2041             }
2042
2043           to_next[0] = bi0;
2044           to_next += 1;
2045           f->n_vectors++;
2046         }
2047
2048       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2049                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2050         {
2051           snat_out2in_worker_handoff_trace_t *t =
2052             vlib_add_trace (vm, node, b0, sizeof (*t));
2053           t->next_worker_index = next_worker_index;
2054           t->do_handoff = do_handoff;
2055         }
2056     }
2057
2058   if (f)
2059     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2060
2061   if (hf)
2062     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2063
2064   /* Ship frames to the worker nodes */
2065   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2066     {
2067       if (handoff_queue_elt_by_worker_index[i])
2068         {
2069           hf = handoff_queue_elt_by_worker_index[i];
2070           /*
2071            * It works better to let the handoff node
2072            * rate-adapt, always ship the handoff queue element.
2073            */
2074           if (1 || hf->n_vectors == hf->last_n_vectors)
2075             {
2076               vlib_put_frame_queue_elt (hf);
2077               handoff_queue_elt_by_worker_index[i] = 0;
2078             }
2079           else
2080             hf->last_n_vectors = hf->n_vectors;
2081         }
2082       congested_handoff_queue_by_worker_index[i] =
2083         (vlib_frame_queue_t *) (~0);
2084     }
2085   hf = 0;
2086   current_worker_index = ~0;
2087   return frame->n_vectors;
2088 }
2089
2090 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2091   .function = snat_out2in_worker_handoff_fn,
2092   .name = "nat44-out2in-worker-handoff",
2093   .vector_size = sizeof (u32),
2094   .format_trace = format_snat_out2in_worker_handoff_trace,
2095   .type = VLIB_NODE_TYPE_INTERNAL,
2096
2097   .n_next_nodes = 1,
2098
2099   .next_nodes = {
2100     [0] = "error-drop",
2101   },
2102 };
2103
2104 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2105
2106 static uword
2107 snat_out2in_fast_node_fn (vlib_main_t * vm,
2108                           vlib_node_runtime_t * node,
2109                           vlib_frame_t * frame)
2110 {
2111   u32 n_left_from, * from, * to_next;
2112   snat_out2in_next_t next_index;
2113   u32 pkts_processed = 0;
2114   snat_main_t * sm = &snat_main;
2115
2116   from = vlib_frame_vector_args (frame);
2117   n_left_from = frame->n_vectors;
2118   next_index = node->cached_next_index;
2119
2120   while (n_left_from > 0)
2121     {
2122       u32 n_left_to_next;
2123
2124       vlib_get_next_frame (vm, node, next_index,
2125                            to_next, n_left_to_next);
2126
2127       while (n_left_from > 0 && n_left_to_next > 0)
2128         {
2129           u32 bi0;
2130           vlib_buffer_t * b0;
2131           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2132           u32 sw_if_index0;
2133           ip4_header_t * ip0;
2134           ip_csum_t sum0;
2135           u32 new_addr0, old_addr0;
2136           u16 new_port0, old_port0;
2137           udp_header_t * udp0;
2138           tcp_header_t * tcp0;
2139           icmp46_header_t * icmp0;
2140           snat_session_key_t key0, sm0;
2141           u32 proto0;
2142           u32 rx_fib_index0;
2143
2144           /* speculatively enqueue b0 to the current next frame */
2145           bi0 = from[0];
2146           to_next[0] = bi0;
2147           from += 1;
2148           to_next += 1;
2149           n_left_from -= 1;
2150           n_left_to_next -= 1;
2151
2152           b0 = vlib_get_buffer (vm, bi0);
2153
2154           ip0 = vlib_buffer_get_current (b0);
2155           udp0 = ip4_next_header (ip0);
2156           tcp0 = (tcp_header_t *) udp0;
2157           icmp0 = (icmp46_header_t *) udp0;
2158
2159           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2160           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2161
2162           vnet_feature_next (sw_if_index0, &next0, b0);
2163
2164           if (PREDICT_FALSE(ip0->ttl == 1))
2165             {
2166               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2167               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2168                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2169                                            0);
2170               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2171               goto trace00;
2172             }
2173
2174           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2175
2176           if (PREDICT_FALSE (proto0 == ~0))
2177               goto trace00;
2178
2179           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2180             {
2181               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2182                                   rx_fib_index0, node, next0, ~0, 0, 0);
2183               goto trace00;
2184             }
2185
2186           key0.addr = ip0->dst_address;
2187           key0.port = udp0->dst_port;
2188           key0.fib_index = rx_fib_index0;
2189
2190           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
2191             {
2192               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2193               goto trace00;
2194             }
2195
2196           new_addr0 = sm0.addr.as_u32;
2197           new_port0 = sm0.port;
2198           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2199           old_addr0 = ip0->dst_address.as_u32;
2200           ip0->dst_address.as_u32 = new_addr0;
2201
2202           sum0 = ip0->checksum;
2203           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2204                                  ip4_header_t,
2205                                  dst_address /* changed member */);
2206           ip0->checksum = ip_csum_fold (sum0);
2207
2208           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2209             {
2210                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2211                 {
2212                   old_port0 = tcp0->dst_port;
2213                   tcp0->dst_port = new_port0;
2214
2215                   sum0 = tcp0->checksum;
2216                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2217                                          ip4_header_t,
2218                                          dst_address /* changed member */);
2219
2220                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2221                                          ip4_header_t /* cheat */,
2222                                          length /* changed member */);
2223                   tcp0->checksum = ip_csum_fold(sum0);
2224                 }
2225               else
2226                 {
2227                   old_port0 = udp0->dst_port;
2228                   udp0->dst_port = new_port0;
2229                   udp0->checksum = 0;
2230                 }
2231             }
2232           else
2233             {
2234               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2235                 {
2236                   sum0 = tcp0->checksum;
2237                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2238                                          ip4_header_t,
2239                                          dst_address /* changed member */);
2240
2241                   tcp0->checksum = ip_csum_fold(sum0);
2242                 }
2243             }
2244
2245         trace00:
2246
2247           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2248                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2249             {
2250               snat_out2in_trace_t *t =
2251                  vlib_add_trace (vm, node, b0, sizeof (*t));
2252               t->sw_if_index = sw_if_index0;
2253               t->next_index = next0;
2254             }
2255
2256           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2257
2258           /* verify speculative enqueue, maybe switch current next frame */
2259           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2260                                            to_next, n_left_to_next,
2261                                            bi0, next0);
2262         }
2263
2264       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2265     }
2266
2267   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2268                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2269                                pkts_processed);
2270   return frame->n_vectors;
2271 }
2272
2273 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2274   .function = snat_out2in_fast_node_fn,
2275   .name = "nat44-out2in-fast",
2276   .vector_size = sizeof (u32),
2277   .format_trace = format_snat_out2in_fast_trace,
2278   .type = VLIB_NODE_TYPE_INTERNAL,
2279
2280   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2281   .error_strings = snat_out2in_error_strings,
2282
2283   .runtime_data_bytes = sizeof (snat_runtime_t),
2284
2285   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2286
2287   /* edit / add dispositions here */
2288   .next_nodes = {
2289     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2290     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2291     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2292   },
2293 };
2294 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);