NAT44: fix ICMP error translation for endpoint dependent sessions (VPP-1150)
[vpp.git] / src / plugins / nat / out2in.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/udp/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat.h>
26 #include <nat/nat_ipfix_logging.h>
27 #include <nat/nat_det.h>
28 #include <nat/nat_reass.h>
29
30 #include <vppinfra/hash.h>
31 #include <vppinfra/error.h>
32 #include <vppinfra/elog.h>
33
34 typedef struct {
35   u32 sw_if_index;
36   u32 next_index;
37   u32 session_index;
38 } snat_out2in_trace_t;
39
40 typedef struct {
41   u32 next_worker_index;
42   u8 do_handoff;
43 } snat_out2in_worker_handoff_trace_t;
44
45 /* packet trace format function */
46 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
47 {
48   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
51
52   s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
53               t->sw_if_index, t->next_index, t->session_index);
54   return s;
55 }
56
57 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
62
63   s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
64               t->sw_if_index, t->next_index);
65   return s;
66 }
67
68 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   snat_out2in_worker_handoff_trace_t * t =
73     va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74   char * m;
75
76   m = t->do_handoff ? "next worker" : "same worker";
77   s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
78
79   return s;
80 }
81
82 typedef struct {
83   u32 sw_if_index;
84   u32 next_index;
85   u8 cached;
86 } nat44_out2in_reass_trace_t;
87
88 static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
89 {
90   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
91   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
92   nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
93
94   s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
95               t->sw_if_index, t->next_index,
96               t->cached ? "cached" : "translated");
97
98   return s;
99 }
100
101 vlib_node_registration_t snat_out2in_node;
102 vlib_node_registration_t snat_out2in_fast_node;
103 vlib_node_registration_t snat_out2in_worker_handoff_node;
104 vlib_node_registration_t snat_det_out2in_node;
105 vlib_node_registration_t nat44_out2in_reass_node;
106
107 #define foreach_snat_out2in_error                       \
108 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
109 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
110 _(OUT_OF_PORTS, "Out of ports")                         \
111 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
112 _(NO_TRANSLATION, "No translation")                     \
113 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
114 _(DROP_FRAGMENT, "Drop fragment")                       \
115 _(MAX_REASS, "Maximum reassemblies exceeded")           \
116 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
117
118 typedef enum {
119 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
120   foreach_snat_out2in_error
121 #undef _
122   SNAT_OUT2IN_N_ERROR,
123 } snat_out2in_error_t;
124
125 static char * snat_out2in_error_strings[] = {
126 #define _(sym,string) string,
127   foreach_snat_out2in_error
128 #undef _
129 };
130
131 typedef enum {
132   SNAT_OUT2IN_NEXT_DROP,
133   SNAT_OUT2IN_NEXT_LOOKUP,
134   SNAT_OUT2IN_NEXT_ICMP_ERROR,
135   SNAT_OUT2IN_NEXT_REASS,
136   SNAT_OUT2IN_N_NEXT,
137 } snat_out2in_next_t;
138
139 /**
140  * @brief Create session for static mapping.
141  *
142  * Create NAT session initiated by host from external network with static
143  * mapping.
144  *
145  * @param sm     NAT main.
146  * @param b0     Vlib buffer.
147  * @param in2out In2out NAT44 session key.
148  * @param out2in Out2in NAT44 session key.
149  * @param node   Vlib node.
150  *
151  * @returns SNAT session if successfully created otherwise 0.
152  */
153 static inline snat_session_t *
154 create_session_for_static_mapping (snat_main_t *sm,
155                                    vlib_buffer_t *b0,
156                                    snat_session_key_t in2out,
157                                    snat_session_key_t out2in,
158                                    vlib_node_runtime_t * node,
159                                    u32 thread_index)
160 {
161   snat_user_t *u;
162   snat_session_t *s;
163   clib_bihash_kv_8_8_t kv0;
164   ip4_header_t *ip0;
165   udp_header_t *udp0;
166
167   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
168     {
169       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
170       return 0;
171     }
172
173   ip0 = vlib_buffer_get_current (b0);
174   udp0 = ip4_next_header (ip0);
175
176   u = nat_user_get_or_create (sm, &in2out.addr, in2out.fib_index, thread_index);
177   if (!u)
178     {
179       clib_warning ("create NAT user failed");
180       return 0;
181     }
182
183   s = nat_session_alloc_or_recycle (sm, u, thread_index);
184   if (!s)
185     {
186       clib_warning ("create NAT session failed");
187       return 0;
188     }
189
190   s->outside_address_index = ~0;
191   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
192   s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
193   s->ext_host_port = udp0->src_port;
194   u->nstaticsessions++;
195   s->in2out = in2out;
196   s->out2in = out2in;
197   s->in2out.protocol = out2in.protocol;
198
199   /* Add to translation hashes */
200   kv0.key = s->in2out.as_u64;
201   kv0.value = s - sm->per_thread_data[thread_index].sessions;
202   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
203                                1 /* is_add */))
204       clib_warning ("in2out key add failed");
205
206   kv0.key = s->out2in.as_u64;
207
208   if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
209                                1 /* is_add */))
210       clib_warning ("out2in key add failed");
211
212   /* log NAT event */
213   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
214                                       s->out2in.addr.as_u32,
215                                       s->in2out.protocol,
216                                       s->in2out.port,
217                                       s->out2in.port,
218                                       s->in2out.fib_index);
219    return s;
220 }
221
222 static_always_inline
223 snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
224                                  snat_session_key_t *p_key0)
225 {
226   icmp46_header_t *icmp0;
227   snat_session_key_t key0;
228   icmp_echo_header_t *echo0, *inner_echo0 = 0;
229   ip4_header_t *inner_ip0;
230   void *l4_header = 0;
231   icmp46_header_t *inner_icmp0;
232
233   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
234   echo0 = (icmp_echo_header_t *)(icmp0+1);
235
236   if (!icmp_is_error_message (icmp0))
237     {
238       key0.protocol = SNAT_PROTOCOL_ICMP;
239       key0.addr = ip0->dst_address;
240       key0.port = echo0->identifier;
241     }
242   else
243     {
244       inner_ip0 = (ip4_header_t *)(echo0+1);
245       l4_header = ip4_next_header (inner_ip0);
246       key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
247       key0.addr = inner_ip0->src_address;
248       switch (key0.protocol)
249         {
250         case SNAT_PROTOCOL_ICMP:
251           inner_icmp0 = (icmp46_header_t*)l4_header;
252           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
253           key0.port = inner_echo0->identifier;
254           break;
255         case SNAT_PROTOCOL_UDP:
256         case SNAT_PROTOCOL_TCP:
257           key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
258           break;
259         default:
260           return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
261         }
262     }
263   *p_key0 = key0;
264   return -1; /* success */
265 }
266
267 static_always_inline int
268 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
269 {
270   icmp46_header_t *icmp0;
271   nat_ed_ses_key_t key0;
272   icmp_echo_header_t *echo0, *inner_echo0 = 0;
273   ip4_header_t *inner_ip0;
274   void *l4_header = 0;
275   icmp46_header_t *inner_icmp0;
276
277   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
278   echo0 = (icmp_echo_header_t *)(icmp0+1);
279
280   if (!icmp_is_error_message (icmp0))
281     {
282       key0.proto = IP_PROTOCOL_ICMP;
283       key0.l_addr = ip0->dst_address;
284       key0.r_addr = ip0->src_address;
285       key0.l_port = key0.r_port = echo0->identifier;
286     }
287   else
288     {
289       inner_ip0 = (ip4_header_t *)(echo0+1);
290       l4_header = ip4_next_header (inner_ip0);
291       key0.proto = inner_ip0->protocol;
292       key0.l_addr = inner_ip0->src_address;
293       key0.r_addr = inner_ip0->dst_address;
294       switch (ip_proto_to_snat_proto (inner_ip0->protocol))
295         {
296         case SNAT_PROTOCOL_ICMP:
297           inner_icmp0 = (icmp46_header_t*)l4_header;
298           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
299           key0.l_port = key0.r_port = inner_echo0->identifier;
300           break;
301         case SNAT_PROTOCOL_UDP:
302         case SNAT_PROTOCOL_TCP:
303           key0.l_port = ((tcp_udp_header_t*)l4_header)->src_port;
304           key0.r_port = ((tcp_udp_header_t*)l4_header)->dst_port;
305           break;
306         default:
307           return -1;
308         }
309     }
310   *p_key0 = key0;
311   return 0;
312 }
313
314 /**
315  * Get address and port values to be used for ICMP packet translation
316  * and create session if needed
317  *
318  * @param[in,out] sm             NAT main
319  * @param[in,out] node           NAT node runtime
320  * @param[in] thread_index       thread index
321  * @param[in,out] b0             buffer containing packet to be translated
322  * @param[out] p_proto           protocol used for matching
323  * @param[out] p_value           address and port after NAT translation
324  * @param[out] p_dont_translate  if packet should not be translated
325  * @param d                      optional parameter
326  * @param e                      optional parameter
327  */
328 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
329                            u32 thread_index, vlib_buffer_t *b0,
330                            ip4_header_t *ip0, u8 *p_proto,
331                            snat_session_key_t *p_value,
332                            u8 *p_dont_translate, void *d, void *e)
333 {
334   icmp46_header_t *icmp0;
335   u32 sw_if_index0;
336   u32 rx_fib_index0;
337   snat_session_key_t key0;
338   snat_session_key_t sm0;
339   snat_session_t *s0 = 0;
340   u8 dont_translate = 0;
341   clib_bihash_kv_8_8_t kv0, value0;
342   u8 is_addr_only;
343   u32 next0 = ~0;
344   int err;
345
346   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
347   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
348   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
349
350   key0.protocol = 0;
351
352   err = icmp_get_key (ip0, &key0);
353   if (err != -1)
354     {
355       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
356       next0 = SNAT_OUT2IN_NEXT_DROP;
357       goto out;
358     }
359   key0.fib_index = rx_fib_index0;
360
361   kv0.key = key0.as_u64;
362
363   if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
364                               &value0))
365     {
366       /* Try to match static mapping by external address and port,
367          destination address and port in packet */
368       if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
369         {
370           if (!sm->forwarding_enabled)
371             {
372               /* Don't NAT packet aimed at the intfc address */
373               if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
374                                                   ip0->dst_address.as_u32)))
375                 {
376                   dont_translate = 1;
377                   goto out;
378                 }
379               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
380               next0 = SNAT_OUT2IN_NEXT_DROP;
381               goto out;
382             }
383           else
384             {
385               dont_translate = 1;
386               goto out;
387             }
388         }
389
390       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
391                         (icmp0->type != ICMP4_echo_request || !is_addr_only)))
392         {
393           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
394           next0 = SNAT_OUT2IN_NEXT_DROP;
395           goto out;
396         }
397
398       /* Create session initiated by host from external network */
399       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
400                                              node, thread_index);
401
402       if (!s0)
403         {
404           next0 = SNAT_OUT2IN_NEXT_DROP;
405           goto out;
406         }
407     }
408   else
409     {
410       if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
411                         icmp0->type != ICMP4_echo_request &&
412                         !icmp_is_error_message (icmp0)))
413         {
414           b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
415           next0 = SNAT_OUT2IN_NEXT_DROP;
416           goto out;
417         }
418
419       if (PREDICT_FALSE (value0.value == ~0ULL))
420         {
421           nat_ed_ses_key_t key;
422           clib_bihash_kv_16_8_t s_kv, s_value;
423
424           key.as_u64[0] = 0;
425           key.as_u64[1] = 0;
426           if (icmp_get_ed_key (ip0, &key))
427             {
428               b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
429               next0 = SNAT_OUT2IN_NEXT_DROP;
430               goto out;
431             }
432           key.fib_index = rx_fib_index0;
433           s_kv.key[0] = key.as_u64[0];
434           s_kv.key[1] = key.as_u64[1];
435           if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
436             s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
437                                     s_value.value);
438           else
439            {
440               next0 = SNAT_OUT2IN_NEXT_DROP;
441               goto out;
442            }
443         }
444       else
445         s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
446                                 value0.value);
447     }
448
449 out:
450   *p_proto = key0.protocol;
451   if (s0)
452     *p_value = s0->in2out;
453   *p_dont_translate = dont_translate;
454   if (d)
455     *(snat_session_t**)d = s0;
456   return next0;
457 }
458
459 /**
460  * Get address and port values to be used for ICMP packet translation
461  *
462  * @param[in] sm                 NAT main
463  * @param[in,out] node           NAT node runtime
464  * @param[in] thread_index       thread index
465  * @param[in,out] b0             buffer containing packet to be translated
466  * @param[out] p_proto           protocol used for matching
467  * @param[out] p_value           address and port after NAT translation
468  * @param[out] p_dont_translate  if packet should not be translated
469  * @param d                      optional parameter
470  * @param e                      optional parameter
471  */
472 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
473                            u32 thread_index, vlib_buffer_t *b0,
474                            ip4_header_t *ip0, u8 *p_proto,
475                            snat_session_key_t *p_value,
476                            u8 *p_dont_translate, void *d, void *e)
477 {
478   icmp46_header_t *icmp0;
479   u32 sw_if_index0;
480   u32 rx_fib_index0;
481   snat_session_key_t key0;
482   snat_session_key_t sm0;
483   u8 dont_translate = 0;
484   u8 is_addr_only;
485   u32 next0 = ~0;
486   int err;
487
488   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
489   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
490   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
491
492   err = icmp_get_key (ip0, &key0);
493   if (err != -1)
494     {
495       b0->error = node->errors[err];
496       next0 = SNAT_OUT2IN_NEXT_DROP;
497       goto out2;
498     }
499   key0.fib_index = rx_fib_index0;
500
501   if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0))
502     {
503       /* Don't NAT packet aimed at the intfc address */
504       if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
505         {
506           dont_translate = 1;
507           goto out;
508         }
509       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
510       next0 = SNAT_OUT2IN_NEXT_DROP;
511       goto out;
512     }
513
514   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
515                     (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
516                     !icmp_is_error_message (icmp0)))
517     {
518       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
519       next0 = SNAT_OUT2IN_NEXT_DROP;
520       goto out;
521     }
522
523 out:
524   *p_value = sm0;
525 out2:
526   *p_proto = key0.protocol;
527   *p_dont_translate = dont_translate;
528   return next0;
529 }
530
531 static inline u32 icmp_out2in (snat_main_t *sm,
532                                vlib_buffer_t * b0,
533                                ip4_header_t * ip0,
534                                icmp46_header_t * icmp0,
535                                u32 sw_if_index0,
536                                u32 rx_fib_index0,
537                                vlib_node_runtime_t * node,
538                                u32 next0,
539                                u32 thread_index,
540                                void *d,
541                                void *e)
542 {
543   snat_session_key_t sm0;
544   u8 protocol;
545   icmp_echo_header_t *echo0, *inner_echo0 = 0;
546   ip4_header_t *inner_ip0 = 0;
547   void *l4_header = 0;
548   icmp46_header_t *inner_icmp0;
549   u8 dont_translate;
550   u32 new_addr0, old_addr0;
551   u16 old_id0, new_id0;
552   ip_csum_t sum0;
553   u16 checksum0;
554   u32 next0_tmp;
555
556   echo0 = (icmp_echo_header_t *)(icmp0+1);
557
558   next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
559                                        &protocol, &sm0, &dont_translate, d, e);
560   if (next0_tmp != ~0)
561     next0 = next0_tmp;
562   if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
563     goto out;
564
565   sum0 = ip_incremental_checksum (0, icmp0,
566                                   ntohs(ip0->length) - ip4_header_bytes (ip0));
567   checksum0 = ~ip_csum_fold (sum0);
568   if (checksum0 != 0 && checksum0 != 0xffff)
569     {
570       next0 = SNAT_OUT2IN_NEXT_DROP;
571       goto out;
572     }
573
574   old_addr0 = ip0->dst_address.as_u32;
575   new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
576   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
577
578   sum0 = ip0->checksum;
579   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
580                          dst_address /* changed member */);
581   ip0->checksum = ip_csum_fold (sum0);
582
583   if (!icmp_is_error_message (icmp0))
584     {
585       new_id0 = sm0.port;
586       if (PREDICT_FALSE(new_id0 != echo0->identifier))
587         {
588           old_id0 = echo0->identifier;
589           new_id0 = sm0.port;
590           echo0->identifier = new_id0;
591
592           sum0 = icmp0->checksum;
593           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
594                                  identifier /* changed member */);
595           icmp0->checksum = ip_csum_fold (sum0);
596         }
597     }
598   else
599     {
600       inner_ip0 = (ip4_header_t *)(echo0+1);
601       l4_header = ip4_next_header (inner_ip0);
602
603       if (!ip4_header_checksum_is_valid (inner_ip0))
604         {
605           next0 = SNAT_OUT2IN_NEXT_DROP;
606           goto out;
607         }
608
609       old_addr0 = inner_ip0->src_address.as_u32;
610       inner_ip0->src_address = sm0.addr;
611       new_addr0 = inner_ip0->src_address.as_u32;
612
613       sum0 = icmp0->checksum;
614       sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
615                              src_address /* changed member */);
616       icmp0->checksum = ip_csum_fold (sum0);
617
618       switch (protocol)
619         {
620         case SNAT_PROTOCOL_ICMP:
621           inner_icmp0 = (icmp46_header_t*)l4_header;
622           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
623
624           old_id0 = inner_echo0->identifier;
625           new_id0 = sm0.port;
626           inner_echo0->identifier = new_id0;
627
628           sum0 = icmp0->checksum;
629           sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
630                                  identifier);
631           icmp0->checksum = ip_csum_fold (sum0);
632           break;
633         case SNAT_PROTOCOL_UDP:
634         case SNAT_PROTOCOL_TCP:
635           old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
636           new_id0 = sm0.port;
637           ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
638
639           sum0 = icmp0->checksum;
640           sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
641                                  src_port);
642           icmp0->checksum = ip_csum_fold (sum0);
643           break;
644         default:
645           ASSERT(0);
646         }
647     }
648
649 out:
650   return next0;
651 }
652
653
654 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
655                                          vlib_buffer_t * b0,
656                                          ip4_header_t * ip0,
657                                          icmp46_header_t * icmp0,
658                                          u32 sw_if_index0,
659                                          u32 rx_fib_index0,
660                                          vlib_node_runtime_t * node,
661                                          u32 next0, f64 now,
662                                          u32 thread_index,
663                                          snat_session_t ** p_s0)
664 {
665   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
666                       next0, thread_index, p_s0, 0);
667   snat_session_t * s0 = *p_s0;
668   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
669     {
670       /* Accounting */
671       s0->last_heard = now;
672       s0->total_pkts++;
673       s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
674       /* Per-user LRU list maintenance */
675       clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
676                          s0->per_user_index);
677       clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
678                           s0->per_user_list_head_index,
679                           s0->per_user_index);
680     }
681   return next0;
682 }
683
684 static snat_session_t *
685 snat_out2in_unknown_proto (snat_main_t *sm,
686                            vlib_buffer_t * b,
687                            ip4_header_t * ip,
688                            u32 rx_fib_index,
689                            u32 thread_index,
690                            f64 now,
691                            vlib_main_t * vm,
692                            vlib_node_runtime_t * node)
693 {
694   clib_bihash_kv_8_8_t kv, value;
695   clib_bihash_kv_16_8_t s_kv, s_value;
696   snat_static_mapping_t *m;
697   snat_session_key_t m_key;
698   u32 old_addr, new_addr;
699   ip_csum_t sum;
700   nat_ed_ses_key_t key;
701   snat_session_t * s;
702   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
703   snat_user_t *u;
704
705   old_addr = ip->dst_address.as_u32;
706
707   key.l_addr = ip->dst_address;
708   key.r_addr = ip->src_address;
709   key.fib_index = rx_fib_index;
710   key.proto = ip->protocol;
711   key.r_port = 0;
712   key.l_port = 0;
713   s_kv.key[0] = key.as_u64[0];
714   s_kv.key[1] = key.as_u64[1];
715
716   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
717     {
718       s = pool_elt_at_index (tsm->sessions, s_value.value);
719       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
720     }
721   else
722     {
723       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
724         {
725           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
726           return 0;
727         }
728
729       m_key.addr = ip->dst_address;
730       m_key.port = 0;
731       m_key.protocol = 0;
732       m_key.fib_index = rx_fib_index;
733       kv.key = m_key.as_u64;
734       if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
735         {
736           b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
737           return 0;
738         }
739
740       m = pool_elt_at_index (sm->static_mappings, value.value);
741
742       new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
743
744       u = nat_user_get_or_create (sm, &ip->src_address, m->fib_index,
745                                   thread_index);
746       if (!u)
747         {
748           clib_warning ("create NAT user failed");
749           return 0;
750         }
751
752       /* Create a new session */
753       s = nat_session_alloc_or_recycle (sm, u, thread_index);
754       if (!s)
755         {
756           clib_warning ("create NAT session failed");
757           return 0;
758         }
759
760       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
761       s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
762       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
763       s->outside_address_index = ~0;
764       s->out2in.addr.as_u32 = old_addr;
765       s->out2in.fib_index = rx_fib_index;
766       s->in2out.addr.as_u32 = new_addr;
767       s->in2out.fib_index = m->fib_index;
768       s->in2out.port = s->out2in.port = ip->protocol;
769       u->nstaticsessions++;
770
771       /* Add to lookup tables */
772       s_kv.value = s - tsm->sessions;
773       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
774         clib_warning ("out2in key add failed");
775
776       key.l_addr = ip->dst_address;
777       key.fib_index = m->fib_index;
778       s_kv.key[0] = key.as_u64[0];
779       s_kv.key[1] = key.as_u64[1];
780       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
781         clib_warning ("in2out key add failed");
782    }
783
784   /* Update IP checksum */
785   sum = ip->checksum;
786   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
787   ip->checksum = ip_csum_fold (sum);
788
789   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
790
791   /* Accounting */
792   s->last_heard = now;
793   s->total_pkts++;
794   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
795   /* Per-user LRU list maintenance */
796   clib_dlist_remove (tsm->list_pool, s->per_user_index);
797   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
798                       s->per_user_index);
799
800   return s;
801 }
802
803 static snat_session_t *
804 snat_out2in_lb (snat_main_t *sm,
805                 vlib_buffer_t * b,
806                 ip4_header_t * ip,
807                 u32 rx_fib_index,
808                 u32 thread_index,
809                 f64 now,
810                 vlib_main_t * vm,
811                 vlib_node_runtime_t * node)
812 {
813   nat_ed_ses_key_t key;
814   clib_bihash_kv_16_8_t s_kv, s_value;
815   udp_header_t *udp = ip4_next_header (ip);
816   tcp_header_t *tcp = (tcp_header_t *) udp;
817   snat_session_t *s = 0;
818   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
819   snat_session_key_t e_key, l_key;
820   u32 old_addr, new_addr;
821   u32 proto = ip_proto_to_snat_proto (ip->protocol);
822   u16 new_port, old_port;
823   ip_csum_t sum;
824   snat_user_t *u;
825   u32 address_index;
826   snat_session_key_t eh_key;
827   u8 twice_nat;
828
829   old_addr = ip->dst_address.as_u32;
830
831   key.l_addr = ip->dst_address;
832   key.r_addr = ip->src_address;
833   key.fib_index = rx_fib_index;
834   key.proto = ip->protocol;
835   key.r_port = udp->src_port;
836   key.l_port = udp->dst_port;
837   s_kv.key[0] = key.as_u64[0];
838   s_kv.key[1] = key.as_u64[1];
839
840   if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
841     {
842       s = pool_elt_at_index (tsm->sessions, s_value.value);
843     }
844   else
845     {
846       if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
847         {
848           b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
849           return 0;
850         }
851
852       e_key.addr = ip->dst_address;
853       e_key.port = udp->dst_port;
854       e_key.protocol = proto;
855       e_key.fib_index = rx_fib_index;
856       if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0, &twice_nat))
857         return 0;
858
859       u = nat_user_get_or_create (sm, &l_key.addr, l_key.fib_index,
860                                   thread_index);
861       if (!u)
862       {
863         clib_warning ("create NAT user failed");
864         return 0;
865       }
866
867       s = nat_session_alloc_or_recycle (sm, u, thread_index);
868       if (!s)
869         {
870           clib_warning ("create NAT session failed");
871           return 0;
872         }
873
874       s->ext_host_addr.as_u32 = ip->src_address.as_u32;
875       s->ext_host_port = udp->src_port;
876       s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
877       s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
878       s->outside_address_index = ~0;
879       s->out2in = e_key;
880       s->in2out = l_key;
881       u->nstaticsessions++;
882
883       /* Add to lookup tables */
884       s_kv.value = s - tsm->sessions;
885       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
886         clib_warning ("out2in-ed key add failed");
887
888       if (twice_nat)
889         {
890           eh_key.protocol = proto;
891           if (snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
892                                                    thread_index, &eh_key,
893                                                    &address_index,
894                                                    sm->port_per_thread,
895                                                    sm->per_thread_data[thread_index].snat_thread_index))
896             {
897               b->error = node->errors[SNAT_OUT2IN_ERROR_OUT_OF_PORTS];
898               return 0;
899             }
900           key.r_addr.as_u32 = s->ext_host_nat_addr.as_u32 = eh_key.addr.as_u32;
901           key.r_port = s->ext_host_nat_port = eh_key.port;
902           s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
903         }
904       key.l_addr = l_key.addr;
905       key.fib_index = l_key.fib_index;
906       key.l_port = l_key.port;
907       s_kv.key[0] = key.as_u64[0];
908       s_kv.key[1] = key.as_u64[1];
909       if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
910         clib_warning ("in2out-ed key add failed");
911     }
912
913   new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
914
915   /* Update IP checksum */
916   sum = ip->checksum;
917   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
918   if (is_twice_nat_session (s))
919     sum = ip_csum_update (sum, ip->src_address.as_u32,
920                           s->ext_host_nat_addr.as_u32, ip4_header_t,
921                           src_address);
922   ip->checksum = ip_csum_fold (sum);
923
924   if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
925     {
926       old_port = tcp->dst_port;
927       tcp->dst_port = s->in2out.port;
928       new_port = tcp->dst_port;
929
930       sum = tcp->checksum;
931       sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
932       sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
933       if (is_twice_nat_session (s))
934         {
935           sum = ip_csum_update (sum, ip->src_address.as_u32,
936                                 s->ext_host_nat_addr.as_u32, ip4_header_t,
937                                 dst_address);
938           sum = ip_csum_update (sum, tcp->src_port, s->ext_host_nat_port,
939                                 ip4_header_t, length);
940           tcp->src_port = s->ext_host_nat_port;
941           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
942         }
943       tcp->checksum = ip_csum_fold(sum);
944     }
945   else
946     {
947       udp->dst_port = s->in2out.port;
948       if (is_twice_nat_session (s))
949         {
950           udp->src_port = s->ext_host_nat_port;
951           ip->src_address.as_u32 = s->ext_host_nat_addr.as_u32;
952         }
953       udp->checksum = 0;
954     }
955
956   vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
957
958   /* Accounting */
959   s->last_heard = now;
960   s->total_pkts++;
961   s->total_bytes += vlib_buffer_length_in_chain (vm, b);
962   /* Per-user LRU list maintenance */
963   clib_dlist_remove (tsm->list_pool, s->per_user_index);
964   clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
965                       s->per_user_index);
966
967   return s;
968 }
969
970 static uword
971 snat_out2in_node_fn (vlib_main_t * vm,
972                   vlib_node_runtime_t * node,
973                   vlib_frame_t * frame)
974 {
975   u32 n_left_from, * from, * to_next;
976   snat_out2in_next_t next_index;
977   u32 pkts_processed = 0;
978   snat_main_t * sm = &snat_main;
979   f64 now = vlib_time_now (vm);
980   u32 thread_index = vlib_get_thread_index ();
981
982   from = vlib_frame_vector_args (frame);
983   n_left_from = frame->n_vectors;
984   next_index = node->cached_next_index;
985
986   while (n_left_from > 0)
987     {
988       u32 n_left_to_next;
989
990       vlib_get_next_frame (vm, node, next_index,
991                            to_next, n_left_to_next);
992
993       while (n_left_from >= 4 && n_left_to_next >= 2)
994         {
995           u32 bi0, bi1;
996           vlib_buffer_t * b0, * b1;
997           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
998           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
999           u32 sw_if_index0, sw_if_index1;
1000           ip4_header_t * ip0, *ip1;
1001           ip_csum_t sum0, sum1;
1002           u32 new_addr0, old_addr0;
1003           u16 new_port0, old_port0;
1004           u32 new_addr1, old_addr1;
1005           u16 new_port1, old_port1;
1006           udp_header_t * udp0, * udp1;
1007           tcp_header_t * tcp0, * tcp1;
1008           icmp46_header_t * icmp0, * icmp1;
1009           snat_session_key_t key0, key1, sm0, sm1;
1010           u32 rx_fib_index0, rx_fib_index1;
1011           u32 proto0, proto1;
1012           snat_session_t * s0 = 0, * s1 = 0;
1013           clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
1014
1015           /* Prefetch next iteration. */
1016           {
1017             vlib_buffer_t * p2, * p3;
1018
1019             p2 = vlib_get_buffer (vm, from[2]);
1020             p3 = vlib_get_buffer (vm, from[3]);
1021
1022             vlib_prefetch_buffer_header (p2, LOAD);
1023             vlib_prefetch_buffer_header (p3, LOAD);
1024
1025             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1026             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1027           }
1028
1029           /* speculatively enqueue b0 and b1 to the current next frame */
1030           to_next[0] = bi0 = from[0];
1031           to_next[1] = bi1 = from[1];
1032           from += 2;
1033           to_next += 2;
1034           n_left_from -= 2;
1035           n_left_to_next -= 2;
1036
1037           b0 = vlib_get_buffer (vm, bi0);
1038           b1 = vlib_get_buffer (vm, bi1);
1039
1040           vnet_buffer (b0)->snat.flags = 0;
1041           vnet_buffer (b1)->snat.flags = 0;
1042
1043           ip0 = vlib_buffer_get_current (b0);
1044           udp0 = ip4_next_header (ip0);
1045           tcp0 = (tcp_header_t *) udp0;
1046           icmp0 = (icmp46_header_t *) udp0;
1047
1048           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1049           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1050                                    sw_if_index0);
1051
1052           if (PREDICT_FALSE(ip0->ttl == 1))
1053             {
1054               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1055               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1056                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1057                                            0);
1058               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1059               goto trace0;
1060             }
1061
1062           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1063
1064           if (PREDICT_FALSE (proto0 == ~0))
1065             {
1066               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1067                                              thread_index, now, vm, node);
1068               if (!s0)
1069                 next0 = SNAT_OUT2IN_NEXT_DROP;
1070               goto trace0;
1071             }
1072
1073           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1074             {
1075               next0 = icmp_out2in_slow_path
1076                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1077                  next0, now, thread_index, &s0);
1078               goto trace0;
1079             }
1080
1081           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1082             {
1083               next0 = SNAT_OUT2IN_NEXT_REASS;
1084               goto trace0;
1085             }
1086
1087           key0.addr = ip0->dst_address;
1088           key0.port = udp0->dst_port;
1089           key0.protocol = proto0;
1090           key0.fib_index = rx_fib_index0;
1091
1092           kv0.key = key0.as_u64;
1093
1094           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1095                                       &kv0, &value0))
1096             {
1097               /* Try to match static mapping by external address and port,
1098                  destination address and port in packet */
1099               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1100                 {
1101                   if (!sm->forwarding_enabled)
1102                     {
1103                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1104                       /*
1105                        * Send DHCP packets to the ipv4 stack, or we won't
1106                        * be able to use dhcp client on the outside interface
1107                        */
1108                       if (proto0 != SNAT_PROTOCOL_UDP
1109                           || (udp0->dst_port
1110                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1111                         next0 = SNAT_OUT2IN_NEXT_DROP;
1112                       goto trace0;
1113                     }
1114                   else
1115                     goto trace0;
1116                 }
1117
1118               /* Create session initiated by host from external network */
1119               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1120                                                      thread_index);
1121               if (!s0)
1122                 {
1123                   next0 = SNAT_OUT2IN_NEXT_DROP;
1124                   goto trace0;
1125                 }
1126             }
1127           else
1128             {
1129               if (PREDICT_FALSE (value0.value == ~0ULL))
1130                 {
1131                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1132                                       now, vm, node);
1133                   if (!s0)
1134                     next0 = SNAT_OUT2IN_NEXT_DROP;
1135                   goto trace0;
1136                 }
1137               else
1138                 {
1139                   s0 = pool_elt_at_index (
1140                     sm->per_thread_data[thread_index].sessions,
1141                     value0.value);
1142                 }
1143             }
1144
1145           old_addr0 = ip0->dst_address.as_u32;
1146           ip0->dst_address = s0->in2out.addr;
1147           new_addr0 = ip0->dst_address.as_u32;
1148           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1149
1150           sum0 = ip0->checksum;
1151           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1152                                  ip4_header_t,
1153                                  dst_address /* changed member */);
1154           ip0->checksum = ip_csum_fold (sum0);
1155
1156           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1157             {
1158               old_port0 = tcp0->dst_port;
1159               tcp0->dst_port = s0->in2out.port;
1160               new_port0 = tcp0->dst_port;
1161
1162               sum0 = tcp0->checksum;
1163               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1164                                      ip4_header_t,
1165                                      dst_address /* changed member */);
1166
1167               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1168                                      ip4_header_t /* cheat */,
1169                                      length /* changed member */);
1170               tcp0->checksum = ip_csum_fold(sum0);
1171             }
1172           else
1173             {
1174               old_port0 = udp0->dst_port;
1175               udp0->dst_port = s0->in2out.port;
1176               udp0->checksum = 0;
1177             }
1178
1179           /* Accounting */
1180           s0->last_heard = now;
1181           s0->total_pkts++;
1182           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1183           /* Per-user LRU list maintenance */
1184           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1185                              s0->per_user_index);
1186           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1187                               s0->per_user_list_head_index,
1188                               s0->per_user_index);
1189         trace0:
1190
1191           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1192                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1193             {
1194               snat_out2in_trace_t *t =
1195                  vlib_add_trace (vm, node, b0, sizeof (*t));
1196               t->sw_if_index = sw_if_index0;
1197               t->next_index = next0;
1198               t->session_index = ~0;
1199               if (s0)
1200                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1201             }
1202
1203           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1204
1205
1206           ip1 = vlib_buffer_get_current (b1);
1207           udp1 = ip4_next_header (ip1);
1208           tcp1 = (tcp_header_t *) udp1;
1209           icmp1 = (icmp46_header_t *) udp1;
1210
1211           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1212           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1213                                    sw_if_index1);
1214
1215           if (PREDICT_FALSE(ip1->ttl == 1))
1216             {
1217               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1218               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1219                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1220                                            0);
1221               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1222               goto trace1;
1223             }
1224
1225           proto1 = ip_proto_to_snat_proto (ip1->protocol);
1226
1227           if (PREDICT_FALSE (proto1 == ~0))
1228             {
1229               s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
1230                                              thread_index, now, vm, node);
1231               if (!s1)
1232                 next1 = SNAT_OUT2IN_NEXT_DROP;
1233               goto trace1;
1234             }
1235
1236           if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1237             {
1238               next1 = icmp_out2in_slow_path
1239                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1240                  next1, now, thread_index, &s1);
1241               goto trace1;
1242             }
1243
1244           if (PREDICT_FALSE (ip4_is_fragment (ip1)))
1245             {
1246               next1 = SNAT_OUT2IN_NEXT_REASS;
1247               goto trace1;
1248             }
1249
1250           key1.addr = ip1->dst_address;
1251           key1.port = udp1->dst_port;
1252           key1.protocol = proto1;
1253           key1.fib_index = rx_fib_index1;
1254
1255           kv1.key = key1.as_u64;
1256
1257           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1258                                       &kv1, &value1))
1259             {
1260               /* Try to match static mapping by external address and port,
1261                  destination address and port in packet */
1262               if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0))
1263                 {
1264                   if (!sm->forwarding_enabled)
1265                     {
1266                       b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1267                       /*
1268                        * Send DHCP packets to the ipv4 stack, or we won't
1269                        * be able to use dhcp client on the outside interface
1270                        */
1271                       if (proto1 != SNAT_PROTOCOL_UDP
1272                           || (udp1->dst_port
1273                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1274                         next1 = SNAT_OUT2IN_NEXT_DROP;
1275                       goto trace1;
1276                     }
1277                   else
1278                     goto trace1;
1279                 }
1280
1281               /* Create session initiated by host from external network */
1282               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
1283                                                      thread_index);
1284               if (!s1)
1285                 {
1286                   next1 = SNAT_OUT2IN_NEXT_DROP;
1287                   goto trace1;
1288                 }
1289             }
1290           else
1291             {
1292               if (PREDICT_FALSE (value1.value == ~0ULL))
1293                 {
1294                   s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
1295                                       now, vm, node);
1296                   if (!s1)
1297                     next1 = SNAT_OUT2IN_NEXT_DROP;
1298                   goto trace1;
1299                 }
1300               else
1301                 {
1302                   s1 = pool_elt_at_index (
1303                     sm->per_thread_data[thread_index].sessions,
1304                     value1.value);
1305                 }
1306             }
1307
1308           old_addr1 = ip1->dst_address.as_u32;
1309           ip1->dst_address = s1->in2out.addr;
1310           new_addr1 = ip1->dst_address.as_u32;
1311           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
1312
1313           sum1 = ip1->checksum;
1314           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1315                                  ip4_header_t,
1316                                  dst_address /* changed member */);
1317           ip1->checksum = ip_csum_fold (sum1);
1318
1319           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1320             {
1321               old_port1 = tcp1->dst_port;
1322               tcp1->dst_port = s1->in2out.port;
1323               new_port1 = tcp1->dst_port;
1324
1325               sum1 = tcp1->checksum;
1326               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1327                                      ip4_header_t,
1328                                      dst_address /* changed member */);
1329
1330               sum1 = ip_csum_update (sum1, old_port1, new_port1,
1331                                      ip4_header_t /* cheat */,
1332                                      length /* changed member */);
1333               tcp1->checksum = ip_csum_fold(sum1);
1334             }
1335           else
1336             {
1337               old_port1 = udp1->dst_port;
1338               udp1->dst_port = s1->in2out.port;
1339               udp1->checksum = 0;
1340             }
1341
1342           /* Accounting */
1343           s1->last_heard = now;
1344           s1->total_pkts++;
1345           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1346           /* Per-user LRU list maintenance */
1347           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1348                              s1->per_user_index);
1349           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1350                               s1->per_user_list_head_index,
1351                               s1->per_user_index);
1352         trace1:
1353
1354           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1355                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1356             {
1357               snat_out2in_trace_t *t =
1358                  vlib_add_trace (vm, node, b1, sizeof (*t));
1359               t->sw_if_index = sw_if_index1;
1360               t->next_index = next1;
1361               t->session_index = ~0;
1362               if (s1)
1363                 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1364             }
1365
1366           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
1367
1368           /* verify speculative enqueues, maybe switch current next frame */
1369           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1370                                            to_next, n_left_to_next,
1371                                            bi0, bi1, next0, next1);
1372         }
1373
1374       while (n_left_from > 0 && n_left_to_next > 0)
1375         {
1376           u32 bi0;
1377           vlib_buffer_t * b0;
1378           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1379           u32 sw_if_index0;
1380           ip4_header_t * ip0;
1381           ip_csum_t sum0;
1382           u32 new_addr0, old_addr0;
1383           u16 new_port0, old_port0;
1384           udp_header_t * udp0;
1385           tcp_header_t * tcp0;
1386           icmp46_header_t * icmp0;
1387           snat_session_key_t key0, sm0;
1388           u32 rx_fib_index0;
1389           u32 proto0;
1390           snat_session_t * s0 = 0;
1391           clib_bihash_kv_8_8_t kv0, value0;
1392
1393           /* speculatively enqueue b0 to the current next frame */
1394           bi0 = from[0];
1395           to_next[0] = bi0;
1396           from += 1;
1397           to_next += 1;
1398           n_left_from -= 1;
1399           n_left_to_next -= 1;
1400
1401           b0 = vlib_get_buffer (vm, bi0);
1402
1403           vnet_buffer (b0)->snat.flags = 0;
1404
1405           ip0 = vlib_buffer_get_current (b0);
1406           udp0 = ip4_next_header (ip0);
1407           tcp0 = (tcp_header_t *) udp0;
1408           icmp0 = (icmp46_header_t *) udp0;
1409
1410           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1411           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1412                                    sw_if_index0);
1413
1414           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1415
1416           if (PREDICT_FALSE (proto0 == ~0))
1417             {
1418               s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
1419                                              thread_index, now, vm, node);
1420               if (!s0)
1421                 next0 = SNAT_OUT2IN_NEXT_DROP;
1422               goto trace00;
1423             }
1424
1425           if (PREDICT_FALSE(ip0->ttl == 1))
1426             {
1427               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1428               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1429                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1430                                            0);
1431               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1432               goto trace00;
1433             }
1434
1435           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1436             {
1437               next0 = icmp_out2in_slow_path
1438                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1439                  next0, now, thread_index, &s0);
1440               goto trace00;
1441             }
1442
1443           if (PREDICT_FALSE (ip4_is_fragment (ip0)))
1444             {
1445               next0 = SNAT_OUT2IN_NEXT_REASS;
1446               goto trace00;
1447             }
1448
1449           key0.addr = ip0->dst_address;
1450           key0.port = udp0->dst_port;
1451           key0.protocol = proto0;
1452           key0.fib_index = rx_fib_index0;
1453
1454           kv0.key = key0.as_u64;
1455
1456           if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
1457                                       &kv0, &value0))
1458             {
1459               /* Try to match static mapping by external address and port,
1460                  destination address and port in packet */
1461               if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1462                 {
1463                   if (!sm->forwarding_enabled)
1464                     {
1465                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1466                       /*
1467                        * Send DHCP packets to the ipv4 stack, or we won't
1468                        * be able to use dhcp client on the outside interface
1469                        */
1470                       if (proto0 != SNAT_PROTOCOL_UDP
1471                           || (udp0->dst_port
1472                               != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1473                         next0 = SNAT_OUT2IN_NEXT_DROP;
1474                       goto trace00;
1475                     }
1476                   else
1477                     goto trace00;
1478                 }
1479
1480               /* Create session initiated by host from external network */
1481               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1482                                                      thread_index);
1483               if (!s0)
1484                 {
1485                   next0 = SNAT_OUT2IN_NEXT_DROP;
1486                   goto trace00;
1487                 }
1488             }
1489           else
1490             {
1491               if (PREDICT_FALSE (value0.value == ~0ULL))
1492                 {
1493                   s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
1494                                       now, vm, node);
1495                   if (!s0)
1496                     next0 = SNAT_OUT2IN_NEXT_DROP;
1497                   goto trace00;
1498                 }
1499               else
1500                 {
1501                   s0 = pool_elt_at_index (
1502                     sm->per_thread_data[thread_index].sessions,
1503                     value0.value);
1504                 }
1505             }
1506
1507           old_addr0 = ip0->dst_address.as_u32;
1508           ip0->dst_address = s0->in2out.addr;
1509           new_addr0 = ip0->dst_address.as_u32;
1510           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1511
1512           sum0 = ip0->checksum;
1513           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1514                                  ip4_header_t,
1515                                  dst_address /* changed member */);
1516           ip0->checksum = ip_csum_fold (sum0);
1517
1518           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1519             {
1520               old_port0 = tcp0->dst_port;
1521               tcp0->dst_port = s0->in2out.port;
1522               new_port0 = tcp0->dst_port;
1523
1524               sum0 = tcp0->checksum;
1525               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1526                                      ip4_header_t,
1527                                      dst_address /* changed member */);
1528
1529               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1530                                      ip4_header_t /* cheat */,
1531                                      length /* changed member */);
1532               tcp0->checksum = ip_csum_fold(sum0);
1533             }
1534           else
1535             {
1536               old_port0 = udp0->dst_port;
1537               udp0->dst_port = s0->in2out.port;
1538               udp0->checksum = 0;
1539             }
1540
1541           /* Accounting */
1542           s0->last_heard = now;
1543           s0->total_pkts++;
1544           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1545           /* Per-user LRU list maintenance */
1546           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1547                              s0->per_user_index);
1548           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1549                               s0->per_user_list_head_index,
1550                               s0->per_user_index);
1551         trace00:
1552
1553           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1554                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1555             {
1556               snat_out2in_trace_t *t =
1557                  vlib_add_trace (vm, node, b0, sizeof (*t));
1558               t->sw_if_index = sw_if_index0;
1559               t->next_index = next0;
1560               t->session_index = ~0;
1561               if (s0)
1562                 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1563             }
1564
1565           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1566
1567           /* verify speculative enqueue, maybe switch current next frame */
1568           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1569                                            to_next, n_left_to_next,
1570                                            bi0, next0);
1571         }
1572
1573       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1574     }
1575
1576   vlib_node_increment_counter (vm, snat_out2in_node.index,
1577                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1578                                pkts_processed);
1579   return frame->n_vectors;
1580 }
1581
1582 VLIB_REGISTER_NODE (snat_out2in_node) = {
1583   .function = snat_out2in_node_fn,
1584   .name = "nat44-out2in",
1585   .vector_size = sizeof (u32),
1586   .format_trace = format_snat_out2in_trace,
1587   .type = VLIB_NODE_TYPE_INTERNAL,
1588
1589   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1590   .error_strings = snat_out2in_error_strings,
1591
1592   .runtime_data_bytes = sizeof (snat_runtime_t),
1593
1594   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1595
1596   /* edit / add dispositions here */
1597   .next_nodes = {
1598     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1599     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1600     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1601     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1602   },
1603 };
1604 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
1605
1606 static uword
1607 nat44_out2in_reass_node_fn (vlib_main_t * vm,
1608                             vlib_node_runtime_t * node,
1609                             vlib_frame_t * frame)
1610 {
1611   u32 n_left_from, *from, *to_next;
1612   snat_out2in_next_t next_index;
1613   u32 pkts_processed = 0;
1614   snat_main_t *sm = &snat_main;
1615   f64 now = vlib_time_now (vm);
1616   u32 thread_index = vlib_get_thread_index ();
1617   snat_main_per_thread_data_t *per_thread_data =
1618     &sm->per_thread_data[thread_index];
1619   u32 *fragments_to_drop = 0;
1620   u32 *fragments_to_loopback = 0;
1621
1622   from = vlib_frame_vector_args (frame);
1623   n_left_from = frame->n_vectors;
1624   next_index = node->cached_next_index;
1625
1626   while (n_left_from > 0)
1627     {
1628       u32 n_left_to_next;
1629
1630       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1631
1632       while (n_left_from > 0 && n_left_to_next > 0)
1633        {
1634           u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
1635           vlib_buffer_t *b0;
1636           u32 next0;
1637           u8 cached0 = 0;
1638           ip4_header_t *ip0;
1639           nat_reass_ip4_t *reass0;
1640           udp_header_t * udp0;
1641           tcp_header_t * tcp0;
1642           snat_session_key_t key0, sm0;
1643           clib_bihash_kv_8_8_t kv0, value0;
1644           snat_session_t * s0 = 0;
1645           u16 old_port0, new_port0;
1646           ip_csum_t sum0;
1647
1648           /* speculatively enqueue b0 to the current next frame */
1649           bi0 = from[0];
1650           to_next[0] = bi0;
1651           from += 1;
1652           to_next += 1;
1653           n_left_from -= 1;
1654           n_left_to_next -= 1;
1655
1656           b0 = vlib_get_buffer (vm, bi0);
1657           next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1658
1659           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1660           rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1661                                                                sw_if_index0);
1662
1663           if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
1664             {
1665               next0 = SNAT_OUT2IN_NEXT_DROP;
1666               b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
1667               goto trace0;
1668             }
1669
1670           ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
1671           udp0 = ip4_next_header (ip0);
1672           tcp0 = (tcp_header_t *) udp0;
1673           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1674
1675           reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
1676                                                  ip0->dst_address,
1677                                                  ip0->fragment_id,
1678                                                  ip0->protocol,
1679                                                  1,
1680                                                  &fragments_to_drop);
1681
1682           if (PREDICT_FALSE (!reass0))
1683             {
1684               next0 = SNAT_OUT2IN_NEXT_DROP;
1685               b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
1686               goto trace0;
1687             }
1688
1689           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1690             {
1691               key0.addr = ip0->dst_address;
1692               key0.port = udp0->dst_port;
1693               key0.protocol = proto0;
1694               key0.fib_index = rx_fib_index0;
1695               kv0.key = key0.as_u64;
1696
1697               if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
1698                 {
1699                   /* Try to match static mapping by external address and port,
1700                      destination address and port in packet */
1701                   if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
1702                     {
1703                       if (!sm->forwarding_enabled)
1704                         {
1705                           b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1706                           /*
1707                            * Send DHCP packets to the ipv4 stack, or we won't
1708                            * be able to use dhcp client on the outside interface
1709                            */
1710                           if (proto0 != SNAT_PROTOCOL_UDP
1711                               || (udp0->dst_port
1712                                   != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
1713                             next0 = SNAT_OUT2IN_NEXT_DROP;
1714                           goto trace0;
1715                         }
1716                       else
1717                         goto trace0;
1718                     }
1719
1720                   /* Create session initiated by host from external network */
1721                   s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
1722                                                          thread_index);
1723                   if (!s0)
1724                     {
1725                       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1726                       next0 = SNAT_OUT2IN_NEXT_DROP;
1727                       goto trace0;
1728                     }
1729                   reass0->sess_index = s0 - per_thread_data->sessions;
1730                   reass0->thread_index = thread_index;
1731                 }
1732               else
1733                 {
1734                   s0 = pool_elt_at_index (per_thread_data->sessions,
1735                                           value0.value);
1736                   reass0->sess_index = value0.value;
1737                 }
1738               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
1739             }
1740           else
1741             {
1742               if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
1743                 {
1744                   if (nat_ip4_reass_add_fragment (reass0, bi0))
1745                     {
1746                       b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
1747                       next0 = SNAT_OUT2IN_NEXT_DROP;
1748                       goto trace0;
1749                     }
1750                   cached0 = 1;
1751                   goto trace0;
1752                 }
1753               s0 = pool_elt_at_index (per_thread_data->sessions,
1754                                       reass0->sess_index);
1755             }
1756
1757           old_addr0 = ip0->dst_address.as_u32;
1758           ip0->dst_address = s0->in2out.addr;
1759           new_addr0 = ip0->dst_address.as_u32;
1760           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1761
1762           sum0 = ip0->checksum;
1763           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1764                                  ip4_header_t,
1765                                  dst_address /* changed member */);
1766           ip0->checksum = ip_csum_fold (sum0);
1767
1768           if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
1769             {
1770               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1771                 {
1772                   old_port0 = tcp0->dst_port;
1773                   tcp0->dst_port = s0->in2out.port;
1774                   new_port0 = tcp0->dst_port;
1775
1776                   sum0 = tcp0->checksum;
1777                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1778                                          ip4_header_t,
1779                                          dst_address /* changed member */);
1780
1781                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1782                                          ip4_header_t /* cheat */,
1783                                          length /* changed member */);
1784                   tcp0->checksum = ip_csum_fold(sum0);
1785                 }
1786               else
1787                 {
1788                   old_port0 = udp0->dst_port;
1789                   udp0->dst_port = s0->in2out.port;
1790                   udp0->checksum = 0;
1791                 }
1792             }
1793
1794           /* Accounting */
1795           s0->last_heard = now;
1796           s0->total_pkts++;
1797           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1798           /* Per-user LRU list maintenance */
1799           clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1800                              s0->per_user_index);
1801           clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1802                               s0->per_user_list_head_index,
1803                               s0->per_user_index);
1804
1805         trace0:
1806           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1807                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1808             {
1809               nat44_out2in_reass_trace_t *t =
1810                  vlib_add_trace (vm, node, b0, sizeof (*t));
1811               t->cached = cached0;
1812               t->sw_if_index = sw_if_index0;
1813               t->next_index = next0;
1814             }
1815
1816           if (cached0)
1817             {
1818               n_left_to_next++;
1819               to_next--;
1820             }
1821           else
1822             {
1823               pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1824
1825               /* verify speculative enqueue, maybe switch current next frame */
1826               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1827                                                to_next, n_left_to_next,
1828                                                bi0, next0);
1829             }
1830
1831           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1832             {
1833               from = vlib_frame_vector_args (frame);
1834               u32 len = vec_len (fragments_to_loopback);
1835               if (len <= VLIB_FRAME_SIZE)
1836                 {
1837                   clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
1838                   n_left_from = len;
1839                   vec_reset_length (fragments_to_loopback);
1840                 }
1841               else
1842                 {
1843                   clib_memcpy (from,
1844                                fragments_to_loopback + (len - VLIB_FRAME_SIZE),
1845                                sizeof (u32) * VLIB_FRAME_SIZE);
1846                   n_left_from = VLIB_FRAME_SIZE;
1847                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1848                 }
1849             }
1850        }
1851
1852       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1853     }
1854
1855   vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
1856                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1857                                pkts_processed);
1858
1859   nat_send_all_to_node (vm, fragments_to_drop, node,
1860                         &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
1861                         SNAT_OUT2IN_NEXT_DROP);
1862
1863   vec_free (fragments_to_drop);
1864   vec_free (fragments_to_loopback);
1865   return frame->n_vectors;
1866 }
1867
1868 VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
1869   .function = nat44_out2in_reass_node_fn,
1870   .name = "nat44-out2in-reass",
1871   .vector_size = sizeof (u32),
1872   .format_trace = format_nat44_out2in_reass_trace,
1873   .type = VLIB_NODE_TYPE_INTERNAL,
1874
1875   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1876   .error_strings = snat_out2in_error_strings,
1877
1878   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1879
1880   /* edit / add dispositions here */
1881   .next_nodes = {
1882     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1883     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1884     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1885     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
1886   },
1887 };
1888 VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
1889                               nat44_out2in_reass_node_fn);
1890
1891 /**************************/
1892 /*** deterministic mode ***/
1893 /**************************/
1894 static uword
1895 snat_det_out2in_node_fn (vlib_main_t * vm,
1896                          vlib_node_runtime_t * node,
1897                          vlib_frame_t * frame)
1898 {
1899   u32 n_left_from, * from, * to_next;
1900   snat_out2in_next_t next_index;
1901   u32 pkts_processed = 0;
1902   snat_main_t * sm = &snat_main;
1903   u32 thread_index = vlib_get_thread_index ();
1904
1905   from = vlib_frame_vector_args (frame);
1906   n_left_from = frame->n_vectors;
1907   next_index = node->cached_next_index;
1908
1909   while (n_left_from > 0)
1910     {
1911       u32 n_left_to_next;
1912
1913       vlib_get_next_frame (vm, node, next_index,
1914                            to_next, n_left_to_next);
1915
1916       while (n_left_from >= 4 && n_left_to_next >= 2)
1917         {
1918           u32 bi0, bi1;
1919           vlib_buffer_t * b0, * b1;
1920           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
1921           u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
1922           u32 sw_if_index0, sw_if_index1;
1923           ip4_header_t * ip0, * ip1;
1924           ip_csum_t sum0, sum1;
1925           ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
1926           u16 new_port0, old_port0, old_port1, new_port1;
1927           udp_header_t * udp0, * udp1;
1928           tcp_header_t * tcp0, * tcp1;
1929           u32 proto0, proto1;
1930           snat_det_out_key_t key0, key1;
1931           snat_det_map_t * dm0, * dm1;
1932           snat_det_session_t * ses0 = 0, * ses1 = 0;
1933           u32 rx_fib_index0, rx_fib_index1;
1934           icmp46_header_t * icmp0, * icmp1;
1935
1936           /* Prefetch next iteration. */
1937           {
1938             vlib_buffer_t * p2, * p3;
1939
1940             p2 = vlib_get_buffer (vm, from[2]);
1941             p3 = vlib_get_buffer (vm, from[3]);
1942
1943             vlib_prefetch_buffer_header (p2, LOAD);
1944             vlib_prefetch_buffer_header (p3, LOAD);
1945
1946             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1947             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1948           }
1949
1950           /* speculatively enqueue b0 and b1 to the current next frame */
1951           to_next[0] = bi0 = from[0];
1952           to_next[1] = bi1 = from[1];
1953           from += 2;
1954           to_next += 2;
1955           n_left_from -= 2;
1956           n_left_to_next -= 2;
1957
1958           b0 = vlib_get_buffer (vm, bi0);
1959           b1 = vlib_get_buffer (vm, bi1);
1960
1961           ip0 = vlib_buffer_get_current (b0);
1962           udp0 = ip4_next_header (ip0);
1963           tcp0 = (tcp_header_t *) udp0;
1964
1965           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1966
1967           if (PREDICT_FALSE(ip0->ttl == 1))
1968             {
1969               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1970               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1971                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
1972                                            0);
1973               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
1974               goto trace0;
1975             }
1976
1977           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1978
1979           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
1980             {
1981               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1982               icmp0 = (icmp46_header_t *) udp0;
1983
1984               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
1985                                   rx_fib_index0, node, next0, thread_index,
1986                                   &ses0, &dm0);
1987               goto trace0;
1988             }
1989
1990           key0.ext_host_addr = ip0->src_address;
1991           key0.ext_host_port = tcp0->src;
1992           key0.out_port = tcp0->dst;
1993
1994           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
1995           if (PREDICT_FALSE(!dm0))
1996             {
1997               clib_warning("unknown dst address:  %U",
1998                            format_ip4_address, &ip0->dst_address);
1999               next0 = SNAT_OUT2IN_NEXT_DROP;
2000               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2001               goto trace0;
2002             }
2003
2004           snat_det_reverse(dm0, &ip0->dst_address,
2005                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2006
2007           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2008           if (PREDICT_FALSE(!ses0))
2009             {
2010               clib_warning("no match src %U:%d dst %U:%d for user %U",
2011                            format_ip4_address, &ip0->src_address,
2012                            clib_net_to_host_u16 (tcp0->src),
2013                            format_ip4_address, &ip0->dst_address,
2014                            clib_net_to_host_u16 (tcp0->dst),
2015                            format_ip4_address, &new_addr0);
2016               next0 = SNAT_OUT2IN_NEXT_DROP;
2017               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2018               goto trace0;
2019             }
2020           new_port0 = ses0->in_port;
2021
2022           old_addr0 = ip0->dst_address;
2023           ip0->dst_address = new_addr0;
2024           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2025
2026           sum0 = ip0->checksum;
2027           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2028                                  ip4_header_t,
2029                                  dst_address /* changed member */);
2030           ip0->checksum = ip_csum_fold (sum0);
2031
2032           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2033             {
2034               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2035                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2036               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2037                 snat_det_ses_close(dm0, ses0);
2038
2039               old_port0 = tcp0->dst;
2040               tcp0->dst = new_port0;
2041
2042               sum0 = tcp0->checksum;
2043               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2044                                      ip4_header_t,
2045                                      dst_address /* changed member */);
2046
2047               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2048                                      ip4_header_t /* cheat */,
2049                                      length /* changed member */);
2050               tcp0->checksum = ip_csum_fold(sum0);
2051             }
2052           else
2053             {
2054               old_port0 = udp0->dst_port;
2055               udp0->dst_port = new_port0;
2056               udp0->checksum = 0;
2057             }
2058
2059         trace0:
2060
2061           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2062                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2063             {
2064               snat_out2in_trace_t *t =
2065                  vlib_add_trace (vm, node, b0, sizeof (*t));
2066               t->sw_if_index = sw_if_index0;
2067               t->next_index = next0;
2068               t->session_index = ~0;
2069               if (ses0)
2070                 t->session_index = ses0 - dm0->sessions;
2071             }
2072
2073           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2074
2075           b1 = vlib_get_buffer (vm, bi1);
2076
2077           ip1 = vlib_buffer_get_current (b1);
2078           udp1 = ip4_next_header (ip1);
2079           tcp1 = (tcp_header_t *) udp1;
2080
2081           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2082
2083           if (PREDICT_FALSE(ip1->ttl == 1))
2084             {
2085               vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2086               icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2087                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2088                                            0);
2089               next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2090               goto trace1;
2091             }
2092
2093           proto1 = ip_proto_to_snat_proto (ip1->protocol);
2094
2095           if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2096             {
2097               rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2098               icmp1 = (icmp46_header_t *) udp1;
2099
2100               next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
2101                                   rx_fib_index1, node, next1, thread_index,
2102                                   &ses1, &dm1);
2103               goto trace1;
2104             }
2105
2106           key1.ext_host_addr = ip1->src_address;
2107           key1.ext_host_port = tcp1->src;
2108           key1.out_port = tcp1->dst;
2109
2110           dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
2111           if (PREDICT_FALSE(!dm1))
2112             {
2113               clib_warning("unknown dst address:  %U",
2114                            format_ip4_address, &ip1->dst_address);
2115               next1 = SNAT_OUT2IN_NEXT_DROP;
2116               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2117               goto trace1;
2118             }
2119
2120           snat_det_reverse(dm1, &ip1->dst_address,
2121                            clib_net_to_host_u16(tcp1->dst), &new_addr1);
2122
2123           ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
2124           if (PREDICT_FALSE(!ses1))
2125             {
2126               clib_warning("no match src %U:%d dst %U:%d for user %U",
2127                            format_ip4_address, &ip1->src_address,
2128                            clib_net_to_host_u16 (tcp1->src),
2129                            format_ip4_address, &ip1->dst_address,
2130                            clib_net_to_host_u16 (tcp1->dst),
2131                            format_ip4_address, &new_addr1);
2132               next1 = SNAT_OUT2IN_NEXT_DROP;
2133               b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2134               goto trace1;
2135             }
2136           new_port1 = ses1->in_port;
2137
2138           old_addr1 = ip1->dst_address;
2139           ip1->dst_address = new_addr1;
2140           vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2141
2142           sum1 = ip1->checksum;
2143           sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2144                                  ip4_header_t,
2145                                  dst_address /* changed member */);
2146           ip1->checksum = ip_csum_fold (sum1);
2147
2148           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
2149             {
2150               if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
2151                 ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2152               else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
2153                 snat_det_ses_close(dm1, ses1);
2154
2155               old_port1 = tcp1->dst;
2156               tcp1->dst = new_port1;
2157
2158               sum1 = tcp1->checksum;
2159               sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
2160                                      ip4_header_t,
2161                                      dst_address /* changed member */);
2162
2163               sum1 = ip_csum_update (sum1, old_port1, new_port1,
2164                                      ip4_header_t /* cheat */,
2165                                      length /* changed member */);
2166               tcp1->checksum = ip_csum_fold(sum1);
2167             }
2168           else
2169             {
2170               old_port1 = udp1->dst_port;
2171               udp1->dst_port = new_port1;
2172               udp1->checksum = 0;
2173             }
2174
2175         trace1:
2176
2177           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2178                             && (b1->flags & VLIB_BUFFER_IS_TRACED)))
2179             {
2180               snat_out2in_trace_t *t =
2181                  vlib_add_trace (vm, node, b1, sizeof (*t));
2182               t->sw_if_index = sw_if_index1;
2183               t->next_index = next1;
2184               t->session_index = ~0;
2185               if (ses1)
2186                 t->session_index = ses1 - dm1->sessions;
2187             }
2188
2189           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
2190
2191           /* verify speculative enqueues, maybe switch current next frame */
2192           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2193                                            to_next, n_left_to_next,
2194                                            bi0, bi1, next0, next1);
2195          }
2196
2197       while (n_left_from > 0 && n_left_to_next > 0)
2198         {
2199           u32 bi0;
2200           vlib_buffer_t * b0;
2201           u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
2202           u32 sw_if_index0;
2203           ip4_header_t * ip0;
2204           ip_csum_t sum0;
2205           ip4_address_t new_addr0, old_addr0;
2206           u16 new_port0, old_port0;
2207           udp_header_t * udp0;
2208           tcp_header_t * tcp0;
2209           u32 proto0;
2210           snat_det_out_key_t key0;
2211           snat_det_map_t * dm0;
2212           snat_det_session_t * ses0 = 0;
2213           u32 rx_fib_index0;
2214           icmp46_header_t * icmp0;
2215
2216           /* speculatively enqueue b0 to the current next frame */
2217           bi0 = from[0];
2218           to_next[0] = bi0;
2219           from += 1;
2220           to_next += 1;
2221           n_left_from -= 1;
2222           n_left_to_next -= 1;
2223
2224           b0 = vlib_get_buffer (vm, bi0);
2225
2226           ip0 = vlib_buffer_get_current (b0);
2227           udp0 = ip4_next_header (ip0);
2228           tcp0 = (tcp_header_t *) udp0;
2229
2230           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2231
2232           if (PREDICT_FALSE(ip0->ttl == 1))
2233             {
2234               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2235               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2236                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2237                                            0);
2238               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2239               goto trace00;
2240             }
2241
2242           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2243
2244           if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2245             {
2246               rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2247               icmp0 = (icmp46_header_t *) udp0;
2248
2249               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2250                                   rx_fib_index0, node, next0, thread_index,
2251                                   &ses0, &dm0);
2252               goto trace00;
2253             }
2254
2255           key0.ext_host_addr = ip0->src_address;
2256           key0.ext_host_port = tcp0->src;
2257           key0.out_port = tcp0->dst;
2258
2259           dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
2260           if (PREDICT_FALSE(!dm0))
2261             {
2262               clib_warning("unknown dst address:  %U",
2263                            format_ip4_address, &ip0->dst_address);
2264               next0 = SNAT_OUT2IN_NEXT_DROP;
2265               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2266               goto trace00;
2267             }
2268
2269           snat_det_reverse(dm0, &ip0->dst_address,
2270                            clib_net_to_host_u16(tcp0->dst), &new_addr0);
2271
2272           ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2273           if (PREDICT_FALSE(!ses0))
2274             {
2275               clib_warning("no match src %U:%d dst %U:%d for user %U",
2276                            format_ip4_address, &ip0->src_address,
2277                            clib_net_to_host_u16 (tcp0->src),
2278                            format_ip4_address, &ip0->dst_address,
2279                            clib_net_to_host_u16 (tcp0->dst),
2280                            format_ip4_address, &new_addr0);
2281               next0 = SNAT_OUT2IN_NEXT_DROP;
2282               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2283               goto trace00;
2284             }
2285           new_port0 = ses0->in_port;
2286
2287           old_addr0 = ip0->dst_address;
2288           ip0->dst_address = new_addr0;
2289           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
2290
2291           sum0 = ip0->checksum;
2292           sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2293                                  ip4_header_t,
2294                                  dst_address /* changed member */);
2295           ip0->checksum = ip_csum_fold (sum0);
2296
2297           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2298             {
2299               if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2300                 ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
2301               else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
2302                 snat_det_ses_close(dm0, ses0);
2303
2304               old_port0 = tcp0->dst;
2305               tcp0->dst = new_port0;
2306
2307               sum0 = tcp0->checksum;
2308               sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2309                                      ip4_header_t,
2310                                      dst_address /* changed member */);
2311
2312               sum0 = ip_csum_update (sum0, old_port0, new_port0,
2313                                      ip4_header_t /* cheat */,
2314                                      length /* changed member */);
2315               tcp0->checksum = ip_csum_fold(sum0);
2316             }
2317           else
2318             {
2319               old_port0 = udp0->dst_port;
2320               udp0->dst_port = new_port0;
2321               udp0->checksum = 0;
2322             }
2323
2324         trace00:
2325
2326           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2327                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2328             {
2329               snat_out2in_trace_t *t =
2330                  vlib_add_trace (vm, node, b0, sizeof (*t));
2331               t->sw_if_index = sw_if_index0;
2332               t->next_index = next0;
2333               t->session_index = ~0;
2334               if (ses0)
2335                 t->session_index = ses0 - dm0->sessions;
2336             }
2337
2338           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2339
2340           /* verify speculative enqueue, maybe switch current next frame */
2341           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2342                                            to_next, n_left_to_next,
2343                                            bi0, next0);
2344         }
2345
2346       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2347     }
2348
2349   vlib_node_increment_counter (vm, snat_det_out2in_node.index,
2350                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2351                                pkts_processed);
2352   return frame->n_vectors;
2353 }
2354
2355 VLIB_REGISTER_NODE (snat_det_out2in_node) = {
2356   .function = snat_det_out2in_node_fn,
2357   .name = "nat44-det-out2in",
2358   .vector_size = sizeof (u32),
2359   .format_trace = format_snat_out2in_trace,
2360   .type = VLIB_NODE_TYPE_INTERNAL,
2361
2362   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2363   .error_strings = snat_out2in_error_strings,
2364
2365   .runtime_data_bytes = sizeof (snat_runtime_t),
2366
2367   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2368
2369   /* edit / add dispositions here */
2370   .next_nodes = {
2371     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2372     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2373     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2374     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2375   },
2376 };
2377 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
2378
2379 /**
2380  * Get address and port values to be used for ICMP packet translation
2381  * and create session if needed
2382  *
2383  * @param[in,out] sm             NAT main
2384  * @param[in,out] node           NAT node runtime
2385  * @param[in] thread_index       thread index
2386  * @param[in,out] b0             buffer containing packet to be translated
2387  * @param[out] p_proto           protocol used for matching
2388  * @param[out] p_value           address and port after NAT translation
2389  * @param[out] p_dont_translate  if packet should not be translated
2390  * @param d                      optional parameter
2391  * @param e                      optional parameter
2392  */
2393 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
2394                           u32 thread_index, vlib_buffer_t *b0,
2395                           ip4_header_t *ip0, u8 *p_proto,
2396                           snat_session_key_t *p_value,
2397                           u8 *p_dont_translate, void *d, void *e)
2398 {
2399   icmp46_header_t *icmp0;
2400   u32 sw_if_index0;
2401   u8 protocol;
2402   snat_det_out_key_t key0;
2403   u8 dont_translate = 0;
2404   u32 next0 = ~0;
2405   icmp_echo_header_t *echo0, *inner_echo0 = 0;
2406   ip4_header_t *inner_ip0;
2407   void *l4_header = 0;
2408   icmp46_header_t *inner_icmp0;
2409   snat_det_map_t * dm0 = 0;
2410   ip4_address_t new_addr0 = {{0}};
2411   snat_det_session_t * ses0 = 0;
2412   ip4_address_t out_addr;
2413
2414   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
2415   echo0 = (icmp_echo_header_t *)(icmp0+1);
2416   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2417
2418   if (!icmp_is_error_message (icmp0))
2419     {
2420       protocol = SNAT_PROTOCOL_ICMP;
2421       key0.ext_host_addr = ip0->src_address;
2422       key0.ext_host_port = 0;
2423       key0.out_port = echo0->identifier;
2424       out_addr = ip0->dst_address;
2425     }
2426   else
2427     {
2428       inner_ip0 = (ip4_header_t *)(echo0+1);
2429       l4_header = ip4_next_header (inner_ip0);
2430       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
2431       key0.ext_host_addr = inner_ip0->dst_address;
2432       out_addr = inner_ip0->src_address;
2433       switch (protocol)
2434         {
2435         case SNAT_PROTOCOL_ICMP:
2436           inner_icmp0 = (icmp46_header_t*)l4_header;
2437           inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
2438           key0.ext_host_port = 0;
2439           key0.out_port = inner_echo0->identifier;
2440           break;
2441         case SNAT_PROTOCOL_UDP:
2442         case SNAT_PROTOCOL_TCP:
2443           key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
2444           key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
2445           break;
2446         default:
2447           b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
2448           next0 = SNAT_OUT2IN_NEXT_DROP;
2449           goto out;
2450         }
2451     }
2452
2453   dm0 = snat_det_map_by_out(sm, &out_addr);
2454   if (PREDICT_FALSE(!dm0))
2455     {
2456       /* Don't NAT packet aimed at the intfc address */
2457       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2458                                           ip0->dst_address.as_u32)))
2459         {
2460           dont_translate = 1;
2461           goto out;
2462         }
2463       clib_warning("unknown dst address:  %U",
2464                    format_ip4_address, &ip0->dst_address);
2465       goto out;
2466     }
2467
2468   snat_det_reverse(dm0, &ip0->dst_address,
2469                    clib_net_to_host_u16(key0.out_port), &new_addr0);
2470
2471   ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
2472   if (PREDICT_FALSE(!ses0))
2473     {
2474       /* Don't NAT packet aimed at the intfc address */
2475       if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
2476                                           ip0->dst_address.as_u32)))
2477         {
2478           dont_translate = 1;
2479           goto out;
2480         }
2481       clib_warning("no match src %U:%d dst %U:%d for user %U",
2482                    format_ip4_address, &key0.ext_host_addr,
2483                    clib_net_to_host_u16 (key0.ext_host_port),
2484                    format_ip4_address, &out_addr,
2485                    clib_net_to_host_u16 (key0.out_port),
2486                    format_ip4_address, &new_addr0);
2487       b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2488       next0 = SNAT_OUT2IN_NEXT_DROP;
2489       goto out;
2490     }
2491
2492   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
2493                     !icmp_is_error_message (icmp0)))
2494     {
2495       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
2496       next0 = SNAT_OUT2IN_NEXT_DROP;
2497       goto out;
2498     }
2499
2500   goto out;
2501
2502 out:
2503   *p_proto = protocol;
2504   if (ses0)
2505     {
2506       p_value->addr = new_addr0;
2507       p_value->fib_index = sm->inside_fib_index;
2508       p_value->port = ses0->in_port;
2509     }
2510   *p_dont_translate = dont_translate;
2511   if (d)
2512     *(snat_det_session_t**)d = ses0;
2513   if (e)
2514     *(snat_det_map_t**)e = dm0;
2515   return next0;
2516 }
2517
2518 /**********************/
2519 /*** worker handoff ***/
2520 /**********************/
2521 static uword
2522 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
2523                                vlib_node_runtime_t * node,
2524                                vlib_frame_t * frame)
2525 {
2526   snat_main_t *sm = &snat_main;
2527   vlib_thread_main_t *tm = vlib_get_thread_main ();
2528   u32 n_left_from, *from, *to_next = 0;
2529   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
2530   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
2531     = 0;
2532   vlib_frame_queue_elt_t *hf = 0;
2533   vlib_frame_t *f = 0;
2534   int i;
2535   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
2536   u32 next_worker_index = 0;
2537   u32 current_worker_index = ~0;
2538   u32 thread_index = vlib_get_thread_index ();
2539
2540   ASSERT (vec_len (sm->workers));
2541
2542   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
2543     {
2544       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
2545
2546       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
2547                                sm->first_worker_index + sm->num_workers - 1,
2548                                (vlib_frame_queue_t *) (~0));
2549     }
2550
2551   from = vlib_frame_vector_args (frame);
2552   n_left_from = frame->n_vectors;
2553
2554   while (n_left_from > 0)
2555     {
2556       u32 bi0;
2557       vlib_buffer_t *b0;
2558       u32 sw_if_index0;
2559       u32 rx_fib_index0;
2560       ip4_header_t * ip0;
2561       u8 do_handoff;
2562
2563       bi0 = from[0];
2564       from += 1;
2565       n_left_from -= 1;
2566
2567       b0 = vlib_get_buffer (vm, bi0);
2568
2569       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
2570       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2571
2572       ip0 = vlib_buffer_get_current (b0);
2573
2574       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
2575
2576       if (PREDICT_FALSE (next_worker_index != thread_index))
2577         {
2578           do_handoff = 1;
2579
2580           if (next_worker_index != current_worker_index)
2581             {
2582               if (hf)
2583                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2584
2585               hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
2586                                                       next_worker_index,
2587                                                       handoff_queue_elt_by_worker_index);
2588
2589               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
2590               to_next_worker = &hf->buffer_index[hf->n_vectors];
2591               current_worker_index = next_worker_index;
2592             }
2593
2594           /* enqueue to correct worker thread */
2595           to_next_worker[0] = bi0;
2596           to_next_worker++;
2597           n_left_to_next_worker--;
2598
2599           if (n_left_to_next_worker == 0)
2600             {
2601               hf->n_vectors = VLIB_FRAME_SIZE;
2602               vlib_put_frame_queue_elt (hf);
2603               current_worker_index = ~0;
2604               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
2605               hf = 0;
2606             }
2607         }
2608       else
2609         {
2610           do_handoff = 0;
2611           /* if this is 1st frame */
2612           if (!f)
2613             {
2614               f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
2615               to_next = vlib_frame_vector_args (f);
2616             }
2617
2618           to_next[0] = bi0;
2619           to_next += 1;
2620           f->n_vectors++;
2621         }
2622
2623       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
2624                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2625         {
2626           snat_out2in_worker_handoff_trace_t *t =
2627             vlib_add_trace (vm, node, b0, sizeof (*t));
2628           t->next_worker_index = next_worker_index;
2629           t->do_handoff = do_handoff;
2630         }
2631     }
2632
2633   if (f)
2634     vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
2635
2636   if (hf)
2637     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
2638
2639   /* Ship frames to the worker nodes */
2640   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
2641     {
2642       if (handoff_queue_elt_by_worker_index[i])
2643         {
2644           hf = handoff_queue_elt_by_worker_index[i];
2645           /*
2646            * It works better to let the handoff node
2647            * rate-adapt, always ship the handoff queue element.
2648            */
2649           if (1 || hf->n_vectors == hf->last_n_vectors)
2650             {
2651               vlib_put_frame_queue_elt (hf);
2652               handoff_queue_elt_by_worker_index[i] = 0;
2653             }
2654           else
2655             hf->last_n_vectors = hf->n_vectors;
2656         }
2657       congested_handoff_queue_by_worker_index[i] =
2658         (vlib_frame_queue_t *) (~0);
2659     }
2660   hf = 0;
2661   current_worker_index = ~0;
2662   return frame->n_vectors;
2663 }
2664
2665 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
2666   .function = snat_out2in_worker_handoff_fn,
2667   .name = "nat44-out2in-worker-handoff",
2668   .vector_size = sizeof (u32),
2669   .format_trace = format_snat_out2in_worker_handoff_trace,
2670   .type = VLIB_NODE_TYPE_INTERNAL,
2671
2672   .n_next_nodes = 1,
2673
2674   .next_nodes = {
2675     [0] = "error-drop",
2676   },
2677 };
2678
2679 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
2680
2681 static uword
2682 snat_out2in_fast_node_fn (vlib_main_t * vm,
2683                           vlib_node_runtime_t * node,
2684                           vlib_frame_t * frame)
2685 {
2686   u32 n_left_from, * from, * to_next;
2687   snat_out2in_next_t next_index;
2688   u32 pkts_processed = 0;
2689   snat_main_t * sm = &snat_main;
2690
2691   from = vlib_frame_vector_args (frame);
2692   n_left_from = frame->n_vectors;
2693   next_index = node->cached_next_index;
2694
2695   while (n_left_from > 0)
2696     {
2697       u32 n_left_to_next;
2698
2699       vlib_get_next_frame (vm, node, next_index,
2700                            to_next, n_left_to_next);
2701
2702       while (n_left_from > 0 && n_left_to_next > 0)
2703         {
2704           u32 bi0;
2705           vlib_buffer_t * b0;
2706           u32 next0 = SNAT_OUT2IN_NEXT_DROP;
2707           u32 sw_if_index0;
2708           ip4_header_t * ip0;
2709           ip_csum_t sum0;
2710           u32 new_addr0, old_addr0;
2711           u16 new_port0, old_port0;
2712           udp_header_t * udp0;
2713           tcp_header_t * tcp0;
2714           icmp46_header_t * icmp0;
2715           snat_session_key_t key0, sm0;
2716           u32 proto0;
2717           u32 rx_fib_index0;
2718
2719           /* speculatively enqueue b0 to the current next frame */
2720           bi0 = from[0];
2721           to_next[0] = bi0;
2722           from += 1;
2723           to_next += 1;
2724           n_left_from -= 1;
2725           n_left_to_next -= 1;
2726
2727           b0 = vlib_get_buffer (vm, bi0);
2728
2729           ip0 = vlib_buffer_get_current (b0);
2730           udp0 = ip4_next_header (ip0);
2731           tcp0 = (tcp_header_t *) udp0;
2732           icmp0 = (icmp46_header_t *) udp0;
2733
2734           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2735           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2736
2737           vnet_feature_next (sw_if_index0, &next0, b0);
2738
2739           if (PREDICT_FALSE(ip0->ttl == 1))
2740             {
2741               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2742               icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2743                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
2744                                            0);
2745               next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
2746               goto trace00;
2747             }
2748
2749           proto0 = ip_proto_to_snat_proto (ip0->protocol);
2750
2751           if (PREDICT_FALSE (proto0 == ~0))
2752               goto trace00;
2753
2754           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
2755             {
2756               next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
2757                                   rx_fib_index0, node, next0, ~0, 0, 0);
2758               goto trace00;
2759             }
2760
2761           key0.addr = ip0->dst_address;
2762           key0.port = udp0->dst_port;
2763           key0.fib_index = rx_fib_index0;
2764
2765           if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2766             {
2767               b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
2768               goto trace00;
2769             }
2770
2771           new_addr0 = sm0.addr.as_u32;
2772           new_port0 = sm0.port;
2773           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2774           old_addr0 = ip0->dst_address.as_u32;
2775           ip0->dst_address.as_u32 = new_addr0;
2776
2777           sum0 = ip0->checksum;
2778           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2779                                  ip4_header_t,
2780                                  dst_address /* changed member */);
2781           ip0->checksum = ip_csum_fold (sum0);
2782
2783           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
2784             {
2785                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2786                 {
2787                   old_port0 = tcp0->dst_port;
2788                   tcp0->dst_port = new_port0;
2789
2790                   sum0 = tcp0->checksum;
2791                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2792                                          ip4_header_t,
2793                                          dst_address /* changed member */);
2794
2795                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
2796                                          ip4_header_t /* cheat */,
2797                                          length /* changed member */);
2798                   tcp0->checksum = ip_csum_fold(sum0);
2799                 }
2800               else
2801                 {
2802                   old_port0 = udp0->dst_port;
2803                   udp0->dst_port = new_port0;
2804                   udp0->checksum = 0;
2805                 }
2806             }
2807           else
2808             {
2809               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2810                 {
2811                   sum0 = tcp0->checksum;
2812                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2813                                          ip4_header_t,
2814                                          dst_address /* changed member */);
2815
2816                   tcp0->checksum = ip_csum_fold(sum0);
2817                 }
2818             }
2819
2820         trace00:
2821
2822           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2823                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2824             {
2825               snat_out2in_trace_t *t =
2826                  vlib_add_trace (vm, node, b0, sizeof (*t));
2827               t->sw_if_index = sw_if_index0;
2828               t->next_index = next0;
2829             }
2830
2831           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
2832
2833           /* verify speculative enqueue, maybe switch current next frame */
2834           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2835                                            to_next, n_left_to_next,
2836                                            bi0, next0);
2837         }
2838
2839       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2840     }
2841
2842   vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
2843                                SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
2844                                pkts_processed);
2845   return frame->n_vectors;
2846 }
2847
2848 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
2849   .function = snat_out2in_fast_node_fn,
2850   .name = "nat44-out2in-fast",
2851   .vector_size = sizeof (u32),
2852   .format_trace = format_snat_out2in_fast_trace,
2853   .type = VLIB_NODE_TYPE_INTERNAL,
2854
2855   .n_errors = ARRAY_LEN(snat_out2in_error_strings),
2856   .error_strings = snat_out2in_error_strings,
2857
2858   .runtime_data_bytes = sizeof (snat_runtime_t),
2859
2860   .n_next_nodes = SNAT_OUT2IN_N_NEXT,
2861
2862   /* edit / add dispositions here */
2863   .next_nodes = {
2864     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
2865     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
2866     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2867     [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
2868   },
2869 };
2870 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);