Fix coverity warnings, VPP-608
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26
27 #include <vppinfra/hash.h>
28 #include <vppinfra/error.h>
29 #include <vppinfra/elog.h>
30
31 typedef struct {
32   u32 sw_if_index;
33   u32 next_index;
34   u32 session_index;
35   u32 is_slow_path;
36 } snat_in2out_trace_t;
37
38 typedef struct {
39   u32 next_worker_index;
40   u8 do_handoff;
41 } snat_in2out_worker_handoff_trace_t;
42
43 /* packet trace format function */
44 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
45 {
46   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
49   char * tag;
50
51   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
52   
53   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
54               t->sw_if_index, t->next_index, t->session_index);
55
56   return s;
57 }
58
59 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
60 {
61   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
62   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
63   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
64
65   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
66               t->sw_if_index, t->next_index);
67
68   return s;
69 }
70
71 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
72 {
73   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75   snat_in2out_worker_handoff_trace_t * t =
76     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
77   char * m;
78
79   m = t->do_handoff ? "next worker" : "same worker";
80   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
81
82   return s;
83 }
84
85 vlib_node_registration_t snat_in2out_node;
86 vlib_node_registration_t snat_in2out_slowpath_node;
87 vlib_node_registration_t snat_in2out_fast_node;
88 vlib_node_registration_t snat_in2out_worker_handoff_node;
89
90 #define foreach_snat_in2out_error                       \
91 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
92 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
93 _(OUT_OF_PORTS, "Out of ports")                         \
94 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
95 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
96 _(NO_TRANSLATION, "No translation")
97   
98 typedef enum {
99 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
100   foreach_snat_in2out_error
101 #undef _
102   SNAT_IN2OUT_N_ERROR,
103 } snat_in2out_error_t;
104
105 static char * snat_in2out_error_strings[] = {
106 #define _(sym,string) string,
107   foreach_snat_in2out_error
108 #undef _
109 };
110
111 typedef enum {
112   SNAT_IN2OUT_NEXT_LOOKUP,
113   SNAT_IN2OUT_NEXT_DROP,
114   SNAT_IN2OUT_NEXT_SLOW_PATH,
115   SNAT_IN2OUT_N_NEXT,
116 } snat_in2out_next_t;
117
118 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
119                       ip4_header_t * ip0,
120                       u32 rx_fib_index0,
121                       snat_session_key_t * key0,
122                       snat_session_t ** sessionp,
123                       vlib_node_runtime_t * node,
124                       u32 next0,
125                       u32 cpu_index)
126 {
127   snat_user_t *u;
128   snat_user_key_t user_key;
129   snat_session_t *s;
130   clib_bihash_kv_8_8_t kv0, value0;
131   u32 oldest_per_user_translation_list_index;
132   dlist_elt_t * oldest_per_user_translation_list_elt;
133   dlist_elt_t * per_user_translation_list_elt;
134   dlist_elt_t * per_user_list_head_elt;
135   u32 session_index;
136   snat_session_key_t key1;
137   u32 address_index = ~0;
138   u32 outside_fib_index;
139   uword * p;
140   snat_static_mapping_key_t worker_by_out_key;
141
142   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
143   if (! p)
144     {
145       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
146       return SNAT_IN2OUT_NEXT_DROP;
147     }
148   outside_fib_index = p[0];
149
150   user_key.addr = ip0->src_address;
151   user_key.fib_index = rx_fib_index0;
152   kv0.key = user_key.as_u64;
153   
154   /* Ever heard of the "user" = src ip4 address before? */
155   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
156     {
157       /* no, make a new one */
158       pool_get (sm->per_thread_data[cpu_index].users, u);
159       memset (u, 0, sizeof (*u));
160       u->addr = ip0->src_address;
161
162       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
163
164       u->sessions_per_user_list_head_index = per_user_list_head_elt -
165         sm->per_thread_data[cpu_index].list_pool;
166
167       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
168                        u->sessions_per_user_list_head_index);
169
170       kv0.value = u - sm->per_thread_data[cpu_index].users;
171
172       /* add user */
173       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
174     }
175   else
176     {
177       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
178                              value0.value);
179     }
180
181   /* Over quota? Recycle the least recently used dynamic translation */
182   if (u->nsessions >= sm->max_translations_per_user)
183     {
184       /* Remove the oldest dynamic translation */
185       do {
186           oldest_per_user_translation_list_index =
187             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
188                                     u->sessions_per_user_list_head_index);
189
190           ASSERT (oldest_per_user_translation_list_index != ~0);
191
192           /* add it back to the end of the LRU list */
193           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
194                               u->sessions_per_user_list_head_index,
195                               oldest_per_user_translation_list_index);
196           /* Get the list element */
197           oldest_per_user_translation_list_elt =
198             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
199                                oldest_per_user_translation_list_index);
200
201           /* Get the session index from the list element */
202           session_index = oldest_per_user_translation_list_elt->value;
203
204           /* Get the session */
205           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
206                                  session_index);
207       } while (snat_is_session_static (s));
208
209       /* Remove in2out, out2in keys */
210       kv0.key = s->in2out.as_u64;
211       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
212           clib_warning ("in2out key delete failed");
213       kv0.key = s->out2in.as_u64;
214       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
215           clib_warning ("out2in key delete failed");
216
217       /* log NAT event */
218       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
219                                           s->out2in.addr.as_u32,
220                                           s->in2out.protocol,
221                                           s->in2out.port,
222                                           s->out2in.port,
223                                           s->in2out.fib_index);
224
225       snat_free_outside_address_and_port 
226         (sm, &s->out2in, s->outside_address_index);
227       s->outside_address_index = ~0;
228
229       if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
230         {
231           ASSERT(0);
232
233           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
234           return SNAT_IN2OUT_NEXT_DROP;
235         }
236       s->outside_address_index = address_index;
237     }
238   else
239     {
240       u8 static_mapping = 1;
241
242       /* First try to match static mapping by local address and port */
243       if (snat_static_mapping_match (sm, *key0, &key1, 0))
244         {
245           static_mapping = 0;
246           /* Try to create dynamic translation */
247           if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
248             {
249               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
250               return SNAT_IN2OUT_NEXT_DROP;
251             }
252         }
253
254       /* Create a new session */
255       pool_get (sm->per_thread_data[cpu_index].sessions, s);
256       memset (s, 0, sizeof (*s));
257       
258       s->outside_address_index = address_index;
259
260       if (static_mapping)
261         {
262           u->nstaticsessions++;
263           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
264         }
265       else
266         {
267           u->nsessions++;
268         }
269
270       /* Create list elts */
271       pool_get (sm->per_thread_data[cpu_index].list_pool,
272                 per_user_translation_list_elt);
273       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
274                        per_user_translation_list_elt -
275                        sm->per_thread_data[cpu_index].list_pool);
276
277       per_user_translation_list_elt->value =
278         s - sm->per_thread_data[cpu_index].sessions;
279       s->per_user_index = per_user_translation_list_elt -
280                           sm->per_thread_data[cpu_index].list_pool;
281       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
282
283       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
284                           s->per_user_list_head_index,
285                           per_user_translation_list_elt -
286                           sm->per_thread_data[cpu_index].list_pool);
287    }
288   
289   s->in2out = *key0;
290   s->out2in = key1;
291   s->out2in.protocol = key0->protocol;
292   s->out2in.fib_index = outside_fib_index;
293   *sessionp = s;
294
295   /* Add to translation hashes */
296   kv0.key = s->in2out.as_u64;
297   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
298   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
299       clib_warning ("in2out key add failed");
300   
301   kv0.key = s->out2in.as_u64;
302   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
303   
304   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
305       clib_warning ("out2in key add failed");
306
307   /* Add to translated packets worker lookup */
308   worker_by_out_key.addr = s->out2in.addr;
309   worker_by_out_key.port = s->out2in.port;
310   worker_by_out_key.fib_index = s->out2in.fib_index;
311   kv0.key = worker_by_out_key.as_u64;
312   kv0.value = cpu_index;
313   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
314
315   /* log NAT event */
316   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
317                                       s->out2in.addr.as_u32,
318                                       s->in2out.protocol,
319                                       s->in2out.port,
320                                       s->out2in.port,
321                                       s->in2out.fib_index);
322   return next0;
323 }
324                       
325 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
326                                          vlib_buffer_t * b0,
327                                          ip4_header_t * ip0,
328                                          icmp46_header_t * icmp0,
329                                          u32 sw_if_index0,
330                                          u32 rx_fib_index0,
331                                          vlib_node_runtime_t * node,
332                                          u32 next0,
333                                          f64 now,
334                                          u32 cpu_index)
335 {
336   snat_session_key_t key0;
337   icmp_echo_header_t *echo0;
338   clib_bihash_kv_8_8_t kv0, value0;
339   snat_session_t * s0;
340   u32 new_addr0, old_addr0;
341   u16 old_id0, new_id0;
342   ip_csum_t sum0;
343   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
344
345   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
346     {
347       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
348       return SNAT_IN2OUT_NEXT_DROP;
349     }
350   
351   echo0 = (icmp_echo_header_t *)(icmp0+1);
352
353   key0.addr = ip0->src_address;
354   key0.port = echo0->identifier;
355   key0.protocol = SNAT_PROTOCOL_ICMP;
356   key0.fib_index = rx_fib_index0;
357   
358   kv0.key = key0.as_u64;
359   
360   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
361     {
362       ip4_address_t * first_int_addr;
363
364       if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
365         {
366           first_int_addr = 
367             ip4_interface_first_address (sm->ip4_main, sw_if_index0,
368                                          0 /* just want the address */);
369           rt->cached_sw_if_index = sw_if_index0;
370           if (first_int_addr)
371             rt->cached_ip4_address = first_int_addr->as_u32;
372           else
373             rt->cached_ip4_address = 0;
374         }
375       
376       /* Don't NAT packet aimed at the intfc address */
377       if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
378                                 rt->cached_ip4_address))
379         return next0;
380       
381       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
382                          &s0, node, next0, cpu_index);
383       
384       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
385         return next0;
386     }
387   else
388     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
389                             value0.value);
390
391   old_addr0 = ip0->src_address.as_u32;
392   ip0->src_address = s0->out2in.addr;
393   new_addr0 = ip0->src_address.as_u32;
394   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
395   
396   sum0 = ip0->checksum;
397   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
398                          ip4_header_t,
399                          src_address /* changed member */);
400   ip0->checksum = ip_csum_fold (sum0);
401   
402   old_id0 = echo0->identifier;
403   new_id0 = s0->out2in.port;
404   echo0->identifier = new_id0;
405
406   sum0 = icmp0->checksum;
407   sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
408                          identifier);
409   icmp0->checksum = ip_csum_fold (sum0);
410
411   /* Accounting */
412   s0->last_heard = now;
413   s0->total_pkts++;
414   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
415   /* Per-user LRU list maintenance for dynamic translations */
416   if (!snat_is_session_static (s0))
417     {
418       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
419                          s0->per_user_index);
420       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
421                           s0->per_user_list_head_index,
422                           s0->per_user_index);
423     }
424
425   return next0;
426 }
427
428 /**
429  * @brief Hairpinning
430  *
431  * Hairpinning allows two endpoints on the internal side of the NAT to
432  * communicate even if they only use each other's external IP addresses
433  * and ports.
434  *
435  * @param sm     SNAT main.
436  * @param b0     Vlib buffer.
437  * @param ip0    IP header.
438  * @param udp0   UDP header.
439  * @param tcp0   TCP header.
440  * @param proto0 SNAT protocol.
441  */
442 static inline void
443 snat_hairpinning (snat_main_t *sm,
444                   vlib_buffer_t * b0,
445                   ip4_header_t * ip0,
446                   udp_header_t * udp0,
447                   tcp_header_t * tcp0,
448                   u32 proto0)
449 {
450   snat_session_key_t key0, sm0;
451   snat_static_mapping_key_t k0;
452   snat_session_t * s0;
453   clib_bihash_kv_8_8_t kv0, value0;
454   ip_csum_t sum0;
455   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
456   u16 new_dst_port0, old_dst_port0;
457
458   key0.addr = ip0->dst_address;
459   key0.port = udp0->dst_port;
460   key0.protocol = proto0;
461   key0.fib_index = sm->outside_fib_index;
462   kv0.key = key0.as_u64;
463
464   /* Check if destination is in active sessions */
465   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
466     {
467       /* or static mappings */
468       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
469         {
470           new_dst_addr0 = sm0.addr.as_u32;
471           new_dst_port0 = sm0.port;
472           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
473         }
474     }
475   else
476     {
477       si = value0.value;
478       if (sm->num_workers > 1)
479         {
480           k0.addr = ip0->dst_address;
481           k0.port = udp0->dst_port;
482           k0.fib_index = sm->outside_fib_index;
483           kv0.key = k0.as_u64;
484           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
485             ASSERT(0);
486           else
487             ti = value0.value;
488         }
489       else
490         ti = sm->num_workers;
491
492       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
493       new_dst_addr0 = s0->in2out.addr.as_u32;
494       new_dst_port0 = s0->in2out.port;
495       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
496     }
497
498   /* Destination is behind the same NAT, use internal address and port */
499   if (new_dst_addr0)
500     {
501       old_dst_addr0 = ip0->dst_address.as_u32;
502       ip0->dst_address.as_u32 = new_dst_addr0;
503       sum0 = ip0->checksum;
504       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
505                              ip4_header_t, dst_address);
506       ip0->checksum = ip_csum_fold (sum0);
507
508       old_dst_port0 = tcp0->ports.dst;
509       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
510         {
511           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
512             {
513               tcp0->ports.dst = new_dst_port0;
514               sum0 = tcp0->checksum;
515               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
516                                      ip4_header_t, dst_address);
517               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
518                                      ip4_header_t /* cheat */, length);
519               tcp0->checksum = ip_csum_fold(sum0);
520             }
521           else
522             {
523               udp0->dst_port = new_dst_port0;
524               udp0->checksum = 0;
525             }
526         }
527     }
528 }
529
530 static inline uword
531 snat_in2out_node_fn_inline (vlib_main_t * vm,
532                             vlib_node_runtime_t * node,
533                             vlib_frame_t * frame, int is_slow_path)
534 {
535   u32 n_left_from, * from, * to_next;
536   snat_in2out_next_t next_index;
537   u32 pkts_processed = 0;
538   snat_main_t * sm = &snat_main;
539   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
540   f64 now = vlib_time_now (vm);
541   u32 stats_node_index;
542   u32 cpu_index = os_get_cpu_number ();
543
544   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
545     snat_in2out_node.index;
546
547   from = vlib_frame_vector_args (frame);
548   n_left_from = frame->n_vectors;
549   next_index = node->cached_next_index;
550
551   while (n_left_from > 0)
552     {
553       u32 n_left_to_next;
554
555       vlib_get_next_frame (vm, node, next_index,
556                            to_next, n_left_to_next);
557
558       while (n_left_from >= 4 && n_left_to_next >= 2)
559         {
560           u32 bi0, bi1;
561           vlib_buffer_t * b0, * b1;
562           u32 next0, next1;
563           u32 sw_if_index0, sw_if_index1;
564           ip4_header_t * ip0, * ip1;
565           ip_csum_t sum0, sum1;
566           u32 new_addr0, old_addr0, new_addr1, old_addr1;
567           u16 old_port0, new_port0, old_port1, new_port1;
568           udp_header_t * udp0, * udp1;
569           tcp_header_t * tcp0, * tcp1;
570           icmp46_header_t * icmp0, * icmp1;
571           snat_session_key_t key0, key1;
572           u32 rx_fib_index0, rx_fib_index1;
573           u32 proto0, proto1;
574           snat_session_t * s0 = 0, * s1 = 0;
575           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
576           
577           /* Prefetch next iteration. */
578           {
579             vlib_buffer_t * p2, * p3;
580             
581             p2 = vlib_get_buffer (vm, from[2]);
582             p3 = vlib_get_buffer (vm, from[3]);
583             
584             vlib_prefetch_buffer_header (p2, LOAD);
585             vlib_prefetch_buffer_header (p3, LOAD);
586
587             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
588             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
589           }
590
591           /* speculatively enqueue b0 and b1 to the current next frame */
592           to_next[0] = bi0 = from[0];
593           to_next[1] = bi1 = from[1];
594           from += 2;
595           to_next += 2;
596           n_left_from -= 2;
597           n_left_to_next -= 2;
598           
599           b0 = vlib_get_buffer (vm, bi0);
600           b1 = vlib_get_buffer (vm, bi1);
601
602           ip0 = vlib_buffer_get_current (b0);
603           udp0 = ip4_next_header (ip0);
604           tcp0 = (tcp_header_t *) udp0;
605           icmp0 = (icmp46_header_t *) udp0;
606
607           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
608           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
609                                    sw_if_index0);
610
611           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
612
613           proto0 = ~0;
614           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
615             ? SNAT_PROTOCOL_UDP : proto0;
616           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
617             ? SNAT_PROTOCOL_TCP : proto0;
618           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
619             ? SNAT_PROTOCOL_ICMP : proto0;
620
621           /* Next configured feature, probably ip4-lookup */
622           if (is_slow_path)
623             {
624               if (PREDICT_FALSE (proto0 == ~0))
625                 goto trace00;
626               
627               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
628                 {
629                   next0 = icmp_in2out_slow_path 
630                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
631                      node, next0, now, cpu_index);
632                   goto trace00;
633                 }
634             }
635           else
636             {
637               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
638                 {
639                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
640                   goto trace00;
641                 }
642             }
643
644           key0.addr = ip0->src_address;
645           key0.port = udp0->src_port;
646           key0.protocol = proto0;
647           key0.fib_index = rx_fib_index0;
648           
649           kv0.key = key0.as_u64;
650
651           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
652             {
653               if (is_slow_path)
654                 {
655                   ip4_address_t * first_int_addr;
656                   
657                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
658                     {
659                       first_int_addr = 
660                         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
661                                                      0 /* just want the address */);
662                       rt->cached_sw_if_index = sw_if_index0;
663                       if (first_int_addr)
664                         rt->cached_ip4_address = first_int_addr->as_u32;
665                       else
666                         rt->cached_ip4_address = 0;
667                     }
668                   
669                   /* Don't NAT packet aimed at the intfc address */
670                   if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
671                                     rt->cached_ip4_address))
672                     goto trace00;
673                   
674                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
675                                      &s0, node, next0, cpu_index);
676                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
677                     goto trace00;
678                 }
679               else
680                 {
681                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
682                   goto trace00;
683                 }
684             }
685           else
686             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
687                                     value0.value);
688
689           old_addr0 = ip0->src_address.as_u32;
690           ip0->src_address = s0->out2in.addr;
691           new_addr0 = ip0->src_address.as_u32;
692           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
693
694           sum0 = ip0->checksum;
695           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
696                                  ip4_header_t,
697                                  src_address /* changed member */);
698           ip0->checksum = ip_csum_fold (sum0);
699
700           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
701             {
702               old_port0 = tcp0->ports.src;
703               tcp0->ports.src = s0->out2in.port;
704               new_port0 = tcp0->ports.src;
705
706               sum0 = tcp0->checksum;
707               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
708                                      ip4_header_t,
709                                      dst_address /* changed member */);
710               sum0 = ip_csum_update (sum0, old_port0, new_port0,
711                                      ip4_header_t /* cheat */,
712                                      length /* changed member */);
713               tcp0->checksum = ip_csum_fold(sum0);
714             }
715           else
716             {
717               old_port0 = udp0->src_port;
718               udp0->src_port = s0->out2in.port;
719               udp0->checksum = 0;
720             }
721
722           /* Hairpinning */
723           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
724
725           /* Accounting */
726           s0->last_heard = now;
727           s0->total_pkts++;
728           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
729           /* Per-user LRU list maintenance for dynamic translation */
730           if (!snat_is_session_static (s0))
731             {
732               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
733                                  s0->per_user_index);
734               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
735                                   s0->per_user_list_head_index,
736                                   s0->per_user_index);
737             }
738         trace00:
739
740           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
741                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
742             {
743               snat_in2out_trace_t *t = 
744                  vlib_add_trace (vm, node, b0, sizeof (*t));
745               t->is_slow_path = is_slow_path;
746               t->sw_if_index = sw_if_index0;
747               t->next_index = next0;
748                   t->session_index = ~0;
749               if (s0)
750                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
751             }
752
753           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
754
755           ip1 = vlib_buffer_get_current (b1);
756           udp1 = ip4_next_header (ip1);
757           tcp1 = (tcp_header_t *) udp1;
758           icmp1 = (icmp46_header_t *) udp1;
759
760           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
761           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
762                                    sw_if_index1);
763
764           proto1 = ~0;
765           proto1 = (ip1->protocol == IP_PROTOCOL_UDP) 
766             ? SNAT_PROTOCOL_UDP : proto1;
767           proto1 = (ip1->protocol == IP_PROTOCOL_TCP) 
768             ? SNAT_PROTOCOL_TCP : proto1;
769           proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) 
770             ? SNAT_PROTOCOL_ICMP : proto1;
771
772           /* Next configured feature, probably ip4-lookup */
773           if (is_slow_path)
774             {
775               if (PREDICT_FALSE (proto1 == ~0))
776                 goto trace01;
777               
778               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
779                 {
780                   next1 = icmp_in2out_slow_path 
781                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
782                      next1, now, cpu_index);
783                   goto trace01;
784                 }
785             }
786           else
787             {
788               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
789                 {
790                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
791                   goto trace01;
792                 }
793             }
794
795           key1.addr = ip1->src_address;
796           key1.port = udp1->src_port;
797           key1.protocol = proto1;
798           key1.fib_index = rx_fib_index1;
799           
800           kv1.key = key1.as_u64;
801
802             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
803             {
804               if (is_slow_path)
805                 {
806                   ip4_address_t * first_int_addr;
807                   
808                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index1))
809                     {
810                       first_int_addr = 
811                         ip4_interface_first_address (sm->ip4_main, sw_if_index1,
812                                                      0 /* just want the address */);
813                       rt->cached_sw_if_index = sw_if_index1;
814                       if (first_int_addr)
815                         rt->cached_ip4_address = first_int_addr->as_u32;
816                       else
817                         rt->cached_ip4_address = 0;
818                     }
819                   
820                   /* Don't NAT packet aimed at the intfc address */
821                   if (PREDICT_FALSE(ip1->dst_address.as_u32 ==
822                                     rt->cached_ip4_address))
823                     goto trace01;
824                   
825                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
826                                      &s1, node, next1, cpu_index);
827                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
828                     goto trace01;
829                 }
830               else
831                 {
832                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
833                   goto trace01;
834                 }
835             }
836           else
837             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
838                                     value1.value);
839
840           old_addr1 = ip1->src_address.as_u32;
841           ip1->src_address = s1->out2in.addr;
842           new_addr1 = ip1->src_address.as_u32;
843           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
844
845           sum1 = ip1->checksum;
846           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
847                                  ip4_header_t,
848                                  src_address /* changed member */);
849           ip1->checksum = ip_csum_fold (sum1);
850
851           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
852             {
853               old_port1 = tcp1->ports.src;
854               tcp1->ports.src = s1->out2in.port;
855               new_port1 = tcp1->ports.src;
856
857               sum1 = tcp1->checksum;
858               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
859                                      ip4_header_t,
860                                      dst_address /* changed member */);
861               sum1 = ip_csum_update (sum1, old_port1, new_port1,
862                                      ip4_header_t /* cheat */,
863                                      length /* changed member */);
864               tcp1->checksum = ip_csum_fold(sum1);
865             }
866           else
867             {
868               old_port1 = udp1->src_port;
869               udp1->src_port = s1->out2in.port;
870               udp1->checksum = 0;
871             }
872
873           /* Hairpinning */
874           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
875
876           /* Accounting */
877           s1->last_heard = now;
878           s1->total_pkts++;
879           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
880           /* Per-user LRU list maintenance for dynamic translation */
881           if (!snat_is_session_static (s1))
882             {
883               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
884                                  s1->per_user_index);
885               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
886                                   s1->per_user_list_head_index,
887                                   s1->per_user_index);
888             }
889         trace01:
890
891           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
892                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
893             {
894               snat_in2out_trace_t *t = 
895                  vlib_add_trace (vm, node, b1, sizeof (*t));
896               t->sw_if_index = sw_if_index1;
897               t->next_index = next1;
898               t->session_index = ~0;
899               if (s1)
900                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
901             }
902
903           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
904
905           /* verify speculative enqueues, maybe switch current next frame */
906           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
907                                            to_next, n_left_to_next,
908                                            bi0, bi1, next0, next1);
909         }
910
911       while (n_left_from > 0 && n_left_to_next > 0)
912         {
913           u32 bi0;
914           vlib_buffer_t * b0;
915           u32 next0;
916           u32 sw_if_index0;
917           ip4_header_t * ip0;
918           ip_csum_t sum0;
919           u32 new_addr0, old_addr0;
920           u16 old_port0, new_port0;
921           udp_header_t * udp0;
922           tcp_header_t * tcp0;
923           icmp46_header_t * icmp0;
924           snat_session_key_t key0;
925           u32 rx_fib_index0;
926           u32 proto0;
927           snat_session_t * s0 = 0;
928           clib_bihash_kv_8_8_t kv0, value0;
929           
930           /* speculatively enqueue b0 to the current next frame */
931           bi0 = from[0];
932           to_next[0] = bi0;
933           from += 1;
934           to_next += 1;
935           n_left_from -= 1;
936           n_left_to_next -= 1;
937
938           b0 = vlib_get_buffer (vm, bi0);
939           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
940
941           ip0 = vlib_buffer_get_current (b0);
942           udp0 = ip4_next_header (ip0);
943           tcp0 = (tcp_header_t *) udp0;
944           icmp0 = (icmp46_header_t *) udp0;
945
946           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
947           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
948                                    sw_if_index0);
949
950           proto0 = ~0;
951           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
952             ? SNAT_PROTOCOL_UDP : proto0;
953           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
954             ? SNAT_PROTOCOL_TCP : proto0;
955           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
956             ? SNAT_PROTOCOL_ICMP : proto0;
957
958           /* Next configured feature, probably ip4-lookup */
959           if (is_slow_path)
960             {
961               if (PREDICT_FALSE (proto0 == ~0))
962                 goto trace0;
963               
964               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
965                 {
966                   next0 = icmp_in2out_slow_path 
967                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
968                      next0, now, cpu_index);
969                   goto trace0;
970                 }
971             }
972           else
973             {
974               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
975                 {
976                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
977                   goto trace0;
978                 }
979             }
980
981           key0.addr = ip0->src_address;
982           key0.port = udp0->src_port;
983           key0.protocol = proto0;
984           key0.fib_index = rx_fib_index0;
985           
986           kv0.key = key0.as_u64;
987
988           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
989             {
990               if (is_slow_path)
991                 {
992                   ip4_address_t * first_int_addr;
993                   
994                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
995                     {
996                       first_int_addr = 
997                         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
998                                                      0 /* just want the address */);
999                       rt->cached_sw_if_index = sw_if_index0;
1000                       if (first_int_addr)
1001                         rt->cached_ip4_address = first_int_addr->as_u32;
1002                       else
1003                         rt->cached_ip4_address = 0;
1004                     }
1005                   
1006                   /* Don't NAT packet aimed at the intfc address */
1007                   if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1008                                     rt->cached_ip4_address))
1009                     goto trace0;
1010                   
1011                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1012                                      &s0, node, next0, cpu_index);
1013                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1014                     goto trace0;
1015                 }
1016               else
1017                 {
1018                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1019                   goto trace0;
1020                 }
1021             }
1022           else
1023             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1024                                     value0.value);
1025
1026           old_addr0 = ip0->src_address.as_u32;
1027           ip0->src_address = s0->out2in.addr;
1028           new_addr0 = ip0->src_address.as_u32;
1029           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1030
1031           sum0 = ip0->checksum;
1032           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1033                                  ip4_header_t,
1034                                  src_address /* changed member */);
1035           ip0->checksum = ip_csum_fold (sum0);
1036
1037           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1038             {
1039               old_port0 = tcp0->ports.src;
1040               tcp0->ports.src = s0->out2in.port;
1041               new_port0 = tcp0->ports.src;
1042
1043               sum0 = tcp0->checksum;
1044               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1045                                      ip4_header_t,
1046                                      dst_address /* changed member */);
1047               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1048                                      ip4_header_t /* cheat */,
1049                                      length /* changed member */);
1050               tcp0->checksum = ip_csum_fold(sum0);
1051             }
1052           else
1053             {
1054               old_port0 = udp0->src_port;
1055               udp0->src_port = s0->out2in.port;
1056               udp0->checksum = 0;
1057             }
1058
1059           /* Hairpinning */
1060           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1061
1062           /* Accounting */
1063           s0->last_heard = now;
1064           s0->total_pkts++;
1065           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1066           /* Per-user LRU list maintenance for dynamic translation */
1067           if (!snat_is_session_static (s0))
1068             {
1069               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1070                                  s0->per_user_index);
1071               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1072                                   s0->per_user_list_head_index,
1073                                   s0->per_user_index);
1074             }
1075
1076         trace0:
1077           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1078                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1079             {
1080               snat_in2out_trace_t *t = 
1081                  vlib_add_trace (vm, node, b0, sizeof (*t));
1082               t->is_slow_path = is_slow_path;
1083               t->sw_if_index = sw_if_index0;
1084               t->next_index = next0;
1085                   t->session_index = ~0;
1086               if (s0)
1087                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1088             }
1089
1090           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1091
1092           /* verify speculative enqueue, maybe switch current next frame */
1093           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1094                                            to_next, n_left_to_next,
1095                                            bi0, next0);
1096         }
1097
1098       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1099     }
1100
1101   vlib_node_increment_counter (vm, stats_node_index, 
1102                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1103                                pkts_processed);
1104   return frame->n_vectors;
1105 }
1106
1107 static uword
1108 snat_in2out_fast_path_fn (vlib_main_t * vm,
1109                           vlib_node_runtime_t * node,
1110                           vlib_frame_t * frame)
1111 {
1112   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1113 }
1114
1115 VLIB_REGISTER_NODE (snat_in2out_node) = {
1116   .function = snat_in2out_fast_path_fn,
1117   .name = "snat-in2out",
1118   .vector_size = sizeof (u32),
1119   .format_trace = format_snat_in2out_trace,
1120   .type = VLIB_NODE_TYPE_INTERNAL,
1121   
1122   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1123   .error_strings = snat_in2out_error_strings,
1124
1125   .runtime_data_bytes = sizeof (snat_runtime_t),
1126   
1127   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1128
1129   /* edit / add dispositions here */
1130   .next_nodes = {
1131     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1132     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1133     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1134   },
1135 };
1136
1137 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1138
1139 static uword
1140 snat_in2out_slow_path_fn (vlib_main_t * vm,
1141                           vlib_node_runtime_t * node,
1142                           vlib_frame_t * frame)
1143 {
1144   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1145 }
1146
1147 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1148   .function = snat_in2out_slow_path_fn,
1149   .name = "snat-in2out-slowpath",
1150   .vector_size = sizeof (u32),
1151   .format_trace = format_snat_in2out_trace,
1152   .type = VLIB_NODE_TYPE_INTERNAL,
1153   
1154   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1155   .error_strings = snat_in2out_error_strings,
1156
1157   .runtime_data_bytes = sizeof (snat_runtime_t),
1158   
1159   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1160
1161   /* edit / add dispositions here */
1162   .next_nodes = {
1163     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1164     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1165     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1166   },
1167 };
1168
1169 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1170
1171 static uword
1172 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1173                                vlib_node_runtime_t * node,
1174                                vlib_frame_t * frame)
1175 {
1176   snat_main_t *sm = &snat_main;
1177   vlib_thread_main_t *tm = vlib_get_thread_main ();
1178   u32 n_left_from, *from, *to_next = 0;
1179   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1180   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1181     = 0;
1182   vlib_frame_queue_elt_t *hf = 0;
1183   vlib_frame_t *f = 0;
1184   int i;
1185   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1186   u32 next_worker_index = 0;
1187   u32 current_worker_index = ~0;
1188   u32 cpu_index = os_get_cpu_number ();
1189
1190   ASSERT (vec_len (sm->workers));
1191
1192   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1193     {
1194       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1195
1196       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1197                                sm->first_worker_index + sm->num_workers - 1,
1198                                (vlib_frame_queue_t *) (~0));
1199     }
1200
1201   from = vlib_frame_vector_args (frame);
1202   n_left_from = frame->n_vectors;
1203
1204   while (n_left_from > 0)
1205     {
1206       u32 bi0;
1207       vlib_buffer_t *b0;
1208       u32 sw_if_index0;
1209       u32 rx_fib_index0;
1210       ip4_header_t * ip0;
1211       snat_user_key_t key0;
1212       clib_bihash_kv_8_8_t kv0, value0;
1213       u8 do_handoff;
1214
1215       bi0 = from[0];
1216       from += 1;
1217       n_left_from -= 1;
1218
1219       b0 = vlib_get_buffer (vm, bi0);
1220
1221       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1222       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1223
1224       ip0 = vlib_buffer_get_current (b0);
1225
1226       key0.addr = ip0->src_address;
1227       key0.fib_index = rx_fib_index0;
1228
1229       kv0.key = key0.as_u64;
1230
1231       /* Ever heard of of the "user" before? */
1232       if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
1233         {
1234           /* No, assign next available worker (RR) */
1235           next_worker_index = sm->first_worker_index;
1236           if (vec_len (sm->workers))
1237             {
1238               next_worker_index += 
1239                 sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
1240             }
1241
1242           /* add non-traslated packets worker lookup */
1243           kv0.value = next_worker_index;
1244           clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
1245         }
1246       else
1247         next_worker_index = value0.value;
1248
1249       if (PREDICT_FALSE (next_worker_index != cpu_index))
1250         {
1251           do_handoff = 1;
1252
1253           if (next_worker_index != current_worker_index)
1254             {
1255               if (hf)
1256                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1257
1258               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1259                                                       next_worker_index,
1260                                                       handoff_queue_elt_by_worker_index);
1261
1262               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1263               to_next_worker = &hf->buffer_index[hf->n_vectors];
1264               current_worker_index = next_worker_index;
1265             }
1266
1267           /* enqueue to correct worker thread */
1268           to_next_worker[0] = bi0;
1269           to_next_worker++;
1270           n_left_to_next_worker--;
1271
1272           if (n_left_to_next_worker == 0)
1273             {
1274               hf->n_vectors = VLIB_FRAME_SIZE;
1275               vlib_put_frame_queue_elt (hf);
1276               current_worker_index = ~0;
1277               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1278               hf = 0;
1279             }
1280         }
1281       else
1282         {
1283           do_handoff = 0;
1284           /* if this is 1st frame */
1285           if (!f)
1286             {
1287               f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
1288               to_next = vlib_frame_vector_args (f);
1289             }
1290
1291           to_next[0] = bi0;
1292           to_next += 1;
1293           f->n_vectors++;
1294         }
1295
1296       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1297                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1298         {
1299           snat_in2out_worker_handoff_trace_t *t =
1300             vlib_add_trace (vm, node, b0, sizeof (*t));
1301           t->next_worker_index = next_worker_index;
1302           t->do_handoff = do_handoff;
1303         }
1304     }
1305
1306   if (f)
1307     vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
1308
1309   if (hf)
1310     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1311
1312   /* Ship frames to the worker nodes */
1313   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1314     {
1315       if (handoff_queue_elt_by_worker_index[i])
1316         {
1317           hf = handoff_queue_elt_by_worker_index[i];
1318           /*
1319            * It works better to let the handoff node
1320            * rate-adapt, always ship the handoff queue element.
1321            */
1322           if (1 || hf->n_vectors == hf->last_n_vectors)
1323             {
1324               vlib_put_frame_queue_elt (hf);
1325               handoff_queue_elt_by_worker_index[i] = 0;
1326             }
1327           else
1328             hf->last_n_vectors = hf->n_vectors;
1329         }
1330       congested_handoff_queue_by_worker_index[i] =
1331         (vlib_frame_queue_t *) (~0);
1332     }
1333   hf = 0;
1334   current_worker_index = ~0;
1335   return frame->n_vectors;
1336 }
1337
1338 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1339   .function = snat_in2out_worker_handoff_fn,
1340   .name = "snat-in2out-worker-handoff",
1341   .vector_size = sizeof (u32),
1342   .format_trace = format_snat_in2out_worker_handoff_trace,
1343   .type = VLIB_NODE_TYPE_INTERNAL,
1344   
1345   .n_next_nodes = 1,
1346
1347   .next_nodes = {
1348     [0] = "error-drop",
1349   },
1350 };
1351
1352 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1353
1354 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
1355                                           vlib_buffer_t * b0,
1356                                           ip4_header_t * ip0,
1357                                           icmp46_header_t * icmp0,
1358                                           u32 sw_if_index0,
1359                                           vlib_node_runtime_t * node,
1360                                           u32 next0,
1361                                           u32 rx_fib_index0)
1362 {
1363   snat_session_key_t key0, sm0;
1364   icmp_echo_header_t *echo0;
1365   u32 new_addr0, old_addr0;
1366   u16 old_id0, new_id0;
1367   ip_csum_t sum0;
1368   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1369
1370   echo0 = (icmp_echo_header_t *)(icmp0+1);
1371
1372   key0.addr = ip0->src_address;
1373   key0.port = echo0->identifier;
1374   key0.fib_index = rx_fib_index0;
1375   
1376   if (snat_static_mapping_match(sm, key0, &sm0, 0))
1377     {
1378       ip4_address_t * first_int_addr;
1379
1380       if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
1381         {
1382           first_int_addr =
1383             ip4_interface_first_address (sm->ip4_main, sw_if_index0,
1384                                          0 /* just want the address */);
1385           rt->cached_sw_if_index = sw_if_index0;
1386           if (first_int_addr)
1387             rt->cached_ip4_address = first_int_addr->as_u32;
1388           else
1389             rt->cached_ip4_address = 0;
1390         }
1391
1392       /* Don't NAT packet aimed at the intfc address */
1393       if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1394                                 rt->cached_ip4_address))
1395         return next0;
1396
1397       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1398       return SNAT_IN2OUT_NEXT_DROP;
1399     }
1400
1401   new_addr0 = sm0.addr.as_u32;
1402   new_id0 = sm0.port;
1403   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1404   old_addr0 = ip0->src_address.as_u32;
1405   ip0->src_address.as_u32 = new_addr0;
1406   
1407   sum0 = ip0->checksum;
1408   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1409                          ip4_header_t,
1410                          src_address /* changed member */);
1411   ip0->checksum = ip_csum_fold (sum0);
1412   
1413   if (PREDICT_FALSE(new_id0 != echo0->identifier))
1414     {
1415       old_id0 = echo0->identifier;
1416       echo0->identifier = new_id0;
1417
1418       sum0 = icmp0->checksum;
1419       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1420                              identifier);
1421       icmp0->checksum = ip_csum_fold (sum0);
1422     }
1423
1424   return next0;
1425 }
1426
1427 static uword
1428 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
1429                                 vlib_node_runtime_t * node,
1430                                 vlib_frame_t * frame)
1431 {
1432   u32 n_left_from, * from, * to_next;
1433   snat_in2out_next_t next_index;
1434   u32 pkts_processed = 0;
1435   snat_main_t * sm = &snat_main;
1436   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1437   u32 stats_node_index;
1438
1439   stats_node_index = snat_in2out_fast_node.index;
1440
1441   from = vlib_frame_vector_args (frame);
1442   n_left_from = frame->n_vectors;
1443   next_index = node->cached_next_index;
1444
1445   while (n_left_from > 0)
1446     {
1447       u32 n_left_to_next;
1448
1449       vlib_get_next_frame (vm, node, next_index,
1450                            to_next, n_left_to_next);
1451
1452       while (n_left_from > 0 && n_left_to_next > 0)
1453         {
1454           u32 bi0;
1455           vlib_buffer_t * b0;
1456           u32 next0;
1457           u32 sw_if_index0;
1458           ip4_header_t * ip0;
1459           ip_csum_t sum0;
1460           u32 new_addr0, old_addr0;
1461           u16 old_port0, new_port0;
1462           udp_header_t * udp0;
1463           tcp_header_t * tcp0;
1464           icmp46_header_t * icmp0;
1465           snat_session_key_t key0, sm0;
1466           u32 proto0;
1467           u32 rx_fib_index0;
1468
1469           /* speculatively enqueue b0 to the current next frame */
1470           bi0 = from[0];
1471           to_next[0] = bi0;
1472           from += 1;
1473           to_next += 1;
1474           n_left_from -= 1;
1475           n_left_to_next -= 1;
1476
1477           b0 = vlib_get_buffer (vm, bi0);
1478           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1479
1480           ip0 = vlib_buffer_get_current (b0);
1481           udp0 = ip4_next_header (ip0);
1482           tcp0 = (tcp_header_t *) udp0;
1483           icmp0 = (icmp46_header_t *) udp0;
1484
1485           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1486           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1487
1488           proto0 = ~0;
1489           proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
1490             ? SNAT_PROTOCOL_UDP : proto0;
1491           proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
1492             ? SNAT_PROTOCOL_TCP : proto0;
1493           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
1494             ? SNAT_PROTOCOL_ICMP : proto0;
1495
1496           if (PREDICT_FALSE (proto0 == ~0))
1497               goto trace0;
1498
1499           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1500             {
1501               ip4_address_t * first_int_addr;
1502               
1503               if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
1504                 {
1505                   first_int_addr = 
1506                     ip4_interface_first_address (sm->ip4_main, sw_if_index0,
1507                                                  0 /* just want the address */);
1508                   rt->cached_sw_if_index = sw_if_index0;
1509                   rt->cached_ip4_address = first_int_addr->as_u32;
1510                 }
1511               
1512               /* Don't NAT packet aimed at the intfc address */
1513               if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1514                                 rt->cached_ip4_address))
1515                 goto trace0;
1516
1517               next0 = icmp_in2out_static_map
1518                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1519               goto trace0;
1520             }
1521
1522           key0.addr = ip0->src_address;
1523           key0.port = udp0->src_port;
1524           key0.fib_index = rx_fib_index0;
1525
1526           if (snat_static_mapping_match(sm, key0, &sm0, 0))
1527             {
1528               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1529               next0= SNAT_IN2OUT_NEXT_DROP;
1530               goto trace0;
1531             }
1532
1533           new_addr0 = sm0.addr.as_u32;
1534           new_port0 = sm0.port;
1535           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1536           old_addr0 = ip0->src_address.as_u32;
1537           ip0->src_address.as_u32 = new_addr0;
1538
1539           sum0 = ip0->checksum;
1540           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1541                                  ip4_header_t,
1542                                  src_address /* changed member */);
1543           ip0->checksum = ip_csum_fold (sum0);
1544
1545           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1546             {
1547               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1548                 {
1549                   old_port0 = tcp0->ports.src;
1550                   tcp0->ports.src = new_port0;
1551
1552                   sum0 = tcp0->checksum;
1553                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1554                                          ip4_header_t,
1555                                          dst_address /* changed member */);
1556                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1557                                          ip4_header_t /* cheat */,
1558                                          length /* changed member */);
1559                   tcp0->checksum = ip_csum_fold(sum0);
1560                 }
1561               else
1562                 {
1563                   old_port0 = udp0->src_port;
1564                   udp0->src_port = new_port0;
1565                   udp0->checksum = 0;
1566                 }
1567             }
1568           else
1569             {
1570               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1571                 {
1572                   sum0 = tcp0->checksum;
1573                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1574                                          ip4_header_t,
1575                                          dst_address /* changed member */);
1576                   tcp0->checksum = ip_csum_fold(sum0);
1577                 }
1578             }
1579
1580           /* Hairpinning */
1581           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1582
1583         trace0:
1584           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1585                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1586             {
1587               snat_in2out_trace_t *t =
1588                  vlib_add_trace (vm, node, b0, sizeof (*t));
1589               t->sw_if_index = sw_if_index0;
1590               t->next_index = next0;
1591             }
1592
1593           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1594
1595           /* verify speculative enqueue, maybe switch current next frame */
1596           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1597                                            to_next, n_left_to_next,
1598                                            bi0, next0);
1599         }
1600
1601       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1602     }
1603
1604   vlib_node_increment_counter (vm, stats_node_index,
1605                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1606                                pkts_processed);
1607   return frame->n_vectors;
1608 }
1609
1610
1611 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
1612   .function = snat_in2out_fast_static_map_fn,
1613   .name = "snat-in2out-fast",
1614   .vector_size = sizeof (u32),
1615   .format_trace = format_snat_in2out_fast_trace,
1616   .type = VLIB_NODE_TYPE_INTERNAL,
1617   
1618   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1619   .error_strings = snat_in2out_error_strings,
1620
1621   .runtime_data_bytes = sizeof (snat_runtime_t),
1622   
1623   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1624
1625   /* edit / add dispositions here */
1626   .next_nodes = {
1627     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1628     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1629     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1630   },
1631 };
1632
1633 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);