Repair Doxygen build infrastructure
[vpp.git] / plugins / snat-plugin / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25
26 #include <vppinfra/hash.h>
27 #include <vppinfra/error.h>
28 #include <vppinfra/elog.h>
29
30 typedef struct {
31   u32 sw_if_index;
32   u32 next_index;
33   u32 session_index;
34   u32 is_slow_path;
35 } snat_in2out_trace_t;
36
37 typedef struct {
38   u32 next_worker_index;
39   u8 do_handoff;
40 } snat_in2out_worker_handoff_trace_t;
41
42 /* packet trace format function */
43 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
44 {
45   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
46   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
47   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
48   char * tag;
49
50   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
51   
52   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
53               t->sw_if_index, t->next_index, t->session_index);
54
55   return s;
56 }
57
58 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
63
64   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
65               t->sw_if_index, t->next_index);
66
67   return s;
68 }
69
70 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
71 {
72   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
73   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
74   snat_in2out_worker_handoff_trace_t * t =
75     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
76   char * m;
77
78   m = t->do_handoff ? "next worker" : "same worker";
79   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
80
81   return s;
82 }
83
84 vlib_node_registration_t snat_in2out_node;
85 vlib_node_registration_t snat_in2out_slowpath_node;
86 vlib_node_registration_t snat_in2out_fast_node;
87 vlib_node_registration_t snat_in2out_worker_handoff_node;
88
89 #define foreach_snat_in2out_error                       \
90 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
91 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
92 _(OUT_OF_PORTS, "Out of ports")                         \
93 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
94 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
95 _(NO_TRANSLATION, "No translation")
96   
97 typedef enum {
98 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
99   foreach_snat_in2out_error
100 #undef _
101   SNAT_IN2OUT_N_ERROR,
102 } snat_in2out_error_t;
103
104 static char * snat_in2out_error_strings[] = {
105 #define _(sym,string) string,
106   foreach_snat_in2out_error
107 #undef _
108 };
109
110 typedef enum {
111   SNAT_IN2OUT_NEXT_LOOKUP,
112   SNAT_IN2OUT_NEXT_DROP,
113   SNAT_IN2OUT_NEXT_SLOW_PATH,
114   SNAT_IN2OUT_N_NEXT,
115 } snat_in2out_next_t;
116
117 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
118                       ip4_header_t * ip0,
119                       u32 rx_fib_index0,
120                       snat_session_key_t * key0,
121                       snat_session_t ** sessionp,
122                       vlib_node_runtime_t * node,
123                       u32 next0,
124                       u32 cpu_index)
125 {
126   snat_user_t *u;
127   snat_user_key_t user_key;
128   snat_session_t *s;
129   clib_bihash_kv_8_8_t kv0, value0;
130   u32 oldest_per_user_translation_list_index;
131   dlist_elt_t * oldest_per_user_translation_list_elt;
132   dlist_elt_t * per_user_translation_list_elt;
133   dlist_elt_t * per_user_list_head_elt;
134   u32 session_index;
135   snat_session_key_t key1;
136   u32 address_index = ~0;
137   u32 outside_fib_index;
138   uword * p;
139   snat_static_mapping_key_t worker_by_out_key;
140
141   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
142   if (! p)
143     {
144       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
145       return SNAT_IN2OUT_NEXT_DROP;
146     }
147   outside_fib_index = p[0];
148
149   user_key.addr = ip0->src_address;
150   user_key.fib_index = rx_fib_index0;
151   kv0.key = user_key.as_u64;
152   
153   /* Ever heard of the "user" = src ip4 address before? */
154   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
155     {
156       /* no, make a new one */
157       pool_get (sm->per_thread_data[cpu_index].users, u);
158       memset (u, 0, sizeof (*u));
159       u->addr = ip0->src_address;
160
161       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
162
163       u->sessions_per_user_list_head_index = per_user_list_head_elt -
164         sm->per_thread_data[cpu_index].list_pool;
165
166       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
167                        u->sessions_per_user_list_head_index);
168
169       kv0.value = u - sm->per_thread_data[cpu_index].users;
170
171       /* add user */
172       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
173     }
174   else
175     {
176       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
177                              value0.value);
178     }
179
180   /* Over quota? Recycle the least recently used dynamic translation */
181   if (u->nsessions >= sm->max_translations_per_user)
182     {
183       /* Remove the oldest dynamic translation */
184       do {
185           oldest_per_user_translation_list_index =
186             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
187                                     u->sessions_per_user_list_head_index);
188
189           ASSERT (oldest_per_user_translation_list_index != ~0);
190
191           /* add it back to the end of the LRU list */
192           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
193                               u->sessions_per_user_list_head_index,
194                               oldest_per_user_translation_list_index);
195           /* Get the list element */
196           oldest_per_user_translation_list_elt =
197             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
198                                oldest_per_user_translation_list_index);
199
200           /* Get the session index from the list element */
201           session_index = oldest_per_user_translation_list_elt->value;
202
203           /* Get the session */
204           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
205                                  session_index);
206       } while (snat_is_session_static (s));
207
208       /* Remove in2out, out2in keys */
209       kv0.key = s->in2out.as_u64;
210       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
211           clib_warning ("in2out key delete failed");
212       kv0.key = s->out2in.as_u64;
213       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
214           clib_warning ("out2in key delete failed");
215
216       snat_free_outside_address_and_port 
217         (sm, &s->out2in, s->outside_address_index);
218       s->outside_address_index = ~0;
219
220       if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
221         {
222           ASSERT(0);
223
224           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
225           return SNAT_IN2OUT_NEXT_DROP;
226         }
227       s->outside_address_index = address_index;
228     }
229   else
230     {
231       u8 static_mapping = 1;
232
233       /* First try to match static mapping by local address and port */
234       if (snat_static_mapping_match (sm, *key0, &key1, 0))
235         {
236           static_mapping = 0;
237           /* Try to create dynamic translation */
238           if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
239             {
240               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
241               return SNAT_IN2OUT_NEXT_DROP;
242             }
243         }
244
245       /* Create a new session */
246       pool_get (sm->per_thread_data[cpu_index].sessions, s);
247       memset (s, 0, sizeof (*s));
248       
249       s->outside_address_index = address_index;
250
251       if (static_mapping)
252         {
253           u->nstaticsessions++;
254           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
255         }
256       else
257         {
258           u->nsessions++;
259         }
260
261       /* Create list elts */
262       pool_get (sm->per_thread_data[cpu_index].list_pool,
263                 per_user_translation_list_elt);
264       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
265                        per_user_translation_list_elt -
266                        sm->per_thread_data[cpu_index].list_pool);
267
268       per_user_translation_list_elt->value =
269         s - sm->per_thread_data[cpu_index].sessions;
270       s->per_user_index = per_user_translation_list_elt -
271                           sm->per_thread_data[cpu_index].list_pool;
272       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
273
274       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
275                           s->per_user_list_head_index,
276                           per_user_translation_list_elt -
277                           sm->per_thread_data[cpu_index].list_pool);
278    }
279   
280   s->in2out = *key0;
281   s->out2in = key1;
282   s->out2in.protocol = key0->protocol;
283   s->out2in.fib_index = outside_fib_index;
284   *sessionp = s;
285
286   /* Add to translation hashes */
287   kv0.key = s->in2out.as_u64;
288   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
289   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
290       clib_warning ("in2out key add failed");
291   
292   kv0.key = s->out2in.as_u64;
293   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
294   
295   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
296       clib_warning ("out2in key add failed");
297
298   /* Add to translated packets worker lookup */
299   worker_by_out_key.addr = s->out2in.addr;
300   worker_by_out_key.port = s->out2in.port;
301   worker_by_out_key.fib_index = s->out2in.fib_index;
302   kv0.key = worker_by_out_key.as_u64;
303   kv0.value = cpu_index;
304   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
305   return next0;
306 }
307                       
308 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
309                                          vlib_buffer_t * b0,
310                                          ip4_header_t * ip0,
311                                          icmp46_header_t * icmp0,
312                                          u32 sw_if_index0,
313                                          u32 rx_fib_index0,
314                                          vlib_node_runtime_t * node,
315                                          u32 next0,
316                                          f64 now,
317                                          u32 cpu_index)
318 {
319   snat_session_key_t key0;
320   icmp_echo_header_t *echo0;
321   clib_bihash_kv_8_8_t kv0, value0;
322   snat_session_t * s0;
323   u32 new_addr0, old_addr0;
324   u16 old_id0, new_id0;
325   ip_csum_t sum0;
326   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
327
328   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
329     {
330       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
331       return SNAT_IN2OUT_NEXT_DROP;
332     }
333   
334   echo0 = (icmp_echo_header_t *)(icmp0+1);
335
336   key0.addr = ip0->src_address;
337   key0.port = echo0->identifier;
338   key0.protocol = SNAT_PROTOCOL_ICMP;
339   key0.fib_index = rx_fib_index0;
340   
341   kv0.key = key0.as_u64;
342   
343   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
344     {
345       ip4_address_t * first_int_addr;
346
347       if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
348         {
349           first_int_addr = 
350             ip4_interface_first_address (sm->ip4_main, sw_if_index0,
351                                          0 /* just want the address */);
352           rt->cached_sw_if_index = sw_if_index0;
353           rt->cached_ip4_address = first_int_addr->as_u32;
354         }
355       
356       /* Don't NAT packet aimed at the intfc address */
357       if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
358                                 rt->cached_ip4_address))
359         return next0;
360       
361       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
362                          &s0, node, next0, cpu_index);
363       
364       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
365         return next0;
366     }
367   else
368     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
369                             value0.value);
370
371   old_addr0 = ip0->src_address.as_u32;
372   ip0->src_address = s0->out2in.addr;
373   new_addr0 = ip0->src_address.as_u32;
374   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
375   
376   sum0 = ip0->checksum;
377   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
378                          ip4_header_t,
379                          src_address /* changed member */);
380   ip0->checksum = ip_csum_fold (sum0);
381   
382   old_id0 = echo0->identifier;
383   new_id0 = s0->out2in.port;
384   echo0->identifier = new_id0;
385
386   sum0 = icmp0->checksum;
387   sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
388                          identifier);
389   icmp0->checksum = ip_csum_fold (sum0);
390
391   /* Accounting */
392   s0->last_heard = now;
393   s0->total_pkts++;
394   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
395   /* Per-user LRU list maintenance for dynamic translations */
396   if (!snat_is_session_static (s0))
397     {
398       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
399                          s0->per_user_index);
400       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
401                           s0->per_user_list_head_index,
402                           s0->per_user_index);
403     }
404
405   return next0;
406 }
407
408 /**
409  * @brief Hairpinning
410  *
411  * Hairpinning allows two endpoints on the internal side of the NAT to
412  * communicate even if they only use each other's external IP addresses
413  * and ports.
414  *
415  * @param sm     SNAT main.
416  * @param b0     Vlib buffer.
417  * @param ip0    IP header.
418  * @param udp0   UDP header.
419  * @param tcp0   TCP header.
420  * @param proto0 SNAT protocol.
421  */
422 static inline void
423 snat_hairpinning (snat_main_t *sm,
424                   vlib_buffer_t * b0,
425                   ip4_header_t * ip0,
426                   udp_header_t * udp0,
427                   tcp_header_t * tcp0,
428                   u32 proto0)
429 {
430   snat_session_key_t key0, sm0;
431   snat_static_mapping_key_t k0;
432   snat_session_t * s0;
433   clib_bihash_kv_8_8_t kv0, value0;
434   ip_csum_t sum0;
435   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
436   u16 new_dst_port0, old_dst_port0;
437
438   key0.addr = ip0->dst_address;
439   key0.port = udp0->dst_port;
440   key0.protocol = proto0;
441   key0.fib_index = sm->outside_fib_index;
442   kv0.key = key0.as_u64;
443
444   /* Check if destination is in active sessions */
445   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
446     {
447       /* or static mappings */
448       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
449         {
450           new_dst_addr0 = sm0.addr.as_u32;
451           new_dst_port0 = sm0.port;
452           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
453         }
454     }
455   else
456     {
457       si = value0.value;
458       if (sm->num_workers > 1)
459         {
460           k0.addr = ip0->dst_address;
461           k0.port = udp0->dst_port;
462           k0.fib_index = sm->outside_fib_index;
463           kv0.key = k0.as_u64;
464           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
465             ASSERT(0);
466           else
467             ti = value0.value;
468         }
469       else
470         ti = sm->num_workers;
471
472       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
473       new_dst_addr0 = s0->in2out.addr.as_u32;
474       new_dst_port0 = s0->in2out.port;
475       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
476     }
477
478   /* Destination is behind the same NAT, use internal address and port */
479   if (new_dst_addr0)
480     {
481       old_dst_addr0 = ip0->dst_address.as_u32;
482       ip0->dst_address.as_u32 = new_dst_addr0;
483       sum0 = ip0->checksum;
484       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
485                              ip4_header_t, dst_address);
486       ip0->checksum = ip_csum_fold (sum0);
487
488       old_dst_port0 = tcp0->ports.dst;
489       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
490         {
491           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
492             {
493               tcp0->ports.dst = new_dst_port0;
494               sum0 = tcp0->checksum;
495               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
496                                      ip4_header_t, dst_address);
497               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
498                                      ip4_header_t /* cheat */, length);
499               tcp0->checksum = ip_csum_fold(sum0);
500             }
501           else
502             {
503               udp0->dst_port = new_dst_port0;
504               udp0->checksum = 0;
505             }
506         }
507     }
508 }
509
510 static inline uword
511 snat_in2out_node_fn_inline (vlib_main_t * vm,
512                             vlib_node_runtime_t * node,
513                             vlib_frame_t * frame, int is_slow_path)
514 {
515   u32 n_left_from, * from, * to_next;
516   snat_in2out_next_t next_index;
517   u32 pkts_processed = 0;
518   snat_main_t * sm = &snat_main;
519   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
520   f64 now = vlib_time_now (vm);
521   u32 stats_node_index;
522   u32 cpu_index = os_get_cpu_number ();
523
524   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
525     snat_in2out_node.index;
526
527   from = vlib_frame_vector_args (frame);
528   n_left_from = frame->n_vectors;
529   next_index = node->cached_next_index;
530
531   while (n_left_from > 0)
532     {
533       u32 n_left_to_next;
534
535       vlib_get_next_frame (vm, node, next_index,
536                            to_next, n_left_to_next);
537
538       while (n_left_from >= 4 && n_left_to_next >= 2)
539         {
540           u32 bi0, bi1;
541           vlib_buffer_t * b0, * b1;
542           u32 next0, next1;
543           u32 sw_if_index0, sw_if_index1;
544           ip4_header_t * ip0, * ip1;
545           ip_csum_t sum0, sum1;
546           u32 new_addr0, old_addr0, new_addr1, old_addr1;
547           u16 old_port0, new_port0, old_port1, new_port1;
548           udp_header_t * udp0, * udp1;
549           tcp_header_t * tcp0, * tcp1;
550           icmp46_header_t * icmp0, * icmp1;
551           snat_session_key_t key0, key1;
552           u32 rx_fib_index0, rx_fib_index1;
553           u32 proto0, proto1;
554           snat_session_t * s0 = 0, * s1 = 0;
555           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
556           
557           /* Prefetch next iteration. */
558           {
559             vlib_buffer_t * p2, * p3;
560             
561             p2 = vlib_get_buffer (vm, from[2]);
562             p3 = vlib_get_buffer (vm, from[3]);
563             
564             vlib_prefetch_buffer_header (p2, LOAD);
565             vlib_prefetch_buffer_header (p3, LOAD);
566
567             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
568             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
569           }
570
571           /* speculatively enqueue b0 and b1 to the current next frame */
572           to_next[0] = bi0 = from[0];
573           to_next[1] = bi1 = from[1];
574           from += 2;
575           to_next += 2;
576           n_left_from -= 2;
577           n_left_to_next -= 2;
578           
579           b0 = vlib_get_buffer (vm, bi0);
580           b1 = vlib_get_buffer (vm, bi1);
581
582           ip0 = vlib_buffer_get_current (b0);
583           udp0 = ip4_next_header (ip0);
584           tcp0 = (tcp_header_t *) udp0;
585           icmp0 = (icmp46_header_t *) udp0;
586
587           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
588           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
589                                    sw_if_index0);
590
591           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
592
593           proto0 = ~0;
594           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
595             ? SNAT_PROTOCOL_UDP : proto0;
596           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
597             ? SNAT_PROTOCOL_TCP : proto0;
598           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
599             ? SNAT_PROTOCOL_ICMP : proto0;
600
601           /* Next configured feature, probably ip4-lookup */
602           if (is_slow_path)
603             {
604               if (PREDICT_FALSE (proto0 == ~0))
605                 goto trace00;
606               
607               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
608                 {
609                   next0 = icmp_in2out_slow_path 
610                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
611                      node, next0, now, cpu_index);
612                   goto trace00;
613                 }
614             }
615           else
616             {
617               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
618                 {
619                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
620                   goto trace00;
621                 }
622             }
623
624           key0.addr = ip0->src_address;
625           key0.port = udp0->src_port;
626           key0.protocol = proto0;
627           key0.fib_index = rx_fib_index0;
628           
629           kv0.key = key0.as_u64;
630
631           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
632             {
633               if (is_slow_path)
634                 {
635                   ip4_address_t * first_int_addr;
636                   
637                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
638                     {
639                       first_int_addr = 
640                         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
641                                                      0 /* just want the address */);
642                       rt->cached_sw_if_index = sw_if_index0;
643                       rt->cached_ip4_address = first_int_addr->as_u32;
644                     }
645                   
646                   /* Don't NAT packet aimed at the intfc address */
647                   if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
648                                     rt->cached_ip4_address))
649                     goto trace00;
650                   
651                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
652                                      &s0, node, next0, cpu_index);
653                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
654                     goto trace00;
655                 }
656               else
657                 {
658                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
659                   goto trace00;
660                 }
661             }
662           else
663             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
664                                     value0.value);
665
666           old_addr0 = ip0->src_address.as_u32;
667           ip0->src_address = s0->out2in.addr;
668           new_addr0 = ip0->src_address.as_u32;
669           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
670
671           sum0 = ip0->checksum;
672           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
673                                  ip4_header_t,
674                                  src_address /* changed member */);
675           ip0->checksum = ip_csum_fold (sum0);
676
677           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
678             {
679               old_port0 = tcp0->ports.src;
680               tcp0->ports.src = s0->out2in.port;
681               new_port0 = tcp0->ports.src;
682
683               sum0 = tcp0->checksum;
684               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
685                                      ip4_header_t,
686                                      dst_address /* changed member */);
687               sum0 = ip_csum_update (sum0, old_port0, new_port0,
688                                      ip4_header_t /* cheat */,
689                                      length /* changed member */);
690               tcp0->checksum = ip_csum_fold(sum0);
691             }
692           else
693             {
694               old_port0 = udp0->src_port;
695               udp0->src_port = s0->out2in.port;
696               udp0->checksum = 0;
697             }
698
699           /* Hairpinning */
700           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
701
702           /* Accounting */
703           s0->last_heard = now;
704           s0->total_pkts++;
705           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
706           /* Per-user LRU list maintenance for dynamic translation */
707           if (!snat_is_session_static (s0))
708             {
709               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
710                                  s0->per_user_index);
711               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
712                                   s0->per_user_list_head_index,
713                                   s0->per_user_index);
714             }
715         trace00:
716
717           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
718                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
719             {
720               snat_in2out_trace_t *t = 
721                  vlib_add_trace (vm, node, b0, sizeof (*t));
722               t->is_slow_path = is_slow_path;
723               t->sw_if_index = sw_if_index0;
724               t->next_index = next0;
725                   t->session_index = ~0;
726               if (s0)
727                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
728             }
729
730           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
731
732           ip1 = vlib_buffer_get_current (b1);
733           udp1 = ip4_next_header (ip1);
734           tcp1 = (tcp_header_t *) udp1;
735           icmp1 = (icmp46_header_t *) udp1;
736
737           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
738           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
739                                    sw_if_index1);
740
741           proto1 = ~0;
742           proto1 = (ip1->protocol == IP_PROTOCOL_UDP) 
743             ? SNAT_PROTOCOL_UDP : proto1;
744           proto1 = (ip1->protocol == IP_PROTOCOL_TCP) 
745             ? SNAT_PROTOCOL_TCP : proto1;
746           proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) 
747             ? SNAT_PROTOCOL_ICMP : proto1;
748
749           /* Next configured feature, probably ip4-lookup */
750           if (is_slow_path)
751             {
752               if (PREDICT_FALSE (proto1 == ~0))
753                 goto trace01;
754               
755               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
756                 {
757                   next1 = icmp_in2out_slow_path 
758                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
759                      next1, now, cpu_index);
760                   goto trace01;
761                 }
762             }
763           else
764             {
765               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
766                 {
767                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
768                   goto trace01;
769                 }
770             }
771
772           key1.addr = ip1->src_address;
773           key1.port = udp1->src_port;
774           key1.protocol = proto1;
775           key1.fib_index = rx_fib_index1;
776           
777           kv1.key = key1.as_u64;
778
779             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
780             {
781               if (is_slow_path)
782                 {
783                   ip4_address_t * first_int_addr;
784                   
785                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index1))
786                     {
787                       first_int_addr = 
788                         ip4_interface_first_address (sm->ip4_main, sw_if_index1,
789                                                      0 /* just want the address */);
790                       rt->cached_sw_if_index = sw_if_index1;
791                       rt->cached_ip4_address = first_int_addr->as_u32;
792                     }
793                   
794                   /* Don't NAT packet aimed at the intfc address */
795                   if (PREDICT_FALSE(ip1->dst_address.as_u32 ==
796                                     rt->cached_ip4_address))
797                     goto trace01;
798                   
799                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
800                                      &s1, node, next1, cpu_index);
801                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
802                     goto trace01;
803                 }
804               else
805                 {
806                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
807                   goto trace01;
808                 }
809             }
810           else
811             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
812                                     value1.value);
813
814           old_addr1 = ip1->src_address.as_u32;
815           ip1->src_address = s1->out2in.addr;
816           new_addr1 = ip1->src_address.as_u32;
817           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
818
819           sum1 = ip1->checksum;
820           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
821                                  ip4_header_t,
822                                  src_address /* changed member */);
823           ip1->checksum = ip_csum_fold (sum1);
824
825           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
826             {
827               old_port1 = tcp1->ports.src;
828               tcp1->ports.src = s1->out2in.port;
829               new_port1 = tcp1->ports.src;
830
831               sum1 = tcp1->checksum;
832               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
833                                      ip4_header_t,
834                                      dst_address /* changed member */);
835               sum1 = ip_csum_update (sum1, old_port1, new_port1,
836                                      ip4_header_t /* cheat */,
837                                      length /* changed member */);
838               tcp1->checksum = ip_csum_fold(sum1);
839             }
840           else
841             {
842               old_port1 = udp1->src_port;
843               udp1->src_port = s1->out2in.port;
844               udp1->checksum = 0;
845             }
846
847           /* Hairpinning */
848           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
849
850           /* Accounting */
851           s1->last_heard = now;
852           s1->total_pkts++;
853           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
854           /* Per-user LRU list maintenance for dynamic translation */
855           if (!snat_is_session_static (s1))
856             {
857               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
858                                  s1->per_user_index);
859               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
860                                   s1->per_user_list_head_index,
861                                   s1->per_user_index);
862             }
863         trace01:
864
865           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
866                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
867             {
868               snat_in2out_trace_t *t = 
869                  vlib_add_trace (vm, node, b1, sizeof (*t));
870               t->sw_if_index = sw_if_index1;
871               t->next_index = next1;
872               t->session_index = ~0;
873               if (s1)
874                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
875             }
876
877           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
878
879           /* verify speculative enqueues, maybe switch current next frame */
880           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
881                                            to_next, n_left_to_next,
882                                            bi0, bi1, next0, next1);
883         }
884
885       while (n_left_from > 0 && n_left_to_next > 0)
886         {
887           u32 bi0;
888           vlib_buffer_t * b0;
889           u32 next0;
890           u32 sw_if_index0;
891           ip4_header_t * ip0;
892           ip_csum_t sum0;
893           u32 new_addr0, old_addr0;
894           u16 old_port0, new_port0;
895           udp_header_t * udp0;
896           tcp_header_t * tcp0;
897           icmp46_header_t * icmp0;
898           snat_session_key_t key0;
899           u32 rx_fib_index0;
900           u32 proto0;
901           snat_session_t * s0 = 0;
902           clib_bihash_kv_8_8_t kv0, value0;
903           
904           /* speculatively enqueue b0 to the current next frame */
905           bi0 = from[0];
906           to_next[0] = bi0;
907           from += 1;
908           to_next += 1;
909           n_left_from -= 1;
910           n_left_to_next -= 1;
911
912           b0 = vlib_get_buffer (vm, bi0);
913           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
914
915           ip0 = vlib_buffer_get_current (b0);
916           udp0 = ip4_next_header (ip0);
917           tcp0 = (tcp_header_t *) udp0;
918           icmp0 = (icmp46_header_t *) udp0;
919
920           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
921           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
922                                    sw_if_index0);
923
924           proto0 = ~0;
925           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
926             ? SNAT_PROTOCOL_UDP : proto0;
927           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
928             ? SNAT_PROTOCOL_TCP : proto0;
929           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
930             ? SNAT_PROTOCOL_ICMP : proto0;
931
932           /* Next configured feature, probably ip4-lookup */
933           if (is_slow_path)
934             {
935               if (PREDICT_FALSE (proto0 == ~0))
936                 goto trace0;
937               
938               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
939                 {
940                   next0 = icmp_in2out_slow_path 
941                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
942                      next0, now, cpu_index);
943                   goto trace0;
944                 }
945             }
946           else
947             {
948               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
949                 {
950                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
951                   goto trace0;
952                 }
953             }
954
955           key0.addr = ip0->src_address;
956           key0.port = udp0->src_port;
957           key0.protocol = proto0;
958           key0.fib_index = rx_fib_index0;
959           
960           kv0.key = key0.as_u64;
961
962           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
963             {
964               if (is_slow_path)
965                 {
966                   ip4_address_t * first_int_addr;
967                   
968                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
969                     {
970                       first_int_addr = 
971                         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
972                                                      0 /* just want the address */);
973                       rt->cached_sw_if_index = sw_if_index0;
974                       rt->cached_ip4_address = first_int_addr->as_u32;
975                     }
976                   
977                   /* Don't NAT packet aimed at the intfc address */
978                   if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
979                                     rt->cached_ip4_address))
980                     goto trace0;
981                   
982                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
983                                      &s0, node, next0, cpu_index);
984                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
985                     goto trace0;
986                 }
987               else
988                 {
989                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
990                   goto trace0;
991                 }
992             }
993           else
994             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
995                                     value0.value);
996
997           old_addr0 = ip0->src_address.as_u32;
998           ip0->src_address = s0->out2in.addr;
999           new_addr0 = ip0->src_address.as_u32;
1000           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1001
1002           sum0 = ip0->checksum;
1003           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1004                                  ip4_header_t,
1005                                  src_address /* changed member */);
1006           ip0->checksum = ip_csum_fold (sum0);
1007
1008           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1009             {
1010               old_port0 = tcp0->ports.src;
1011               tcp0->ports.src = s0->out2in.port;
1012               new_port0 = tcp0->ports.src;
1013
1014               sum0 = tcp0->checksum;
1015               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1016                                      ip4_header_t,
1017                                      dst_address /* changed member */);
1018               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1019                                      ip4_header_t /* cheat */,
1020                                      length /* changed member */);
1021               tcp0->checksum = ip_csum_fold(sum0);
1022             }
1023           else
1024             {
1025               old_port0 = udp0->src_port;
1026               udp0->src_port = s0->out2in.port;
1027               udp0->checksum = 0;
1028             }
1029
1030           /* Hairpinning */
1031           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1032
1033           /* Accounting */
1034           s0->last_heard = now;
1035           s0->total_pkts++;
1036           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1037           /* Per-user LRU list maintenance for dynamic translation */
1038           if (!snat_is_session_static (s0))
1039             {
1040               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1041                                  s0->per_user_index);
1042               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1043                                   s0->per_user_list_head_index,
1044                                   s0->per_user_index);
1045             }
1046
1047         trace0:
1048           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1049                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1050             {
1051               snat_in2out_trace_t *t = 
1052                  vlib_add_trace (vm, node, b0, sizeof (*t));
1053               t->is_slow_path = is_slow_path;
1054               t->sw_if_index = sw_if_index0;
1055               t->next_index = next0;
1056                   t->session_index = ~0;
1057               if (s0)
1058                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1059             }
1060
1061           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1062
1063           /* verify speculative enqueue, maybe switch current next frame */
1064           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1065                                            to_next, n_left_to_next,
1066                                            bi0, next0);
1067         }
1068
1069       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1070     }
1071
1072   vlib_node_increment_counter (vm, stats_node_index, 
1073                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1074                                pkts_processed);
1075   return frame->n_vectors;
1076 }
1077
1078 static uword
1079 snat_in2out_fast_path_fn (vlib_main_t * vm,
1080                           vlib_node_runtime_t * node,
1081                           vlib_frame_t * frame)
1082 {
1083   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1084 }
1085
1086 VLIB_REGISTER_NODE (snat_in2out_node) = {
1087   .function = snat_in2out_fast_path_fn,
1088   .name = "snat-in2out",
1089   .vector_size = sizeof (u32),
1090   .format_trace = format_snat_in2out_trace,
1091   .type = VLIB_NODE_TYPE_INTERNAL,
1092   
1093   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1094   .error_strings = snat_in2out_error_strings,
1095
1096   .runtime_data_bytes = sizeof (snat_runtime_t),
1097   
1098   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1099
1100   /* edit / add dispositions here */
1101   .next_nodes = {
1102     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1103     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1104     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1105   },
1106 };
1107
1108 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1109
1110 static uword
1111 snat_in2out_slow_path_fn (vlib_main_t * vm,
1112                           vlib_node_runtime_t * node,
1113                           vlib_frame_t * frame)
1114 {
1115   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1116 }
1117
1118 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1119   .function = snat_in2out_slow_path_fn,
1120   .name = "snat-in2out-slowpath",
1121   .vector_size = sizeof (u32),
1122   .format_trace = format_snat_in2out_trace,
1123   .type = VLIB_NODE_TYPE_INTERNAL,
1124   
1125   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1126   .error_strings = snat_in2out_error_strings,
1127
1128   .runtime_data_bytes = sizeof (snat_runtime_t),
1129   
1130   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1131
1132   /* edit / add dispositions here */
1133   .next_nodes = {
1134     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1135     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1136     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1137   },
1138 };
1139
1140 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1141
1142 static uword
1143 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1144                                vlib_node_runtime_t * node,
1145                                vlib_frame_t * frame)
1146 {
1147   snat_main_t *sm = &snat_main;
1148   vlib_thread_main_t *tm = vlib_get_thread_main ();
1149   u32 n_left_from, *from, *to_next = 0;
1150   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1151   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1152     = 0;
1153   vlib_frame_queue_elt_t *hf = 0;
1154   vlib_frame_t *f = 0;
1155   int i;
1156   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1157   u32 next_worker_index = 0;
1158   u32 current_worker_index = ~0;
1159   u32 cpu_index = os_get_cpu_number ();
1160
1161   ASSERT (vec_len (sm->workers));
1162
1163   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1164     {
1165       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1166
1167       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1168                                sm->first_worker_index + sm->num_workers - 1,
1169                                (vlib_frame_queue_t *) (~0));
1170     }
1171
1172   from = vlib_frame_vector_args (frame);
1173   n_left_from = frame->n_vectors;
1174
1175   while (n_left_from > 0)
1176     {
1177       u32 bi0;
1178       vlib_buffer_t *b0;
1179       u32 sw_if_index0;
1180       u32 rx_fib_index0;
1181       ip4_header_t * ip0;
1182       snat_user_key_t key0;
1183       clib_bihash_kv_8_8_t kv0, value0;
1184       u8 do_handoff;
1185
1186       bi0 = from[0];
1187       from += 1;
1188       n_left_from -= 1;
1189
1190       b0 = vlib_get_buffer (vm, bi0);
1191
1192       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1193       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1194
1195       ip0 = vlib_buffer_get_current (b0);
1196
1197       key0.addr = ip0->src_address;
1198       key0.fib_index = rx_fib_index0;
1199
1200       kv0.key = key0.as_u64;
1201
1202       /* Ever heard of of the "user" before? */
1203       if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
1204         {
1205           /* No, assign next available worker (RR) */
1206           next_worker_index = sm->first_worker_index +
1207             sm->workers[sm->next_worker++ % vec_len (sm->workers)];
1208
1209           /* add non-traslated packets worker lookup */
1210           kv0.value = next_worker_index;
1211           clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
1212         }
1213       else
1214         next_worker_index = value0.value;
1215
1216       if (PREDICT_FALSE (next_worker_index != cpu_index))
1217         {
1218           do_handoff = 1;
1219
1220           if (next_worker_index != current_worker_index)
1221             {
1222               if (hf)
1223                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1224
1225               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1226                                                       next_worker_index,
1227                                                       handoff_queue_elt_by_worker_index);
1228
1229               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1230               to_next_worker = &hf->buffer_index[hf->n_vectors];
1231               current_worker_index = next_worker_index;
1232             }
1233
1234           /* enqueue to correct worker thread */
1235           to_next_worker[0] = bi0;
1236           to_next_worker++;
1237           n_left_to_next_worker--;
1238
1239           if (n_left_to_next_worker == 0)
1240             {
1241               hf->n_vectors = VLIB_FRAME_SIZE;
1242               vlib_put_frame_queue_elt (hf);
1243               current_worker_index = ~0;
1244               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1245               hf = 0;
1246             }
1247         }
1248       else
1249         {
1250           do_handoff = 0;
1251           /* if this is 1st frame */
1252           if (!f)
1253             {
1254               f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
1255               to_next = vlib_frame_vector_args (f);
1256             }
1257
1258           to_next[0] = bi0;
1259           to_next += 1;
1260           f->n_vectors++;
1261         }
1262
1263       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1264                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1265         {
1266           snat_in2out_worker_handoff_trace_t *t =
1267             vlib_add_trace (vm, node, b0, sizeof (*t));
1268           t->next_worker_index = next_worker_index;
1269           t->do_handoff = do_handoff;
1270         }
1271     }
1272
1273   if (f)
1274     vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
1275
1276   if (hf)
1277     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1278
1279   /* Ship frames to the worker nodes */
1280   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1281     {
1282       if (handoff_queue_elt_by_worker_index[i])
1283         {
1284           hf = handoff_queue_elt_by_worker_index[i];
1285           /*
1286            * It works better to let the handoff node
1287            * rate-adapt, always ship the handoff queue element.
1288            */
1289           if (1 || hf->n_vectors == hf->last_n_vectors)
1290             {
1291               vlib_put_frame_queue_elt (hf);
1292               handoff_queue_elt_by_worker_index[i] = 0;
1293             }
1294           else
1295             hf->last_n_vectors = hf->n_vectors;
1296         }
1297       congested_handoff_queue_by_worker_index[i] =
1298         (vlib_frame_queue_t *) (~0);
1299     }
1300   hf = 0;
1301   current_worker_index = ~0;
1302   return frame->n_vectors;
1303 }
1304
1305 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1306   .function = snat_in2out_worker_handoff_fn,
1307   .name = "snat-in2out-worker-handoff",
1308   .vector_size = sizeof (u32),
1309   .format_trace = format_snat_in2out_worker_handoff_trace,
1310   .type = VLIB_NODE_TYPE_INTERNAL,
1311   
1312   .n_next_nodes = 1,
1313
1314   .next_nodes = {
1315     [0] = "error-drop",
1316   },
1317 };
1318
1319 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1320
1321 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
1322                                           vlib_buffer_t * b0,
1323                                           ip4_header_t * ip0,
1324                                           icmp46_header_t * icmp0,
1325                                           u32 sw_if_index0,
1326                                           vlib_node_runtime_t * node,
1327                                           u32 next0,
1328                                           u32 rx_fib_index0)
1329 {
1330   snat_session_key_t key0, sm0;
1331   icmp_echo_header_t *echo0;
1332   u32 new_addr0, old_addr0;
1333   u16 old_id0, new_id0;
1334   ip_csum_t sum0;
1335   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1336
1337   echo0 = (icmp_echo_header_t *)(icmp0+1);
1338
1339   key0.addr = ip0->src_address;
1340   key0.port = echo0->identifier;
1341   key0.fib_index = rx_fib_index0;
1342   
1343   if (snat_static_mapping_match(sm, key0, &sm0, 0))
1344     {
1345       ip4_address_t * first_int_addr;
1346
1347       if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
1348         {
1349           first_int_addr =
1350             ip4_interface_first_address (sm->ip4_main, sw_if_index0,
1351                                          0 /* just want the address */);
1352           rt->cached_sw_if_index = sw_if_index0;
1353           rt->cached_ip4_address = first_int_addr->as_u32;
1354         }
1355
1356       /* Don't NAT packet aimed at the intfc address */
1357       if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1358                                 rt->cached_ip4_address))
1359         return next0;
1360
1361       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1362       return SNAT_IN2OUT_NEXT_DROP;
1363     }
1364
1365   new_addr0 = sm0.addr.as_u32;
1366   new_id0 = sm0.port;
1367   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1368   old_addr0 = ip0->src_address.as_u32;
1369   ip0->src_address.as_u32 = new_addr0;
1370   
1371   sum0 = ip0->checksum;
1372   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1373                          ip4_header_t,
1374                          src_address /* changed member */);
1375   ip0->checksum = ip_csum_fold (sum0);
1376   
1377   if (PREDICT_FALSE(new_id0 != echo0->identifier))
1378     {
1379       old_id0 = echo0->identifier;
1380       echo0->identifier = new_id0;
1381
1382       sum0 = icmp0->checksum;
1383       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1384                              identifier);
1385       icmp0->checksum = ip_csum_fold (sum0);
1386     }
1387
1388   return next0;
1389 }
1390
1391 static uword
1392 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
1393                                 vlib_node_runtime_t * node,
1394                                 vlib_frame_t * frame)
1395 {
1396   u32 n_left_from, * from, * to_next;
1397   snat_in2out_next_t next_index;
1398   u32 pkts_processed = 0;
1399   snat_main_t * sm = &snat_main;
1400   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1401   u32 stats_node_index;
1402
1403   stats_node_index = snat_in2out_fast_node.index;
1404
1405   from = vlib_frame_vector_args (frame);
1406   n_left_from = frame->n_vectors;
1407   next_index = node->cached_next_index;
1408
1409   while (n_left_from > 0)
1410     {
1411       u32 n_left_to_next;
1412
1413       vlib_get_next_frame (vm, node, next_index,
1414                            to_next, n_left_to_next);
1415
1416       while (n_left_from > 0 && n_left_to_next > 0)
1417         {
1418           u32 bi0;
1419           vlib_buffer_t * b0;
1420           u32 next0;
1421           u32 sw_if_index0;
1422           ip4_header_t * ip0;
1423           ip_csum_t sum0;
1424           u32 new_addr0, old_addr0;
1425           u16 old_port0, new_port0;
1426           udp_header_t * udp0;
1427           tcp_header_t * tcp0;
1428           icmp46_header_t * icmp0;
1429           snat_session_key_t key0, sm0;
1430           u32 proto0;
1431           u32 rx_fib_index0;
1432
1433           /* speculatively enqueue b0 to the current next frame */
1434           bi0 = from[0];
1435           to_next[0] = bi0;
1436           from += 1;
1437           to_next += 1;
1438           n_left_from -= 1;
1439           n_left_to_next -= 1;
1440
1441           b0 = vlib_get_buffer (vm, bi0);
1442           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1443
1444           ip0 = vlib_buffer_get_current (b0);
1445           udp0 = ip4_next_header (ip0);
1446           tcp0 = (tcp_header_t *) udp0;
1447           icmp0 = (icmp46_header_t *) udp0;
1448
1449           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1450           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1451
1452           proto0 = ~0;
1453           proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
1454             ? SNAT_PROTOCOL_UDP : proto0;
1455           proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
1456             ? SNAT_PROTOCOL_TCP : proto0;
1457           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
1458             ? SNAT_PROTOCOL_ICMP : proto0;
1459
1460           if (PREDICT_FALSE (proto0 == ~0))
1461               goto trace0;
1462
1463           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1464             {
1465               ip4_address_t * first_int_addr;
1466               
1467               if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
1468                 {
1469                   first_int_addr = 
1470                     ip4_interface_first_address (sm->ip4_main, sw_if_index0,
1471                                                  0 /* just want the address */);
1472                   rt->cached_sw_if_index = sw_if_index0;
1473                   rt->cached_ip4_address = first_int_addr->as_u32;
1474                 }
1475               
1476               /* Don't NAT packet aimed at the intfc address */
1477               if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1478                                 rt->cached_ip4_address))
1479                 goto trace0;
1480
1481               next0 = icmp_in2out_static_map
1482                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1483               goto trace0;
1484             }
1485
1486           key0.addr = ip0->src_address;
1487           key0.port = udp0->src_port;
1488           key0.fib_index = rx_fib_index0;
1489
1490           if (snat_static_mapping_match(sm, key0, &sm0, 0))
1491             {
1492               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1493               next0= SNAT_IN2OUT_NEXT_DROP;
1494               goto trace0;
1495             }
1496
1497           new_addr0 = sm0.addr.as_u32;
1498           new_port0 = sm0.port;
1499           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1500           old_addr0 = ip0->src_address.as_u32;
1501           ip0->src_address.as_u32 = new_addr0;
1502
1503           sum0 = ip0->checksum;
1504           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1505                                  ip4_header_t,
1506                                  src_address /* changed member */);
1507           ip0->checksum = ip_csum_fold (sum0);
1508
1509           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1510             {
1511               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1512                 {
1513                   old_port0 = tcp0->ports.src;
1514                   tcp0->ports.src = new_port0;
1515
1516                   sum0 = tcp0->checksum;
1517                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1518                                          ip4_header_t,
1519                                          dst_address /* changed member */);
1520                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1521                                          ip4_header_t /* cheat */,
1522                                          length /* changed member */);
1523                   tcp0->checksum = ip_csum_fold(sum0);
1524                 }
1525               else
1526                 {
1527                   old_port0 = udp0->src_port;
1528                   udp0->src_port = new_port0;
1529                   udp0->checksum = 0;
1530                 }
1531             }
1532           else
1533             {
1534               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1535                 {
1536                   sum0 = tcp0->checksum;
1537                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1538                                          ip4_header_t,
1539                                          dst_address /* changed member */);
1540                   tcp0->checksum = ip_csum_fold(sum0);
1541                 }
1542             }
1543
1544           /* Hairpinning */
1545           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1546
1547         trace0:
1548           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1549                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1550             {
1551               snat_in2out_trace_t *t =
1552                  vlib_add_trace (vm, node, b0, sizeof (*t));
1553               t->sw_if_index = sw_if_index0;
1554               t->next_index = next0;
1555             }
1556
1557           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1558
1559           /* verify speculative enqueue, maybe switch current next frame */
1560           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1561                                            to_next, n_left_to_next,
1562                                            bi0, next0);
1563         }
1564
1565       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1566     }
1567
1568   vlib_node_increment_counter (vm, stats_node_index,
1569                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1570                                pkts_processed);
1571   return frame->n_vectors;
1572 }
1573
1574
1575 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
1576   .function = snat_in2out_fast_static_map_fn,
1577   .name = "snat-in2out-fast",
1578   .vector_size = sizeof (u32),
1579   .format_trace = format_snat_in2out_fast_trace,
1580   .type = VLIB_NODE_TYPE_INTERNAL,
1581   
1582   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1583   .error_strings = snat_in2out_error_strings,
1584
1585   .runtime_data_bytes = sizeof (snat_runtime_t),
1586   
1587   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1588
1589   /* edit / add dispositions here */
1590   .next_nodes = {
1591     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1592     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1593     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1594   },
1595 };
1596
1597 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);