SNAT: fixed crash - interface without IP address (VPP-599)
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25
26 #include <vppinfra/hash.h>
27 #include <vppinfra/error.h>
28 #include <vppinfra/elog.h>
29
30 typedef struct {
31   u32 sw_if_index;
32   u32 next_index;
33   u32 session_index;
34   u32 is_slow_path;
35 } snat_in2out_trace_t;
36
37 typedef struct {
38   u32 next_worker_index;
39   u8 do_handoff;
40 } snat_in2out_worker_handoff_trace_t;
41
42 /* packet trace format function */
43 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
44 {
45   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
46   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
47   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
48   char * tag;
49
50   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
51   
52   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
53               t->sw_if_index, t->next_index, t->session_index);
54
55   return s;
56 }
57
58 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
63
64   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
65               t->sw_if_index, t->next_index);
66
67   return s;
68 }
69
70 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
71 {
72   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
73   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
74   snat_in2out_worker_handoff_trace_t * t =
75     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
76   char * m;
77
78   m = t->do_handoff ? "next worker" : "same worker";
79   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
80
81   return s;
82 }
83
84 vlib_node_registration_t snat_in2out_node;
85 vlib_node_registration_t snat_in2out_slowpath_node;
86 vlib_node_registration_t snat_in2out_fast_node;
87 vlib_node_registration_t snat_in2out_worker_handoff_node;
88
89 #define foreach_snat_in2out_error                       \
90 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
91 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
92 _(OUT_OF_PORTS, "Out of ports")                         \
93 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
94 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
95 _(NO_TRANSLATION, "No translation")
96   
97 typedef enum {
98 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
99   foreach_snat_in2out_error
100 #undef _
101   SNAT_IN2OUT_N_ERROR,
102 } snat_in2out_error_t;
103
104 static char * snat_in2out_error_strings[] = {
105 #define _(sym,string) string,
106   foreach_snat_in2out_error
107 #undef _
108 };
109
110 typedef enum {
111   SNAT_IN2OUT_NEXT_LOOKUP,
112   SNAT_IN2OUT_NEXT_DROP,
113   SNAT_IN2OUT_NEXT_SLOW_PATH,
114   SNAT_IN2OUT_N_NEXT,
115 } snat_in2out_next_t;
116
117 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
118                       ip4_header_t * ip0,
119                       u32 rx_fib_index0,
120                       snat_session_key_t * key0,
121                       snat_session_t ** sessionp,
122                       vlib_node_runtime_t * node,
123                       u32 next0,
124                       u32 cpu_index)
125 {
126   snat_user_t *u;
127   snat_user_key_t user_key;
128   snat_session_t *s;
129   clib_bihash_kv_8_8_t kv0, value0;
130   u32 oldest_per_user_translation_list_index;
131   dlist_elt_t * oldest_per_user_translation_list_elt;
132   dlist_elt_t * per_user_translation_list_elt;
133   dlist_elt_t * per_user_list_head_elt;
134   u32 session_index;
135   snat_session_key_t key1;
136   u32 address_index = ~0;
137   u32 outside_fib_index;
138   uword * p;
139   snat_static_mapping_key_t worker_by_out_key;
140
141   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
142   if (! p)
143     {
144       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
145       return SNAT_IN2OUT_NEXT_DROP;
146     }
147   outside_fib_index = p[0];
148
149   user_key.addr = ip0->src_address;
150   user_key.fib_index = rx_fib_index0;
151   kv0.key = user_key.as_u64;
152   
153   /* Ever heard of the "user" = src ip4 address before? */
154   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
155     {
156       /* no, make a new one */
157       pool_get (sm->per_thread_data[cpu_index].users, u);
158       memset (u, 0, sizeof (*u));
159       u->addr = ip0->src_address;
160
161       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
162
163       u->sessions_per_user_list_head_index = per_user_list_head_elt -
164         sm->per_thread_data[cpu_index].list_pool;
165
166       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
167                        u->sessions_per_user_list_head_index);
168
169       kv0.value = u - sm->per_thread_data[cpu_index].users;
170
171       /* add user */
172       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
173     }
174   else
175     {
176       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
177                              value0.value);
178     }
179
180   /* Over quota? Recycle the least recently used dynamic translation */
181   if (u->nsessions >= sm->max_translations_per_user)
182     {
183       /* Remove the oldest dynamic translation */
184       do {
185           oldest_per_user_translation_list_index =
186             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
187                                     u->sessions_per_user_list_head_index);
188
189           ASSERT (oldest_per_user_translation_list_index != ~0);
190
191           /* add it back to the end of the LRU list */
192           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
193                               u->sessions_per_user_list_head_index,
194                               oldest_per_user_translation_list_index);
195           /* Get the list element */
196           oldest_per_user_translation_list_elt =
197             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
198                                oldest_per_user_translation_list_index);
199
200           /* Get the session index from the list element */
201           session_index = oldest_per_user_translation_list_elt->value;
202
203           /* Get the session */
204           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
205                                  session_index);
206       } while (snat_is_session_static (s));
207
208       /* Remove in2out, out2in keys */
209       kv0.key = s->in2out.as_u64;
210       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
211           clib_warning ("in2out key delete failed");
212       kv0.key = s->out2in.as_u64;
213       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
214           clib_warning ("out2in key delete failed");
215
216       snat_free_outside_address_and_port 
217         (sm, &s->out2in, s->outside_address_index);
218       s->outside_address_index = ~0;
219
220       if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
221         {
222           ASSERT(0);
223
224           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
225           return SNAT_IN2OUT_NEXT_DROP;
226         }
227       s->outside_address_index = address_index;
228     }
229   else
230     {
231       u8 static_mapping = 1;
232
233       /* First try to match static mapping by local address and port */
234       if (snat_static_mapping_match (sm, *key0, &key1, 0))
235         {
236           static_mapping = 0;
237           /* Try to create dynamic translation */
238           if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
239             {
240               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
241               return SNAT_IN2OUT_NEXT_DROP;
242             }
243         }
244
245       /* Create a new session */
246       pool_get (sm->per_thread_data[cpu_index].sessions, s);
247       memset (s, 0, sizeof (*s));
248       
249       s->outside_address_index = address_index;
250
251       if (static_mapping)
252         {
253           u->nstaticsessions++;
254           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
255         }
256       else
257         {
258           u->nsessions++;
259         }
260
261       /* Create list elts */
262       pool_get (sm->per_thread_data[cpu_index].list_pool,
263                 per_user_translation_list_elt);
264       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
265                        per_user_translation_list_elt -
266                        sm->per_thread_data[cpu_index].list_pool);
267
268       per_user_translation_list_elt->value =
269         s - sm->per_thread_data[cpu_index].sessions;
270       s->per_user_index = per_user_translation_list_elt -
271                           sm->per_thread_data[cpu_index].list_pool;
272       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
273
274       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
275                           s->per_user_list_head_index,
276                           per_user_translation_list_elt -
277                           sm->per_thread_data[cpu_index].list_pool);
278    }
279   
280   s->in2out = *key0;
281   s->out2in = key1;
282   s->out2in.protocol = key0->protocol;
283   s->out2in.fib_index = outside_fib_index;
284   *sessionp = s;
285
286   /* Add to translation hashes */
287   kv0.key = s->in2out.as_u64;
288   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
289   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
290       clib_warning ("in2out key add failed");
291   
292   kv0.key = s->out2in.as_u64;
293   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
294   
295   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
296       clib_warning ("out2in key add failed");
297
298   /* Add to translated packets worker lookup */
299   worker_by_out_key.addr = s->out2in.addr;
300   worker_by_out_key.port = s->out2in.port;
301   worker_by_out_key.fib_index = s->out2in.fib_index;
302   kv0.key = worker_by_out_key.as_u64;
303   kv0.value = cpu_index;
304   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
305   return next0;
306 }
307                       
308 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
309                                          vlib_buffer_t * b0,
310                                          ip4_header_t * ip0,
311                                          icmp46_header_t * icmp0,
312                                          u32 sw_if_index0,
313                                          u32 rx_fib_index0,
314                                          vlib_node_runtime_t * node,
315                                          u32 next0,
316                                          f64 now,
317                                          u32 cpu_index)
318 {
319   snat_session_key_t key0;
320   icmp_echo_header_t *echo0;
321   clib_bihash_kv_8_8_t kv0, value0;
322   snat_session_t * s0;
323   u32 new_addr0, old_addr0;
324   u16 old_id0, new_id0;
325   ip_csum_t sum0;
326   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
327
328   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
329     {
330       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
331       return SNAT_IN2OUT_NEXT_DROP;
332     }
333   
334   echo0 = (icmp_echo_header_t *)(icmp0+1);
335
336   key0.addr = ip0->src_address;
337   key0.port = echo0->identifier;
338   key0.protocol = SNAT_PROTOCOL_ICMP;
339   key0.fib_index = rx_fib_index0;
340   
341   kv0.key = key0.as_u64;
342   
343   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
344     {
345       ip4_address_t * first_int_addr;
346
347       if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
348         {
349           first_int_addr = 
350             ip4_interface_first_address (sm->ip4_main, sw_if_index0,
351                                          0 /* just want the address */);
352           rt->cached_sw_if_index = sw_if_index0;
353           if (first_int_addr)
354             rt->cached_ip4_address = first_int_addr->as_u32;
355           else
356             rt->cached_ip4_address = 0;
357         }
358       
359       /* Don't NAT packet aimed at the intfc address */
360       if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
361                                 rt->cached_ip4_address))
362         return next0;
363       
364       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
365                          &s0, node, next0, cpu_index);
366       
367       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
368         return next0;
369     }
370   else
371     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
372                             value0.value);
373
374   old_addr0 = ip0->src_address.as_u32;
375   ip0->src_address = s0->out2in.addr;
376   new_addr0 = ip0->src_address.as_u32;
377   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
378   
379   sum0 = ip0->checksum;
380   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
381                          ip4_header_t,
382                          src_address /* changed member */);
383   ip0->checksum = ip_csum_fold (sum0);
384   
385   old_id0 = echo0->identifier;
386   new_id0 = s0->out2in.port;
387   echo0->identifier = new_id0;
388
389   sum0 = icmp0->checksum;
390   sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
391                          identifier);
392   icmp0->checksum = ip_csum_fold (sum0);
393
394   /* Accounting */
395   s0->last_heard = now;
396   s0->total_pkts++;
397   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
398   /* Per-user LRU list maintenance for dynamic translations */
399   if (!snat_is_session_static (s0))
400     {
401       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
402                          s0->per_user_index);
403       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
404                           s0->per_user_list_head_index,
405                           s0->per_user_index);
406     }
407
408   return next0;
409 }
410
411 /**
412  * @brief Hairpinning
413  *
414  * Hairpinning allows two endpoints on the internal side of the NAT to
415  * communicate even if they only use each other's external IP addresses
416  * and ports.
417  *
418  * @param sm     SNAT main.
419  * @param b0     Vlib buffer.
420  * @param ip0    IP header.
421  * @param udp0   UDP header.
422  * @param tcp0   TCP header.
423  * @param proto0 SNAT protocol.
424  */
425 static inline void
426 snat_hairpinning (snat_main_t *sm,
427                   vlib_buffer_t * b0,
428                   ip4_header_t * ip0,
429                   udp_header_t * udp0,
430                   tcp_header_t * tcp0,
431                   u32 proto0)
432 {
433   snat_session_key_t key0, sm0;
434   snat_static_mapping_key_t k0;
435   snat_session_t * s0;
436   clib_bihash_kv_8_8_t kv0, value0;
437   ip_csum_t sum0;
438   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
439   u16 new_dst_port0, old_dst_port0;
440
441   key0.addr = ip0->dst_address;
442   key0.port = udp0->dst_port;
443   key0.protocol = proto0;
444   key0.fib_index = sm->outside_fib_index;
445   kv0.key = key0.as_u64;
446
447   /* Check if destination is in active sessions */
448   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
449     {
450       /* or static mappings */
451       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
452         {
453           new_dst_addr0 = sm0.addr.as_u32;
454           new_dst_port0 = sm0.port;
455           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
456         }
457     }
458   else
459     {
460       si = value0.value;
461       if (sm->num_workers > 1)
462         {
463           k0.addr = ip0->dst_address;
464           k0.port = udp0->dst_port;
465           k0.fib_index = sm->outside_fib_index;
466           kv0.key = k0.as_u64;
467           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
468             ASSERT(0);
469           else
470             ti = value0.value;
471         }
472       else
473         ti = sm->num_workers;
474
475       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
476       new_dst_addr0 = s0->in2out.addr.as_u32;
477       new_dst_port0 = s0->in2out.port;
478       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
479     }
480
481   /* Destination is behind the same NAT, use internal address and port */
482   if (new_dst_addr0)
483     {
484       old_dst_addr0 = ip0->dst_address.as_u32;
485       ip0->dst_address.as_u32 = new_dst_addr0;
486       sum0 = ip0->checksum;
487       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
488                              ip4_header_t, dst_address);
489       ip0->checksum = ip_csum_fold (sum0);
490
491       old_dst_port0 = tcp0->ports.dst;
492       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
493         {
494           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
495             {
496               tcp0->ports.dst = new_dst_port0;
497               sum0 = tcp0->checksum;
498               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
499                                      ip4_header_t, dst_address);
500               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
501                                      ip4_header_t /* cheat */, length);
502               tcp0->checksum = ip_csum_fold(sum0);
503             }
504           else
505             {
506               udp0->dst_port = new_dst_port0;
507               udp0->checksum = 0;
508             }
509         }
510     }
511 }
512
513 static inline uword
514 snat_in2out_node_fn_inline (vlib_main_t * vm,
515                             vlib_node_runtime_t * node,
516                             vlib_frame_t * frame, int is_slow_path)
517 {
518   u32 n_left_from, * from, * to_next;
519   snat_in2out_next_t next_index;
520   u32 pkts_processed = 0;
521   snat_main_t * sm = &snat_main;
522   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
523   f64 now = vlib_time_now (vm);
524   u32 stats_node_index;
525   u32 cpu_index = os_get_cpu_number ();
526
527   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
528     snat_in2out_node.index;
529
530   from = vlib_frame_vector_args (frame);
531   n_left_from = frame->n_vectors;
532   next_index = node->cached_next_index;
533
534   while (n_left_from > 0)
535     {
536       u32 n_left_to_next;
537
538       vlib_get_next_frame (vm, node, next_index,
539                            to_next, n_left_to_next);
540
541       while (n_left_from >= 4 && n_left_to_next >= 2)
542         {
543           u32 bi0, bi1;
544           vlib_buffer_t * b0, * b1;
545           u32 next0, next1;
546           u32 sw_if_index0, sw_if_index1;
547           ip4_header_t * ip0, * ip1;
548           ip_csum_t sum0, sum1;
549           u32 new_addr0, old_addr0, new_addr1, old_addr1;
550           u16 old_port0, new_port0, old_port1, new_port1;
551           udp_header_t * udp0, * udp1;
552           tcp_header_t * tcp0, * tcp1;
553           icmp46_header_t * icmp0, * icmp1;
554           snat_session_key_t key0, key1;
555           u32 rx_fib_index0, rx_fib_index1;
556           u32 proto0, proto1;
557           snat_session_t * s0 = 0, * s1 = 0;
558           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
559           
560           /* Prefetch next iteration. */
561           {
562             vlib_buffer_t * p2, * p3;
563             
564             p2 = vlib_get_buffer (vm, from[2]);
565             p3 = vlib_get_buffer (vm, from[3]);
566             
567             vlib_prefetch_buffer_header (p2, LOAD);
568             vlib_prefetch_buffer_header (p3, LOAD);
569
570             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
571             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
572           }
573
574           /* speculatively enqueue b0 and b1 to the current next frame */
575           to_next[0] = bi0 = from[0];
576           to_next[1] = bi1 = from[1];
577           from += 2;
578           to_next += 2;
579           n_left_from -= 2;
580           n_left_to_next -= 2;
581           
582           b0 = vlib_get_buffer (vm, bi0);
583           b1 = vlib_get_buffer (vm, bi1);
584
585           ip0 = vlib_buffer_get_current (b0);
586           udp0 = ip4_next_header (ip0);
587           tcp0 = (tcp_header_t *) udp0;
588           icmp0 = (icmp46_header_t *) udp0;
589
590           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
591           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
592                                    sw_if_index0);
593
594           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
595
596           proto0 = ~0;
597           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
598             ? SNAT_PROTOCOL_UDP : proto0;
599           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
600             ? SNAT_PROTOCOL_TCP : proto0;
601           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
602             ? SNAT_PROTOCOL_ICMP : proto0;
603
604           /* Next configured feature, probably ip4-lookup */
605           if (is_slow_path)
606             {
607               if (PREDICT_FALSE (proto0 == ~0))
608                 goto trace00;
609               
610               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
611                 {
612                   next0 = icmp_in2out_slow_path 
613                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
614                      node, next0, now, cpu_index);
615                   goto trace00;
616                 }
617             }
618           else
619             {
620               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
621                 {
622                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
623                   goto trace00;
624                 }
625             }
626
627           key0.addr = ip0->src_address;
628           key0.port = udp0->src_port;
629           key0.protocol = proto0;
630           key0.fib_index = rx_fib_index0;
631           
632           kv0.key = key0.as_u64;
633
634           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
635             {
636               if (is_slow_path)
637                 {
638                   ip4_address_t * first_int_addr;
639                   
640                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
641                     {
642                       first_int_addr = 
643                         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
644                                                      0 /* just want the address */);
645                       rt->cached_sw_if_index = sw_if_index0;
646                       if (first_int_addr)
647                         rt->cached_ip4_address = first_int_addr->as_u32;
648                       else
649                         rt->cached_ip4_address = 0;
650                     }
651                   
652                   /* Don't NAT packet aimed at the intfc address */
653                   if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
654                                     rt->cached_ip4_address))
655                     goto trace00;
656                   
657                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
658                                      &s0, node, next0, cpu_index);
659                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
660                     goto trace00;
661                 }
662               else
663                 {
664                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
665                   goto trace00;
666                 }
667             }
668           else
669             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
670                                     value0.value);
671
672           old_addr0 = ip0->src_address.as_u32;
673           ip0->src_address = s0->out2in.addr;
674           new_addr0 = ip0->src_address.as_u32;
675           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
676
677           sum0 = ip0->checksum;
678           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
679                                  ip4_header_t,
680                                  src_address /* changed member */);
681           ip0->checksum = ip_csum_fold (sum0);
682
683           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
684             {
685               old_port0 = tcp0->ports.src;
686               tcp0->ports.src = s0->out2in.port;
687               new_port0 = tcp0->ports.src;
688
689               sum0 = tcp0->checksum;
690               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
691                                      ip4_header_t,
692                                      dst_address /* changed member */);
693               sum0 = ip_csum_update (sum0, old_port0, new_port0,
694                                      ip4_header_t /* cheat */,
695                                      length /* changed member */);
696               tcp0->checksum = ip_csum_fold(sum0);
697             }
698           else
699             {
700               old_port0 = udp0->src_port;
701               udp0->src_port = s0->out2in.port;
702               udp0->checksum = 0;
703             }
704
705           /* Hairpinning */
706           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
707
708           /* Accounting */
709           s0->last_heard = now;
710           s0->total_pkts++;
711           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
712           /* Per-user LRU list maintenance for dynamic translation */
713           if (!snat_is_session_static (s0))
714             {
715               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
716                                  s0->per_user_index);
717               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
718                                   s0->per_user_list_head_index,
719                                   s0->per_user_index);
720             }
721         trace00:
722
723           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
724                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
725             {
726               snat_in2out_trace_t *t = 
727                  vlib_add_trace (vm, node, b0, sizeof (*t));
728               t->is_slow_path = is_slow_path;
729               t->sw_if_index = sw_if_index0;
730               t->next_index = next0;
731                   t->session_index = ~0;
732               if (s0)
733                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
734             }
735
736           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
737
738           ip1 = vlib_buffer_get_current (b1);
739           udp1 = ip4_next_header (ip1);
740           tcp1 = (tcp_header_t *) udp1;
741           icmp1 = (icmp46_header_t *) udp1;
742
743           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
744           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
745                                    sw_if_index1);
746
747           proto1 = ~0;
748           proto1 = (ip1->protocol == IP_PROTOCOL_UDP) 
749             ? SNAT_PROTOCOL_UDP : proto1;
750           proto1 = (ip1->protocol == IP_PROTOCOL_TCP) 
751             ? SNAT_PROTOCOL_TCP : proto1;
752           proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) 
753             ? SNAT_PROTOCOL_ICMP : proto1;
754
755           /* Next configured feature, probably ip4-lookup */
756           if (is_slow_path)
757             {
758               if (PREDICT_FALSE (proto1 == ~0))
759                 goto trace01;
760               
761               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
762                 {
763                   next1 = icmp_in2out_slow_path 
764                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
765                      next1, now, cpu_index);
766                   goto trace01;
767                 }
768             }
769           else
770             {
771               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
772                 {
773                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
774                   goto trace01;
775                 }
776             }
777
778           key1.addr = ip1->src_address;
779           key1.port = udp1->src_port;
780           key1.protocol = proto1;
781           key1.fib_index = rx_fib_index1;
782           
783           kv1.key = key1.as_u64;
784
785             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
786             {
787               if (is_slow_path)
788                 {
789                   ip4_address_t * first_int_addr;
790                   
791                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index1))
792                     {
793                       first_int_addr = 
794                         ip4_interface_first_address (sm->ip4_main, sw_if_index1,
795                                                      0 /* just want the address */);
796                       rt->cached_sw_if_index = sw_if_index1;
797                       if (first_int_addr)
798                         rt->cached_ip4_address = first_int_addr->as_u32;
799                       else
800                         rt->cached_ip4_address = 0;
801                     }
802                   
803                   /* Don't NAT packet aimed at the intfc address */
804                   if (PREDICT_FALSE(ip1->dst_address.as_u32 ==
805                                     rt->cached_ip4_address))
806                     goto trace01;
807                   
808                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
809                                      &s1, node, next1, cpu_index);
810                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
811                     goto trace01;
812                 }
813               else
814                 {
815                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
816                   goto trace01;
817                 }
818             }
819           else
820             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
821                                     value1.value);
822
823           old_addr1 = ip1->src_address.as_u32;
824           ip1->src_address = s1->out2in.addr;
825           new_addr1 = ip1->src_address.as_u32;
826           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
827
828           sum1 = ip1->checksum;
829           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
830                                  ip4_header_t,
831                                  src_address /* changed member */);
832           ip1->checksum = ip_csum_fold (sum1);
833
834           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
835             {
836               old_port1 = tcp1->ports.src;
837               tcp1->ports.src = s1->out2in.port;
838               new_port1 = tcp1->ports.src;
839
840               sum1 = tcp1->checksum;
841               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
842                                      ip4_header_t,
843                                      dst_address /* changed member */);
844               sum1 = ip_csum_update (sum1, old_port1, new_port1,
845                                      ip4_header_t /* cheat */,
846                                      length /* changed member */);
847               tcp1->checksum = ip_csum_fold(sum1);
848             }
849           else
850             {
851               old_port1 = udp1->src_port;
852               udp1->src_port = s1->out2in.port;
853               udp1->checksum = 0;
854             }
855
856           /* Hairpinning */
857           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
858
859           /* Accounting */
860           s1->last_heard = now;
861           s1->total_pkts++;
862           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
863           /* Per-user LRU list maintenance for dynamic translation */
864           if (!snat_is_session_static (s1))
865             {
866               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
867                                  s1->per_user_index);
868               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
869                                   s1->per_user_list_head_index,
870                                   s1->per_user_index);
871             }
872         trace01:
873
874           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
875                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
876             {
877               snat_in2out_trace_t *t = 
878                  vlib_add_trace (vm, node, b1, sizeof (*t));
879               t->sw_if_index = sw_if_index1;
880               t->next_index = next1;
881               t->session_index = ~0;
882               if (s1)
883                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
884             }
885
886           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
887
888           /* verify speculative enqueues, maybe switch current next frame */
889           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
890                                            to_next, n_left_to_next,
891                                            bi0, bi1, next0, next1);
892         }
893
894       while (n_left_from > 0 && n_left_to_next > 0)
895         {
896           u32 bi0;
897           vlib_buffer_t * b0;
898           u32 next0;
899           u32 sw_if_index0;
900           ip4_header_t * ip0;
901           ip_csum_t sum0;
902           u32 new_addr0, old_addr0;
903           u16 old_port0, new_port0;
904           udp_header_t * udp0;
905           tcp_header_t * tcp0;
906           icmp46_header_t * icmp0;
907           snat_session_key_t key0;
908           u32 rx_fib_index0;
909           u32 proto0;
910           snat_session_t * s0 = 0;
911           clib_bihash_kv_8_8_t kv0, value0;
912           
913           /* speculatively enqueue b0 to the current next frame */
914           bi0 = from[0];
915           to_next[0] = bi0;
916           from += 1;
917           to_next += 1;
918           n_left_from -= 1;
919           n_left_to_next -= 1;
920
921           b0 = vlib_get_buffer (vm, bi0);
922           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
923
924           ip0 = vlib_buffer_get_current (b0);
925           udp0 = ip4_next_header (ip0);
926           tcp0 = (tcp_header_t *) udp0;
927           icmp0 = (icmp46_header_t *) udp0;
928
929           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
930           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
931                                    sw_if_index0);
932
933           proto0 = ~0;
934           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
935             ? SNAT_PROTOCOL_UDP : proto0;
936           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
937             ? SNAT_PROTOCOL_TCP : proto0;
938           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
939             ? SNAT_PROTOCOL_ICMP : proto0;
940
941           /* Next configured feature, probably ip4-lookup */
942           if (is_slow_path)
943             {
944               if (PREDICT_FALSE (proto0 == ~0))
945                 goto trace0;
946               
947               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
948                 {
949                   next0 = icmp_in2out_slow_path 
950                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
951                      next0, now, cpu_index);
952                   goto trace0;
953                 }
954             }
955           else
956             {
957               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
958                 {
959                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
960                   goto trace0;
961                 }
962             }
963
964           key0.addr = ip0->src_address;
965           key0.port = udp0->src_port;
966           key0.protocol = proto0;
967           key0.fib_index = rx_fib_index0;
968           
969           kv0.key = key0.as_u64;
970
971           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
972             {
973               if (is_slow_path)
974                 {
975                   ip4_address_t * first_int_addr;
976                   
977                   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
978                     {
979                       first_int_addr = 
980                         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
981                                                      0 /* just want the address */);
982                       rt->cached_sw_if_index = sw_if_index0;
983                       if (first_int_addr)
984                         rt->cached_ip4_address = first_int_addr->as_u32;
985                       else
986                         rt->cached_ip4_address = 0;
987                     }
988                   
989                   /* Don't NAT packet aimed at the intfc address */
990                   if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
991                                     rt->cached_ip4_address))
992                     goto trace0;
993                   
994                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
995                                      &s0, node, next0, cpu_index);
996                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
997                     goto trace0;
998                 }
999               else
1000                 {
1001                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1002                   goto trace0;
1003                 }
1004             }
1005           else
1006             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1007                                     value0.value);
1008
1009           old_addr0 = ip0->src_address.as_u32;
1010           ip0->src_address = s0->out2in.addr;
1011           new_addr0 = ip0->src_address.as_u32;
1012           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1013
1014           sum0 = ip0->checksum;
1015           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1016                                  ip4_header_t,
1017                                  src_address /* changed member */);
1018           ip0->checksum = ip_csum_fold (sum0);
1019
1020           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1021             {
1022               old_port0 = tcp0->ports.src;
1023               tcp0->ports.src = s0->out2in.port;
1024               new_port0 = tcp0->ports.src;
1025
1026               sum0 = tcp0->checksum;
1027               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1028                                      ip4_header_t,
1029                                      dst_address /* changed member */);
1030               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1031                                      ip4_header_t /* cheat */,
1032                                      length /* changed member */);
1033               tcp0->checksum = ip_csum_fold(sum0);
1034             }
1035           else
1036             {
1037               old_port0 = udp0->src_port;
1038               udp0->src_port = s0->out2in.port;
1039               udp0->checksum = 0;
1040             }
1041
1042           /* Hairpinning */
1043           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1044
1045           /* Accounting */
1046           s0->last_heard = now;
1047           s0->total_pkts++;
1048           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1049           /* Per-user LRU list maintenance for dynamic translation */
1050           if (!snat_is_session_static (s0))
1051             {
1052               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1053                                  s0->per_user_index);
1054               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1055                                   s0->per_user_list_head_index,
1056                                   s0->per_user_index);
1057             }
1058
1059         trace0:
1060           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1061                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1062             {
1063               snat_in2out_trace_t *t = 
1064                  vlib_add_trace (vm, node, b0, sizeof (*t));
1065               t->is_slow_path = is_slow_path;
1066               t->sw_if_index = sw_if_index0;
1067               t->next_index = next0;
1068                   t->session_index = ~0;
1069               if (s0)
1070                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1071             }
1072
1073           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1074
1075           /* verify speculative enqueue, maybe switch current next frame */
1076           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1077                                            to_next, n_left_to_next,
1078                                            bi0, next0);
1079         }
1080
1081       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1082     }
1083
1084   vlib_node_increment_counter (vm, stats_node_index, 
1085                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1086                                pkts_processed);
1087   return frame->n_vectors;
1088 }
1089
1090 static uword
1091 snat_in2out_fast_path_fn (vlib_main_t * vm,
1092                           vlib_node_runtime_t * node,
1093                           vlib_frame_t * frame)
1094 {
1095   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1096 }
1097
1098 VLIB_REGISTER_NODE (snat_in2out_node) = {
1099   .function = snat_in2out_fast_path_fn,
1100   .name = "snat-in2out",
1101   .vector_size = sizeof (u32),
1102   .format_trace = format_snat_in2out_trace,
1103   .type = VLIB_NODE_TYPE_INTERNAL,
1104   
1105   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1106   .error_strings = snat_in2out_error_strings,
1107
1108   .runtime_data_bytes = sizeof (snat_runtime_t),
1109   
1110   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1111
1112   /* edit / add dispositions here */
1113   .next_nodes = {
1114     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1115     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1116     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1117   },
1118 };
1119
1120 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1121
1122 static uword
1123 snat_in2out_slow_path_fn (vlib_main_t * vm,
1124                           vlib_node_runtime_t * node,
1125                           vlib_frame_t * frame)
1126 {
1127   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1128 }
1129
1130 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1131   .function = snat_in2out_slow_path_fn,
1132   .name = "snat-in2out-slowpath",
1133   .vector_size = sizeof (u32),
1134   .format_trace = format_snat_in2out_trace,
1135   .type = VLIB_NODE_TYPE_INTERNAL,
1136   
1137   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1138   .error_strings = snat_in2out_error_strings,
1139
1140   .runtime_data_bytes = sizeof (snat_runtime_t),
1141   
1142   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1143
1144   /* edit / add dispositions here */
1145   .next_nodes = {
1146     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1147     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1148     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1149   },
1150 };
1151
1152 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1153
1154 static uword
1155 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1156                                vlib_node_runtime_t * node,
1157                                vlib_frame_t * frame)
1158 {
1159   snat_main_t *sm = &snat_main;
1160   vlib_thread_main_t *tm = vlib_get_thread_main ();
1161   u32 n_left_from, *from, *to_next = 0;
1162   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1163   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1164     = 0;
1165   vlib_frame_queue_elt_t *hf = 0;
1166   vlib_frame_t *f = 0;
1167   int i;
1168   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1169   u32 next_worker_index = 0;
1170   u32 current_worker_index = ~0;
1171   u32 cpu_index = os_get_cpu_number ();
1172
1173   ASSERT (vec_len (sm->workers));
1174
1175   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1176     {
1177       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1178
1179       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1180                                sm->first_worker_index + sm->num_workers - 1,
1181                                (vlib_frame_queue_t *) (~0));
1182     }
1183
1184   from = vlib_frame_vector_args (frame);
1185   n_left_from = frame->n_vectors;
1186
1187   while (n_left_from > 0)
1188     {
1189       u32 bi0;
1190       vlib_buffer_t *b0;
1191       u32 sw_if_index0;
1192       u32 rx_fib_index0;
1193       ip4_header_t * ip0;
1194       snat_user_key_t key0;
1195       clib_bihash_kv_8_8_t kv0, value0;
1196       u8 do_handoff;
1197
1198       bi0 = from[0];
1199       from += 1;
1200       n_left_from -= 1;
1201
1202       b0 = vlib_get_buffer (vm, bi0);
1203
1204       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1205       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1206
1207       ip0 = vlib_buffer_get_current (b0);
1208
1209       key0.addr = ip0->src_address;
1210       key0.fib_index = rx_fib_index0;
1211
1212       kv0.key = key0.as_u64;
1213
1214       /* Ever heard of of the "user" before? */
1215       if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
1216         {
1217           /* No, assign next available worker (RR) */
1218           next_worker_index = sm->first_worker_index +
1219             sm->workers[sm->next_worker++ % vec_len (sm->workers)];
1220
1221           /* add non-traslated packets worker lookup */
1222           kv0.value = next_worker_index;
1223           clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
1224         }
1225       else
1226         next_worker_index = value0.value;
1227
1228       if (PREDICT_FALSE (next_worker_index != cpu_index))
1229         {
1230           do_handoff = 1;
1231
1232           if (next_worker_index != current_worker_index)
1233             {
1234               if (hf)
1235                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1236
1237               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1238                                                       next_worker_index,
1239                                                       handoff_queue_elt_by_worker_index);
1240
1241               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1242               to_next_worker = &hf->buffer_index[hf->n_vectors];
1243               current_worker_index = next_worker_index;
1244             }
1245
1246           /* enqueue to correct worker thread */
1247           to_next_worker[0] = bi0;
1248           to_next_worker++;
1249           n_left_to_next_worker--;
1250
1251           if (n_left_to_next_worker == 0)
1252             {
1253               hf->n_vectors = VLIB_FRAME_SIZE;
1254               vlib_put_frame_queue_elt (hf);
1255               current_worker_index = ~0;
1256               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1257               hf = 0;
1258             }
1259         }
1260       else
1261         {
1262           do_handoff = 0;
1263           /* if this is 1st frame */
1264           if (!f)
1265             {
1266               f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
1267               to_next = vlib_frame_vector_args (f);
1268             }
1269
1270           to_next[0] = bi0;
1271           to_next += 1;
1272           f->n_vectors++;
1273         }
1274
1275       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1276                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1277         {
1278           snat_in2out_worker_handoff_trace_t *t =
1279             vlib_add_trace (vm, node, b0, sizeof (*t));
1280           t->next_worker_index = next_worker_index;
1281           t->do_handoff = do_handoff;
1282         }
1283     }
1284
1285   if (f)
1286     vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
1287
1288   if (hf)
1289     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1290
1291   /* Ship frames to the worker nodes */
1292   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1293     {
1294       if (handoff_queue_elt_by_worker_index[i])
1295         {
1296           hf = handoff_queue_elt_by_worker_index[i];
1297           /*
1298            * It works better to let the handoff node
1299            * rate-adapt, always ship the handoff queue element.
1300            */
1301           if (1 || hf->n_vectors == hf->last_n_vectors)
1302             {
1303               vlib_put_frame_queue_elt (hf);
1304               handoff_queue_elt_by_worker_index[i] = 0;
1305             }
1306           else
1307             hf->last_n_vectors = hf->n_vectors;
1308         }
1309       congested_handoff_queue_by_worker_index[i] =
1310         (vlib_frame_queue_t *) (~0);
1311     }
1312   hf = 0;
1313   current_worker_index = ~0;
1314   return frame->n_vectors;
1315 }
1316
1317 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1318   .function = snat_in2out_worker_handoff_fn,
1319   .name = "snat-in2out-worker-handoff",
1320   .vector_size = sizeof (u32),
1321   .format_trace = format_snat_in2out_worker_handoff_trace,
1322   .type = VLIB_NODE_TYPE_INTERNAL,
1323   
1324   .n_next_nodes = 1,
1325
1326   .next_nodes = {
1327     [0] = "error-drop",
1328   },
1329 };
1330
1331 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1332
1333 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
1334                                           vlib_buffer_t * b0,
1335                                           ip4_header_t * ip0,
1336                                           icmp46_header_t * icmp0,
1337                                           u32 sw_if_index0,
1338                                           vlib_node_runtime_t * node,
1339                                           u32 next0,
1340                                           u32 rx_fib_index0)
1341 {
1342   snat_session_key_t key0, sm0;
1343   icmp_echo_header_t *echo0;
1344   u32 new_addr0, old_addr0;
1345   u16 old_id0, new_id0;
1346   ip_csum_t sum0;
1347   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1348
1349   echo0 = (icmp_echo_header_t *)(icmp0+1);
1350
1351   key0.addr = ip0->src_address;
1352   key0.port = echo0->identifier;
1353   key0.fib_index = rx_fib_index0;
1354   
1355   if (snat_static_mapping_match(sm, key0, &sm0, 0))
1356     {
1357       ip4_address_t * first_int_addr;
1358
1359       if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
1360         {
1361           first_int_addr =
1362             ip4_interface_first_address (sm->ip4_main, sw_if_index0,
1363                                          0 /* just want the address */);
1364           rt->cached_sw_if_index = sw_if_index0;
1365           if (first_int_addr)
1366             rt->cached_ip4_address = first_int_addr->as_u32;
1367           else
1368             rt->cached_ip4_address = 0;
1369         }
1370
1371       /* Don't NAT packet aimed at the intfc address */
1372       if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1373                                 rt->cached_ip4_address))
1374         return next0;
1375
1376       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1377       return SNAT_IN2OUT_NEXT_DROP;
1378     }
1379
1380   new_addr0 = sm0.addr.as_u32;
1381   new_id0 = sm0.port;
1382   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1383   old_addr0 = ip0->src_address.as_u32;
1384   ip0->src_address.as_u32 = new_addr0;
1385   
1386   sum0 = ip0->checksum;
1387   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1388                          ip4_header_t,
1389                          src_address /* changed member */);
1390   ip0->checksum = ip_csum_fold (sum0);
1391   
1392   if (PREDICT_FALSE(new_id0 != echo0->identifier))
1393     {
1394       old_id0 = echo0->identifier;
1395       echo0->identifier = new_id0;
1396
1397       sum0 = icmp0->checksum;
1398       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1399                              identifier);
1400       icmp0->checksum = ip_csum_fold (sum0);
1401     }
1402
1403   return next0;
1404 }
1405
1406 static uword
1407 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
1408                                 vlib_node_runtime_t * node,
1409                                 vlib_frame_t * frame)
1410 {
1411   u32 n_left_from, * from, * to_next;
1412   snat_in2out_next_t next_index;
1413   u32 pkts_processed = 0;
1414   snat_main_t * sm = &snat_main;
1415   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1416   u32 stats_node_index;
1417
1418   stats_node_index = snat_in2out_fast_node.index;
1419
1420   from = vlib_frame_vector_args (frame);
1421   n_left_from = frame->n_vectors;
1422   next_index = node->cached_next_index;
1423
1424   while (n_left_from > 0)
1425     {
1426       u32 n_left_to_next;
1427
1428       vlib_get_next_frame (vm, node, next_index,
1429                            to_next, n_left_to_next);
1430
1431       while (n_left_from > 0 && n_left_to_next > 0)
1432         {
1433           u32 bi0;
1434           vlib_buffer_t * b0;
1435           u32 next0;
1436           u32 sw_if_index0;
1437           ip4_header_t * ip0;
1438           ip_csum_t sum0;
1439           u32 new_addr0, old_addr0;
1440           u16 old_port0, new_port0;
1441           udp_header_t * udp0;
1442           tcp_header_t * tcp0;
1443           icmp46_header_t * icmp0;
1444           snat_session_key_t key0, sm0;
1445           u32 proto0;
1446           u32 rx_fib_index0;
1447
1448           /* speculatively enqueue b0 to the current next frame */
1449           bi0 = from[0];
1450           to_next[0] = bi0;
1451           from += 1;
1452           to_next += 1;
1453           n_left_from -= 1;
1454           n_left_to_next -= 1;
1455
1456           b0 = vlib_get_buffer (vm, bi0);
1457           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1458
1459           ip0 = vlib_buffer_get_current (b0);
1460           udp0 = ip4_next_header (ip0);
1461           tcp0 = (tcp_header_t *) udp0;
1462           icmp0 = (icmp46_header_t *) udp0;
1463
1464           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1465           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1466
1467           proto0 = ~0;
1468           proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
1469             ? SNAT_PROTOCOL_UDP : proto0;
1470           proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
1471             ? SNAT_PROTOCOL_TCP : proto0;
1472           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
1473             ? SNAT_PROTOCOL_ICMP : proto0;
1474
1475           if (PREDICT_FALSE (proto0 == ~0))
1476               goto trace0;
1477
1478           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1479             {
1480               ip4_address_t * first_int_addr;
1481               
1482               if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
1483                 {
1484                   first_int_addr = 
1485                     ip4_interface_first_address (sm->ip4_main, sw_if_index0,
1486                                                  0 /* just want the address */);
1487                   rt->cached_sw_if_index = sw_if_index0;
1488                   rt->cached_ip4_address = first_int_addr->as_u32;
1489                 }
1490               
1491               /* Don't NAT packet aimed at the intfc address */
1492               if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1493                                 rt->cached_ip4_address))
1494                 goto trace0;
1495
1496               next0 = icmp_in2out_static_map
1497                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1498               goto trace0;
1499             }
1500
1501           key0.addr = ip0->src_address;
1502           key0.port = udp0->src_port;
1503           key0.fib_index = rx_fib_index0;
1504
1505           if (snat_static_mapping_match(sm, key0, &sm0, 0))
1506             {
1507               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1508               next0= SNAT_IN2OUT_NEXT_DROP;
1509               goto trace0;
1510             }
1511
1512           new_addr0 = sm0.addr.as_u32;
1513           new_port0 = sm0.port;
1514           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1515           old_addr0 = ip0->src_address.as_u32;
1516           ip0->src_address.as_u32 = new_addr0;
1517
1518           sum0 = ip0->checksum;
1519           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1520                                  ip4_header_t,
1521                                  src_address /* changed member */);
1522           ip0->checksum = ip_csum_fold (sum0);
1523
1524           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1525             {
1526               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1527                 {
1528                   old_port0 = tcp0->ports.src;
1529                   tcp0->ports.src = new_port0;
1530
1531                   sum0 = tcp0->checksum;
1532                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1533                                          ip4_header_t,
1534                                          dst_address /* changed member */);
1535                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1536                                          ip4_header_t /* cheat */,
1537                                          length /* changed member */);
1538                   tcp0->checksum = ip_csum_fold(sum0);
1539                 }
1540               else
1541                 {
1542                   old_port0 = udp0->src_port;
1543                   udp0->src_port = new_port0;
1544                   udp0->checksum = 0;
1545                 }
1546             }
1547           else
1548             {
1549               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1550                 {
1551                   sum0 = tcp0->checksum;
1552                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1553                                          ip4_header_t,
1554                                          dst_address /* changed member */);
1555                   tcp0->checksum = ip_csum_fold(sum0);
1556                 }
1557             }
1558
1559           /* Hairpinning */
1560           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1561
1562         trace0:
1563           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1564                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1565             {
1566               snat_in2out_trace_t *t =
1567                  vlib_add_trace (vm, node, b0, sizeof (*t));
1568               t->sw_if_index = sw_if_index0;
1569               t->next_index = next0;
1570             }
1571
1572           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1573
1574           /* verify speculative enqueue, maybe switch current next frame */
1575           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1576                                            to_next, n_left_to_next,
1577                                            bi0, next0);
1578         }
1579
1580       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1581     }
1582
1583   vlib_node_increment_counter (vm, stats_node_index,
1584                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1585                                pkts_processed);
1586   return frame->n_vectors;
1587 }
1588
1589
1590 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
1591   .function = snat_in2out_fast_static_map_fn,
1592   .name = "snat-in2out-fast",
1593   .vector_size = sizeof (u32),
1594   .format_trace = format_snat_in2out_fast_trace,
1595   .type = VLIB_NODE_TYPE_INTERNAL,
1596   
1597   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1598   .error_strings = snat_in2out_error_strings,
1599
1600   .runtime_data_bytes = sizeof (snat_runtime_t),
1601   
1602   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1603
1604   /* edit / add dispositions here */
1605   .next_nodes = {
1606     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1607     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1608     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1609   },
1610 };
1611
1612 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);