SNAT: fix invalid outside FIB index
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26
27 #include <vppinfra/hash.h>
28 #include <vppinfra/error.h>
29 #include <vppinfra/elog.h>
30
31 typedef struct {
32   u32 sw_if_index;
33   u32 next_index;
34   u32 session_index;
35   u32 is_slow_path;
36 } snat_in2out_trace_t;
37
38 typedef struct {
39   u32 next_worker_index;
40   u8 do_handoff;
41 } snat_in2out_worker_handoff_trace_t;
42
43 /* packet trace format function */
44 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
45 {
46   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
49   char * tag;
50
51   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
52   
53   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
54               t->sw_if_index, t->next_index, t->session_index);
55
56   return s;
57 }
58
59 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
60 {
61   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
62   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
63   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
64
65   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
66               t->sw_if_index, t->next_index);
67
68   return s;
69 }
70
71 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
72 {
73   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75   snat_in2out_worker_handoff_trace_t * t =
76     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
77   char * m;
78
79   m = t->do_handoff ? "next worker" : "same worker";
80   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
81
82   return s;
83 }
84
85 vlib_node_registration_t snat_in2out_node;
86 vlib_node_registration_t snat_in2out_slowpath_node;
87 vlib_node_registration_t snat_in2out_fast_node;
88 vlib_node_registration_t snat_in2out_worker_handoff_node;
89
90 #define foreach_snat_in2out_error                       \
91 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
92 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
93 _(OUT_OF_PORTS, "Out of ports")                         \
94 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
95 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
96 _(NO_TRANSLATION, "No translation")
97   
98 typedef enum {
99 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
100   foreach_snat_in2out_error
101 #undef _
102   SNAT_IN2OUT_N_ERROR,
103 } snat_in2out_error_t;
104
105 static char * snat_in2out_error_strings[] = {
106 #define _(sym,string) string,
107   foreach_snat_in2out_error
108 #undef _
109 };
110
111 typedef enum {
112   SNAT_IN2OUT_NEXT_LOOKUP,
113   SNAT_IN2OUT_NEXT_DROP,
114   SNAT_IN2OUT_NEXT_SLOW_PATH,
115   SNAT_IN2OUT_N_NEXT,
116 } snat_in2out_next_t;
117
118 /**
119  * @brief Check if packet should be translated
120  *
121  * Packets aimed at outside interface and external addresss with active session
122  * should be translated.
123  *
124  * @param sm            SNAT main
125  * @param rt            SNAT runtime data
126  * @param sw_if_index0  index of the inside interface
127  * @param ip0           IPv4 header
128  * @param proto0        SNAT protocol
129  * @param rx_fib_index0 RX FIB index
130  *
131  * @returns 0 if packet should be translated otherwise 1
132  */
133 static inline int
134 snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
135                    ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
136 {
137   ip4_address_t * first_int_addr;
138   udp_header_t * udp0 = ip4_next_header (ip0);
139   snat_session_key_t key0, sm0;
140   clib_bihash_kv_8_8_t kv0, value0;
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
151     {
152       first_int_addr =
153         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
154                                      0 /* just want the address */);
155       rt->cached_sw_if_index = sw_if_index0;
156       if (first_int_addr)
157         rt->cached_ip4_address = first_int_addr->as_u32;
158       else
159         rt->cached_ip4_address = 0;
160     }
161
162   /* Don't NAT packet aimed at the intfc address */
163   if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
164     return 1;
165
166   /* If outside FIB index is not resolved yet */
167   if (sm->outside_fib_index == ~0)
168     {
169       sm->outside_fib_index =
170         ip4_fib_table_find_or_create_and_lock (sm->outside_vrf_id);
171     }
172
173   key0.addr = ip0->dst_address;
174   key0.port = udp0->dst_port;
175   key0.protocol = proto0;
176   key0.fib_index = sm->outside_fib_index;
177   kv0.key = key0.as_u64;
178
179   /* NAT packet aimed at external address if */
180   /* has active sessions */
181   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
182     {
183       /* or is static mappings */
184       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
185         return 0;
186     }
187   else
188     return 0;
189
190   fei = fib_table_lookup (rx_fib_index0, &pfx);
191   if (FIB_NODE_INDEX_INVALID != fei)
192     {
193       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
194       if (sw_if_index == ~0)
195         {
196           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
197           if (FIB_NODE_INDEX_INVALID != fei)
198             sw_if_index = fib_entry_get_resolving_interface (fei);
199         }
200       snat_interface_t *i;
201       pool_foreach (i, sm->interfaces,
202       ({
203         /* NAT packet aimed at outside interface */
204         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
205           return 0;
206       }));
207     }
208
209   return 1;
210 }
211
212 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
213                       ip4_header_t * ip0,
214                       u32 rx_fib_index0,
215                       snat_session_key_t * key0,
216                       snat_session_t ** sessionp,
217                       vlib_node_runtime_t * node,
218                       u32 next0,
219                       u32 cpu_index)
220 {
221   snat_user_t *u;
222   snat_user_key_t user_key;
223   snat_session_t *s;
224   clib_bihash_kv_8_8_t kv0, value0;
225   u32 oldest_per_user_translation_list_index;
226   dlist_elt_t * oldest_per_user_translation_list_elt;
227   dlist_elt_t * per_user_translation_list_elt;
228   dlist_elt_t * per_user_list_head_elt;
229   u32 session_index;
230   snat_session_key_t key1;
231   u32 address_index = ~0;
232   u32 outside_fib_index;
233   uword * p;
234   snat_worker_key_t worker_by_out_key;
235
236   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
237   if (! p)
238     {
239       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
240       return SNAT_IN2OUT_NEXT_DROP;
241     }
242   outside_fib_index = p[0];
243
244   key1.protocol = key0->protocol;
245   user_key.addr = ip0->src_address;
246   user_key.fib_index = rx_fib_index0;
247   kv0.key = user_key.as_u64;
248   
249   /* Ever heard of the "user" = src ip4 address before? */
250   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
251     {
252       /* no, make a new one */
253       pool_get (sm->per_thread_data[cpu_index].users, u);
254       memset (u, 0, sizeof (*u));
255       u->addr = ip0->src_address;
256
257       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
258
259       u->sessions_per_user_list_head_index = per_user_list_head_elt -
260         sm->per_thread_data[cpu_index].list_pool;
261
262       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
263                        u->sessions_per_user_list_head_index);
264
265       kv0.value = u - sm->per_thread_data[cpu_index].users;
266
267       /* add user */
268       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
269     }
270   else
271     {
272       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
273                              value0.value);
274     }
275
276   /* Over quota? Recycle the least recently used dynamic translation */
277   if (u->nsessions >= sm->max_translations_per_user)
278     {
279       /* Remove the oldest dynamic translation */
280       do {
281           oldest_per_user_translation_list_index =
282             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
283                                     u->sessions_per_user_list_head_index);
284
285           ASSERT (oldest_per_user_translation_list_index != ~0);
286
287           /* add it back to the end of the LRU list */
288           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
289                               u->sessions_per_user_list_head_index,
290                               oldest_per_user_translation_list_index);
291           /* Get the list element */
292           oldest_per_user_translation_list_elt =
293             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
294                                oldest_per_user_translation_list_index);
295
296           /* Get the session index from the list element */
297           session_index = oldest_per_user_translation_list_elt->value;
298
299           /* Get the session */
300           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
301                                  session_index);
302       } while (snat_is_session_static (s));
303
304       /* Remove in2out, out2in keys */
305       kv0.key = s->in2out.as_u64;
306       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
307           clib_warning ("in2out key delete failed");
308       kv0.key = s->out2in.as_u64;
309       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
310           clib_warning ("out2in key delete failed");
311
312       /* log NAT event */
313       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
314                                           s->out2in.addr.as_u32,
315                                           s->in2out.protocol,
316                                           s->in2out.port,
317                                           s->out2in.port,
318                                           s->in2out.fib_index);
319
320       snat_free_outside_address_and_port 
321         (sm, &s->out2in, s->outside_address_index);
322       s->outside_address_index = ~0;
323
324       if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
325         {
326           ASSERT(0);
327
328           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
329           return SNAT_IN2OUT_NEXT_DROP;
330         }
331       s->outside_address_index = address_index;
332     }
333   else
334     {
335       u8 static_mapping = 1;
336
337       /* First try to match static mapping by local address and port */
338       if (snat_static_mapping_match (sm, *key0, &key1, 0))
339         {
340           static_mapping = 0;
341           /* Try to create dynamic translation */
342           if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
343             {
344               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
345               return SNAT_IN2OUT_NEXT_DROP;
346             }
347         }
348
349       /* Create a new session */
350       pool_get (sm->per_thread_data[cpu_index].sessions, s);
351       memset (s, 0, sizeof (*s));
352       
353       s->outside_address_index = address_index;
354
355       if (static_mapping)
356         {
357           u->nstaticsessions++;
358           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
359         }
360       else
361         {
362           u->nsessions++;
363         }
364
365       /* Create list elts */
366       pool_get (sm->per_thread_data[cpu_index].list_pool,
367                 per_user_translation_list_elt);
368       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
369                        per_user_translation_list_elt -
370                        sm->per_thread_data[cpu_index].list_pool);
371
372       per_user_translation_list_elt->value =
373         s - sm->per_thread_data[cpu_index].sessions;
374       s->per_user_index = per_user_translation_list_elt -
375                           sm->per_thread_data[cpu_index].list_pool;
376       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
377
378       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
379                           s->per_user_list_head_index,
380                           per_user_translation_list_elt -
381                           sm->per_thread_data[cpu_index].list_pool);
382    }
383   
384   s->in2out = *key0;
385   s->out2in = key1;
386   s->out2in.protocol = key0->protocol;
387   s->out2in.fib_index = outside_fib_index;
388   *sessionp = s;
389
390   /* Add to translation hashes */
391   kv0.key = s->in2out.as_u64;
392   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
393   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
394       clib_warning ("in2out key add failed");
395   
396   kv0.key = s->out2in.as_u64;
397   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
398   
399   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
400       clib_warning ("out2in key add failed");
401
402   /* Add to translated packets worker lookup */
403   worker_by_out_key.addr = s->out2in.addr;
404   worker_by_out_key.port = s->out2in.port;
405   worker_by_out_key.fib_index = s->out2in.fib_index;
406   kv0.key = worker_by_out_key.as_u64;
407   kv0.value = cpu_index;
408   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
409
410   /* log NAT event */
411   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
412                                       s->out2in.addr.as_u32,
413                                       s->in2out.protocol,
414                                       s->in2out.port,
415                                       s->out2in.port,
416                                       s->in2out.fib_index);
417   return next0;
418 }
419                       
420 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
421                                          vlib_buffer_t * b0,
422                                          ip4_header_t * ip0,
423                                          icmp46_header_t * icmp0,
424                                          u32 sw_if_index0,
425                                          u32 rx_fib_index0,
426                                          vlib_node_runtime_t * node,
427                                          u32 next0,
428                                          f64 now,
429                                          u32 cpu_index)
430 {
431   snat_session_key_t key0;
432   icmp_echo_header_t *echo0;
433   clib_bihash_kv_8_8_t kv0, value0;
434   snat_session_t * s0;
435   u32 new_addr0, old_addr0;
436   u16 old_id0, new_id0;
437   ip_csum_t sum0;
438   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
439
440   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
441     {
442       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
443       return SNAT_IN2OUT_NEXT_DROP;
444     }
445   
446   echo0 = (icmp_echo_header_t *)(icmp0+1);
447
448   key0.addr = ip0->src_address;
449   key0.port = echo0->identifier;
450   key0.protocol = SNAT_PROTOCOL_ICMP;
451   key0.fib_index = rx_fib_index0;
452   
453   kv0.key = key0.as_u64;
454   
455   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
456     {
457       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
458           IP_PROTOCOL_ICMP, rx_fib_index0)))
459         return next0;
460
461       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
462                          &s0, node, next0, cpu_index);
463       
464       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
465         return next0;
466     }
467   else
468     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
469                             value0.value);
470
471   old_addr0 = ip0->src_address.as_u32;
472   ip0->src_address = s0->out2in.addr;
473   new_addr0 = ip0->src_address.as_u32;
474   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
475   
476   sum0 = ip0->checksum;
477   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
478                          ip4_header_t,
479                          src_address /* changed member */);
480   ip0->checksum = ip_csum_fold (sum0);
481   
482   old_id0 = echo0->identifier;
483   new_id0 = s0->out2in.port;
484   echo0->identifier = new_id0;
485
486   sum0 = icmp0->checksum;
487   sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
488                          identifier);
489   icmp0->checksum = ip_csum_fold (sum0);
490
491   /* Accounting */
492   s0->last_heard = now;
493   s0->total_pkts++;
494   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
495   /* Per-user LRU list maintenance for dynamic translations */
496   if (!snat_is_session_static (s0))
497     {
498       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
499                          s0->per_user_index);
500       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
501                           s0->per_user_list_head_index,
502                           s0->per_user_index);
503     }
504
505   return next0;
506 }
507
508 /**
509  * @brief Hairpinning
510  *
511  * Hairpinning allows two endpoints on the internal side of the NAT to
512  * communicate even if they only use each other's external IP addresses
513  * and ports.
514  *
515  * @param sm     SNAT main.
516  * @param b0     Vlib buffer.
517  * @param ip0    IP header.
518  * @param udp0   UDP header.
519  * @param tcp0   TCP header.
520  * @param proto0 SNAT protocol.
521  */
522 static inline void
523 snat_hairpinning (snat_main_t *sm,
524                   vlib_buffer_t * b0,
525                   ip4_header_t * ip0,
526                   udp_header_t * udp0,
527                   tcp_header_t * tcp0,
528                   u32 proto0)
529 {
530   snat_session_key_t key0, sm0;
531   snat_worker_key_t k0;
532   snat_session_t * s0;
533   clib_bihash_kv_8_8_t kv0, value0;
534   ip_csum_t sum0;
535   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
536   u16 new_dst_port0, old_dst_port0;
537
538   /* If outside FIB index is not resolved yet */
539   if (sm->outside_fib_index == ~0)
540     {
541       sm->outside_fib_index =
542         ip4_fib_table_find_or_create_and_lock (sm->outside_vrf_id);
543     }
544
545   key0.addr = ip0->dst_address;
546   key0.port = udp0->dst_port;
547   key0.protocol = proto0;
548   key0.fib_index = sm->outside_fib_index;
549   kv0.key = key0.as_u64;
550
551   /* Check if destination is in active sessions */
552   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
553     {
554       /* or static mappings */
555       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
556         {
557           new_dst_addr0 = sm0.addr.as_u32;
558           new_dst_port0 = sm0.port;
559           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
560         }
561     }
562   else
563     {
564       si = value0.value;
565       if (sm->num_workers > 1)
566         {
567           k0.addr = ip0->dst_address;
568           k0.port = udp0->dst_port;
569           k0.fib_index = sm->outside_fib_index;
570           kv0.key = k0.as_u64;
571           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
572             ASSERT(0);
573           else
574             ti = value0.value;
575         }
576       else
577         ti = sm->num_workers;
578
579       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
580       new_dst_addr0 = s0->in2out.addr.as_u32;
581       new_dst_port0 = s0->in2out.port;
582       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
583     }
584
585   /* Destination is behind the same NAT, use internal address and port */
586   if (new_dst_addr0)
587     {
588       old_dst_addr0 = ip0->dst_address.as_u32;
589       ip0->dst_address.as_u32 = new_dst_addr0;
590       sum0 = ip0->checksum;
591       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
592                              ip4_header_t, dst_address);
593       ip0->checksum = ip_csum_fold (sum0);
594
595       old_dst_port0 = tcp0->ports.dst;
596       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
597         {
598           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
599             {
600               tcp0->ports.dst = new_dst_port0;
601               sum0 = tcp0->checksum;
602               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
603                                      ip4_header_t, dst_address);
604               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
605                                      ip4_header_t /* cheat */, length);
606               tcp0->checksum = ip_csum_fold(sum0);
607             }
608           else
609             {
610               udp0->dst_port = new_dst_port0;
611               udp0->checksum = 0;
612             }
613         }
614     }
615 }
616
617 static inline uword
618 snat_in2out_node_fn_inline (vlib_main_t * vm,
619                             vlib_node_runtime_t * node,
620                             vlib_frame_t * frame, int is_slow_path)
621 {
622   u32 n_left_from, * from, * to_next;
623   snat_in2out_next_t next_index;
624   u32 pkts_processed = 0;
625   snat_main_t * sm = &snat_main;
626   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
627   f64 now = vlib_time_now (vm);
628   u32 stats_node_index;
629   u32 cpu_index = os_get_cpu_number ();
630
631   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
632     snat_in2out_node.index;
633
634   from = vlib_frame_vector_args (frame);
635   n_left_from = frame->n_vectors;
636   next_index = node->cached_next_index;
637
638   while (n_left_from > 0)
639     {
640       u32 n_left_to_next;
641
642       vlib_get_next_frame (vm, node, next_index,
643                            to_next, n_left_to_next);
644
645       while (n_left_from >= 4 && n_left_to_next >= 2)
646         {
647           u32 bi0, bi1;
648           vlib_buffer_t * b0, * b1;
649           u32 next0, next1;
650           u32 sw_if_index0, sw_if_index1;
651           ip4_header_t * ip0, * ip1;
652           ip_csum_t sum0, sum1;
653           u32 new_addr0, old_addr0, new_addr1, old_addr1;
654           u16 old_port0, new_port0, old_port1, new_port1;
655           udp_header_t * udp0, * udp1;
656           tcp_header_t * tcp0, * tcp1;
657           icmp46_header_t * icmp0, * icmp1;
658           snat_session_key_t key0, key1;
659           u32 rx_fib_index0, rx_fib_index1;
660           u32 proto0, proto1;
661           snat_session_t * s0 = 0, * s1 = 0;
662           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
663           
664           /* Prefetch next iteration. */
665           {
666             vlib_buffer_t * p2, * p3;
667             
668             p2 = vlib_get_buffer (vm, from[2]);
669             p3 = vlib_get_buffer (vm, from[3]);
670             
671             vlib_prefetch_buffer_header (p2, LOAD);
672             vlib_prefetch_buffer_header (p3, LOAD);
673
674             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
675             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
676           }
677
678           /* speculatively enqueue b0 and b1 to the current next frame */
679           to_next[0] = bi0 = from[0];
680           to_next[1] = bi1 = from[1];
681           from += 2;
682           to_next += 2;
683           n_left_from -= 2;
684           n_left_to_next -= 2;
685           
686           b0 = vlib_get_buffer (vm, bi0);
687           b1 = vlib_get_buffer (vm, bi1);
688
689           ip0 = vlib_buffer_get_current (b0);
690           udp0 = ip4_next_header (ip0);
691           tcp0 = (tcp_header_t *) udp0;
692           icmp0 = (icmp46_header_t *) udp0;
693
694           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
695           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
696                                    sw_if_index0);
697
698           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
699
700           proto0 = ip_proto_to_snat_proto (ip0->protocol);
701
702           /* Next configured feature, probably ip4-lookup */
703           if (is_slow_path)
704             {
705               if (PREDICT_FALSE (proto0 == ~0))
706                 goto trace00;
707               
708               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
709                 {
710                   next0 = icmp_in2out_slow_path 
711                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
712                      node, next0, now, cpu_index);
713                   goto trace00;
714                 }
715             }
716           else
717             {
718               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
719                 {
720                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
721                   goto trace00;
722                 }
723             }
724
725           key0.addr = ip0->src_address;
726           key0.port = udp0->src_port;
727           key0.protocol = proto0;
728           key0.fib_index = rx_fib_index0;
729           
730           kv0.key = key0.as_u64;
731
732           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
733             {
734               if (is_slow_path)
735                 {
736                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
737                       proto0, rx_fib_index0)))
738                     goto trace00;
739
740                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
741                                      &s0, node, next0, cpu_index);
742                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
743                     goto trace00;
744                 }
745               else
746                 {
747                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
748                   goto trace00;
749                 }
750             }
751           else
752             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
753                                     value0.value);
754
755           old_addr0 = ip0->src_address.as_u32;
756           ip0->src_address = s0->out2in.addr;
757           new_addr0 = ip0->src_address.as_u32;
758           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
759
760           sum0 = ip0->checksum;
761           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
762                                  ip4_header_t,
763                                  src_address /* changed member */);
764           ip0->checksum = ip_csum_fold (sum0);
765
766           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
767             {
768               old_port0 = tcp0->ports.src;
769               tcp0->ports.src = s0->out2in.port;
770               new_port0 = tcp0->ports.src;
771
772               sum0 = tcp0->checksum;
773               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
774                                      ip4_header_t,
775                                      dst_address /* changed member */);
776               sum0 = ip_csum_update (sum0, old_port0, new_port0,
777                                      ip4_header_t /* cheat */,
778                                      length /* changed member */);
779               tcp0->checksum = ip_csum_fold(sum0);
780             }
781           else
782             {
783               old_port0 = udp0->src_port;
784               udp0->src_port = s0->out2in.port;
785               udp0->checksum = 0;
786             }
787
788           /* Hairpinning */
789           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
790
791           /* Accounting */
792           s0->last_heard = now;
793           s0->total_pkts++;
794           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
795           /* Per-user LRU list maintenance for dynamic translation */
796           if (!snat_is_session_static (s0))
797             {
798               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
799                                  s0->per_user_index);
800               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
801                                   s0->per_user_list_head_index,
802                                   s0->per_user_index);
803             }
804         trace00:
805
806           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
807                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
808             {
809               snat_in2out_trace_t *t = 
810                  vlib_add_trace (vm, node, b0, sizeof (*t));
811               t->is_slow_path = is_slow_path;
812               t->sw_if_index = sw_if_index0;
813               t->next_index = next0;
814                   t->session_index = ~0;
815               if (s0)
816                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
817             }
818
819           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
820
821           ip1 = vlib_buffer_get_current (b1);
822           udp1 = ip4_next_header (ip1);
823           tcp1 = (tcp_header_t *) udp1;
824           icmp1 = (icmp46_header_t *) udp1;
825
826           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
827           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
828                                    sw_if_index1);
829
830           proto1 = ip_proto_to_snat_proto (ip1->protocol);
831
832           /* Next configured feature, probably ip4-lookup */
833           if (is_slow_path)
834             {
835               if (PREDICT_FALSE (proto1 == ~0))
836                 goto trace01;
837               
838               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
839                 {
840                   next1 = icmp_in2out_slow_path 
841                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
842                      next1, now, cpu_index);
843                   goto trace01;
844                 }
845             }
846           else
847             {
848               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
849                 {
850                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
851                   goto trace01;
852                 }
853             }
854
855           key1.addr = ip1->src_address;
856           key1.port = udp1->src_port;
857           key1.protocol = proto1;
858           key1.fib_index = rx_fib_index1;
859           
860           kv1.key = key1.as_u64;
861
862             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
863             {
864               if (is_slow_path)
865                 {
866                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
867                       proto1, rx_fib_index1)))
868                     goto trace01;
869
870                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
871                                      &s1, node, next1, cpu_index);
872                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
873                     goto trace01;
874                 }
875               else
876                 {
877                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
878                   goto trace01;
879                 }
880             }
881           else
882             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
883                                     value1.value);
884
885           old_addr1 = ip1->src_address.as_u32;
886           ip1->src_address = s1->out2in.addr;
887           new_addr1 = ip1->src_address.as_u32;
888           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
889
890           sum1 = ip1->checksum;
891           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
892                                  ip4_header_t,
893                                  src_address /* changed member */);
894           ip1->checksum = ip_csum_fold (sum1);
895
896           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
897             {
898               old_port1 = tcp1->ports.src;
899               tcp1->ports.src = s1->out2in.port;
900               new_port1 = tcp1->ports.src;
901
902               sum1 = tcp1->checksum;
903               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
904                                      ip4_header_t,
905                                      dst_address /* changed member */);
906               sum1 = ip_csum_update (sum1, old_port1, new_port1,
907                                      ip4_header_t /* cheat */,
908                                      length /* changed member */);
909               tcp1->checksum = ip_csum_fold(sum1);
910             }
911           else
912             {
913               old_port1 = udp1->src_port;
914               udp1->src_port = s1->out2in.port;
915               udp1->checksum = 0;
916             }
917
918           /* Hairpinning */
919           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
920
921           /* Accounting */
922           s1->last_heard = now;
923           s1->total_pkts++;
924           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
925           /* Per-user LRU list maintenance for dynamic translation */
926           if (!snat_is_session_static (s1))
927             {
928               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
929                                  s1->per_user_index);
930               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
931                                   s1->per_user_list_head_index,
932                                   s1->per_user_index);
933             }
934         trace01:
935
936           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
937                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
938             {
939               snat_in2out_trace_t *t = 
940                  vlib_add_trace (vm, node, b1, sizeof (*t));
941               t->sw_if_index = sw_if_index1;
942               t->next_index = next1;
943               t->session_index = ~0;
944               if (s1)
945                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
946             }
947
948           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
949
950           /* verify speculative enqueues, maybe switch current next frame */
951           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
952                                            to_next, n_left_to_next,
953                                            bi0, bi1, next0, next1);
954         }
955
956       while (n_left_from > 0 && n_left_to_next > 0)
957         {
958           u32 bi0;
959           vlib_buffer_t * b0;
960           u32 next0;
961           u32 sw_if_index0;
962           ip4_header_t * ip0;
963           ip_csum_t sum0;
964           u32 new_addr0, old_addr0;
965           u16 old_port0, new_port0;
966           udp_header_t * udp0;
967           tcp_header_t * tcp0;
968           icmp46_header_t * icmp0;
969           snat_session_key_t key0;
970           u32 rx_fib_index0;
971           u32 proto0;
972           snat_session_t * s0 = 0;
973           clib_bihash_kv_8_8_t kv0, value0;
974           
975           /* speculatively enqueue b0 to the current next frame */
976           bi0 = from[0];
977           to_next[0] = bi0;
978           from += 1;
979           to_next += 1;
980           n_left_from -= 1;
981           n_left_to_next -= 1;
982
983           b0 = vlib_get_buffer (vm, bi0);
984           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
985
986           ip0 = vlib_buffer_get_current (b0);
987           udp0 = ip4_next_header (ip0);
988           tcp0 = (tcp_header_t *) udp0;
989           icmp0 = (icmp46_header_t *) udp0;
990
991           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
992           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
993                                    sw_if_index0);
994
995           proto0 = ip_proto_to_snat_proto (ip0->protocol);
996
997           /* Next configured feature, probably ip4-lookup */
998           if (is_slow_path)
999             {
1000               if (PREDICT_FALSE (proto0 == ~0))
1001                 goto trace0;
1002               
1003               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1004                 {
1005                   next0 = icmp_in2out_slow_path 
1006                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1007                      next0, now, cpu_index);
1008                   goto trace0;
1009                 }
1010             }
1011           else
1012             {
1013               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1014                 {
1015                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1016                   goto trace0;
1017                 }
1018             }
1019
1020           key0.addr = ip0->src_address;
1021           key0.port = udp0->src_port;
1022           key0.protocol = proto0;
1023           key0.fib_index = rx_fib_index0;
1024           
1025           kv0.key = key0.as_u64;
1026
1027           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1028             {
1029               if (is_slow_path)
1030                 {
1031                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1032                       proto0, rx_fib_index0)))
1033                     goto trace0;
1034
1035                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1036                                      &s0, node, next0, cpu_index);
1037                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1038                     goto trace0;
1039                 }
1040               else
1041                 {
1042                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1043                   goto trace0;
1044                 }
1045             }
1046           else
1047             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1048                                     value0.value);
1049
1050           old_addr0 = ip0->src_address.as_u32;
1051           ip0->src_address = s0->out2in.addr;
1052           new_addr0 = ip0->src_address.as_u32;
1053           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1054
1055           sum0 = ip0->checksum;
1056           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1057                                  ip4_header_t,
1058                                  src_address /* changed member */);
1059           ip0->checksum = ip_csum_fold (sum0);
1060
1061           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1062             {
1063               old_port0 = tcp0->ports.src;
1064               tcp0->ports.src = s0->out2in.port;
1065               new_port0 = tcp0->ports.src;
1066
1067               sum0 = tcp0->checksum;
1068               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1069                                      ip4_header_t,
1070                                      dst_address /* changed member */);
1071               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1072                                      ip4_header_t /* cheat */,
1073                                      length /* changed member */);
1074               tcp0->checksum = ip_csum_fold(sum0);
1075             }
1076           else
1077             {
1078               old_port0 = udp0->src_port;
1079               udp0->src_port = s0->out2in.port;
1080               udp0->checksum = 0;
1081             }
1082
1083           /* Hairpinning */
1084           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1085
1086           /* Accounting */
1087           s0->last_heard = now;
1088           s0->total_pkts++;
1089           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1090           /* Per-user LRU list maintenance for dynamic translation */
1091           if (!snat_is_session_static (s0))
1092             {
1093               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1094                                  s0->per_user_index);
1095               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1096                                   s0->per_user_list_head_index,
1097                                   s0->per_user_index);
1098             }
1099
1100         trace0:
1101           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1102                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1103             {
1104               snat_in2out_trace_t *t = 
1105                  vlib_add_trace (vm, node, b0, sizeof (*t));
1106               t->is_slow_path = is_slow_path;
1107               t->sw_if_index = sw_if_index0;
1108               t->next_index = next0;
1109                   t->session_index = ~0;
1110               if (s0)
1111                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1112             }
1113
1114           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1115
1116           /* verify speculative enqueue, maybe switch current next frame */
1117           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1118                                            to_next, n_left_to_next,
1119                                            bi0, next0);
1120         }
1121
1122       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1123     }
1124
1125   vlib_node_increment_counter (vm, stats_node_index, 
1126                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1127                                pkts_processed);
1128   return frame->n_vectors;
1129 }
1130
1131 static uword
1132 snat_in2out_fast_path_fn (vlib_main_t * vm,
1133                           vlib_node_runtime_t * node,
1134                           vlib_frame_t * frame)
1135 {
1136   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1137 }
1138
1139 VLIB_REGISTER_NODE (snat_in2out_node) = {
1140   .function = snat_in2out_fast_path_fn,
1141   .name = "snat-in2out",
1142   .vector_size = sizeof (u32),
1143   .format_trace = format_snat_in2out_trace,
1144   .type = VLIB_NODE_TYPE_INTERNAL,
1145   
1146   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1147   .error_strings = snat_in2out_error_strings,
1148
1149   .runtime_data_bytes = sizeof (snat_runtime_t),
1150   
1151   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1152
1153   /* edit / add dispositions here */
1154   .next_nodes = {
1155     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1156     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1157     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1158   },
1159 };
1160
1161 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1162
1163 static uword
1164 snat_in2out_slow_path_fn (vlib_main_t * vm,
1165                           vlib_node_runtime_t * node,
1166                           vlib_frame_t * frame)
1167 {
1168   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1169 }
1170
1171 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1172   .function = snat_in2out_slow_path_fn,
1173   .name = "snat-in2out-slowpath",
1174   .vector_size = sizeof (u32),
1175   .format_trace = format_snat_in2out_trace,
1176   .type = VLIB_NODE_TYPE_INTERNAL,
1177   
1178   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1179   .error_strings = snat_in2out_error_strings,
1180
1181   .runtime_data_bytes = sizeof (snat_runtime_t),
1182   
1183   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1184
1185   /* edit / add dispositions here */
1186   .next_nodes = {
1187     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1188     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1189     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1190   },
1191 };
1192
1193 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1194
1195 static uword
1196 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1197                                vlib_node_runtime_t * node,
1198                                vlib_frame_t * frame)
1199 {
1200   snat_main_t *sm = &snat_main;
1201   vlib_thread_main_t *tm = vlib_get_thread_main ();
1202   u32 n_left_from, *from, *to_next = 0;
1203   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1204   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1205     = 0;
1206   vlib_frame_queue_elt_t *hf = 0;
1207   vlib_frame_t *f = 0;
1208   int i;
1209   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1210   u32 next_worker_index = 0;
1211   u32 current_worker_index = ~0;
1212   u32 cpu_index = os_get_cpu_number ();
1213
1214   ASSERT (vec_len (sm->workers));
1215
1216   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1217     {
1218       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1219
1220       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1221                                sm->first_worker_index + sm->num_workers - 1,
1222                                (vlib_frame_queue_t *) (~0));
1223     }
1224
1225   from = vlib_frame_vector_args (frame);
1226   n_left_from = frame->n_vectors;
1227
1228   while (n_left_from > 0)
1229     {
1230       u32 bi0;
1231       vlib_buffer_t *b0;
1232       u32 sw_if_index0;
1233       u32 rx_fib_index0;
1234       ip4_header_t * ip0;
1235       snat_user_key_t key0;
1236       clib_bihash_kv_8_8_t kv0, value0;
1237       u8 do_handoff;
1238
1239       bi0 = from[0];
1240       from += 1;
1241       n_left_from -= 1;
1242
1243       b0 = vlib_get_buffer (vm, bi0);
1244
1245       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1246       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1247
1248       ip0 = vlib_buffer_get_current (b0);
1249
1250       key0.addr = ip0->src_address;
1251       key0.fib_index = rx_fib_index0;
1252
1253       kv0.key = key0.as_u64;
1254
1255       /* Ever heard of of the "user" before? */
1256       if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
1257         {
1258           /* No, assign next available worker (RR) */
1259           next_worker_index = sm->first_worker_index;
1260           if (vec_len (sm->workers))
1261             {
1262               next_worker_index += 
1263                 sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
1264             }
1265
1266           /* add non-traslated packets worker lookup */
1267           kv0.value = next_worker_index;
1268           clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
1269         }
1270       else
1271         next_worker_index = value0.value;
1272
1273       if (PREDICT_FALSE (next_worker_index != cpu_index))
1274         {
1275           do_handoff = 1;
1276
1277           if (next_worker_index != current_worker_index)
1278             {
1279               if (hf)
1280                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1281
1282               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1283                                                       next_worker_index,
1284                                                       handoff_queue_elt_by_worker_index);
1285
1286               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1287               to_next_worker = &hf->buffer_index[hf->n_vectors];
1288               current_worker_index = next_worker_index;
1289             }
1290
1291           /* enqueue to correct worker thread */
1292           to_next_worker[0] = bi0;
1293           to_next_worker++;
1294           n_left_to_next_worker--;
1295
1296           if (n_left_to_next_worker == 0)
1297             {
1298               hf->n_vectors = VLIB_FRAME_SIZE;
1299               vlib_put_frame_queue_elt (hf);
1300               current_worker_index = ~0;
1301               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1302               hf = 0;
1303             }
1304         }
1305       else
1306         {
1307           do_handoff = 0;
1308           /* if this is 1st frame */
1309           if (!f)
1310             {
1311               f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
1312               to_next = vlib_frame_vector_args (f);
1313             }
1314
1315           to_next[0] = bi0;
1316           to_next += 1;
1317           f->n_vectors++;
1318         }
1319
1320       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1321                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1322         {
1323           snat_in2out_worker_handoff_trace_t *t =
1324             vlib_add_trace (vm, node, b0, sizeof (*t));
1325           t->next_worker_index = next_worker_index;
1326           t->do_handoff = do_handoff;
1327         }
1328     }
1329
1330   if (f)
1331     vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
1332
1333   if (hf)
1334     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1335
1336   /* Ship frames to the worker nodes */
1337   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1338     {
1339       if (handoff_queue_elt_by_worker_index[i])
1340         {
1341           hf = handoff_queue_elt_by_worker_index[i];
1342           /*
1343            * It works better to let the handoff node
1344            * rate-adapt, always ship the handoff queue element.
1345            */
1346           if (1 || hf->n_vectors == hf->last_n_vectors)
1347             {
1348               vlib_put_frame_queue_elt (hf);
1349               handoff_queue_elt_by_worker_index[i] = 0;
1350             }
1351           else
1352             hf->last_n_vectors = hf->n_vectors;
1353         }
1354       congested_handoff_queue_by_worker_index[i] =
1355         (vlib_frame_queue_t *) (~0);
1356     }
1357   hf = 0;
1358   current_worker_index = ~0;
1359   return frame->n_vectors;
1360 }
1361
1362 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1363   .function = snat_in2out_worker_handoff_fn,
1364   .name = "snat-in2out-worker-handoff",
1365   .vector_size = sizeof (u32),
1366   .format_trace = format_snat_in2out_worker_handoff_trace,
1367   .type = VLIB_NODE_TYPE_INTERNAL,
1368   
1369   .n_next_nodes = 1,
1370
1371   .next_nodes = {
1372     [0] = "error-drop",
1373   },
1374 };
1375
1376 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1377
1378 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
1379                                           vlib_buffer_t * b0,
1380                                           ip4_header_t * ip0,
1381                                           icmp46_header_t * icmp0,
1382                                           u32 sw_if_index0,
1383                                           vlib_node_runtime_t * node,
1384                                           u32 next0,
1385                                           u32 rx_fib_index0)
1386 {
1387   snat_session_key_t key0, sm0;
1388   icmp_echo_header_t *echo0;
1389   u32 new_addr0, old_addr0;
1390   u16 old_id0, new_id0;
1391   ip_csum_t sum0;
1392   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1393
1394   echo0 = (icmp_echo_header_t *)(icmp0+1);
1395
1396   key0.addr = ip0->src_address;
1397   key0.port = echo0->identifier;
1398   key0.fib_index = rx_fib_index0;
1399   
1400   if (snat_static_mapping_match(sm, key0, &sm0, 0))
1401     {
1402       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1403           IP_PROTOCOL_ICMP, rx_fib_index0)))
1404         return next0;
1405
1406       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1407       return SNAT_IN2OUT_NEXT_DROP;
1408     }
1409
1410   new_addr0 = sm0.addr.as_u32;
1411   new_id0 = sm0.port;
1412   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1413   old_addr0 = ip0->src_address.as_u32;
1414   ip0->src_address.as_u32 = new_addr0;
1415   
1416   sum0 = ip0->checksum;
1417   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1418                          ip4_header_t,
1419                          src_address /* changed member */);
1420   ip0->checksum = ip_csum_fold (sum0);
1421   
1422   if (PREDICT_FALSE(new_id0 != echo0->identifier))
1423     {
1424       old_id0 = echo0->identifier;
1425       echo0->identifier = new_id0;
1426
1427       sum0 = icmp0->checksum;
1428       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1429                              identifier);
1430       icmp0->checksum = ip_csum_fold (sum0);
1431     }
1432
1433   return next0;
1434 }
1435
1436 static uword
1437 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
1438                                 vlib_node_runtime_t * node,
1439                                 vlib_frame_t * frame)
1440 {
1441   u32 n_left_from, * from, * to_next;
1442   snat_in2out_next_t next_index;
1443   u32 pkts_processed = 0;
1444   snat_main_t * sm = &snat_main;
1445   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1446   u32 stats_node_index;
1447
1448   stats_node_index = snat_in2out_fast_node.index;
1449
1450   from = vlib_frame_vector_args (frame);
1451   n_left_from = frame->n_vectors;
1452   next_index = node->cached_next_index;
1453
1454   while (n_left_from > 0)
1455     {
1456       u32 n_left_to_next;
1457
1458       vlib_get_next_frame (vm, node, next_index,
1459                            to_next, n_left_to_next);
1460
1461       while (n_left_from > 0 && n_left_to_next > 0)
1462         {
1463           u32 bi0;
1464           vlib_buffer_t * b0;
1465           u32 next0;
1466           u32 sw_if_index0;
1467           ip4_header_t * ip0;
1468           ip_csum_t sum0;
1469           u32 new_addr0, old_addr0;
1470           u16 old_port0, new_port0;
1471           udp_header_t * udp0;
1472           tcp_header_t * tcp0;
1473           icmp46_header_t * icmp0;
1474           snat_session_key_t key0, sm0;
1475           u32 proto0;
1476           u32 rx_fib_index0;
1477
1478           /* speculatively enqueue b0 to the current next frame */
1479           bi0 = from[0];
1480           to_next[0] = bi0;
1481           from += 1;
1482           to_next += 1;
1483           n_left_from -= 1;
1484           n_left_to_next -= 1;
1485
1486           b0 = vlib_get_buffer (vm, bi0);
1487           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1488
1489           ip0 = vlib_buffer_get_current (b0);
1490           udp0 = ip4_next_header (ip0);
1491           tcp0 = (tcp_header_t *) udp0;
1492           icmp0 = (icmp46_header_t *) udp0;
1493
1494           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1495           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1496
1497           proto0 = ip_proto_to_snat_proto (ip0->protocol);
1498
1499           if (PREDICT_FALSE (proto0 == ~0))
1500               goto trace0;
1501
1502           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1503             {
1504               if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1505                   proto0, rx_fib_index0)))
1506                 goto trace0;
1507
1508               next0 = icmp_in2out_static_map
1509                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1510               goto trace0;
1511             }
1512
1513           key0.addr = ip0->src_address;
1514           key0.port = udp0->src_port;
1515           key0.fib_index = rx_fib_index0;
1516
1517           if (snat_static_mapping_match(sm, key0, &sm0, 0))
1518             {
1519               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1520               next0= SNAT_IN2OUT_NEXT_DROP;
1521               goto trace0;
1522             }
1523
1524           new_addr0 = sm0.addr.as_u32;
1525           new_port0 = sm0.port;
1526           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1527           old_addr0 = ip0->src_address.as_u32;
1528           ip0->src_address.as_u32 = new_addr0;
1529
1530           sum0 = ip0->checksum;
1531           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1532                                  ip4_header_t,
1533                                  src_address /* changed member */);
1534           ip0->checksum = ip_csum_fold (sum0);
1535
1536           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1537             {
1538               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1539                 {
1540                   old_port0 = tcp0->ports.src;
1541                   tcp0->ports.src = new_port0;
1542
1543                   sum0 = tcp0->checksum;
1544                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1545                                          ip4_header_t,
1546                                          dst_address /* changed member */);
1547                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1548                                          ip4_header_t /* cheat */,
1549                                          length /* changed member */);
1550                   tcp0->checksum = ip_csum_fold(sum0);
1551                 }
1552               else
1553                 {
1554                   old_port0 = udp0->src_port;
1555                   udp0->src_port = new_port0;
1556                   udp0->checksum = 0;
1557                 }
1558             }
1559           else
1560             {
1561               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1562                 {
1563                   sum0 = tcp0->checksum;
1564                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1565                                          ip4_header_t,
1566                                          dst_address /* changed member */);
1567                   tcp0->checksum = ip_csum_fold(sum0);
1568                 }
1569             }
1570
1571           /* Hairpinning */
1572           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1573
1574         trace0:
1575           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1576                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1577             {
1578               snat_in2out_trace_t *t =
1579                  vlib_add_trace (vm, node, b0, sizeof (*t));
1580               t->sw_if_index = sw_if_index0;
1581               t->next_index = next0;
1582             }
1583
1584           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1585
1586           /* verify speculative enqueue, maybe switch current next frame */
1587           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1588                                            to_next, n_left_to_next,
1589                                            bi0, next0);
1590         }
1591
1592       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1593     }
1594
1595   vlib_node_increment_counter (vm, stats_node_index,
1596                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1597                                pkts_processed);
1598   return frame->n_vectors;
1599 }
1600
1601
1602 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
1603   .function = snat_in2out_fast_static_map_fn,
1604   .name = "snat-in2out-fast",
1605   .vector_size = sizeof (u32),
1606   .format_trace = format_snat_in2out_fast_trace,
1607   .type = VLIB_NODE_TYPE_INTERNAL,
1608   
1609   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1610   .error_strings = snat_in2out_error_strings,
1611
1612   .runtime_data_bytes = sizeof (snat_runtime_t),
1613   
1614   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1615
1616   /* edit / add dispositions here */
1617   .next_nodes = {
1618     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1619     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1620     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1621   },
1622 };
1623
1624 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);