SNAT: Multiple inside interfaces (VPP-447)
[vpp.git] / src / plugins / snat / in2out.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
20
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <snat/snat.h>
25 #include <snat/snat_ipfix_logging.h>
26
27 #include <vppinfra/hash.h>
28 #include <vppinfra/error.h>
29 #include <vppinfra/elog.h>
30
31 typedef struct {
32   u32 sw_if_index;
33   u32 next_index;
34   u32 session_index;
35   u32 is_slow_path;
36 } snat_in2out_trace_t;
37
38 typedef struct {
39   u32 next_worker_index;
40   u8 do_handoff;
41 } snat_in2out_worker_handoff_trace_t;
42
43 /* packet trace format function */
44 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
45 {
46   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
49   char * tag;
50
51   tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
52   
53   s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
54               t->sw_if_index, t->next_index, t->session_index);
55
56   return s;
57 }
58
59 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
60 {
61   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
62   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
63   snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
64
65   s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", 
66               t->sw_if_index, t->next_index);
67
68   return s;
69 }
70
71 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
72 {
73   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75   snat_in2out_worker_handoff_trace_t * t =
76     va_arg (*args, snat_in2out_worker_handoff_trace_t *);
77   char * m;
78
79   m = t->do_handoff ? "next worker" : "same worker";
80   s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
81
82   return s;
83 }
84
85 vlib_node_registration_t snat_in2out_node;
86 vlib_node_registration_t snat_in2out_slowpath_node;
87 vlib_node_registration_t snat_in2out_fast_node;
88 vlib_node_registration_t snat_in2out_worker_handoff_node;
89
90 #define foreach_snat_in2out_error                       \
91 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
92 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
93 _(OUT_OF_PORTS, "Out of ports")                         \
94 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
95 _(BAD_ICMP_TYPE, "icmp type not echo-request")          \
96 _(NO_TRANSLATION, "No translation")
97   
98 typedef enum {
99 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
100   foreach_snat_in2out_error
101 #undef _
102   SNAT_IN2OUT_N_ERROR,
103 } snat_in2out_error_t;
104
105 static char * snat_in2out_error_strings[] = {
106 #define _(sym,string) string,
107   foreach_snat_in2out_error
108 #undef _
109 };
110
111 typedef enum {
112   SNAT_IN2OUT_NEXT_LOOKUP,
113   SNAT_IN2OUT_NEXT_DROP,
114   SNAT_IN2OUT_NEXT_SLOW_PATH,
115   SNAT_IN2OUT_N_NEXT,
116 } snat_in2out_next_t;
117
118 /**
119  * @brief Check if packet should be translated
120  *
121  * Packets aimed at outside interface and external addresss with active session
122  * should be translated.
123  *
124  * @param sm            SNAT main
125  * @param rt            SNAT runtime data
126  * @param sw_if_index0  index of the inside interface
127  * @param ip0           IPv4 header
128  * @param proto0        SNAT protocol
129  * @param rx_fib_index0 RX FIB index
130  *
131  * @returns 0 if packet should be translated otherwise 1
132  */
133 static inline int
134 snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
135                    ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
136 {
137   ip4_address_t * first_int_addr;
138   udp_header_t * udp0 = ip4_next_header (ip0);
139   snat_session_key_t key0, sm0;
140   clib_bihash_kv_8_8_t kv0, value0;
141   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
142   fib_prefix_t pfx = {
143     .fp_proto = FIB_PROTOCOL_IP4,
144     .fp_len = 32,
145     .fp_addr = {
146         .ip4.as_u32 = ip0->dst_address.as_u32,
147     },
148   };
149
150   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
151     {
152       first_int_addr =
153         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
154                                      0 /* just want the address */);
155       rt->cached_sw_if_index = sw_if_index0;
156       if (first_int_addr)
157         rt->cached_ip4_address = first_int_addr->as_u32;
158       else
159         rt->cached_ip4_address = 0;
160     }
161
162   /* Don't NAT packet aimed at the intfc address */
163   if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
164     return 1;
165
166   key0.addr = ip0->dst_address;
167   key0.port = udp0->dst_port;
168   key0.protocol = proto0;
169   key0.fib_index = sm->outside_fib_index;
170   kv0.key = key0.as_u64;
171
172   /* NAT packet aimed at external address if */
173   /* has active sessions */
174   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
175     {
176       /* or is static mappings */
177       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
178         return 0;
179     }
180   else
181     return 0;
182
183   fei = fib_table_lookup (rx_fib_index0, &pfx);
184   if (FIB_NODE_INDEX_INVALID != fei)
185     {
186       u32 sw_if_index = fib_entry_get_resolving_interface (fei);
187       if (sw_if_index == ~0)
188         {
189           fei = fib_table_lookup (sm->outside_fib_index, &pfx);
190           if (FIB_NODE_INDEX_INVALID != fei)
191             sw_if_index = fib_entry_get_resolving_interface (fei);
192         }
193       snat_interface_t *i;
194       pool_foreach (i, sm->interfaces,
195       ({
196         /* NAT packet aimed at outside interface */
197         if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
198           return 0;
199       }));
200     }
201
202   return 1;
203 }
204
205 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
206                       ip4_header_t * ip0,
207                       u32 rx_fib_index0,
208                       snat_session_key_t * key0,
209                       snat_session_t ** sessionp,
210                       vlib_node_runtime_t * node,
211                       u32 next0,
212                       u32 cpu_index)
213 {
214   snat_user_t *u;
215   snat_user_key_t user_key;
216   snat_session_t *s;
217   clib_bihash_kv_8_8_t kv0, value0;
218   u32 oldest_per_user_translation_list_index;
219   dlist_elt_t * oldest_per_user_translation_list_elt;
220   dlist_elt_t * per_user_translation_list_elt;
221   dlist_elt_t * per_user_list_head_elt;
222   u32 session_index;
223   snat_session_key_t key1;
224   u32 address_index = ~0;
225   u32 outside_fib_index;
226   uword * p;
227   snat_static_mapping_key_t worker_by_out_key;
228
229   p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
230   if (! p)
231     {
232       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
233       return SNAT_IN2OUT_NEXT_DROP;
234     }
235   outside_fib_index = p[0];
236
237   user_key.addr = ip0->src_address;
238   user_key.fib_index = rx_fib_index0;
239   kv0.key = user_key.as_u64;
240   
241   /* Ever heard of the "user" = src ip4 address before? */
242   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
243     {
244       /* no, make a new one */
245       pool_get (sm->per_thread_data[cpu_index].users, u);
246       memset (u, 0, sizeof (*u));
247       u->addr = ip0->src_address;
248
249       pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
250
251       u->sessions_per_user_list_head_index = per_user_list_head_elt -
252         sm->per_thread_data[cpu_index].list_pool;
253
254       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
255                        u->sessions_per_user_list_head_index);
256
257       kv0.value = u - sm->per_thread_data[cpu_index].users;
258
259       /* add user */
260       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
261     }
262   else
263     {
264       u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
265                              value0.value);
266     }
267
268   /* Over quota? Recycle the least recently used dynamic translation */
269   if (u->nsessions >= sm->max_translations_per_user)
270     {
271       /* Remove the oldest dynamic translation */
272       do {
273           oldest_per_user_translation_list_index =
274             clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
275                                     u->sessions_per_user_list_head_index);
276
277           ASSERT (oldest_per_user_translation_list_index != ~0);
278
279           /* add it back to the end of the LRU list */
280           clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
281                               u->sessions_per_user_list_head_index,
282                               oldest_per_user_translation_list_index);
283           /* Get the list element */
284           oldest_per_user_translation_list_elt =
285             pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
286                                oldest_per_user_translation_list_index);
287
288           /* Get the session index from the list element */
289           session_index = oldest_per_user_translation_list_elt->value;
290
291           /* Get the session */
292           s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
293                                  session_index);
294       } while (snat_is_session_static (s));
295
296       /* Remove in2out, out2in keys */
297       kv0.key = s->in2out.as_u64;
298       if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
299           clib_warning ("in2out key delete failed");
300       kv0.key = s->out2in.as_u64;
301       if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
302           clib_warning ("out2in key delete failed");
303
304       /* log NAT event */
305       snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
306                                           s->out2in.addr.as_u32,
307                                           s->in2out.protocol,
308                                           s->in2out.port,
309                                           s->out2in.port,
310                                           s->in2out.fib_index);
311
312       snat_free_outside_address_and_port 
313         (sm, &s->out2in, s->outside_address_index);
314       s->outside_address_index = ~0;
315
316       if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
317         {
318           ASSERT(0);
319
320           b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
321           return SNAT_IN2OUT_NEXT_DROP;
322         }
323       s->outside_address_index = address_index;
324     }
325   else
326     {
327       u8 static_mapping = 1;
328
329       /* First try to match static mapping by local address and port */
330       if (snat_static_mapping_match (sm, *key0, &key1, 0))
331         {
332           static_mapping = 0;
333           /* Try to create dynamic translation */
334           if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
335             {
336               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
337               return SNAT_IN2OUT_NEXT_DROP;
338             }
339         }
340
341       /* Create a new session */
342       pool_get (sm->per_thread_data[cpu_index].sessions, s);
343       memset (s, 0, sizeof (*s));
344       
345       s->outside_address_index = address_index;
346
347       if (static_mapping)
348         {
349           u->nstaticsessions++;
350           s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
351         }
352       else
353         {
354           u->nsessions++;
355         }
356
357       /* Create list elts */
358       pool_get (sm->per_thread_data[cpu_index].list_pool,
359                 per_user_translation_list_elt);
360       clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
361                        per_user_translation_list_elt -
362                        sm->per_thread_data[cpu_index].list_pool);
363
364       per_user_translation_list_elt->value =
365         s - sm->per_thread_data[cpu_index].sessions;
366       s->per_user_index = per_user_translation_list_elt -
367                           sm->per_thread_data[cpu_index].list_pool;
368       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
369
370       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
371                           s->per_user_list_head_index,
372                           per_user_translation_list_elt -
373                           sm->per_thread_data[cpu_index].list_pool);
374    }
375   
376   s->in2out = *key0;
377   s->out2in = key1;
378   s->out2in.protocol = key0->protocol;
379   s->out2in.fib_index = outside_fib_index;
380   *sessionp = s;
381
382   /* Add to translation hashes */
383   kv0.key = s->in2out.as_u64;
384   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
385   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
386       clib_warning ("in2out key add failed");
387   
388   kv0.key = s->out2in.as_u64;
389   kv0.value = s - sm->per_thread_data[cpu_index].sessions;
390   
391   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
392       clib_warning ("out2in key add failed");
393
394   /* Add to translated packets worker lookup */
395   worker_by_out_key.addr = s->out2in.addr;
396   worker_by_out_key.port = s->out2in.port;
397   worker_by_out_key.fib_index = s->out2in.fib_index;
398   kv0.key = worker_by_out_key.as_u64;
399   kv0.value = cpu_index;
400   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
401
402   /* log NAT event */
403   snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
404                                       s->out2in.addr.as_u32,
405                                       s->in2out.protocol,
406                                       s->in2out.port,
407                                       s->out2in.port,
408                                       s->in2out.fib_index);
409   return next0;
410 }
411                       
412 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
413                                          vlib_buffer_t * b0,
414                                          ip4_header_t * ip0,
415                                          icmp46_header_t * icmp0,
416                                          u32 sw_if_index0,
417                                          u32 rx_fib_index0,
418                                          vlib_node_runtime_t * node,
419                                          u32 next0,
420                                          f64 now,
421                                          u32 cpu_index)
422 {
423   snat_session_key_t key0;
424   icmp_echo_header_t *echo0;
425   clib_bihash_kv_8_8_t kv0, value0;
426   snat_session_t * s0;
427   u32 new_addr0, old_addr0;
428   u16 old_id0, new_id0;
429   ip_csum_t sum0;
430   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
431
432   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
433     {
434       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
435       return SNAT_IN2OUT_NEXT_DROP;
436     }
437   
438   echo0 = (icmp_echo_header_t *)(icmp0+1);
439
440   key0.addr = ip0->src_address;
441   key0.port = echo0->identifier;
442   key0.protocol = SNAT_PROTOCOL_ICMP;
443   key0.fib_index = rx_fib_index0;
444   
445   kv0.key = key0.as_u64;
446   
447   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
448     {
449       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
450           IP_PROTOCOL_ICMP, rx_fib_index0)))
451         return next0;
452
453       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
454                          &s0, node, next0, cpu_index);
455       
456       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
457         return next0;
458     }
459   else
460     s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
461                             value0.value);
462
463   old_addr0 = ip0->src_address.as_u32;
464   ip0->src_address = s0->out2in.addr;
465   new_addr0 = ip0->src_address.as_u32;
466   vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
467   
468   sum0 = ip0->checksum;
469   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
470                          ip4_header_t,
471                          src_address /* changed member */);
472   ip0->checksum = ip_csum_fold (sum0);
473   
474   old_id0 = echo0->identifier;
475   new_id0 = s0->out2in.port;
476   echo0->identifier = new_id0;
477
478   sum0 = icmp0->checksum;
479   sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
480                          identifier);
481   icmp0->checksum = ip_csum_fold (sum0);
482
483   /* Accounting */
484   s0->last_heard = now;
485   s0->total_pkts++;
486   s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
487   /* Per-user LRU list maintenance for dynamic translations */
488   if (!snat_is_session_static (s0))
489     {
490       clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
491                          s0->per_user_index);
492       clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
493                           s0->per_user_list_head_index,
494                           s0->per_user_index);
495     }
496
497   return next0;
498 }
499
500 /**
501  * @brief Hairpinning
502  *
503  * Hairpinning allows two endpoints on the internal side of the NAT to
504  * communicate even if they only use each other's external IP addresses
505  * and ports.
506  *
507  * @param sm     SNAT main.
508  * @param b0     Vlib buffer.
509  * @param ip0    IP header.
510  * @param udp0   UDP header.
511  * @param tcp0   TCP header.
512  * @param proto0 SNAT protocol.
513  */
514 static inline void
515 snat_hairpinning (snat_main_t *sm,
516                   vlib_buffer_t * b0,
517                   ip4_header_t * ip0,
518                   udp_header_t * udp0,
519                   tcp_header_t * tcp0,
520                   u32 proto0)
521 {
522   snat_session_key_t key0, sm0;
523   snat_static_mapping_key_t k0;
524   snat_session_t * s0;
525   clib_bihash_kv_8_8_t kv0, value0;
526   ip_csum_t sum0;
527   u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
528   u16 new_dst_port0, old_dst_port0;
529
530   key0.addr = ip0->dst_address;
531   key0.port = udp0->dst_port;
532   key0.protocol = proto0;
533   key0.fib_index = sm->outside_fib_index;
534   kv0.key = key0.as_u64;
535
536   /* Check if destination is in active sessions */
537   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
538     {
539       /* or static mappings */
540       if (!snat_static_mapping_match(sm, key0, &sm0, 1))
541         {
542           new_dst_addr0 = sm0.addr.as_u32;
543           new_dst_port0 = sm0.port;
544           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
545         }
546     }
547   else
548     {
549       si = value0.value;
550       if (sm->num_workers > 1)
551         {
552           k0.addr = ip0->dst_address;
553           k0.port = udp0->dst_port;
554           k0.fib_index = sm->outside_fib_index;
555           kv0.key = k0.as_u64;
556           if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
557             ASSERT(0);
558           else
559             ti = value0.value;
560         }
561       else
562         ti = sm->num_workers;
563
564       s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
565       new_dst_addr0 = s0->in2out.addr.as_u32;
566       new_dst_port0 = s0->in2out.port;
567       vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
568     }
569
570   /* Destination is behind the same NAT, use internal address and port */
571   if (new_dst_addr0)
572     {
573       old_dst_addr0 = ip0->dst_address.as_u32;
574       ip0->dst_address.as_u32 = new_dst_addr0;
575       sum0 = ip0->checksum;
576       sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
577                              ip4_header_t, dst_address);
578       ip0->checksum = ip_csum_fold (sum0);
579
580       old_dst_port0 = tcp0->ports.dst;
581       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
582         {
583           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
584             {
585               tcp0->ports.dst = new_dst_port0;
586               sum0 = tcp0->checksum;
587               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
588                                      ip4_header_t, dst_address);
589               sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
590                                      ip4_header_t /* cheat */, length);
591               tcp0->checksum = ip_csum_fold(sum0);
592             }
593           else
594             {
595               udp0->dst_port = new_dst_port0;
596               udp0->checksum = 0;
597             }
598         }
599     }
600 }
601
602 static inline uword
603 snat_in2out_node_fn_inline (vlib_main_t * vm,
604                             vlib_node_runtime_t * node,
605                             vlib_frame_t * frame, int is_slow_path)
606 {
607   u32 n_left_from, * from, * to_next;
608   snat_in2out_next_t next_index;
609   u32 pkts_processed = 0;
610   snat_main_t * sm = &snat_main;
611   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
612   f64 now = vlib_time_now (vm);
613   u32 stats_node_index;
614   u32 cpu_index = os_get_cpu_number ();
615
616   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
617     snat_in2out_node.index;
618
619   from = vlib_frame_vector_args (frame);
620   n_left_from = frame->n_vectors;
621   next_index = node->cached_next_index;
622
623   while (n_left_from > 0)
624     {
625       u32 n_left_to_next;
626
627       vlib_get_next_frame (vm, node, next_index,
628                            to_next, n_left_to_next);
629
630       while (n_left_from >= 4 && n_left_to_next >= 2)
631         {
632           u32 bi0, bi1;
633           vlib_buffer_t * b0, * b1;
634           u32 next0, next1;
635           u32 sw_if_index0, sw_if_index1;
636           ip4_header_t * ip0, * ip1;
637           ip_csum_t sum0, sum1;
638           u32 new_addr0, old_addr0, new_addr1, old_addr1;
639           u16 old_port0, new_port0, old_port1, new_port1;
640           udp_header_t * udp0, * udp1;
641           tcp_header_t * tcp0, * tcp1;
642           icmp46_header_t * icmp0, * icmp1;
643           snat_session_key_t key0, key1;
644           u32 rx_fib_index0, rx_fib_index1;
645           u32 proto0, proto1;
646           snat_session_t * s0 = 0, * s1 = 0;
647           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
648           
649           /* Prefetch next iteration. */
650           {
651             vlib_buffer_t * p2, * p3;
652             
653             p2 = vlib_get_buffer (vm, from[2]);
654             p3 = vlib_get_buffer (vm, from[3]);
655             
656             vlib_prefetch_buffer_header (p2, LOAD);
657             vlib_prefetch_buffer_header (p3, LOAD);
658
659             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
660             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
661           }
662
663           /* speculatively enqueue b0 and b1 to the current next frame */
664           to_next[0] = bi0 = from[0];
665           to_next[1] = bi1 = from[1];
666           from += 2;
667           to_next += 2;
668           n_left_from -= 2;
669           n_left_to_next -= 2;
670           
671           b0 = vlib_get_buffer (vm, bi0);
672           b1 = vlib_get_buffer (vm, bi1);
673
674           ip0 = vlib_buffer_get_current (b0);
675           udp0 = ip4_next_header (ip0);
676           tcp0 = (tcp_header_t *) udp0;
677           icmp0 = (icmp46_header_t *) udp0;
678
679           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
680           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
681                                    sw_if_index0);
682
683           next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
684
685           proto0 = ~0;
686           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
687             ? SNAT_PROTOCOL_UDP : proto0;
688           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
689             ? SNAT_PROTOCOL_TCP : proto0;
690           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
691             ? SNAT_PROTOCOL_ICMP : proto0;
692
693           /* Next configured feature, probably ip4-lookup */
694           if (is_slow_path)
695             {
696               if (PREDICT_FALSE (proto0 == ~0))
697                 goto trace00;
698               
699               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
700                 {
701                   next0 = icmp_in2out_slow_path 
702                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
703                      node, next0, now, cpu_index);
704                   goto trace00;
705                 }
706             }
707           else
708             {
709               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
710                 {
711                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
712                   goto trace00;
713                 }
714             }
715
716           key0.addr = ip0->src_address;
717           key0.port = udp0->src_port;
718           key0.protocol = proto0;
719           key0.fib_index = rx_fib_index0;
720           
721           kv0.key = key0.as_u64;
722
723           if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
724             {
725               if (is_slow_path)
726                 {
727                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
728                       proto0, rx_fib_index0)))
729                     goto trace00;
730
731                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
732                                      &s0, node, next0, cpu_index);
733                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
734                     goto trace00;
735                 }
736               else
737                 {
738                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
739                   goto trace00;
740                 }
741             }
742           else
743             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
744                                     value0.value);
745
746           old_addr0 = ip0->src_address.as_u32;
747           ip0->src_address = s0->out2in.addr;
748           new_addr0 = ip0->src_address.as_u32;
749           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
750
751           sum0 = ip0->checksum;
752           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
753                                  ip4_header_t,
754                                  src_address /* changed member */);
755           ip0->checksum = ip_csum_fold (sum0);
756
757           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
758             {
759               old_port0 = tcp0->ports.src;
760               tcp0->ports.src = s0->out2in.port;
761               new_port0 = tcp0->ports.src;
762
763               sum0 = tcp0->checksum;
764               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
765                                      ip4_header_t,
766                                      dst_address /* changed member */);
767               sum0 = ip_csum_update (sum0, old_port0, new_port0,
768                                      ip4_header_t /* cheat */,
769                                      length /* changed member */);
770               tcp0->checksum = ip_csum_fold(sum0);
771             }
772           else
773             {
774               old_port0 = udp0->src_port;
775               udp0->src_port = s0->out2in.port;
776               udp0->checksum = 0;
777             }
778
779           /* Hairpinning */
780           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
781
782           /* Accounting */
783           s0->last_heard = now;
784           s0->total_pkts++;
785           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
786           /* Per-user LRU list maintenance for dynamic translation */
787           if (!snat_is_session_static (s0))
788             {
789               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
790                                  s0->per_user_index);
791               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
792                                   s0->per_user_list_head_index,
793                                   s0->per_user_index);
794             }
795         trace00:
796
797           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
798                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
799             {
800               snat_in2out_trace_t *t = 
801                  vlib_add_trace (vm, node, b0, sizeof (*t));
802               t->is_slow_path = is_slow_path;
803               t->sw_if_index = sw_if_index0;
804               t->next_index = next0;
805                   t->session_index = ~0;
806               if (s0)
807                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
808             }
809
810           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
811
812           ip1 = vlib_buffer_get_current (b1);
813           udp1 = ip4_next_header (ip1);
814           tcp1 = (tcp_header_t *) udp1;
815           icmp1 = (icmp46_header_t *) udp1;
816
817           sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
818           rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
819                                    sw_if_index1);
820
821           proto1 = ~0;
822           proto1 = (ip1->protocol == IP_PROTOCOL_UDP) 
823             ? SNAT_PROTOCOL_UDP : proto1;
824           proto1 = (ip1->protocol == IP_PROTOCOL_TCP) 
825             ? SNAT_PROTOCOL_TCP : proto1;
826           proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) 
827             ? SNAT_PROTOCOL_ICMP : proto1;
828
829           /* Next configured feature, probably ip4-lookup */
830           if (is_slow_path)
831             {
832               if (PREDICT_FALSE (proto1 == ~0))
833                 goto trace01;
834               
835               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
836                 {
837                   next1 = icmp_in2out_slow_path 
838                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
839                      next1, now, cpu_index);
840                   goto trace01;
841                 }
842             }
843           else
844             {
845               if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
846                 {
847                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
848                   goto trace01;
849                 }
850             }
851
852           key1.addr = ip1->src_address;
853           key1.port = udp1->src_port;
854           key1.protocol = proto1;
855           key1.fib_index = rx_fib_index1;
856           
857           kv1.key = key1.as_u64;
858
859             if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
860             {
861               if (is_slow_path)
862                 {
863                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
864                       proto1, rx_fib_index1)))
865                     goto trace01;
866
867                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
868                                      &s1, node, next1, cpu_index);
869                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
870                     goto trace01;
871                 }
872               else
873                 {
874                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
875                   goto trace01;
876                 }
877             }
878           else
879             s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
880                                     value1.value);
881
882           old_addr1 = ip1->src_address.as_u32;
883           ip1->src_address = s1->out2in.addr;
884           new_addr1 = ip1->src_address.as_u32;
885           vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
886
887           sum1 = ip1->checksum;
888           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
889                                  ip4_header_t,
890                                  src_address /* changed member */);
891           ip1->checksum = ip_csum_fold (sum1);
892
893           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
894             {
895               old_port1 = tcp1->ports.src;
896               tcp1->ports.src = s1->out2in.port;
897               new_port1 = tcp1->ports.src;
898
899               sum1 = tcp1->checksum;
900               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
901                                      ip4_header_t,
902                                      dst_address /* changed member */);
903               sum1 = ip_csum_update (sum1, old_port1, new_port1,
904                                      ip4_header_t /* cheat */,
905                                      length /* changed member */);
906               tcp1->checksum = ip_csum_fold(sum1);
907             }
908           else
909             {
910               old_port1 = udp1->src_port;
911               udp1->src_port = s1->out2in.port;
912               udp1->checksum = 0;
913             }
914
915           /* Hairpinning */
916           snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
917
918           /* Accounting */
919           s1->last_heard = now;
920           s1->total_pkts++;
921           s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
922           /* Per-user LRU list maintenance for dynamic translation */
923           if (!snat_is_session_static (s1))
924             {
925               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
926                                  s1->per_user_index);
927               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
928                                   s1->per_user_list_head_index,
929                                   s1->per_user_index);
930             }
931         trace01:
932
933           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
934                             && (b1->flags & VLIB_BUFFER_IS_TRACED))) 
935             {
936               snat_in2out_trace_t *t = 
937                  vlib_add_trace (vm, node, b1, sizeof (*t));
938               t->sw_if_index = sw_if_index1;
939               t->next_index = next1;
940               t->session_index = ~0;
941               if (s1)
942                 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
943             }
944
945           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
946
947           /* verify speculative enqueues, maybe switch current next frame */
948           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
949                                            to_next, n_left_to_next,
950                                            bi0, bi1, next0, next1);
951         }
952
953       while (n_left_from > 0 && n_left_to_next > 0)
954         {
955           u32 bi0;
956           vlib_buffer_t * b0;
957           u32 next0;
958           u32 sw_if_index0;
959           ip4_header_t * ip0;
960           ip_csum_t sum0;
961           u32 new_addr0, old_addr0;
962           u16 old_port0, new_port0;
963           udp_header_t * udp0;
964           tcp_header_t * tcp0;
965           icmp46_header_t * icmp0;
966           snat_session_key_t key0;
967           u32 rx_fib_index0;
968           u32 proto0;
969           snat_session_t * s0 = 0;
970           clib_bihash_kv_8_8_t kv0, value0;
971           
972           /* speculatively enqueue b0 to the current next frame */
973           bi0 = from[0];
974           to_next[0] = bi0;
975           from += 1;
976           to_next += 1;
977           n_left_from -= 1;
978           n_left_to_next -= 1;
979
980           b0 = vlib_get_buffer (vm, bi0);
981           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
982
983           ip0 = vlib_buffer_get_current (b0);
984           udp0 = ip4_next_header (ip0);
985           tcp0 = (tcp_header_t *) udp0;
986           icmp0 = (icmp46_header_t *) udp0;
987
988           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
989           rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, 
990                                    sw_if_index0);
991
992           proto0 = ~0;
993           proto0 = (ip0->protocol == IP_PROTOCOL_UDP) 
994             ? SNAT_PROTOCOL_UDP : proto0;
995           proto0 = (ip0->protocol == IP_PROTOCOL_TCP) 
996             ? SNAT_PROTOCOL_TCP : proto0;
997           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) 
998             ? SNAT_PROTOCOL_ICMP : proto0;
999
1000           /* Next configured feature, probably ip4-lookup */
1001           if (is_slow_path)
1002             {
1003               if (PREDICT_FALSE (proto0 == ~0))
1004                 goto trace0;
1005               
1006               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1007                 {
1008                   next0 = icmp_in2out_slow_path 
1009                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1010                      next0, now, cpu_index);
1011                   goto trace0;
1012                 }
1013             }
1014           else
1015             {
1016               if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1017                 {
1018                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1019                   goto trace0;
1020                 }
1021             }
1022
1023           key0.addr = ip0->src_address;
1024           key0.port = udp0->src_port;
1025           key0.protocol = proto0;
1026           key0.fib_index = rx_fib_index0;
1027           
1028           kv0.key = key0.as_u64;
1029
1030           if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
1031             {
1032               if (is_slow_path)
1033                 {
1034                   if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1035                       proto0, rx_fib_index0)))
1036                     goto trace0;
1037
1038                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1039                                      &s0, node, next0, cpu_index);
1040                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1041                     goto trace0;
1042                 }
1043               else
1044                 {
1045                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1046                   goto trace0;
1047                 }
1048             }
1049           else
1050             s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
1051                                     value0.value);
1052
1053           old_addr0 = ip0->src_address.as_u32;
1054           ip0->src_address = s0->out2in.addr;
1055           new_addr0 = ip0->src_address.as_u32;
1056           vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1057
1058           sum0 = ip0->checksum;
1059           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1060                                  ip4_header_t,
1061                                  src_address /* changed member */);
1062           ip0->checksum = ip_csum_fold (sum0);
1063
1064           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1065             {
1066               old_port0 = tcp0->ports.src;
1067               tcp0->ports.src = s0->out2in.port;
1068               new_port0 = tcp0->ports.src;
1069
1070               sum0 = tcp0->checksum;
1071               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1072                                      ip4_header_t,
1073                                      dst_address /* changed member */);
1074               sum0 = ip_csum_update (sum0, old_port0, new_port0,
1075                                      ip4_header_t /* cheat */,
1076                                      length /* changed member */);
1077               tcp0->checksum = ip_csum_fold(sum0);
1078             }
1079           else
1080             {
1081               old_port0 = udp0->src_port;
1082               udp0->src_port = s0->out2in.port;
1083               udp0->checksum = 0;
1084             }
1085
1086           /* Hairpinning */
1087           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1088
1089           /* Accounting */
1090           s0->last_heard = now;
1091           s0->total_pkts++;
1092           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1093           /* Per-user LRU list maintenance for dynamic translation */
1094           if (!snat_is_session_static (s0))
1095             {
1096               clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
1097                                  s0->per_user_index);
1098               clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
1099                                   s0->per_user_list_head_index,
1100                                   s0->per_user_index);
1101             }
1102
1103         trace0:
1104           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
1105                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
1106             {
1107               snat_in2out_trace_t *t = 
1108                  vlib_add_trace (vm, node, b0, sizeof (*t));
1109               t->is_slow_path = is_slow_path;
1110               t->sw_if_index = sw_if_index0;
1111               t->next_index = next0;
1112                   t->session_index = ~0;
1113               if (s0)
1114                 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
1115             }
1116
1117           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1118
1119           /* verify speculative enqueue, maybe switch current next frame */
1120           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1121                                            to_next, n_left_to_next,
1122                                            bi0, next0);
1123         }
1124
1125       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1126     }
1127
1128   vlib_node_increment_counter (vm, stats_node_index, 
1129                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, 
1130                                pkts_processed);
1131   return frame->n_vectors;
1132 }
1133
1134 static uword
1135 snat_in2out_fast_path_fn (vlib_main_t * vm,
1136                           vlib_node_runtime_t * node,
1137                           vlib_frame_t * frame)
1138 {
1139   return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
1140 }
1141
1142 VLIB_REGISTER_NODE (snat_in2out_node) = {
1143   .function = snat_in2out_fast_path_fn,
1144   .name = "snat-in2out",
1145   .vector_size = sizeof (u32),
1146   .format_trace = format_snat_in2out_trace,
1147   .type = VLIB_NODE_TYPE_INTERNAL,
1148   
1149   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1150   .error_strings = snat_in2out_error_strings,
1151
1152   .runtime_data_bytes = sizeof (snat_runtime_t),
1153   
1154   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1155
1156   /* edit / add dispositions here */
1157   .next_nodes = {
1158     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1159     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1160     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1161   },
1162 };
1163
1164 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
1165
1166 static uword
1167 snat_in2out_slow_path_fn (vlib_main_t * vm,
1168                           vlib_node_runtime_t * node,
1169                           vlib_frame_t * frame)
1170 {
1171   return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
1172 }
1173
1174 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
1175   .function = snat_in2out_slow_path_fn,
1176   .name = "snat-in2out-slowpath",
1177   .vector_size = sizeof (u32),
1178   .format_trace = format_snat_in2out_trace,
1179   .type = VLIB_NODE_TYPE_INTERNAL,
1180   
1181   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1182   .error_strings = snat_in2out_error_strings,
1183
1184   .runtime_data_bytes = sizeof (snat_runtime_t),
1185   
1186   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1187
1188   /* edit / add dispositions here */
1189   .next_nodes = {
1190     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1191     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1192     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1193   },
1194 };
1195
1196 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
1197
1198 static uword
1199 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
1200                                vlib_node_runtime_t * node,
1201                                vlib_frame_t * frame)
1202 {
1203   snat_main_t *sm = &snat_main;
1204   vlib_thread_main_t *tm = vlib_get_thread_main ();
1205   u32 n_left_from, *from, *to_next = 0;
1206   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1207   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1208     = 0;
1209   vlib_frame_queue_elt_t *hf = 0;
1210   vlib_frame_t *f = 0;
1211   int i;
1212   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1213   u32 next_worker_index = 0;
1214   u32 current_worker_index = ~0;
1215   u32 cpu_index = os_get_cpu_number ();
1216
1217   ASSERT (vec_len (sm->workers));
1218
1219   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1220     {
1221       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1222
1223       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1224                                sm->first_worker_index + sm->num_workers - 1,
1225                                (vlib_frame_queue_t *) (~0));
1226     }
1227
1228   from = vlib_frame_vector_args (frame);
1229   n_left_from = frame->n_vectors;
1230
1231   while (n_left_from > 0)
1232     {
1233       u32 bi0;
1234       vlib_buffer_t *b0;
1235       u32 sw_if_index0;
1236       u32 rx_fib_index0;
1237       ip4_header_t * ip0;
1238       snat_user_key_t key0;
1239       clib_bihash_kv_8_8_t kv0, value0;
1240       u8 do_handoff;
1241
1242       bi0 = from[0];
1243       from += 1;
1244       n_left_from -= 1;
1245
1246       b0 = vlib_get_buffer (vm, bi0);
1247
1248       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1249       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1250
1251       ip0 = vlib_buffer_get_current (b0);
1252
1253       key0.addr = ip0->src_address;
1254       key0.fib_index = rx_fib_index0;
1255
1256       kv0.key = key0.as_u64;
1257
1258       /* Ever heard of of the "user" before? */
1259       if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
1260         {
1261           /* No, assign next available worker (RR) */
1262           next_worker_index = sm->first_worker_index;
1263           if (vec_len (sm->workers))
1264             {
1265               next_worker_index += 
1266                 sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
1267             }
1268
1269           /* add non-traslated packets worker lookup */
1270           kv0.value = next_worker_index;
1271           clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
1272         }
1273       else
1274         next_worker_index = value0.value;
1275
1276       if (PREDICT_FALSE (next_worker_index != cpu_index))
1277         {
1278           do_handoff = 1;
1279
1280           if (next_worker_index != current_worker_index)
1281             {
1282               if (hf)
1283                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1284
1285               hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
1286                                                       next_worker_index,
1287                                                       handoff_queue_elt_by_worker_index);
1288
1289               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1290               to_next_worker = &hf->buffer_index[hf->n_vectors];
1291               current_worker_index = next_worker_index;
1292             }
1293
1294           /* enqueue to correct worker thread */
1295           to_next_worker[0] = bi0;
1296           to_next_worker++;
1297           n_left_to_next_worker--;
1298
1299           if (n_left_to_next_worker == 0)
1300             {
1301               hf->n_vectors = VLIB_FRAME_SIZE;
1302               vlib_put_frame_queue_elt (hf);
1303               current_worker_index = ~0;
1304               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1305               hf = 0;
1306             }
1307         }
1308       else
1309         {
1310           do_handoff = 0;
1311           /* if this is 1st frame */
1312           if (!f)
1313             {
1314               f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
1315               to_next = vlib_frame_vector_args (f);
1316             }
1317
1318           to_next[0] = bi0;
1319           to_next += 1;
1320           f->n_vectors++;
1321         }
1322
1323       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1324                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1325         {
1326           snat_in2out_worker_handoff_trace_t *t =
1327             vlib_add_trace (vm, node, b0, sizeof (*t));
1328           t->next_worker_index = next_worker_index;
1329           t->do_handoff = do_handoff;
1330         }
1331     }
1332
1333   if (f)
1334     vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
1335
1336   if (hf)
1337     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1338
1339   /* Ship frames to the worker nodes */
1340   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1341     {
1342       if (handoff_queue_elt_by_worker_index[i])
1343         {
1344           hf = handoff_queue_elt_by_worker_index[i];
1345           /*
1346            * It works better to let the handoff node
1347            * rate-adapt, always ship the handoff queue element.
1348            */
1349           if (1 || hf->n_vectors == hf->last_n_vectors)
1350             {
1351               vlib_put_frame_queue_elt (hf);
1352               handoff_queue_elt_by_worker_index[i] = 0;
1353             }
1354           else
1355             hf->last_n_vectors = hf->n_vectors;
1356         }
1357       congested_handoff_queue_by_worker_index[i] =
1358         (vlib_frame_queue_t *) (~0);
1359     }
1360   hf = 0;
1361   current_worker_index = ~0;
1362   return frame->n_vectors;
1363 }
1364
1365 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
1366   .function = snat_in2out_worker_handoff_fn,
1367   .name = "snat-in2out-worker-handoff",
1368   .vector_size = sizeof (u32),
1369   .format_trace = format_snat_in2out_worker_handoff_trace,
1370   .type = VLIB_NODE_TYPE_INTERNAL,
1371   
1372   .n_next_nodes = 1,
1373
1374   .next_nodes = {
1375     [0] = "error-drop",
1376   },
1377 };
1378
1379 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
1380
1381 static inline u32 icmp_in2out_static_map (snat_main_t *sm,
1382                                           vlib_buffer_t * b0,
1383                                           ip4_header_t * ip0,
1384                                           icmp46_header_t * icmp0,
1385                                           u32 sw_if_index0,
1386                                           vlib_node_runtime_t * node,
1387                                           u32 next0,
1388                                           u32 rx_fib_index0)
1389 {
1390   snat_session_key_t key0, sm0;
1391   icmp_echo_header_t *echo0;
1392   u32 new_addr0, old_addr0;
1393   u16 old_id0, new_id0;
1394   ip_csum_t sum0;
1395   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1396
1397   echo0 = (icmp_echo_header_t *)(icmp0+1);
1398
1399   key0.addr = ip0->src_address;
1400   key0.port = echo0->identifier;
1401   key0.fib_index = rx_fib_index0;
1402   
1403   if (snat_static_mapping_match(sm, key0, &sm0, 0))
1404     {
1405       if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1406           IP_PROTOCOL_ICMP, rx_fib_index0)))
1407         return next0;
1408
1409       b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1410       return SNAT_IN2OUT_NEXT_DROP;
1411     }
1412
1413   new_addr0 = sm0.addr.as_u32;
1414   new_id0 = sm0.port;
1415   vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1416   old_addr0 = ip0->src_address.as_u32;
1417   ip0->src_address.as_u32 = new_addr0;
1418   
1419   sum0 = ip0->checksum;
1420   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1421                          ip4_header_t,
1422                          src_address /* changed member */);
1423   ip0->checksum = ip_csum_fold (sum0);
1424   
1425   if (PREDICT_FALSE(new_id0 != echo0->identifier))
1426     {
1427       old_id0 = echo0->identifier;
1428       echo0->identifier = new_id0;
1429
1430       sum0 = icmp0->checksum;
1431       sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1432                              identifier);
1433       icmp0->checksum = ip_csum_fold (sum0);
1434     }
1435
1436   return next0;
1437 }
1438
1439 static uword
1440 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
1441                                 vlib_node_runtime_t * node,
1442                                 vlib_frame_t * frame)
1443 {
1444   u32 n_left_from, * from, * to_next;
1445   snat_in2out_next_t next_index;
1446   u32 pkts_processed = 0;
1447   snat_main_t * sm = &snat_main;
1448   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1449   u32 stats_node_index;
1450
1451   stats_node_index = snat_in2out_fast_node.index;
1452
1453   from = vlib_frame_vector_args (frame);
1454   n_left_from = frame->n_vectors;
1455   next_index = node->cached_next_index;
1456
1457   while (n_left_from > 0)
1458     {
1459       u32 n_left_to_next;
1460
1461       vlib_get_next_frame (vm, node, next_index,
1462                            to_next, n_left_to_next);
1463
1464       while (n_left_from > 0 && n_left_to_next > 0)
1465         {
1466           u32 bi0;
1467           vlib_buffer_t * b0;
1468           u32 next0;
1469           u32 sw_if_index0;
1470           ip4_header_t * ip0;
1471           ip_csum_t sum0;
1472           u32 new_addr0, old_addr0;
1473           u16 old_port0, new_port0;
1474           udp_header_t * udp0;
1475           tcp_header_t * tcp0;
1476           icmp46_header_t * icmp0;
1477           snat_session_key_t key0, sm0;
1478           u32 proto0;
1479           u32 rx_fib_index0;
1480
1481           /* speculatively enqueue b0 to the current next frame */
1482           bi0 = from[0];
1483           to_next[0] = bi0;
1484           from += 1;
1485           to_next += 1;
1486           n_left_from -= 1;
1487           n_left_to_next -= 1;
1488
1489           b0 = vlib_get_buffer (vm, bi0);
1490           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1491
1492           ip0 = vlib_buffer_get_current (b0);
1493           udp0 = ip4_next_header (ip0);
1494           tcp0 = (tcp_header_t *) udp0;
1495           icmp0 = (icmp46_header_t *) udp0;
1496
1497           sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1498           rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1499
1500           proto0 = ~0;
1501           proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
1502             ? SNAT_PROTOCOL_UDP : proto0;
1503           proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
1504             ? SNAT_PROTOCOL_TCP : proto0;
1505           proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
1506             ? SNAT_PROTOCOL_ICMP : proto0;
1507
1508           if (PREDICT_FALSE (proto0 == ~0))
1509               goto trace0;
1510
1511           if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1512             {
1513               if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
1514                   proto0, rx_fib_index0)))
1515                 goto trace0;
1516
1517               next0 = icmp_in2out_static_map
1518                 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1519               goto trace0;
1520             }
1521
1522           key0.addr = ip0->src_address;
1523           key0.port = udp0->src_port;
1524           key0.fib_index = rx_fib_index0;
1525
1526           if (snat_static_mapping_match(sm, key0, &sm0, 0))
1527             {
1528               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
1529               next0= SNAT_IN2OUT_NEXT_DROP;
1530               goto trace0;
1531             }
1532
1533           new_addr0 = sm0.addr.as_u32;
1534           new_port0 = sm0.port;
1535           vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1536           old_addr0 = ip0->src_address.as_u32;
1537           ip0->src_address.as_u32 = new_addr0;
1538
1539           sum0 = ip0->checksum;
1540           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1541                                  ip4_header_t,
1542                                  src_address /* changed member */);
1543           ip0->checksum = ip_csum_fold (sum0);
1544
1545           if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1546             {
1547               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1548                 {
1549                   old_port0 = tcp0->ports.src;
1550                   tcp0->ports.src = new_port0;
1551
1552                   sum0 = tcp0->checksum;
1553                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1554                                          ip4_header_t,
1555                                          dst_address /* changed member */);
1556                   sum0 = ip_csum_update (sum0, old_port0, new_port0,
1557                                          ip4_header_t /* cheat */,
1558                                          length /* changed member */);
1559                   tcp0->checksum = ip_csum_fold(sum0);
1560                 }
1561               else
1562                 {
1563                   old_port0 = udp0->src_port;
1564                   udp0->src_port = new_port0;
1565                   udp0->checksum = 0;
1566                 }
1567             }
1568           else
1569             {
1570               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1571                 {
1572                   sum0 = tcp0->checksum;
1573                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1574                                          ip4_header_t,
1575                                          dst_address /* changed member */);
1576                   tcp0->checksum = ip_csum_fold(sum0);
1577                 }
1578             }
1579
1580           /* Hairpinning */
1581           snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
1582
1583         trace0:
1584           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1585                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1586             {
1587               snat_in2out_trace_t *t =
1588                  vlib_add_trace (vm, node, b0, sizeof (*t));
1589               t->sw_if_index = sw_if_index0;
1590               t->next_index = next0;
1591             }
1592
1593           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1594
1595           /* verify speculative enqueue, maybe switch current next frame */
1596           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1597                                            to_next, n_left_to_next,
1598                                            bi0, next0);
1599         }
1600
1601       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1602     }
1603
1604   vlib_node_increment_counter (vm, stats_node_index,
1605                                SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
1606                                pkts_processed);
1607   return frame->n_vectors;
1608 }
1609
1610
1611 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
1612   .function = snat_in2out_fast_static_map_fn,
1613   .name = "snat-in2out-fast",
1614   .vector_size = sizeof (u32),
1615   .format_trace = format_snat_in2out_fast_trace,
1616   .type = VLIB_NODE_TYPE_INTERNAL,
1617   
1618   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
1619   .error_strings = snat_in2out_error_strings,
1620
1621   .runtime_data_bytes = sizeof (snat_runtime_t),
1622   
1623   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
1624
1625   /* edit / add dispositions here */
1626   .next_nodes = {
1627     [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
1628     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
1629     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
1630   },
1631 };
1632
1633 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);