NAT: DS-Lite (VPP-1040)
[vpp.git] / src / plugins / nat / dslite_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <nat/dslite.h>
16
17 vlib_node_registration_t dslite_in2out_node;
18 vlib_node_registration_t dslite_in2out_slowpath_node;
19
20 typedef enum
21 {
22   DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
23   DSLITE_IN2OUT_NEXT_IP6_LOOKUP,
24   DSLITE_IN2OUT_NEXT_DROP,
25   DSLITE_IN2OUT_NEXT_SLOWPATH,
26   DSLITE_IN2OUT_N_NEXT,
27 } dslite_in2out_next_t;
28
29 static char *dslite_in2out_error_strings[] = {
30 #define _(sym,string) string,
31   foreach_dslite_error
32 #undef _
33 };
34
35 static u32
36 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
37            dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
38 {
39   dslite_b4_t *b4;
40   clib_bihash_kv_16_8_t b4_kv, b4_value;
41   clib_bihash_kv_24_8_t in2out_kv;
42   clib_bihash_kv_8_8_t out2in_kv;
43   dlist_elt_t *head_elt, *oldest_elt, *elt;
44   u32 oldest_index;
45   dslite_session_t *s;
46   snat_session_key_t out2in_key;
47   u32 address_index;
48
49   out2in_key.protocol = in2out_key->proto;
50   out2in_key.fib_index = 0;
51
52   b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
53   b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
54
55   if (clib_bihash_search_16_8
56       (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
57     {
58       pool_get (dm->per_thread_data[thread_index].b4s, b4);
59       memset (b4, 0, sizeof (*b4));
60       b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
61       b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
62
63       pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
64       b4->sessions_per_b4_list_head_index =
65         head_elt - dm->per_thread_data[thread_index].list_pool;
66       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
67                        b4->sessions_per_b4_list_head_index);
68
69       b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
70       clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
71                                 &b4_kv, 1);
72     }
73   else
74     {
75       b4 =
76         pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
77                            b4_value.value);
78     }
79
80   //TODO configurable quota
81   if (b4->nsessions >= 1000)
82     {
83       oldest_index =
84         clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
85                                 b4->sessions_per_b4_list_head_index);
86       ASSERT (oldest_index != ~0);
87       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
88                           b4->sessions_per_b4_list_head_index, oldest_index);
89       oldest_elt =
90         pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
91                            oldest_index);
92       s =
93         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
94                            oldest_elt->value);
95
96       in2out_kv.key[0] = s->in2out.as_u64[0];
97       in2out_kv.key[1] = s->in2out.as_u64[1];
98       in2out_kv.key[2] = s->in2out.as_u64[2];
99       clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
100                                 &in2out_kv, 0);
101       out2in_kv.key = s->out2in.as_u64;
102       clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
103                                &out2in_kv, 0);
104       snat_free_outside_address_and_port (dm->addr_pool, thread_index,
105                                           &s->out2in,
106                                           s->outside_address_index);
107       s->outside_address_index = ~0;
108
109       if (snat_alloc_outside_address_and_port
110           (dm->addr_pool, 0, thread_index, &out2in_key,
111            &s->outside_address_index, 0, dm->port_per_thread, thread_index))
112         ASSERT (0);
113     }
114   else
115     {
116       if (snat_alloc_outside_address_and_port
117           (dm->addr_pool, 0, thread_index, &out2in_key, &address_index, 0,
118            dm->port_per_thread, thread_index))
119         {
120           *error = DSLITE_ERROR_OUT_OF_PORTS;
121           return DSLITE_IN2OUT_NEXT_DROP;
122         }
123       pool_get (dm->per_thread_data[thread_index].sessions, s);
124       memset (s, 0, sizeof (*s));
125       s->outside_address_index = address_index;
126       b4->nsessions++;
127
128       pool_get (dm->per_thread_data[thread_index].list_pool, elt);
129       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
130                        elt - dm->per_thread_data[thread_index].list_pool);
131       elt->value = s - dm->per_thread_data[thread_index].sessions;
132       s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
133       s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
134       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
135                           s->per_b4_list_head_index,
136                           elt - dm->per_thread_data[thread_index].list_pool);
137     }
138
139   s->in2out = *in2out_key;
140   s->out2in = out2in_key;
141   *sp = s;
142   in2out_kv.key[0] = s->in2out.as_u64[0];
143   in2out_kv.key[1] = s->in2out.as_u64[1];
144   in2out_kv.key[2] = s->in2out.as_u64[2];
145   in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
146   clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
147                             &in2out_kv, 1);
148   out2in_kv.key = s->out2in.as_u64;
149   out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
150   clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
151                            &out2in_kv, 1);
152
153   return next;
154 }
155
156 static inline u32
157 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
158                     ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
159                     u8 * error, u32 thread_index)
160 {
161   dslite_session_t *s = 0;
162   icmp46_header_t *icmp = ip4_next_header (ip4);
163   clib_bihash_kv_24_8_t kv, value;
164   dslite_session_key_t key;
165   u32 n = next;
166   icmp_echo_header_t *echo;
167   u32 new_addr, old_addr;
168   u16 old_id, new_id;
169   ip_csum_t sum;
170
171   if (icmp_is_error_message (icmp))
172     {
173       n = DSLITE_IN2OUT_NEXT_DROP;
174       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
175       goto done;
176     }
177
178   echo = (icmp_echo_header_t *) (icmp + 1);
179
180   key.addr = ip4->src_address;
181   key.port = echo->identifier;
182   key.proto = SNAT_PROTOCOL_ICMP;
183   key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
184   key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
185   key.pad = 0;
186   kv.key[0] = key.as_u64[0];
187   kv.key[1] = key.as_u64[1];
188   kv.key[2] = key.as_u64[2];
189
190   if (clib_bihash_search_24_8
191       (&dm->per_thread_data[thread_index].in2out, &kv, &value))
192     {
193       n = slow_path (dm, &key, &s, next, error, thread_index);
194       if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
195         goto done;
196     }
197   else
198     {
199       s =
200         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
201                            value.value);
202     }
203
204   old_addr = ip4->src_address.as_u32;
205   ip4->src_address = s->out2in.addr;
206   new_addr = ip4->src_address.as_u32;
207   sum = ip4->checksum;
208   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
209   ip4->checksum = ip_csum_fold (sum);
210
211   old_id = echo->identifier;
212   echo->identifier = new_id = s->out2in.port;
213   sum = icmp->checksum;
214   sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier);
215   icmp->checksum = ip_csum_fold (sum);
216
217 done:
218   *sp = s;
219   return n;
220 }
221
222 static inline uword
223 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
224                               vlib_frame_t * frame, u8 is_slow_path)
225 {
226   u32 n_left_from, *from, *to_next;
227   dslite_in2out_next_t next_index;
228   u32 node_index;
229   vlib_node_runtime_t *error_node;
230   u32 thread_index = vlib_get_thread_index ();
231   f64 now = vlib_time_now (vm);
232   dslite_main_t *dm = &dslite_main;
233
234   node_index =
235     is_slow_path ? dslite_in2out_slowpath_node.
236     index : dslite_in2out_node.index;
237
238   error_node = vlib_node_get_runtime (vm, node_index);
239
240   from = vlib_frame_vector_args (frame);
241   n_left_from = frame->n_vectors;
242   next_index = node->cached_next_index;
243
244   while (n_left_from > 0)
245     {
246       u32 n_left_to_next;
247
248       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
249
250       while (n_left_from > 0 && n_left_to_next > 0)
251         {
252           u32 bi0;
253           vlib_buffer_t *b0;
254           u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
255           ip4_header_t *ip40;
256           ip6_header_t *ip60;
257           u8 error0 = DSLITE_ERROR_IN2OUT;
258           u32 proto0;
259           dslite_session_t *s0 = 0;
260           clib_bihash_kv_24_8_t kv0, value0;
261           dslite_session_key_t key0;
262           udp_header_t *udp0;
263           tcp_header_t *tcp0;
264           ip_csum_t sum0;
265           u32 new_addr0, old_addr0;
266           u16 old_port0, new_port0;
267
268           /* speculatively enqueue b0 to the current next frame */
269           bi0 = from[0];
270           to_next[0] = bi0;
271           from += 1;
272           to_next += 1;
273           n_left_from -= 1;
274           n_left_to_next -= 1;
275
276           b0 = vlib_get_buffer (vm, bi0);
277           ip60 = vlib_buffer_get_current (b0);
278
279           if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
280             {
281               error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
282               next0 = DSLITE_IN2OUT_NEXT_DROP;
283               goto trace0;
284             }
285
286           ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
287           proto0 = ip_proto_to_snat_proto (ip40->protocol);
288
289           if (PREDICT_FALSE (proto0 == ~0))
290             {
291               error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
292               next0 = DSLITE_IN2OUT_NEXT_DROP;
293               goto trace0;
294             }
295
296           udp0 = ip4_next_header (ip40);
297           tcp0 = (tcp_header_t *) udp0;
298
299           if (is_slow_path)
300             {
301               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
302                 {
303                   next0 =
304                     dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
305                                         thread_index);
306                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
307                     goto trace0;
308
309                   goto accounting0;
310                 }
311             }
312           else
313             {
314               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
315                 {
316                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
317                   goto trace0;
318                 }
319             }
320
321           key0.addr = ip40->src_address;
322           key0.port = udp0->src_port;
323           key0.proto = proto0;
324           key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
325           key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
326           key0.pad = 0;
327           kv0.key[0] = key0.as_u64[0];
328           kv0.key[1] = key0.as_u64[1];
329           kv0.key[2] = key0.as_u64[2];
330
331           if (clib_bihash_search_24_8
332               (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
333             {
334               if (is_slow_path)
335                 {
336                   next0 =
337                     slow_path (dm, &key0, &s0, next0, &error0, thread_index);
338                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
339                     goto trace0;
340                 }
341               else
342                 {
343                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
344                   goto trace0;
345                 }
346             }
347           else
348             {
349               s0 =
350                 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
351                                    value0.value);
352             }
353
354           old_addr0 = ip40->src_address.as_u32;
355           ip40->src_address = s0->out2in.addr;
356           new_addr0 = ip40->src_address.as_u32;
357           sum0 = ip40->checksum;
358           sum0 =
359             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
360                             src_address);
361           ip40->checksum = ip_csum_fold (sum0);
362           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
363             {
364               old_port0 = tcp0->src_port;
365               tcp0->src_port = s0->out2in.port;
366               new_port0 = tcp0->src_port;
367
368               sum0 = tcp0->checksum;
369               sum0 =
370                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
371                                 dst_address);
372               sum0 =
373                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
374                                 length);
375               tcp0->checksum = ip_csum_fold (sum0);
376             }
377           else
378             {
379               old_port0 = udp0->src_port;
380               udp0->src_port = s0->out2in.port;
381               udp0->checksum = 0;
382             }
383
384         accounting0:
385           /* Accounting */
386           s0->last_heard = now;
387           s0->total_pkts++;
388           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
389           /* Per-B4 LRU list maintenance */
390           clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
391                              s0->per_b4_index);
392           clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
393                               s0->per_b4_list_head_index, s0->per_b4_index);
394
395           ip40->tos =
396             (clib_net_to_host_u32
397              (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
398             20;
399           vlib_buffer_advance (b0, sizeof (ip6_header_t));
400
401         trace0:
402           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
403                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
404             {
405               dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
406               t->next_index = next0;
407               t->session_index = ~0;
408               if (s0)
409                 t->session_index =
410                   s0 - dm->per_thread_data[thread_index].sessions;
411             }
412
413           b0->error = error_node->errors[error0];
414
415           /* verify speculative enqueue, maybe switch current next frame */
416           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
417                                            n_left_to_next, bi0, next0);
418         }
419       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
420     }
421
422   return frame->n_vectors;
423 }
424
425 static uword
426 dslite_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
427                        vlib_frame_t * frame)
428 {
429   return dslite_in2out_node_fn_inline (vm, node, frame, 0);
430 }
431
432 /* *INDENT-OFF* */
433 VLIB_REGISTER_NODE (dslite_in2out_node) = {
434   .function = dslite_in2out_node_fn,
435   .name = "dslite-in2out",
436   .vector_size = sizeof (u32),
437   .format_trace = format_dslite_trace,
438   .type = VLIB_NODE_TYPE_INTERNAL,
439   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
440   .error_strings = dslite_in2out_error_strings,
441   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
442   /* edit / add dispositions here */
443   .next_nodes = {
444     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
445     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
446     [DSLITE_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
447     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
448   },
449 };
450 /* *INDENT-ON* */
451
452 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_node, dslite_in2out_node_fn);
453
454 static uword
455 dslite_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
456                                 vlib_frame_t * frame)
457 {
458   return dslite_in2out_node_fn_inline (vm, node, frame, 1);
459 }
460
461 /* *INDENT-OFF* */
462 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
463   .function = dslite_in2out_slowpath_node_fn,
464   .name = "dslite-in2out-slowpath",
465   .vector_size = sizeof (u32),
466   .format_trace = format_dslite_trace,
467   .type = VLIB_NODE_TYPE_INTERNAL,
468   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
469   .error_strings = dslite_in2out_error_strings,
470   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
471   /* edit / add dispositions here */
472   .next_nodes = {
473     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
474     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
475     [DSLITE_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
476     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
477   },
478 };
479 /* *INDENT-ON* */
480
481 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_slowpath_node,
482                               dslite_in2out_slowpath_node_fn);
483
484 /*
485  * fd.io coding-style-patch-verification: ON
486  *
487  * Local Variables:
488  * eval: (c-set-style "gnu")
489  * End:
490  */