4eec5958da9b6f900b1655d5d77e47f8a6ae3a27
[vpp.git] / src / plugins / nat / dslite_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <nat/dslite.h>
16 #include <nat/nat_inlines.h>
17
18 vlib_node_registration_t dslite_in2out_node;
19 vlib_node_registration_t dslite_in2out_slowpath_node;
20
21 typedef enum
22 {
23   DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
24   DSLITE_IN2OUT_NEXT_IP6_ICMP,
25   DSLITE_IN2OUT_NEXT_DROP,
26   DSLITE_IN2OUT_NEXT_SLOWPATH,
27   DSLITE_IN2OUT_N_NEXT,
28 } dslite_in2out_next_t;
29
30 static char *dslite_in2out_error_strings[] = {
31 #define _(sym,string) string,
32   foreach_dslite_error
33 #undef _
34 };
35
36 static u32
37 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
38            dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
39 {
40   dslite_b4_t *b4;
41   clib_bihash_kv_16_8_t b4_kv, b4_value;
42   clib_bihash_kv_24_8_t in2out_kv;
43   clib_bihash_kv_8_8_t out2in_kv;
44   dlist_elt_t *head_elt, *oldest_elt, *elt;
45   u32 oldest_index;
46   dslite_session_t *s;
47   snat_session_key_t out2in_key;
48
49   out2in_key.protocol = in2out_key->proto;
50   out2in_key.fib_index = 0;
51
52   b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
53   b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
54
55   if (clib_bihash_search_16_8
56       (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
57     {
58       pool_get (dm->per_thread_data[thread_index].b4s, b4);
59       memset (b4, 0, sizeof (*b4));
60       b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
61       b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
62
63       pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
64       b4->sessions_per_b4_list_head_index =
65         head_elt - dm->per_thread_data[thread_index].list_pool;
66       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
67                        b4->sessions_per_b4_list_head_index);
68
69       b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
70       clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
71                                 &b4_kv, 1);
72     }
73   else
74     {
75       b4 =
76         pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
77                            b4_value.value);
78     }
79
80   //TODO configurable quota
81   if (b4->nsessions >= 1000)
82     {
83       oldest_index =
84         clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
85                                 b4->sessions_per_b4_list_head_index);
86       ASSERT (oldest_index != ~0);
87       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
88                           b4->sessions_per_b4_list_head_index, oldest_index);
89       oldest_elt =
90         pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
91                            oldest_index);
92       s =
93         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
94                            oldest_elt->value);
95
96       in2out_kv.key[0] = s->in2out.as_u64[0];
97       in2out_kv.key[1] = s->in2out.as_u64[1];
98       in2out_kv.key[2] = s->in2out.as_u64[2];
99       clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
100                                 &in2out_kv, 0);
101       out2in_kv.key = s->out2in.as_u64;
102       clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
103                                &out2in_kv, 0);
104       snat_free_outside_address_and_port (dm->addr_pool, thread_index,
105                                           &s->out2in);
106
107       if (snat_alloc_outside_address_and_port
108           (dm->addr_pool, 0, thread_index, &out2in_key,
109            dm->port_per_thread, thread_index))
110         ASSERT (0);
111     }
112   else
113     {
114       if (snat_alloc_outside_address_and_port
115           (dm->addr_pool, 0, thread_index, &out2in_key,
116            dm->port_per_thread, thread_index))
117         {
118           *error = DSLITE_ERROR_OUT_OF_PORTS;
119           return DSLITE_IN2OUT_NEXT_DROP;
120         }
121       pool_get (dm->per_thread_data[thread_index].sessions, s);
122       memset (s, 0, sizeof (*s));
123       b4->nsessions++;
124
125       pool_get (dm->per_thread_data[thread_index].list_pool, elt);
126       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
127                        elt - dm->per_thread_data[thread_index].list_pool);
128       elt->value = s - dm->per_thread_data[thread_index].sessions;
129       s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
130       s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
131       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
132                           s->per_b4_list_head_index,
133                           elt - dm->per_thread_data[thread_index].list_pool);
134     }
135
136   s->in2out = *in2out_key;
137   s->out2in = out2in_key;
138   *sp = s;
139   in2out_kv.key[0] = s->in2out.as_u64[0];
140   in2out_kv.key[1] = s->in2out.as_u64[1];
141   in2out_kv.key[2] = s->in2out.as_u64[2];
142   in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
143   clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
144                             &in2out_kv, 1);
145   out2in_kv.key = s->out2in.as_u64;
146   out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
147   clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
148                            &out2in_kv, 1);
149
150   return next;
151 }
152
153 static inline u32
154 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
155                     ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
156                     u8 * error, u32 thread_index)
157 {
158   dslite_session_t *s = 0;
159   icmp46_header_t *icmp = ip4_next_header (ip4);
160   clib_bihash_kv_24_8_t kv, value;
161   dslite_session_key_t key;
162   u32 n = next;
163   icmp_echo_header_t *echo;
164   u32 new_addr, old_addr;
165   u16 old_id, new_id;
166   ip_csum_t sum;
167
168   if (icmp_is_error_message (icmp))
169     {
170       n = DSLITE_IN2OUT_NEXT_DROP;
171       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
172       goto done;
173     }
174
175   echo = (icmp_echo_header_t *) (icmp + 1);
176
177   key.addr = ip4->src_address;
178   key.port = echo->identifier;
179   key.proto = SNAT_PROTOCOL_ICMP;
180   key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
181   key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
182   key.pad = 0;
183   kv.key[0] = key.as_u64[0];
184   kv.key[1] = key.as_u64[1];
185   kv.key[2] = key.as_u64[2];
186
187   if (clib_bihash_search_24_8
188       (&dm->per_thread_data[thread_index].in2out, &kv, &value))
189     {
190       n = slow_path (dm, &key, &s, next, error, thread_index);
191       if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
192         goto done;
193     }
194   else
195     {
196       s =
197         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
198                            value.value);
199     }
200
201   old_addr = ip4->src_address.as_u32;
202   ip4->src_address = s->out2in.addr;
203   new_addr = ip4->src_address.as_u32;
204   sum = ip4->checksum;
205   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
206   ip4->checksum = ip_csum_fold (sum);
207
208   old_id = echo->identifier;
209   echo->identifier = new_id = s->out2in.port;
210   sum = icmp->checksum;
211   sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier);
212   icmp->checksum = ip_csum_fold (sum);
213
214 done:
215   *sp = s;
216   return n;
217 }
218
219 static inline uword
220 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
221                               vlib_frame_t * frame, u8 is_slow_path)
222 {
223   u32 n_left_from, *from, *to_next;
224   dslite_in2out_next_t next_index;
225   u32 node_index;
226   vlib_node_runtime_t *error_node;
227   u32 thread_index = vm->thread_index;
228   f64 now = vlib_time_now (vm);
229   dslite_main_t *dm = &dslite_main;
230
231   node_index =
232     is_slow_path ? dslite_in2out_slowpath_node.
233     index : dslite_in2out_node.index;
234
235   error_node = vlib_node_get_runtime (vm, node_index);
236
237   from = vlib_frame_vector_args (frame);
238   n_left_from = frame->n_vectors;
239   next_index = node->cached_next_index;
240
241   while (n_left_from > 0)
242     {
243       u32 n_left_to_next;
244
245       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
246
247       while (n_left_from > 0 && n_left_to_next > 0)
248         {
249           u32 bi0;
250           vlib_buffer_t *b0;
251           u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
252           ip4_header_t *ip40;
253           ip6_header_t *ip60;
254           u8 error0 = DSLITE_ERROR_IN2OUT;
255           u32 proto0;
256           dslite_session_t *s0 = 0;
257           clib_bihash_kv_24_8_t kv0, value0;
258           dslite_session_key_t key0;
259           udp_header_t *udp0;
260           tcp_header_t *tcp0;
261           ip_csum_t sum0;
262           u32 new_addr0, old_addr0;
263           u16 old_port0, new_port0;
264
265           /* speculatively enqueue b0 to the current next frame */
266           bi0 = from[0];
267           to_next[0] = bi0;
268           from += 1;
269           to_next += 1;
270           n_left_from -= 1;
271           n_left_to_next -= 1;
272
273           b0 = vlib_get_buffer (vm, bi0);
274           ip60 = vlib_buffer_get_current (b0);
275
276           if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
277             {
278               if (ip60->protocol == IP_PROTOCOL_ICMP6)
279                 {
280                   next0 = DSLITE_IN2OUT_NEXT_IP6_ICMP;
281                   goto trace0;
282                 }
283               error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
284               next0 = DSLITE_IN2OUT_NEXT_DROP;
285               goto trace0;
286             }
287
288           ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
289           proto0 = ip_proto_to_snat_proto (ip40->protocol);
290
291           if (PREDICT_FALSE (proto0 == ~0))
292             {
293               error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
294               next0 = DSLITE_IN2OUT_NEXT_DROP;
295               goto trace0;
296             }
297
298           udp0 = ip4_next_header (ip40);
299           tcp0 = (tcp_header_t *) udp0;
300
301           if (is_slow_path)
302             {
303               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
304                 {
305                   next0 =
306                     dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
307                                         thread_index);
308                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
309                     goto trace0;
310
311                   goto accounting0;
312                 }
313             }
314           else
315             {
316               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
317                 {
318                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
319                   goto trace0;
320                 }
321             }
322
323           key0.addr = ip40->src_address;
324           key0.port = udp0->src_port;
325           key0.proto = proto0;
326           key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
327           key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
328           key0.pad = 0;
329           kv0.key[0] = key0.as_u64[0];
330           kv0.key[1] = key0.as_u64[1];
331           kv0.key[2] = key0.as_u64[2];
332
333           if (clib_bihash_search_24_8
334               (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
335             {
336               if (is_slow_path)
337                 {
338                   next0 =
339                     slow_path (dm, &key0, &s0, next0, &error0, thread_index);
340                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
341                     goto trace0;
342                 }
343               else
344                 {
345                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
346                   goto trace0;
347                 }
348             }
349           else
350             {
351               s0 =
352                 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
353                                    value0.value);
354             }
355
356           old_addr0 = ip40->src_address.as_u32;
357           ip40->src_address = s0->out2in.addr;
358           new_addr0 = ip40->src_address.as_u32;
359           sum0 = ip40->checksum;
360           sum0 =
361             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
362                             src_address);
363           ip40->checksum = ip_csum_fold (sum0);
364           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
365             {
366               old_port0 = tcp0->src_port;
367               tcp0->src_port = s0->out2in.port;
368               new_port0 = tcp0->src_port;
369
370               sum0 = tcp0->checksum;
371               sum0 =
372                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
373                                 dst_address);
374               sum0 =
375                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
376                                 length);
377               mss_clamping (&snat_main, tcp0, &sum0);
378               tcp0->checksum = ip_csum_fold (sum0);
379             }
380           else
381             {
382               old_port0 = udp0->src_port;
383               udp0->src_port = s0->out2in.port;
384               udp0->checksum = 0;
385             }
386
387         accounting0:
388           /* Accounting */
389           s0->last_heard = now;
390           s0->total_pkts++;
391           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
392           /* Per-B4 LRU list maintenance */
393           clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
394                              s0->per_b4_index);
395           clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
396                               s0->per_b4_list_head_index, s0->per_b4_index);
397
398           ip40->tos =
399             (clib_net_to_host_u32
400              (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
401             20;
402           vlib_buffer_advance (b0, sizeof (ip6_header_t));
403
404         trace0:
405           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
406                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
407             {
408               dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
409               t->next_index = next0;
410               t->session_index = ~0;
411               if (s0)
412                 t->session_index =
413                   s0 - dm->per_thread_data[thread_index].sessions;
414             }
415
416           b0->error = error_node->errors[error0];
417
418           /* verify speculative enqueue, maybe switch current next frame */
419           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
420                                            n_left_to_next, bi0, next0);
421         }
422       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
423     }
424
425   return frame->n_vectors;
426 }
427
428 static uword
429 dslite_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
430                        vlib_frame_t * frame)
431 {
432   return dslite_in2out_node_fn_inline (vm, node, frame, 0);
433 }
434
435 /* *INDENT-OFF* */
436 VLIB_REGISTER_NODE (dslite_in2out_node) = {
437   .function = dslite_in2out_node_fn,
438   .name = "dslite-in2out",
439   .vector_size = sizeof (u32),
440   .format_trace = format_dslite_trace,
441   .type = VLIB_NODE_TYPE_INTERNAL,
442   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
443   .error_strings = dslite_in2out_error_strings,
444   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
445   /* edit / add dispositions here */
446   .next_nodes = {
447     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
448     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
449     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
450     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
451   },
452 };
453 /* *INDENT-ON* */
454
455 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_node, dslite_in2out_node_fn);
456
457 static uword
458 dslite_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
459                                 vlib_frame_t * frame)
460 {
461   return dslite_in2out_node_fn_inline (vm, node, frame, 1);
462 }
463
464 /* *INDENT-OFF* */
465 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
466   .function = dslite_in2out_slowpath_node_fn,
467   .name = "dslite-in2out-slowpath",
468   .vector_size = sizeof (u32),
469   .format_trace = format_dslite_trace,
470   .type = VLIB_NODE_TYPE_INTERNAL,
471   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
472   .error_strings = dslite_in2out_error_strings,
473   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
474   /* edit / add dispositions here */
475   .next_nodes = {
476     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
477     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
478     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-lookup",
479     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
480   },
481 };
482 /* *INDENT-ON* */
483
484 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_slowpath_node,
485                               dslite_in2out_slowpath_node_fn);
486
487 /*
488  * fd.io coding-style-patch-verification: ON
489  *
490  * Local Variables:
491  * eval: (c-set-style "gnu")
492  * End:
493  */