991d6eda775d2c3cd84872745866a20771715544
[vpp.git] / src / plugins / nat / dslite_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <nat/dslite.h>
16 #include <nat/nat_inlines.h>
17
18 vlib_node_registration_t dslite_in2out_node;
19 vlib_node_registration_t dslite_in2out_slowpath_node;
20
21 typedef enum
22 {
23   DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
24   DSLITE_IN2OUT_NEXT_IP6_ICMP,
25   DSLITE_IN2OUT_NEXT_DROP,
26   DSLITE_IN2OUT_NEXT_SLOWPATH,
27   DSLITE_IN2OUT_N_NEXT,
28 } dslite_in2out_next_t;
29
30 static char *dslite_in2out_error_strings[] = {
31 #define _(sym,string) string,
32   foreach_dslite_error
33 #undef _
34 };
35
36 static u32
37 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
38            dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
39 {
40   dslite_b4_t *b4;
41   clib_bihash_kv_16_8_t b4_kv, b4_value;
42   clib_bihash_kv_24_8_t in2out_kv;
43   clib_bihash_kv_8_8_t out2in_kv;
44   dlist_elt_t *head_elt, *oldest_elt, *elt;
45   u32 oldest_index;
46   dslite_session_t *s;
47   snat_session_key_t out2in_key;
48   u32 address_index;
49
50   out2in_key.protocol = in2out_key->proto;
51   out2in_key.fib_index = 0;
52
53   b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
54   b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
55
56   if (clib_bihash_search_16_8
57       (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
58     {
59       pool_get (dm->per_thread_data[thread_index].b4s, b4);
60       memset (b4, 0, sizeof (*b4));
61       b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
62       b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
63
64       pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
65       b4->sessions_per_b4_list_head_index =
66         head_elt - dm->per_thread_data[thread_index].list_pool;
67       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
68                        b4->sessions_per_b4_list_head_index);
69
70       b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
71       clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
72                                 &b4_kv, 1);
73     }
74   else
75     {
76       b4 =
77         pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
78                            b4_value.value);
79     }
80
81   //TODO configurable quota
82   if (b4->nsessions >= 1000)
83     {
84       oldest_index =
85         clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
86                                 b4->sessions_per_b4_list_head_index);
87       ASSERT (oldest_index != ~0);
88       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
89                           b4->sessions_per_b4_list_head_index, oldest_index);
90       oldest_elt =
91         pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
92                            oldest_index);
93       s =
94         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
95                            oldest_elt->value);
96
97       in2out_kv.key[0] = s->in2out.as_u64[0];
98       in2out_kv.key[1] = s->in2out.as_u64[1];
99       in2out_kv.key[2] = s->in2out.as_u64[2];
100       clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
101                                 &in2out_kv, 0);
102       out2in_kv.key = s->out2in.as_u64;
103       clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
104                                &out2in_kv, 0);
105       snat_free_outside_address_and_port (dm->addr_pool, thread_index,
106                                           &s->out2in,
107                                           s->outside_address_index);
108       s->outside_address_index = ~0;
109
110       if (snat_alloc_outside_address_and_port
111           (dm->addr_pool, 0, thread_index, &out2in_key,
112            &s->outside_address_index, dm->port_per_thread, thread_index))
113         ASSERT (0);
114     }
115   else
116     {
117       if (snat_alloc_outside_address_and_port
118           (dm->addr_pool, 0, thread_index, &out2in_key, &address_index,
119            dm->port_per_thread, thread_index))
120         {
121           *error = DSLITE_ERROR_OUT_OF_PORTS;
122           return DSLITE_IN2OUT_NEXT_DROP;
123         }
124       pool_get (dm->per_thread_data[thread_index].sessions, s);
125       memset (s, 0, sizeof (*s));
126       s->outside_address_index = address_index;
127       b4->nsessions++;
128
129       pool_get (dm->per_thread_data[thread_index].list_pool, elt);
130       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
131                        elt - dm->per_thread_data[thread_index].list_pool);
132       elt->value = s - dm->per_thread_data[thread_index].sessions;
133       s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
134       s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
135       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
136                           s->per_b4_list_head_index,
137                           elt - dm->per_thread_data[thread_index].list_pool);
138     }
139
140   s->in2out = *in2out_key;
141   s->out2in = out2in_key;
142   *sp = s;
143   in2out_kv.key[0] = s->in2out.as_u64[0];
144   in2out_kv.key[1] = s->in2out.as_u64[1];
145   in2out_kv.key[2] = s->in2out.as_u64[2];
146   in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
147   clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
148                             &in2out_kv, 1);
149   out2in_kv.key = s->out2in.as_u64;
150   out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
151   clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
152                            &out2in_kv, 1);
153
154   return next;
155 }
156
157 static inline u32
158 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
159                     ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
160                     u8 * error, u32 thread_index)
161 {
162   dslite_session_t *s = 0;
163   icmp46_header_t *icmp = ip4_next_header (ip4);
164   clib_bihash_kv_24_8_t kv, value;
165   dslite_session_key_t key;
166   u32 n = next;
167   icmp_echo_header_t *echo;
168   u32 new_addr, old_addr;
169   u16 old_id, new_id;
170   ip_csum_t sum;
171
172   if (icmp_is_error_message (icmp))
173     {
174       n = DSLITE_IN2OUT_NEXT_DROP;
175       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
176       goto done;
177     }
178
179   echo = (icmp_echo_header_t *) (icmp + 1);
180
181   key.addr = ip4->src_address;
182   key.port = echo->identifier;
183   key.proto = SNAT_PROTOCOL_ICMP;
184   key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
185   key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
186   key.pad = 0;
187   kv.key[0] = key.as_u64[0];
188   kv.key[1] = key.as_u64[1];
189   kv.key[2] = key.as_u64[2];
190
191   if (clib_bihash_search_24_8
192       (&dm->per_thread_data[thread_index].in2out, &kv, &value))
193     {
194       n = slow_path (dm, &key, &s, next, error, thread_index);
195       if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
196         goto done;
197     }
198   else
199     {
200       s =
201         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
202                            value.value);
203     }
204
205   old_addr = ip4->src_address.as_u32;
206   ip4->src_address = s->out2in.addr;
207   new_addr = ip4->src_address.as_u32;
208   sum = ip4->checksum;
209   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
210   ip4->checksum = ip_csum_fold (sum);
211
212   old_id = echo->identifier;
213   echo->identifier = new_id = s->out2in.port;
214   sum = icmp->checksum;
215   sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier);
216   icmp->checksum = ip_csum_fold (sum);
217
218 done:
219   *sp = s;
220   return n;
221 }
222
223 static inline uword
224 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
225                               vlib_frame_t * frame, u8 is_slow_path)
226 {
227   u32 n_left_from, *from, *to_next;
228   dslite_in2out_next_t next_index;
229   u32 node_index;
230   vlib_node_runtime_t *error_node;
231   u32 thread_index = vm->thread_index;
232   f64 now = vlib_time_now (vm);
233   dslite_main_t *dm = &dslite_main;
234
235   node_index =
236     is_slow_path ? dslite_in2out_slowpath_node.
237     index : dslite_in2out_node.index;
238
239   error_node = vlib_node_get_runtime (vm, node_index);
240
241   from = vlib_frame_vector_args (frame);
242   n_left_from = frame->n_vectors;
243   next_index = node->cached_next_index;
244
245   while (n_left_from > 0)
246     {
247       u32 n_left_to_next;
248
249       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
250
251       while (n_left_from > 0 && n_left_to_next > 0)
252         {
253           u32 bi0;
254           vlib_buffer_t *b0;
255           u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
256           ip4_header_t *ip40;
257           ip6_header_t *ip60;
258           u8 error0 = DSLITE_ERROR_IN2OUT;
259           u32 proto0;
260           dslite_session_t *s0 = 0;
261           clib_bihash_kv_24_8_t kv0, value0;
262           dslite_session_key_t key0;
263           udp_header_t *udp0;
264           tcp_header_t *tcp0;
265           ip_csum_t sum0;
266           u32 new_addr0, old_addr0;
267           u16 old_port0, new_port0;
268
269           /* speculatively enqueue b0 to the current next frame */
270           bi0 = from[0];
271           to_next[0] = bi0;
272           from += 1;
273           to_next += 1;
274           n_left_from -= 1;
275           n_left_to_next -= 1;
276
277           b0 = vlib_get_buffer (vm, bi0);
278           ip60 = vlib_buffer_get_current (b0);
279
280           if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
281             {
282               if (ip60->protocol == IP_PROTOCOL_ICMP6)
283                 {
284                   next0 = DSLITE_IN2OUT_NEXT_IP6_ICMP;
285                   goto trace0;
286                 }
287               error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
288               next0 = DSLITE_IN2OUT_NEXT_DROP;
289               goto trace0;
290             }
291
292           ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
293           proto0 = ip_proto_to_snat_proto (ip40->protocol);
294
295           if (PREDICT_FALSE (proto0 == ~0))
296             {
297               error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
298               next0 = DSLITE_IN2OUT_NEXT_DROP;
299               goto trace0;
300             }
301
302           udp0 = ip4_next_header (ip40);
303           tcp0 = (tcp_header_t *) udp0;
304
305           if (is_slow_path)
306             {
307               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
308                 {
309                   next0 =
310                     dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
311                                         thread_index);
312                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
313                     goto trace0;
314
315                   goto accounting0;
316                 }
317             }
318           else
319             {
320               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
321                 {
322                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
323                   goto trace0;
324                 }
325             }
326
327           key0.addr = ip40->src_address;
328           key0.port = udp0->src_port;
329           key0.proto = proto0;
330           key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
331           key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
332           key0.pad = 0;
333           kv0.key[0] = key0.as_u64[0];
334           kv0.key[1] = key0.as_u64[1];
335           kv0.key[2] = key0.as_u64[2];
336
337           if (clib_bihash_search_24_8
338               (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
339             {
340               if (is_slow_path)
341                 {
342                   next0 =
343                     slow_path (dm, &key0, &s0, next0, &error0, thread_index);
344                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
345                     goto trace0;
346                 }
347               else
348                 {
349                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
350                   goto trace0;
351                 }
352             }
353           else
354             {
355               s0 =
356                 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
357                                    value0.value);
358             }
359
360           old_addr0 = ip40->src_address.as_u32;
361           ip40->src_address = s0->out2in.addr;
362           new_addr0 = ip40->src_address.as_u32;
363           sum0 = ip40->checksum;
364           sum0 =
365             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
366                             src_address);
367           ip40->checksum = ip_csum_fold (sum0);
368           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
369             {
370               old_port0 = tcp0->src_port;
371               tcp0->src_port = s0->out2in.port;
372               new_port0 = tcp0->src_port;
373
374               sum0 = tcp0->checksum;
375               sum0 =
376                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
377                                 dst_address);
378               sum0 =
379                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
380                                 length);
381               tcp0->checksum = ip_csum_fold (sum0);
382             }
383           else
384             {
385               old_port0 = udp0->src_port;
386               udp0->src_port = s0->out2in.port;
387               udp0->checksum = 0;
388             }
389
390         accounting0:
391           /* Accounting */
392           s0->last_heard = now;
393           s0->total_pkts++;
394           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
395           /* Per-B4 LRU list maintenance */
396           clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
397                              s0->per_b4_index);
398           clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
399                               s0->per_b4_list_head_index, s0->per_b4_index);
400
401           ip40->tos =
402             (clib_net_to_host_u32
403              (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
404             20;
405           vlib_buffer_advance (b0, sizeof (ip6_header_t));
406
407         trace0:
408           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
409                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
410             {
411               dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
412               t->next_index = next0;
413               t->session_index = ~0;
414               if (s0)
415                 t->session_index =
416                   s0 - dm->per_thread_data[thread_index].sessions;
417             }
418
419           b0->error = error_node->errors[error0];
420
421           /* verify speculative enqueue, maybe switch current next frame */
422           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
423                                            n_left_to_next, bi0, next0);
424         }
425       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
426     }
427
428   return frame->n_vectors;
429 }
430
431 static uword
432 dslite_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
433                        vlib_frame_t * frame)
434 {
435   return dslite_in2out_node_fn_inline (vm, node, frame, 0);
436 }
437
438 /* *INDENT-OFF* */
439 VLIB_REGISTER_NODE (dslite_in2out_node) = {
440   .function = dslite_in2out_node_fn,
441   .name = "dslite-in2out",
442   .vector_size = sizeof (u32),
443   .format_trace = format_dslite_trace,
444   .type = VLIB_NODE_TYPE_INTERNAL,
445   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
446   .error_strings = dslite_in2out_error_strings,
447   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
448   /* edit / add dispositions here */
449   .next_nodes = {
450     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
451     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
452     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
453     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
454   },
455 };
456 /* *INDENT-ON* */
457
458 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_node, dslite_in2out_node_fn);
459
460 static uword
461 dslite_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
462                                 vlib_frame_t * frame)
463 {
464   return dslite_in2out_node_fn_inline (vm, node, frame, 1);
465 }
466
467 /* *INDENT-OFF* */
468 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
469   .function = dslite_in2out_slowpath_node_fn,
470   .name = "dslite-in2out-slowpath",
471   .vector_size = sizeof (u32),
472   .format_trace = format_dslite_trace,
473   .type = VLIB_NODE_TYPE_INTERNAL,
474   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
475   .error_strings = dslite_in2out_error_strings,
476   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
477   /* edit / add dispositions here */
478   .next_nodes = {
479     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
480     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
481     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-lookup",
482     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
483   },
484 };
485 /* *INDENT-ON* */
486
487 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_slowpath_node,
488                               dslite_in2out_slowpath_node_fn);
489
490 /*
491  * fd.io coding-style-patch-verification: ON
492  *
493  * Local Variables:
494  * eval: (c-set-style "gnu")
495  * End:
496  */