d1ac17509c60bdffce643dac08dd1210f0efaa1f
[vpp.git] / src / plugins / nat / dslite / dslite_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <nat/dslite/dslite.h>
16 #include <nat/nat_inlines.h>
17 #include <nat/nat_syslog.h>
18
19 typedef enum
20 {
21   DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
22   DSLITE_IN2OUT_NEXT_IP6_ICMP,
23   DSLITE_IN2OUT_NEXT_DROP,
24   DSLITE_IN2OUT_NEXT_SLOWPATH,
25   DSLITE_IN2OUT_N_NEXT,
26 } dslite_in2out_next_t;
27
28 static char *dslite_in2out_error_strings[] = {
29 #define _(sym,string) string,
30   foreach_dslite_error
31 #undef _
32 };
33
34 static u32
35 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
36            dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
37 {
38   dslite_b4_t *b4;
39   clib_bihash_kv_16_8_t b4_kv, b4_value;
40   clib_bihash_kv_24_8_t in2out_kv;
41   clib_bihash_kv_8_8_t out2in_kv;
42   dlist_elt_t *head_elt, *oldest_elt, *elt;
43   u32 oldest_index;
44   dslite_session_t *s;
45   snat_session_key_t out2in_key;
46   nat_ip4_addr_port_t addr_port;
47   u32 b4_index;
48
49   out2in_key.protocol = in2out_key->proto;
50   out2in_key.fib_index = 0;
51
52   b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
53   b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
54
55   if (clib_bihash_search_16_8
56       (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
57     {
58       pool_get (dm->per_thread_data[thread_index].b4s, b4);
59       clib_memset (b4, 0, sizeof (*b4));
60       b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
61       b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
62
63       pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
64       b4->sessions_per_b4_list_head_index =
65         head_elt - dm->per_thread_data[thread_index].list_pool;
66       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
67                        b4->sessions_per_b4_list_head_index);
68
69       b4_index = b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
70       clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
71                                 &b4_kv, 1);
72
73       vlib_set_simple_counter (&dm->total_b4s, thread_index, 0,
74                                pool_elts (dm->
75                                           per_thread_data[thread_index].b4s));
76     }
77   else
78     {
79       b4_index = b4_value.value;
80       b4 =
81         pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
82                            b4_value.value);
83     }
84
85   //TODO configurable quota
86   if (b4->nsessions >= 1000)
87     {
88       oldest_index =
89         clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
90                                 b4->sessions_per_b4_list_head_index);
91       ASSERT (oldest_index != ~0);
92       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
93                           b4->sessions_per_b4_list_head_index, oldest_index);
94       oldest_elt =
95         pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
96                            oldest_index);
97       s =
98         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
99                            oldest_elt->value);
100
101       in2out_kv.key[0] = s->in2out.as_u64[0];
102       in2out_kv.key[1] = s->in2out.as_u64[1];
103       in2out_kv.key[2] = s->in2out.as_u64[2];
104       clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
105                                 &in2out_kv, 0);
106       out2in_kv.key = s->out2in.as_u64;
107       clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
108                                &out2in_kv, 0);
109
110       addr_port.addr.as_u32 = s->out2in.addr.as_u32;
111       addr_port.port = s->out2in.port;
112
113       nat_free_ip4_addr_and_port (&dm->pool, thread_index,
114                                   s->out2in.protocol, &addr_port);
115
116       nat_syslog_dslite_apmdel (b4_index, &s->in2out.softwire_id,
117                                 &s->in2out.addr, s->in2out.port,
118                                 &s->out2in.addr, s->out2in.port,
119                                 s->in2out.proto);
120
121       if (nat_alloc_ip4_addr_and_port
122           (&dm->pool, 0, thread_index, thread_index,
123            dm->port_per_thread, out2in_key.protocol, &addr_port))
124         ASSERT (0);
125
126       out2in_key.addr.as_u32 = addr_port.addr.as_u32;
127       out2in_key.port = addr_port.port;
128     }
129   else
130     {
131       if (nat_alloc_ip4_addr_and_port
132           (&dm->pool, 0, thread_index, thread_index,
133            dm->port_per_thread, out2in_key.protocol, &addr_port))
134         {
135           *error = DSLITE_ERROR_OUT_OF_PORTS;
136           return DSLITE_IN2OUT_NEXT_DROP;
137         }
138
139       out2in_key.addr.as_u32 = addr_port.addr.as_u32;
140       out2in_key.port = addr_port.port;
141
142       pool_get (dm->per_thread_data[thread_index].sessions, s);
143       clib_memset (s, 0, sizeof (*s));
144       b4->nsessions++;
145
146       pool_get (dm->per_thread_data[thread_index].list_pool, elt);
147       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
148                        elt - dm->per_thread_data[thread_index].list_pool);
149       elt->value = s - dm->per_thread_data[thread_index].sessions;
150       s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
151       s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
152       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
153                           s->per_b4_list_head_index,
154                           elt - dm->per_thread_data[thread_index].list_pool);
155
156       vlib_set_simple_counter (&dm->total_sessions, thread_index, 0,
157                                pool_elts (dm->per_thread_data
158                                           [thread_index].sessions));
159     }
160
161   s->in2out = *in2out_key;
162   s->out2in = out2in_key;
163   *sp = s;
164   in2out_kv.key[0] = s->in2out.as_u64[0];
165   in2out_kv.key[1] = s->in2out.as_u64[1];
166   in2out_kv.key[2] = s->in2out.as_u64[2];
167   in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
168   clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
169                             &in2out_kv, 1);
170   out2in_kv.key = s->out2in.as_u64;
171   out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
172   clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
173                            &out2in_kv, 1);
174
175   nat_syslog_dslite_apmadd (b4_index, &s->in2out.softwire_id, &s->in2out.addr,
176                             s->in2out.port, &s->out2in.addr, s->out2in.port,
177                             s->in2out.proto);
178
179   return next;
180 }
181
182 static inline u32
183 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
184                     ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
185                     u8 * error, u32 thread_index)
186 {
187   dslite_session_t *s = 0;
188   icmp46_header_t *icmp = ip4_next_header (ip4);
189   clib_bihash_kv_24_8_t kv, value;
190   dslite_session_key_t key;
191   u32 n = next;
192   icmp_echo_header_t *echo;
193   u32 new_addr, old_addr;
194   u16 old_id, new_id;
195   ip_csum_t sum;
196
197   if (icmp_type_is_error_message (icmp->type))
198     {
199       n = DSLITE_IN2OUT_NEXT_DROP;
200       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
201       goto done;
202     }
203
204   echo = (icmp_echo_header_t *) (icmp + 1);
205
206   key.addr = ip4->src_address;
207   key.port = echo->identifier;
208   key.proto = SNAT_PROTOCOL_ICMP;
209   key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
210   key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
211   key.pad = 0;
212   kv.key[0] = key.as_u64[0];
213   kv.key[1] = key.as_u64[1];
214   kv.key[2] = key.as_u64[2];
215
216   if (clib_bihash_search_24_8
217       (&dm->per_thread_data[thread_index].in2out, &kv, &value))
218     {
219       n = slow_path (dm, &key, &s, next, error, thread_index);
220       if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
221         goto done;
222     }
223   else
224     {
225       s =
226         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
227                            value.value);
228     }
229
230   old_addr = ip4->src_address.as_u32;
231   ip4->src_address = s->out2in.addr;
232   new_addr = ip4->src_address.as_u32;
233   sum = ip4->checksum;
234   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
235   ip4->checksum = ip_csum_fold (sum);
236
237   old_id = echo->identifier;
238   echo->identifier = new_id = s->out2in.port;
239   sum = icmp->checksum;
240   sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier);
241   icmp->checksum = ip_csum_fold (sum);
242
243 done:
244   *sp = s;
245   return n;
246 }
247
248 static inline uword
249 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
250                               vlib_frame_t * frame, u8 is_slow_path)
251 {
252   u32 n_left_from, *from, *to_next;
253   dslite_in2out_next_t next_index;
254   u32 node_index;
255   vlib_node_runtime_t *error_node;
256   u32 thread_index = vm->thread_index;
257   f64 now = vlib_time_now (vm);
258   dslite_main_t *dm = &dslite_main;
259
260   node_index =
261     is_slow_path ? dm->dslite_in2out_slowpath_node_index :
262     dm->dslite_in2out_node_index;
263
264   error_node = vlib_node_get_runtime (vm, node_index);
265
266   from = vlib_frame_vector_args (frame);
267   n_left_from = frame->n_vectors;
268   next_index = node->cached_next_index;
269
270   while (n_left_from > 0)
271     {
272       u32 n_left_to_next;
273
274       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
275
276       while (n_left_from > 0 && n_left_to_next > 0)
277         {
278           u32 bi0;
279           vlib_buffer_t *b0;
280           u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
281           ip4_header_t *ip40;
282           ip6_header_t *ip60;
283           u8 error0 = DSLITE_ERROR_IN2OUT;
284           u32 proto0;
285           dslite_session_t *s0 = 0;
286           clib_bihash_kv_24_8_t kv0, value0;
287           dslite_session_key_t key0;
288           udp_header_t *udp0;
289           tcp_header_t *tcp0;
290           ip_csum_t sum0;
291           u32 new_addr0, old_addr0;
292           u16 old_port0, new_port0;
293
294           /* speculatively enqueue b0 to the current next frame */
295           bi0 = from[0];
296           to_next[0] = bi0;
297           from += 1;
298           to_next += 1;
299           n_left_from -= 1;
300           n_left_to_next -= 1;
301
302           b0 = vlib_get_buffer (vm, bi0);
303           ip60 = vlib_buffer_get_current (b0);
304
305           if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
306             {
307               if (ip60->protocol == IP_PROTOCOL_ICMP6)
308                 {
309                   next0 = DSLITE_IN2OUT_NEXT_IP6_ICMP;
310                   goto trace0;
311                 }
312               error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
313               next0 = DSLITE_IN2OUT_NEXT_DROP;
314               goto trace0;
315             }
316
317           ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
318           proto0 = ip_proto_to_snat_proto (ip40->protocol);
319
320           if (PREDICT_FALSE (proto0 == ~0))
321             {
322               error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
323               next0 = DSLITE_IN2OUT_NEXT_DROP;
324               goto trace0;
325             }
326
327           udp0 = ip4_next_header (ip40);
328           tcp0 = (tcp_header_t *) udp0;
329
330           if (is_slow_path)
331             {
332               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
333                 {
334                   next0 =
335                     dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
336                                         thread_index);
337                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
338                     goto trace0;
339
340                   goto accounting0;
341                 }
342             }
343           else
344             {
345               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
346                 {
347                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
348                   goto trace0;
349                 }
350             }
351
352           key0.addr = ip40->src_address;
353           key0.port = udp0->src_port;
354           key0.proto = proto0;
355           key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
356           key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
357           key0.pad = 0;
358           kv0.key[0] = key0.as_u64[0];
359           kv0.key[1] = key0.as_u64[1];
360           kv0.key[2] = key0.as_u64[2];
361
362           if (clib_bihash_search_24_8
363               (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
364             {
365               if (is_slow_path)
366                 {
367                   next0 =
368                     slow_path (dm, &key0, &s0, next0, &error0, thread_index);
369                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
370                     goto trace0;
371                 }
372               else
373                 {
374                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
375                   goto trace0;
376                 }
377             }
378           else
379             {
380               s0 =
381                 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
382                                    value0.value);
383             }
384
385           old_addr0 = ip40->src_address.as_u32;
386           ip40->src_address = s0->out2in.addr;
387           new_addr0 = ip40->src_address.as_u32;
388           sum0 = ip40->checksum;
389           sum0 =
390             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
391                             src_address);
392           ip40->checksum = ip_csum_fold (sum0);
393           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
394             {
395               old_port0 = tcp0->src_port;
396               tcp0->src_port = s0->out2in.port;
397               new_port0 = tcp0->src_port;
398
399               sum0 = tcp0->checksum;
400               sum0 =
401                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
402                                 dst_address);
403               sum0 =
404                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
405                                 length);
406               //mss_clamping (&dslite_main, tcp0, &sum0);
407               tcp0->checksum = ip_csum_fold (sum0);
408             }
409           else
410             {
411               old_port0 = udp0->src_port;
412               udp0->src_port = s0->out2in.port;
413               udp0->checksum = 0;
414             }
415
416         accounting0:
417           /* Accounting */
418           s0->last_heard = now;
419           s0->total_pkts++;
420           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
421           /* Per-B4 LRU list maintenance */
422           clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
423                              s0->per_b4_index);
424           clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
425                               s0->per_b4_list_head_index, s0->per_b4_index);
426
427           ip40->tos =
428             (clib_net_to_host_u32
429              (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
430             20;
431           vlib_buffer_advance (b0, sizeof (ip6_header_t));
432
433         trace0:
434           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
435                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
436             {
437               dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
438               t->next_index = next0;
439               t->session_index = ~0;
440               if (s0)
441                 t->session_index =
442                   s0 - dm->per_thread_data[thread_index].sessions;
443             }
444
445           b0->error = error_node->errors[error0];
446
447           /* verify speculative enqueue, maybe switch current next frame */
448           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
449                                            n_left_to_next, bi0, next0);
450         }
451       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
452     }
453
454   return frame->n_vectors;
455 }
456
457 VLIB_NODE_FN (dslite_in2out_node) (vlib_main_t * vm,
458                                    vlib_node_runtime_t * node,
459                                    vlib_frame_t * frame)
460 {
461   return dslite_in2out_node_fn_inline (vm, node, frame, 0);
462 }
463
464 /* *INDENT-OFF* */
465 VLIB_REGISTER_NODE (dslite_in2out_node) = {
466   .name = "dslite-in2out",
467   .vector_size = sizeof (u32),
468   .format_trace = format_dslite_trace,
469   .type = VLIB_NODE_TYPE_INTERNAL,
470   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
471   .error_strings = dslite_in2out_error_strings,
472   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
473   /* edit / add dispositions here */
474   .next_nodes = {
475     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
476     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
477     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
478     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
479   },
480 };
481 /* *INDENT-ON* */
482
483 VLIB_NODE_FN (dslite_in2out_slowpath_node) (vlib_main_t * vm,
484                                             vlib_node_runtime_t * node,
485                                             vlib_frame_t * frame)
486 {
487   return dslite_in2out_node_fn_inline (vm, node, frame, 1);
488 }
489
490 /* *INDENT-OFF* */
491 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
492   .name = "dslite-in2out-slowpath",
493   .vector_size = sizeof (u32),
494   .format_trace = format_dslite_trace,
495   .type = VLIB_NODE_TYPE_INTERNAL,
496   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
497   .error_strings = dslite_in2out_error_strings,
498   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
499   /* edit / add dispositions here */
500   .next_nodes = {
501     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
502     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
503     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-lookup",
504     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
505   },
506 };
507 /* *INDENT-ON* */
508
509 /*
510  * fd.io coding-style-patch-verification: ON
511  *
512  * Local Variables:
513  * eval: (c-set-style "gnu")
514  * End:
515  */