nat: cleanup & reorganization
[vpp.git] / src / plugins / nat / dslite / dslite_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <nat/dslite/dslite.h>
16 #include <nat/lib/nat_syslog.h>
17
18 typedef enum
19 {
20   DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
21   DSLITE_IN2OUT_NEXT_IP6_ICMP,
22   DSLITE_IN2OUT_NEXT_DROP,
23   DSLITE_IN2OUT_NEXT_SLOWPATH,
24   DSLITE_IN2OUT_N_NEXT,
25 } dslite_in2out_next_t;
26
27 static char *dslite_in2out_error_strings[] = {
28 #define _(sym,string) string,
29   foreach_dslite_error
30 #undef _
31 };
32
33 static u32
34 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
35            dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
36 {
37   dslite_b4_t *b4;
38   clib_bihash_kv_16_8_t b4_kv, b4_value;
39   clib_bihash_kv_24_8_t in2out_kv;
40   clib_bihash_kv_8_8_t out2in_kv;
41   dlist_elt_t *head_elt, *oldest_elt, *elt;
42   u32 oldest_index;
43   dslite_session_t *s;
44   nat_session_key_t out2in_key;
45   nat_ip4_addr_port_t addr_port;
46   u32 b4_index;
47
48   out2in_key.protocol = in2out_key->proto;
49   out2in_key.fib_index = 0;
50
51   b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
52   b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
53
54   if (clib_bihash_search_16_8
55       (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
56     {
57       pool_get (dm->per_thread_data[thread_index].b4s, b4);
58       clib_memset (b4, 0, sizeof (*b4));
59       b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
60       b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
61
62       pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
63       b4->sessions_per_b4_list_head_index =
64         head_elt - dm->per_thread_data[thread_index].list_pool;
65       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
66                        b4->sessions_per_b4_list_head_index);
67
68       b4_index = b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
69       clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
70                                 &b4_kv, 1);
71
72       vlib_set_simple_counter (&dm->total_b4s, thread_index, 0,
73                                pool_elts (dm->
74                                           per_thread_data[thread_index].b4s));
75     }
76   else
77     {
78       b4_index = b4_value.value;
79       b4 =
80         pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
81                            b4_value.value);
82     }
83
84   //TODO configurable quota
85   if (b4->nsessions >= 1000)
86     {
87       oldest_index =
88         clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
89                                 b4->sessions_per_b4_list_head_index);
90       ASSERT (oldest_index != ~0);
91       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
92                           b4->sessions_per_b4_list_head_index, oldest_index);
93       oldest_elt =
94         pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
95                            oldest_index);
96       s =
97         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
98                            oldest_elt->value);
99
100       in2out_kv.key[0] = s->in2out.as_u64[0];
101       in2out_kv.key[1] = s->in2out.as_u64[1];
102       in2out_kv.key[2] = s->in2out.as_u64[2];
103       clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
104                                 &in2out_kv, 0);
105       out2in_kv.key = s->out2in.as_u64;
106       clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
107                                &out2in_kv, 0);
108
109       addr_port.addr.as_u32 = s->out2in.addr.as_u32;
110       addr_port.port = s->out2in.port;
111
112       nat_free_ip4_addr_and_port (&dm->pool, thread_index,
113                                   s->out2in.protocol, &addr_port);
114
115       nat_syslog_dslite_apmdel (b4_index, &s->in2out.softwire_id,
116                                 &s->in2out.addr, s->in2out.port,
117                                 &s->out2in.addr, s->out2in.port,
118                                 s->in2out.proto);
119
120       if (nat_alloc_ip4_addr_and_port
121           (&dm->pool, 0, thread_index, thread_index,
122            dm->port_per_thread, out2in_key.protocol, &addr_port))
123         ASSERT (0);
124
125       out2in_key.addr.as_u32 = addr_port.addr.as_u32;
126       out2in_key.port = addr_port.port;
127     }
128   else
129     {
130       if (nat_alloc_ip4_addr_and_port
131           (&dm->pool, 0, thread_index, thread_index,
132            dm->port_per_thread, out2in_key.protocol, &addr_port))
133         {
134           *error = DSLITE_ERROR_OUT_OF_PORTS;
135           return DSLITE_IN2OUT_NEXT_DROP;
136         }
137
138       out2in_key.addr.as_u32 = addr_port.addr.as_u32;
139       out2in_key.port = addr_port.port;
140
141       pool_get (dm->per_thread_data[thread_index].sessions, s);
142       clib_memset (s, 0, sizeof (*s));
143       b4->nsessions++;
144
145       pool_get (dm->per_thread_data[thread_index].list_pool, elt);
146       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
147                        elt - dm->per_thread_data[thread_index].list_pool);
148       elt->value = s - dm->per_thread_data[thread_index].sessions;
149       s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
150       s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
151       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
152                           s->per_b4_list_head_index,
153                           elt - dm->per_thread_data[thread_index].list_pool);
154
155       vlib_set_simple_counter (&dm->total_sessions, thread_index, 0,
156                                pool_elts (dm->per_thread_data
157                                           [thread_index].sessions));
158     }
159
160   s->in2out = *in2out_key;
161   s->out2in = out2in_key;
162   *sp = s;
163   in2out_kv.key[0] = s->in2out.as_u64[0];
164   in2out_kv.key[1] = s->in2out.as_u64[1];
165   in2out_kv.key[2] = s->in2out.as_u64[2];
166   in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
167   clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
168                             &in2out_kv, 1);
169   out2in_kv.key = s->out2in.as_u64;
170   out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
171   clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
172                            &out2in_kv, 1);
173
174   nat_syslog_dslite_apmadd (b4_index, &s->in2out.softwire_id, &s->in2out.addr,
175                             s->in2out.port, &s->out2in.addr, s->out2in.port,
176                             s->in2out.proto);
177
178   return next;
179 }
180
181 static inline u32
182 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
183                     ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
184                     u8 * error, u32 thread_index)
185 {
186   dslite_session_t *s = 0;
187   icmp46_header_t *icmp = ip4_next_header (ip4);
188   clib_bihash_kv_24_8_t kv, value;
189   dslite_session_key_t key;
190   u32 n = next;
191   echo_header_t *echo;
192   u32 new_addr, old_addr;
193   u16 old_id, new_id;
194   ip_csum_t sum;
195
196   if (icmp_type_is_error_message (icmp->type))
197     {
198       n = DSLITE_IN2OUT_NEXT_DROP;
199       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
200       goto done;
201     }
202
203   echo = (echo_header_t *) (icmp + 1);
204
205   key.addr = ip4->src_address;
206   key.port = echo->identifier;
207   key.proto = NAT_PROTOCOL_ICMP;
208   key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
209   key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
210   key.pad = 0;
211   kv.key[0] = key.as_u64[0];
212   kv.key[1] = key.as_u64[1];
213   kv.key[2] = key.as_u64[2];
214
215   if (clib_bihash_search_24_8
216       (&dm->per_thread_data[thread_index].in2out, &kv, &value))
217     {
218       n = slow_path (dm, &key, &s, next, error, thread_index);
219       if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
220         goto done;
221     }
222   else
223     {
224       s =
225         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
226                            value.value);
227     }
228
229   old_addr = ip4->src_address.as_u32;
230   ip4->src_address = s->out2in.addr;
231   new_addr = ip4->src_address.as_u32;
232   sum = ip4->checksum;
233   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
234   ip4->checksum = ip_csum_fold (sum);
235
236   old_id = echo->identifier;
237   echo->identifier = new_id = s->out2in.port;
238   sum = icmp->checksum;
239   sum = ip_csum_update (sum, old_id, new_id, echo_header_t, identifier);
240   icmp->checksum = ip_csum_fold (sum);
241
242 done:
243   *sp = s;
244   return n;
245 }
246
247 static inline uword
248 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
249                               vlib_frame_t * frame, u8 is_slow_path)
250 {
251   u32 n_left_from, *from, *to_next;
252   dslite_in2out_next_t next_index;
253   u32 node_index;
254   vlib_node_runtime_t *error_node;
255   u32 thread_index = vm->thread_index;
256   f64 now = vlib_time_now (vm);
257   dslite_main_t *dm = &dslite_main;
258
259   node_index =
260     is_slow_path ? dm->dslite_in2out_slowpath_node_index :
261     dm->dslite_in2out_node_index;
262
263   error_node = vlib_node_get_runtime (vm, node_index);
264
265   from = vlib_frame_vector_args (frame);
266   n_left_from = frame->n_vectors;
267   next_index = node->cached_next_index;
268
269   while (n_left_from > 0)
270     {
271       u32 n_left_to_next;
272
273       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
274
275       while (n_left_from > 0 && n_left_to_next > 0)
276         {
277           u32 bi0;
278           vlib_buffer_t *b0;
279           u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
280           ip4_header_t *ip40;
281           ip6_header_t *ip60;
282           u8 error0 = DSLITE_ERROR_IN2OUT;
283           u32 proto0;
284           dslite_session_t *s0 = 0;
285           clib_bihash_kv_24_8_t kv0, value0;
286           dslite_session_key_t key0;
287           udp_header_t *udp0;
288           tcp_header_t *tcp0;
289           ip_csum_t sum0;
290           u32 new_addr0, old_addr0;
291           u16 old_port0, new_port0;
292
293           /* speculatively enqueue b0 to the current next frame */
294           bi0 = from[0];
295           to_next[0] = bi0;
296           from += 1;
297           to_next += 1;
298           n_left_from -= 1;
299           n_left_to_next -= 1;
300
301           b0 = vlib_get_buffer (vm, bi0);
302           ip60 = vlib_buffer_get_current (b0);
303
304           if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
305             {
306               if (ip60->protocol == IP_PROTOCOL_ICMP6)
307                 {
308                   next0 = DSLITE_IN2OUT_NEXT_IP6_ICMP;
309                   goto trace0;
310                 }
311               error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
312               next0 = DSLITE_IN2OUT_NEXT_DROP;
313               goto trace0;
314             }
315
316           ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
317           proto0 = ip_proto_to_nat_proto (ip40->protocol);
318
319           if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
320             {
321               error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
322               next0 = DSLITE_IN2OUT_NEXT_DROP;
323               goto trace0;
324             }
325
326           udp0 = ip4_next_header (ip40);
327           tcp0 = (tcp_header_t *) udp0;
328
329           if (is_slow_path)
330             {
331               if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
332                 {
333                   next0 =
334                     dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
335                                         thread_index);
336                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
337                     goto trace0;
338
339                   goto accounting0;
340                 }
341             }
342           else
343             {
344               if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
345                 {
346                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
347                   goto trace0;
348                 }
349             }
350
351           key0.addr = ip40->src_address;
352           key0.port = udp0->src_port;
353           key0.proto = proto0;
354           key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
355           key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
356           key0.pad = 0;
357           kv0.key[0] = key0.as_u64[0];
358           kv0.key[1] = key0.as_u64[1];
359           kv0.key[2] = key0.as_u64[2];
360
361           if (clib_bihash_search_24_8
362               (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
363             {
364               if (is_slow_path)
365                 {
366                   next0 =
367                     slow_path (dm, &key0, &s0, next0, &error0, thread_index);
368                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
369                     goto trace0;
370                 }
371               else
372                 {
373                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
374                   goto trace0;
375                 }
376             }
377           else
378             {
379               s0 =
380                 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
381                                    value0.value);
382             }
383
384           old_addr0 = ip40->src_address.as_u32;
385           ip40->src_address = s0->out2in.addr;
386           new_addr0 = ip40->src_address.as_u32;
387           sum0 = ip40->checksum;
388           sum0 =
389             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
390                             src_address);
391           ip40->checksum = ip_csum_fold (sum0);
392           if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
393             {
394               old_port0 = tcp0->src_port;
395               tcp0->src_port = s0->out2in.port;
396               new_port0 = tcp0->src_port;
397
398               sum0 = tcp0->checksum;
399               sum0 =
400                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
401                                 dst_address);
402               sum0 =
403                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
404                                 length);
405               //mss_clamping (&dslite_main, tcp0, &sum0);
406               tcp0->checksum = ip_csum_fold (sum0);
407             }
408           else
409             {
410               old_port0 = udp0->src_port;
411               udp0->src_port = s0->out2in.port;
412               udp0->checksum = 0;
413             }
414
415         accounting0:
416           /* Accounting */
417           s0->last_heard = now;
418           s0->total_pkts++;
419           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
420           /* Per-B4 LRU list maintenance */
421           clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
422                              s0->per_b4_index);
423           clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
424                               s0->per_b4_list_head_index, s0->per_b4_index);
425
426           ip40->tos =
427             (clib_net_to_host_u32
428              (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
429             20;
430           vlib_buffer_advance (b0, sizeof (ip6_header_t));
431
432         trace0:
433           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
434                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
435             {
436               dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
437               t->next_index = next0;
438               t->session_index = ~0;
439               if (s0)
440                 t->session_index =
441                   s0 - dm->per_thread_data[thread_index].sessions;
442             }
443
444           b0->error = error_node->errors[error0];
445
446           /* verify speculative enqueue, maybe switch current next frame */
447           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
448                                            n_left_to_next, bi0, next0);
449         }
450       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
451     }
452
453   return frame->n_vectors;
454 }
455
456 VLIB_NODE_FN (dslite_in2out_node) (vlib_main_t * vm,
457                                    vlib_node_runtime_t * node,
458                                    vlib_frame_t * frame)
459 {
460   return dslite_in2out_node_fn_inline (vm, node, frame, 0);
461 }
462
463 /* *INDENT-OFF* */
464 VLIB_REGISTER_NODE (dslite_in2out_node) = {
465   .name = "dslite-in2out",
466   .vector_size = sizeof (u32),
467   .format_trace = format_dslite_trace,
468   .type = VLIB_NODE_TYPE_INTERNAL,
469   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
470   .error_strings = dslite_in2out_error_strings,
471   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
472   /* edit / add dispositions here */
473   .next_nodes = {
474     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
475     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
476     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
477     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
478   },
479 };
480 /* *INDENT-ON* */
481
482 VLIB_NODE_FN (dslite_in2out_slowpath_node) (vlib_main_t * vm,
483                                             vlib_node_runtime_t * node,
484                                             vlib_frame_t * frame)
485 {
486   return dslite_in2out_node_fn_inline (vm, node, frame, 1);
487 }
488
489 /* *INDENT-OFF* */
490 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
491   .name = "dslite-in2out-slowpath",
492   .vector_size = sizeof (u32),
493   .format_trace = format_dslite_trace,
494   .type = VLIB_NODE_TYPE_INTERNAL,
495   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
496   .error_strings = dslite_in2out_error_strings,
497   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
498   /* edit / add dispositions here */
499   .next_nodes = {
500     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
501     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
502     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-lookup",
503     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
504   },
505 };
506 /* *INDENT-ON* */
507
508 /*
509  * fd.io coding-style-patch-verification: ON
510  *
511  * Local Variables:
512  * eval: (c-set-style "gnu")
513  * End:
514  */