NAT: total users and sessions gauges (VPP-1484)
[vpp.git] / src / plugins / nat / dslite_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <nat/dslite.h>
16 #include <nat/nat_inlines.h>
17 #include <nat/nat_syslog.h>
18
19 vlib_node_registration_t dslite_in2out_node;
20 vlib_node_registration_t dslite_in2out_slowpath_node;
21
22 typedef enum
23 {
24   DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
25   DSLITE_IN2OUT_NEXT_IP6_ICMP,
26   DSLITE_IN2OUT_NEXT_DROP,
27   DSLITE_IN2OUT_NEXT_SLOWPATH,
28   DSLITE_IN2OUT_N_NEXT,
29 } dslite_in2out_next_t;
30
31 static char *dslite_in2out_error_strings[] = {
32 #define _(sym,string) string,
33   foreach_dslite_error
34 #undef _
35 };
36
37 static u32
38 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
39            dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
40 {
41   dslite_b4_t *b4;
42   clib_bihash_kv_16_8_t b4_kv, b4_value;
43   clib_bihash_kv_24_8_t in2out_kv;
44   clib_bihash_kv_8_8_t out2in_kv;
45   dlist_elt_t *head_elt, *oldest_elt, *elt;
46   u32 oldest_index;
47   dslite_session_t *s;
48   snat_session_key_t out2in_key;
49   u32 b4_index;
50
51   out2in_key.protocol = in2out_key->proto;
52   out2in_key.fib_index = 0;
53
54   b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
55   b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
56
57   if (clib_bihash_search_16_8
58       (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
59     {
60       pool_get (dm->per_thread_data[thread_index].b4s, b4);
61       clib_memset (b4, 0, sizeof (*b4));
62       b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
63       b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
64
65       pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
66       b4->sessions_per_b4_list_head_index =
67         head_elt - dm->per_thread_data[thread_index].list_pool;
68       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
69                        b4->sessions_per_b4_list_head_index);
70
71       b4_index = b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
72       clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
73                                 &b4_kv, 1);
74
75       vlib_set_simple_counter (&dm->total_b4s, thread_index, 0,
76                                pool_elts (dm->
77                                           per_thread_data[thread_index].b4s));
78     }
79   else
80     {
81       b4_index = b4_value.value;
82       b4 =
83         pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
84                            b4_value.value);
85     }
86
87   //TODO configurable quota
88   if (b4->nsessions >= 1000)
89     {
90       oldest_index =
91         clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
92                                 b4->sessions_per_b4_list_head_index);
93       ASSERT (oldest_index != ~0);
94       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
95                           b4->sessions_per_b4_list_head_index, oldest_index);
96       oldest_elt =
97         pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
98                            oldest_index);
99       s =
100         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
101                            oldest_elt->value);
102
103       in2out_kv.key[0] = s->in2out.as_u64[0];
104       in2out_kv.key[1] = s->in2out.as_u64[1];
105       in2out_kv.key[2] = s->in2out.as_u64[2];
106       clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
107                                 &in2out_kv, 0);
108       out2in_kv.key = s->out2in.as_u64;
109       clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
110                                &out2in_kv, 0);
111       snat_free_outside_address_and_port (dm->addr_pool, thread_index,
112                                           &s->out2in);
113
114       nat_syslog_dslite_apmdel (b4_index, &s->in2out.softwire_id,
115                                 &s->in2out.addr, s->in2out.port,
116                                 &s->out2in.addr, s->out2in.port,
117                                 s->in2out.proto);
118
119       if (snat_alloc_outside_address_and_port
120           (dm->addr_pool, 0, thread_index, &out2in_key,
121            dm->port_per_thread, thread_index))
122         ASSERT (0);
123     }
124   else
125     {
126       if (snat_alloc_outside_address_and_port
127           (dm->addr_pool, 0, thread_index, &out2in_key,
128            dm->port_per_thread, thread_index))
129         {
130           *error = DSLITE_ERROR_OUT_OF_PORTS;
131           return DSLITE_IN2OUT_NEXT_DROP;
132         }
133       pool_get (dm->per_thread_data[thread_index].sessions, s);
134       clib_memset (s, 0, sizeof (*s));
135       b4->nsessions++;
136
137       pool_get (dm->per_thread_data[thread_index].list_pool, elt);
138       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
139                        elt - dm->per_thread_data[thread_index].list_pool);
140       elt->value = s - dm->per_thread_data[thread_index].sessions;
141       s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
142       s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
143       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
144                           s->per_b4_list_head_index,
145                           elt - dm->per_thread_data[thread_index].list_pool);
146
147       vlib_set_simple_counter (&dm->total_sessions, thread_index, 0,
148                                pool_elts (dm->per_thread_data
149                                           [thread_index].sessions));
150     }
151
152   s->in2out = *in2out_key;
153   s->out2in = out2in_key;
154   *sp = s;
155   in2out_kv.key[0] = s->in2out.as_u64[0];
156   in2out_kv.key[1] = s->in2out.as_u64[1];
157   in2out_kv.key[2] = s->in2out.as_u64[2];
158   in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
159   clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
160                             &in2out_kv, 1);
161   out2in_kv.key = s->out2in.as_u64;
162   out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
163   clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
164                            &out2in_kv, 1);
165
166   nat_syslog_dslite_apmadd (b4_index, &s->in2out.softwire_id, &s->in2out.addr,
167                             s->in2out.port, &s->out2in.addr, s->out2in.port,
168                             s->in2out.proto);
169
170   return next;
171 }
172
173 static inline u32
174 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
175                     ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
176                     u8 * error, u32 thread_index)
177 {
178   dslite_session_t *s = 0;
179   icmp46_header_t *icmp = ip4_next_header (ip4);
180   clib_bihash_kv_24_8_t kv, value;
181   dslite_session_key_t key;
182   u32 n = next;
183   icmp_echo_header_t *echo;
184   u32 new_addr, old_addr;
185   u16 old_id, new_id;
186   ip_csum_t sum;
187
188   if (icmp_is_error_message (icmp))
189     {
190       n = DSLITE_IN2OUT_NEXT_DROP;
191       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
192       goto done;
193     }
194
195   echo = (icmp_echo_header_t *) (icmp + 1);
196
197   key.addr = ip4->src_address;
198   key.port = echo->identifier;
199   key.proto = SNAT_PROTOCOL_ICMP;
200   key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
201   key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
202   key.pad = 0;
203   kv.key[0] = key.as_u64[0];
204   kv.key[1] = key.as_u64[1];
205   kv.key[2] = key.as_u64[2];
206
207   if (clib_bihash_search_24_8
208       (&dm->per_thread_data[thread_index].in2out, &kv, &value))
209     {
210       n = slow_path (dm, &key, &s, next, error, thread_index);
211       if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
212         goto done;
213     }
214   else
215     {
216       s =
217         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
218                            value.value);
219     }
220
221   old_addr = ip4->src_address.as_u32;
222   ip4->src_address = s->out2in.addr;
223   new_addr = ip4->src_address.as_u32;
224   sum = ip4->checksum;
225   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
226   ip4->checksum = ip_csum_fold (sum);
227
228   old_id = echo->identifier;
229   echo->identifier = new_id = s->out2in.port;
230   sum = icmp->checksum;
231   sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier);
232   icmp->checksum = ip_csum_fold (sum);
233
234 done:
235   *sp = s;
236   return n;
237 }
238
239 static inline uword
240 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
241                               vlib_frame_t * frame, u8 is_slow_path)
242 {
243   u32 n_left_from, *from, *to_next;
244   dslite_in2out_next_t next_index;
245   u32 node_index;
246   vlib_node_runtime_t *error_node;
247   u32 thread_index = vm->thread_index;
248   f64 now = vlib_time_now (vm);
249   dslite_main_t *dm = &dslite_main;
250
251   node_index =
252     is_slow_path ? dslite_in2out_slowpath_node.
253     index : dslite_in2out_node.index;
254
255   error_node = vlib_node_get_runtime (vm, node_index);
256
257   from = vlib_frame_vector_args (frame);
258   n_left_from = frame->n_vectors;
259   next_index = node->cached_next_index;
260
261   while (n_left_from > 0)
262     {
263       u32 n_left_to_next;
264
265       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
266
267       while (n_left_from > 0 && n_left_to_next > 0)
268         {
269           u32 bi0;
270           vlib_buffer_t *b0;
271           u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
272           ip4_header_t *ip40;
273           ip6_header_t *ip60;
274           u8 error0 = DSLITE_ERROR_IN2OUT;
275           u32 proto0;
276           dslite_session_t *s0 = 0;
277           clib_bihash_kv_24_8_t kv0, value0;
278           dslite_session_key_t key0;
279           udp_header_t *udp0;
280           tcp_header_t *tcp0;
281           ip_csum_t sum0;
282           u32 new_addr0, old_addr0;
283           u16 old_port0, new_port0;
284
285           /* speculatively enqueue b0 to the current next frame */
286           bi0 = from[0];
287           to_next[0] = bi0;
288           from += 1;
289           to_next += 1;
290           n_left_from -= 1;
291           n_left_to_next -= 1;
292
293           b0 = vlib_get_buffer (vm, bi0);
294           ip60 = vlib_buffer_get_current (b0);
295
296           if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
297             {
298               if (ip60->protocol == IP_PROTOCOL_ICMP6)
299                 {
300                   next0 = DSLITE_IN2OUT_NEXT_IP6_ICMP;
301                   goto trace0;
302                 }
303               error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
304               next0 = DSLITE_IN2OUT_NEXT_DROP;
305               goto trace0;
306             }
307
308           ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
309           proto0 = ip_proto_to_snat_proto (ip40->protocol);
310
311           if (PREDICT_FALSE (proto0 == ~0))
312             {
313               error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
314               next0 = DSLITE_IN2OUT_NEXT_DROP;
315               goto trace0;
316             }
317
318           udp0 = ip4_next_header (ip40);
319           tcp0 = (tcp_header_t *) udp0;
320
321           if (is_slow_path)
322             {
323               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
324                 {
325                   next0 =
326                     dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
327                                         thread_index);
328                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
329                     goto trace0;
330
331                   goto accounting0;
332                 }
333             }
334           else
335             {
336               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
337                 {
338                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
339                   goto trace0;
340                 }
341             }
342
343           key0.addr = ip40->src_address;
344           key0.port = udp0->src_port;
345           key0.proto = proto0;
346           key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
347           key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
348           key0.pad = 0;
349           kv0.key[0] = key0.as_u64[0];
350           kv0.key[1] = key0.as_u64[1];
351           kv0.key[2] = key0.as_u64[2];
352
353           if (clib_bihash_search_24_8
354               (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
355             {
356               if (is_slow_path)
357                 {
358                   next0 =
359                     slow_path (dm, &key0, &s0, next0, &error0, thread_index);
360                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
361                     goto trace0;
362                 }
363               else
364                 {
365                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
366                   goto trace0;
367                 }
368             }
369           else
370             {
371               s0 =
372                 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
373                                    value0.value);
374             }
375
376           old_addr0 = ip40->src_address.as_u32;
377           ip40->src_address = s0->out2in.addr;
378           new_addr0 = ip40->src_address.as_u32;
379           sum0 = ip40->checksum;
380           sum0 =
381             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
382                             src_address);
383           ip40->checksum = ip_csum_fold (sum0);
384           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
385             {
386               old_port0 = tcp0->src_port;
387               tcp0->src_port = s0->out2in.port;
388               new_port0 = tcp0->src_port;
389
390               sum0 = tcp0->checksum;
391               sum0 =
392                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
393                                 dst_address);
394               sum0 =
395                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
396                                 length);
397               mss_clamping (&snat_main, tcp0, &sum0);
398               tcp0->checksum = ip_csum_fold (sum0);
399             }
400           else
401             {
402               old_port0 = udp0->src_port;
403               udp0->src_port = s0->out2in.port;
404               udp0->checksum = 0;
405             }
406
407         accounting0:
408           /* Accounting */
409           s0->last_heard = now;
410           s0->total_pkts++;
411           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
412           /* Per-B4 LRU list maintenance */
413           clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
414                              s0->per_b4_index);
415           clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
416                               s0->per_b4_list_head_index, s0->per_b4_index);
417
418           ip40->tos =
419             (clib_net_to_host_u32
420              (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
421             20;
422           vlib_buffer_advance (b0, sizeof (ip6_header_t));
423
424         trace0:
425           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
426                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
427             {
428               dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
429               t->next_index = next0;
430               t->session_index = ~0;
431               if (s0)
432                 t->session_index =
433                   s0 - dm->per_thread_data[thread_index].sessions;
434             }
435
436           b0->error = error_node->errors[error0];
437
438           /* verify speculative enqueue, maybe switch current next frame */
439           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
440                                            n_left_to_next, bi0, next0);
441         }
442       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
443     }
444
445   return frame->n_vectors;
446 }
447
448 static uword
449 dslite_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
450                        vlib_frame_t * frame)
451 {
452   return dslite_in2out_node_fn_inline (vm, node, frame, 0);
453 }
454
455 /* *INDENT-OFF* */
456 VLIB_REGISTER_NODE (dslite_in2out_node) = {
457   .function = dslite_in2out_node_fn,
458   .name = "dslite-in2out",
459   .vector_size = sizeof (u32),
460   .format_trace = format_dslite_trace,
461   .type = VLIB_NODE_TYPE_INTERNAL,
462   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
463   .error_strings = dslite_in2out_error_strings,
464   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
465   /* edit / add dispositions here */
466   .next_nodes = {
467     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
468     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
469     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
470     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
471   },
472 };
473 /* *INDENT-ON* */
474
475 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_node, dslite_in2out_node_fn);
476
477 static uword
478 dslite_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
479                                 vlib_frame_t * frame)
480 {
481   return dslite_in2out_node_fn_inline (vm, node, frame, 1);
482 }
483
484 /* *INDENT-OFF* */
485 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
486   .function = dslite_in2out_slowpath_node_fn,
487   .name = "dslite-in2out-slowpath",
488   .vector_size = sizeof (u32),
489   .format_trace = format_dslite_trace,
490   .type = VLIB_NODE_TYPE_INTERNAL,
491   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
492   .error_strings = dslite_in2out_error_strings,
493   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
494   /* edit / add dispositions here */
495   .next_nodes = {
496     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
497     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
498     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-lookup",
499     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
500   },
501 };
502 /* *INDENT-ON* */
503
504 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_slowpath_node,
505                               dslite_in2out_slowpath_node_fn);
506
507 /*
508  * fd.io coding-style-patch-verification: ON
509  *
510  * Local Variables:
511  * eval: (c-set-style "gnu")
512  * End:
513  */