5c1de19b8b2cb994b6e6a3947cc2459e01dba178
[vpp.git] / src / plugins / nat / dslite_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <nat/dslite.h>
16 #include <nat/nat_inlines.h>
17 #include <nat/nat_syslog.h>
18
19 vlib_node_registration_t dslite_in2out_node;
20 vlib_node_registration_t dslite_in2out_slowpath_node;
21
22 typedef enum
23 {
24   DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
25   DSLITE_IN2OUT_NEXT_IP6_ICMP,
26   DSLITE_IN2OUT_NEXT_DROP,
27   DSLITE_IN2OUT_NEXT_SLOWPATH,
28   DSLITE_IN2OUT_N_NEXT,
29 } dslite_in2out_next_t;
30
31 static char *dslite_in2out_error_strings[] = {
32 #define _(sym,string) string,
33   foreach_dslite_error
34 #undef _
35 };
36
37 static u32
38 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
39            dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
40 {
41   dslite_b4_t *b4;
42   clib_bihash_kv_16_8_t b4_kv, b4_value;
43   clib_bihash_kv_24_8_t in2out_kv;
44   clib_bihash_kv_8_8_t out2in_kv;
45   dlist_elt_t *head_elt, *oldest_elt, *elt;
46   u32 oldest_index;
47   dslite_session_t *s;
48   snat_session_key_t out2in_key;
49   u32 b4_index;
50
51   out2in_key.protocol = in2out_key->proto;
52   out2in_key.fib_index = 0;
53
54   b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
55   b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
56
57   if (clib_bihash_search_16_8
58       (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
59     {
60       pool_get (dm->per_thread_data[thread_index].b4s, b4);
61       clib_memset (b4, 0, sizeof (*b4));
62       b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
63       b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
64
65       pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
66       b4->sessions_per_b4_list_head_index =
67         head_elt - dm->per_thread_data[thread_index].list_pool;
68       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
69                        b4->sessions_per_b4_list_head_index);
70
71       b4_index = b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
72       clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
73                                 &b4_kv, 1);
74     }
75   else
76     {
77       b4_index = b4_value.value;
78       b4 =
79         pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
80                            b4_value.value);
81     }
82
83   //TODO configurable quota
84   if (b4->nsessions >= 1000)
85     {
86       oldest_index =
87         clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
88                                 b4->sessions_per_b4_list_head_index);
89       ASSERT (oldest_index != ~0);
90       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
91                           b4->sessions_per_b4_list_head_index, oldest_index);
92       oldest_elt =
93         pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
94                            oldest_index);
95       s =
96         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
97                            oldest_elt->value);
98
99       in2out_kv.key[0] = s->in2out.as_u64[0];
100       in2out_kv.key[1] = s->in2out.as_u64[1];
101       in2out_kv.key[2] = s->in2out.as_u64[2];
102       clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
103                                 &in2out_kv, 0);
104       out2in_kv.key = s->out2in.as_u64;
105       clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
106                                &out2in_kv, 0);
107       snat_free_outside_address_and_port (dm->addr_pool, thread_index,
108                                           &s->out2in);
109
110       nat_syslog_dslite_apmdel (b4_index, &s->in2out.softwire_id,
111                                 &s->in2out.addr, s->in2out.port,
112                                 &s->out2in.addr, s->out2in.port,
113                                 s->in2out.proto);
114
115       if (snat_alloc_outside_address_and_port
116           (dm->addr_pool, 0, thread_index, &out2in_key,
117            dm->port_per_thread, thread_index))
118         ASSERT (0);
119     }
120   else
121     {
122       if (snat_alloc_outside_address_and_port
123           (dm->addr_pool, 0, thread_index, &out2in_key,
124            dm->port_per_thread, thread_index))
125         {
126           *error = DSLITE_ERROR_OUT_OF_PORTS;
127           return DSLITE_IN2OUT_NEXT_DROP;
128         }
129       pool_get (dm->per_thread_data[thread_index].sessions, s);
130       clib_memset (s, 0, sizeof (*s));
131       b4->nsessions++;
132
133       pool_get (dm->per_thread_data[thread_index].list_pool, elt);
134       clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
135                        elt - dm->per_thread_data[thread_index].list_pool);
136       elt->value = s - dm->per_thread_data[thread_index].sessions;
137       s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
138       s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
139       clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
140                           s->per_b4_list_head_index,
141                           elt - dm->per_thread_data[thread_index].list_pool);
142     }
143
144   s->in2out = *in2out_key;
145   s->out2in = out2in_key;
146   *sp = s;
147   in2out_kv.key[0] = s->in2out.as_u64[0];
148   in2out_kv.key[1] = s->in2out.as_u64[1];
149   in2out_kv.key[2] = s->in2out.as_u64[2];
150   in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
151   clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
152                             &in2out_kv, 1);
153   out2in_kv.key = s->out2in.as_u64;
154   out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
155   clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
156                            &out2in_kv, 1);
157
158   nat_syslog_dslite_apmadd (b4_index, &s->in2out.softwire_id, &s->in2out.addr,
159                             s->in2out.port, &s->out2in.addr, s->out2in.port,
160                             s->in2out.proto);
161
162   return next;
163 }
164
165 static inline u32
166 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
167                     ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
168                     u8 * error, u32 thread_index)
169 {
170   dslite_session_t *s = 0;
171   icmp46_header_t *icmp = ip4_next_header (ip4);
172   clib_bihash_kv_24_8_t kv, value;
173   dslite_session_key_t key;
174   u32 n = next;
175   icmp_echo_header_t *echo;
176   u32 new_addr, old_addr;
177   u16 old_id, new_id;
178   ip_csum_t sum;
179
180   if (icmp_is_error_message (icmp))
181     {
182       n = DSLITE_IN2OUT_NEXT_DROP;
183       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
184       goto done;
185     }
186
187   echo = (icmp_echo_header_t *) (icmp + 1);
188
189   key.addr = ip4->src_address;
190   key.port = echo->identifier;
191   key.proto = SNAT_PROTOCOL_ICMP;
192   key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
193   key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
194   key.pad = 0;
195   kv.key[0] = key.as_u64[0];
196   kv.key[1] = key.as_u64[1];
197   kv.key[2] = key.as_u64[2];
198
199   if (clib_bihash_search_24_8
200       (&dm->per_thread_data[thread_index].in2out, &kv, &value))
201     {
202       n = slow_path (dm, &key, &s, next, error, thread_index);
203       if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
204         goto done;
205     }
206   else
207     {
208       s =
209         pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
210                            value.value);
211     }
212
213   old_addr = ip4->src_address.as_u32;
214   ip4->src_address = s->out2in.addr;
215   new_addr = ip4->src_address.as_u32;
216   sum = ip4->checksum;
217   sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
218   ip4->checksum = ip_csum_fold (sum);
219
220   old_id = echo->identifier;
221   echo->identifier = new_id = s->out2in.port;
222   sum = icmp->checksum;
223   sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier);
224   icmp->checksum = ip_csum_fold (sum);
225
226 done:
227   *sp = s;
228   return n;
229 }
230
231 static inline uword
232 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
233                               vlib_frame_t * frame, u8 is_slow_path)
234 {
235   u32 n_left_from, *from, *to_next;
236   dslite_in2out_next_t next_index;
237   u32 node_index;
238   vlib_node_runtime_t *error_node;
239   u32 thread_index = vm->thread_index;
240   f64 now = vlib_time_now (vm);
241   dslite_main_t *dm = &dslite_main;
242
243   node_index =
244     is_slow_path ? dslite_in2out_slowpath_node.
245     index : dslite_in2out_node.index;
246
247   error_node = vlib_node_get_runtime (vm, node_index);
248
249   from = vlib_frame_vector_args (frame);
250   n_left_from = frame->n_vectors;
251   next_index = node->cached_next_index;
252
253   while (n_left_from > 0)
254     {
255       u32 n_left_to_next;
256
257       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
258
259       while (n_left_from > 0 && n_left_to_next > 0)
260         {
261           u32 bi0;
262           vlib_buffer_t *b0;
263           u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
264           ip4_header_t *ip40;
265           ip6_header_t *ip60;
266           u8 error0 = DSLITE_ERROR_IN2OUT;
267           u32 proto0;
268           dslite_session_t *s0 = 0;
269           clib_bihash_kv_24_8_t kv0, value0;
270           dslite_session_key_t key0;
271           udp_header_t *udp0;
272           tcp_header_t *tcp0;
273           ip_csum_t sum0;
274           u32 new_addr0, old_addr0;
275           u16 old_port0, new_port0;
276
277           /* speculatively enqueue b0 to the current next frame */
278           bi0 = from[0];
279           to_next[0] = bi0;
280           from += 1;
281           to_next += 1;
282           n_left_from -= 1;
283           n_left_to_next -= 1;
284
285           b0 = vlib_get_buffer (vm, bi0);
286           ip60 = vlib_buffer_get_current (b0);
287
288           if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
289             {
290               if (ip60->protocol == IP_PROTOCOL_ICMP6)
291                 {
292                   next0 = DSLITE_IN2OUT_NEXT_IP6_ICMP;
293                   goto trace0;
294                 }
295               error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
296               next0 = DSLITE_IN2OUT_NEXT_DROP;
297               goto trace0;
298             }
299
300           ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
301           proto0 = ip_proto_to_snat_proto (ip40->protocol);
302
303           if (PREDICT_FALSE (proto0 == ~0))
304             {
305               error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
306               next0 = DSLITE_IN2OUT_NEXT_DROP;
307               goto trace0;
308             }
309
310           udp0 = ip4_next_header (ip40);
311           tcp0 = (tcp_header_t *) udp0;
312
313           if (is_slow_path)
314             {
315               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
316                 {
317                   next0 =
318                     dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
319                                         thread_index);
320                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
321                     goto trace0;
322
323                   goto accounting0;
324                 }
325             }
326           else
327             {
328               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
329                 {
330                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
331                   goto trace0;
332                 }
333             }
334
335           key0.addr = ip40->src_address;
336           key0.port = udp0->src_port;
337           key0.proto = proto0;
338           key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
339           key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
340           key0.pad = 0;
341           kv0.key[0] = key0.as_u64[0];
342           kv0.key[1] = key0.as_u64[1];
343           kv0.key[2] = key0.as_u64[2];
344
345           if (clib_bihash_search_24_8
346               (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
347             {
348               if (is_slow_path)
349                 {
350                   next0 =
351                     slow_path (dm, &key0, &s0, next0, &error0, thread_index);
352                   if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
353                     goto trace0;
354                 }
355               else
356                 {
357                   next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
358                   goto trace0;
359                 }
360             }
361           else
362             {
363               s0 =
364                 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
365                                    value0.value);
366             }
367
368           old_addr0 = ip40->src_address.as_u32;
369           ip40->src_address = s0->out2in.addr;
370           new_addr0 = ip40->src_address.as_u32;
371           sum0 = ip40->checksum;
372           sum0 =
373             ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
374                             src_address);
375           ip40->checksum = ip_csum_fold (sum0);
376           if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
377             {
378               old_port0 = tcp0->src_port;
379               tcp0->src_port = s0->out2in.port;
380               new_port0 = tcp0->src_port;
381
382               sum0 = tcp0->checksum;
383               sum0 =
384                 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
385                                 dst_address);
386               sum0 =
387                 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
388                                 length);
389               mss_clamping (&snat_main, tcp0, &sum0);
390               tcp0->checksum = ip_csum_fold (sum0);
391             }
392           else
393             {
394               old_port0 = udp0->src_port;
395               udp0->src_port = s0->out2in.port;
396               udp0->checksum = 0;
397             }
398
399         accounting0:
400           /* Accounting */
401           s0->last_heard = now;
402           s0->total_pkts++;
403           s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
404           /* Per-B4 LRU list maintenance */
405           clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
406                              s0->per_b4_index);
407           clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
408                               s0->per_b4_list_head_index, s0->per_b4_index);
409
410           ip40->tos =
411             (clib_net_to_host_u32
412              (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
413             20;
414           vlib_buffer_advance (b0, sizeof (ip6_header_t));
415
416         trace0:
417           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
418                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
419             {
420               dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
421               t->next_index = next0;
422               t->session_index = ~0;
423               if (s0)
424                 t->session_index =
425                   s0 - dm->per_thread_data[thread_index].sessions;
426             }
427
428           b0->error = error_node->errors[error0];
429
430           /* verify speculative enqueue, maybe switch current next frame */
431           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
432                                            n_left_to_next, bi0, next0);
433         }
434       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
435     }
436
437   return frame->n_vectors;
438 }
439
440 static uword
441 dslite_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
442                        vlib_frame_t * frame)
443 {
444   return dslite_in2out_node_fn_inline (vm, node, frame, 0);
445 }
446
447 /* *INDENT-OFF* */
448 VLIB_REGISTER_NODE (dslite_in2out_node) = {
449   .function = dslite_in2out_node_fn,
450   .name = "dslite-in2out",
451   .vector_size = sizeof (u32),
452   .format_trace = format_dslite_trace,
453   .type = VLIB_NODE_TYPE_INTERNAL,
454   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
455   .error_strings = dslite_in2out_error_strings,
456   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
457   /* edit / add dispositions here */
458   .next_nodes = {
459     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
460     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
461     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
462     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
463   },
464 };
465 /* *INDENT-ON* */
466
467 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_node, dslite_in2out_node_fn);
468
469 static uword
470 dslite_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
471                                 vlib_frame_t * frame)
472 {
473   return dslite_in2out_node_fn_inline (vm, node, frame, 1);
474 }
475
476 /* *INDENT-OFF* */
477 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
478   .function = dslite_in2out_slowpath_node_fn,
479   .name = "dslite-in2out-slowpath",
480   .vector_size = sizeof (u32),
481   .format_trace = format_dslite_trace,
482   .type = VLIB_NODE_TYPE_INTERNAL,
483   .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
484   .error_strings = dslite_in2out_error_strings,
485   .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
486   /* edit / add dispositions here */
487   .next_nodes = {
488     [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
489     [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
490     [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-lookup",
491     [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
492   },
493 };
494 /* *INDENT-ON* */
495
496 VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_slowpath_node,
497                               dslite_in2out_slowpath_node_fn);
498
499 /*
500  * fd.io coding-style-patch-verification: ON
501  *
502  * Local Variables:
503  * eval: (c-set-style "gnu")
504  * End:
505  */