NAT44: asymmetrical load balancing static mapping rule (VPP-1132)
[vpp.git] / src / plugins / nat / nat.h
1
2 /*
3  * nat.h - NAT plugin definitions
4  *
5  * Copyright (c) 2016 Cisco and/or its affiliates.
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at:
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 #ifndef __included_nat_h__
19 #define __included_nat_h__
20
21 #include <vnet/vnet.h>
22 #include <vnet/ip/ip.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/ip/icmp46_packet.h>
25 #include <vnet/api_errno.h>
26 #include <vppinfra/bihash_8_8.h>
27 #include <vppinfra/bihash_16_8.h>
28 #include <vppinfra/dlist.h>
29 #include <vppinfra/error.h>
30 #include <vlibapi/api.h>
31
32
33 #define SNAT_UDP_TIMEOUT 300
34 #define SNAT_UDP_TIMEOUT_MIN 120
35 #define SNAT_TCP_TRANSITORY_TIMEOUT 240
36 #define SNAT_TCP_ESTABLISHED_TIMEOUT 7440
37 #define SNAT_TCP_INCOMING_SYN 6
38 #define SNAT_ICMP_TIMEOUT 60
39
40 #define SNAT_FLAG_HAIRPINNING (1 << 0)
41
42 /* Key */
43 typedef struct {
44   union
45   {
46     struct
47     {
48       ip4_address_t addr;
49       u16 port;
50       u16 protocol:3,
51         fib_index:13;
52     };
53     u64 as_u64;
54   };
55 } snat_session_key_t;
56
57 typedef struct {
58   union
59   {
60     struct
61     {
62       ip4_address_t l_addr;
63       ip4_address_t r_addr;
64       u32 proto:8,
65           fib_index:24;
66       u16 l_port;
67       u16 r_port;
68     };
69     u64 as_u64[2];
70   };
71 } nat_ed_ses_key_t;
72
73 typedef struct {
74   union
75   {
76     struct
77     {
78       ip4_address_t ext_host_addr;
79       u16 ext_host_port;
80       u16 out_port;
81     };
82     u64 as_u64;
83   };
84 } snat_det_out_key_t;
85
86 typedef struct {
87   union
88   {
89     struct
90     {
91       ip4_address_t addr;
92       u32 fib_index;
93     };
94     u64 as_u64;
95   };
96 } snat_user_key_t;
97
98
99 #define foreach_snat_protocol \
100   _(UDP, 0, udp, "udp")       \
101   _(TCP, 1, tcp, "tcp")       \
102   _(ICMP, 2, icmp, "icmp")
103
104 typedef enum {
105 #define _(N, i, n, s) SNAT_PROTOCOL_##N = i,
106   foreach_snat_protocol
107 #undef _
108 } snat_protocol_t;
109
110
111 #define foreach_snat_session_state          \
112   _(0, UNKNOWN, "unknown")                 \
113   _(1, UDP_ACTIVE, "udp-active")           \
114   _(2, TCP_SYN_SENT, "tcp-syn-sent")       \
115   _(3, TCP_ESTABLISHED, "tcp-established") \
116   _(4, TCP_FIN_WAIT, "tcp-fin-wait")       \
117   _(5, TCP_CLOSE_WAIT, "tcp-close-wait")   \
118   _(6, TCP_LAST_ACK, "tcp-last-ack")       \
119   _(7, ICMP_ACTIVE, "icmp-active")
120
121 typedef enum {
122 #define _(v, N, s) SNAT_SESSION_##N = v,
123   foreach_snat_session_state
124 #undef _
125 } snat_session_state_t;
126
127
128 #define SNAT_SESSION_FLAG_STATIC_MAPPING 1
129 #define SNAT_SESSION_FLAG_UNKNOWN_PROTO  2
130 #define SNAT_SESSION_FLAG_LOAD_BALANCING 4
131 #define SNAT_SESSION_FLAG_TWICE_NAT      8
132
133 #define NAT_INTERFACE_FLAG_IS_INSIDE 1
134 #define NAT_INTERFACE_FLAG_IS_OUTSIDE 2
135
136 typedef CLIB_PACKED(struct {
137   snat_session_key_t out2in;    /* 0-15 */
138
139   snat_session_key_t in2out;    /* 16-31 */
140
141   u32 flags;                    /* 32-35 */
142
143   /* per-user translations */
144   u32 per_user_index;           /* 36-39 */
145
146   u32 per_user_list_head_index; /* 40-43 */
147
148   /* Last heard timer */
149   f64 last_heard;               /* 44-51 */
150
151   u64 total_bytes;              /* 52-59 */
152
153   u32 total_pkts;               /* 60-63 */
154
155   /* Outside address */
156   u32 outside_address_index;    /* 64-67 */
157
158   /* External host address and port */
159   ip4_address_t ext_host_addr;  /* 68-71 */
160   u16 ext_host_port;            /* 72-73 */
161
162   /* External hos address and port after translation */
163   ip4_address_t ext_host_nat_addr; /* 74-77 */
164   u16 ext_host_nat_port;           /* 78-79 */
165 }) snat_session_t;
166
167
168 typedef struct {
169   ip4_address_t addr;
170   u32 fib_index;
171   u32 sessions_per_user_list_head_index;
172   u32 nsessions;
173   u32 nstaticsessions;
174 } snat_user_t;
175
176 typedef struct {
177   ip4_address_t addr;
178   u32 fib_index;
179 #define _(N, i, n, s) \
180   u16 busy_##n##_ports; \
181   u16 * busy_##n##_ports_per_thread; \
182   uword * busy_##n##_port_bitmap;
183   foreach_snat_protocol
184 #undef _
185 } snat_address_t;
186
187 typedef struct {
188   u16 in_port;
189   snat_det_out_key_t out;
190   u8 state;
191   u32 expire;
192 } snat_det_session_t;
193
194 typedef struct {
195   ip4_address_t in_addr;
196   u8 in_plen;
197   ip4_address_t out_addr;
198   u8 out_plen;
199   u32 sharing_ratio;
200   u16 ports_per_host;
201   u32 ses_num;
202   /* vector of sessions */
203   snat_det_session_t * sessions;
204 } snat_det_map_t;
205
206 typedef struct {
207   ip4_address_t addr;
208   u16 port;
209   u8 probability;
210   u8 prefix;
211 } nat44_lb_addr_port_t;
212
213 typedef struct {
214   ip4_address_t local_addr;
215   ip4_address_t external_addr;
216   u16 local_port;
217   u16 external_port;
218   u8 addr_only;
219   u8 twice_nat;
220   u8 out2in_only;
221   u32 vrf_id;
222   u32 fib_index;
223   snat_protocol_t proto;
224   u32 worker_index;
225   nat44_lb_addr_port_t *locals;
226 } snat_static_mapping_t;
227
228 typedef struct {
229   u32 sw_if_index;
230   u8 flags;
231 } snat_interface_t;
232
233 typedef struct {
234   ip4_address_t l_addr;
235   u16 l_port;
236   u16 e_port;
237   u32 sw_if_index;
238   u32 vrf_id;
239   snat_protocol_t proto;
240   int addr_only;
241   int twice_nat;
242   int is_add;
243 } snat_static_map_resolve_t;
244
245 typedef struct {
246   /* Main lookup tables */
247   clib_bihash_8_8_t out2in;
248   clib_bihash_8_8_t in2out;
249
250   /* Find-a-user => src address lookup */
251   clib_bihash_8_8_t user_hash;
252
253   /* User pool */
254   snat_user_t * users;
255
256   /* Session pool */
257   snat_session_t * sessions;
258
259   /* Pool of doubly-linked list elements */
260   dlist_elt_t * list_pool;
261
262   u32 snat_thread_index;
263 } snat_main_per_thread_data_t;
264
265 struct snat_main_s;
266
267 typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm,
268                                         vlib_node_runtime_t *node,
269                                         u32 thread_index,
270                                         vlib_buffer_t *b0,
271                                         ip4_header_t *ip0,
272                                         u8 *p_proto,
273                                         snat_session_key_t *p_value,
274                                         u8 *p_dont_translate,
275                                         void *d,
276                                         void *e);
277
278 typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip, u32 rx_fib_index);
279
280 typedef int nat_alloc_out_addr_and_port_function_t (snat_address_t * addresses,
281                                                     u32 fib_index,
282                                                     u32 thread_index,
283                                                     snat_session_key_t * k,
284                                                     u32 * address_indexp,
285                                                     u16 port_per_thread,
286                                                     u32 snat_thread_index);
287
288 typedef struct snat_main_s {
289   /* Endpoint address dependent sessions lookup tables */
290   clib_bihash_16_8_t out2in_ed;
291   clib_bihash_16_8_t in2out_ed;
292
293   snat_icmp_match_function_t * icmp_match_in2out_cb;
294   snat_icmp_match_function_t * icmp_match_out2in_cb;
295
296   u32 num_workers;
297   u32 first_worker_index;
298   u32 next_worker;
299   u32 * workers;
300   snat_get_worker_function_t * worker_in2out_cb;
301   snat_get_worker_function_t * worker_out2in_cb;
302   u16 port_per_thread;
303   u32 num_snat_thread;
304
305   /* Per thread data */
306   snat_main_per_thread_data_t * per_thread_data;
307
308   /* Find a static mapping by local */
309   clib_bihash_8_8_t static_mapping_by_local;
310
311   /* Find a static mapping by external */
312   clib_bihash_8_8_t static_mapping_by_external;
313
314   /* Static mapping pool */
315   snat_static_mapping_t * static_mappings;
316
317   /* Interface pool */
318   snat_interface_t * interfaces;
319   snat_interface_t * output_feature_interfaces;
320
321   /* Vector of outside addresses */
322   snat_address_t * addresses;
323   nat_alloc_out_addr_and_port_function_t *alloc_addr_and_port;
324   u8 psid_offset;
325   u8 psid_length;
326   u16 psid;
327
328   /* Vector of twice NAT addresses for extenal hosts */
329   snat_address_t * twice_nat_addresses;
330
331   /* sw_if_indices whose intfc addresses should be auto-added */
332   u32 * auto_add_sw_if_indices;
333   u32 * auto_add_sw_if_indices_twice_nat;
334
335   /* vector of interface address static mappings to resolve. */
336   snat_static_map_resolve_t *to_resolve;
337
338   /* Randomize port allocation order */
339   u32 random_seed;
340
341   /* Worker handoff index */
342   u32 fq_in2out_index;
343   u32 fq_in2out_output_index;
344   u32 fq_out2in_index;
345
346   /* in2out and out2in node index */
347   u32 in2out_node_index;
348   u32 in2out_output_node_index;
349   u32 out2in_node_index;
350
351   /* Deterministic NAT */
352   snat_det_map_t * det_maps;
353
354   /* If forwarding is enabled */
355   u8 forwarding_enabled;
356
357   /* Config parameters */
358   u8 static_mapping_only;
359   u8 static_mapping_connection_tracking;
360   u8 deterministic;
361   u8 out2in_dpo;
362   u32 translation_buckets;
363   u32 translation_memory_size;
364   u32 max_translations;
365   u32 user_buckets;
366   u32 user_memory_size;
367   u32 max_translations_per_user;
368   u32 outside_vrf_id;
369   u32 outside_fib_index;
370   u32 inside_vrf_id;
371   u32 inside_fib_index;
372
373   /* values of various timeouts */
374   u32 udp_timeout;
375   u32 tcp_established_timeout;
376   u32 tcp_transitory_timeout;
377   u32 icmp_timeout;
378
379   /* API message ID base */
380   u16 msg_id_base;
381
382   /* convenience */
383   vlib_main_t * vlib_main;
384   vnet_main_t * vnet_main;
385   ip4_main_t * ip4_main;
386   ip_lookup_main_t * ip4_lookup_main;
387   api_main_t * api_main;
388 } snat_main_t;
389
390 extern snat_main_t snat_main;
391 extern vlib_node_registration_t snat_in2out_node;
392 extern vlib_node_registration_t snat_in2out_output_node;
393 extern vlib_node_registration_t snat_out2in_node;
394 extern vlib_node_registration_t snat_in2out_fast_node;
395 extern vlib_node_registration_t snat_out2in_fast_node;
396 extern vlib_node_registration_t snat_in2out_worker_handoff_node;
397 extern vlib_node_registration_t snat_in2out_output_worker_handoff_node;
398 extern vlib_node_registration_t snat_out2in_worker_handoff_node;
399 extern vlib_node_registration_t snat_det_in2out_node;
400 extern vlib_node_registration_t snat_det_out2in_node;
401 extern vlib_node_registration_t snat_hairpin_dst_node;
402 extern vlib_node_registration_t snat_hairpin_src_node;
403
404 void snat_free_outside_address_and_port (snat_address_t * addresses,
405                                          u32 thread_index,
406                                          snat_session_key_t * k,
407                                          u32 address_index);
408
409 int snat_alloc_outside_address_and_port (snat_address_t * addresses,
410                                          u32 fib_index,
411                                          u32 thread_index,
412                                          snat_session_key_t * k,
413                                          u32 * address_indexp,
414                                          u16 port_per_thread,
415                                          u32 snat_thread_index);
416
417 int snat_static_mapping_match (snat_main_t * sm,
418                                snat_session_key_t match,
419                                snat_session_key_t * mapping,
420                                u8 by_external,
421                                u8 *is_addr_only,
422                                u8 *twice_nat);
423
424 void snat_add_del_addr_to_fib (ip4_address_t * addr,
425                                u8 p_len,
426                                u32 sw_if_index,
427                                int is_add);
428
429 format_function_t format_snat_user;
430
431 typedef struct {
432   u32 cached_sw_if_index;
433   u32 cached_ip4_address;
434 } snat_runtime_t;
435
436 /** \brief Check if SNAT session is created from static mapping.
437     @param s SNAT session
438     @return 1 if SNAT session is created from static mapping otherwise 0
439 */
440 #define snat_is_session_static(s) (s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING)
441
442 /** \brief Check if SNAT session for unknown protocol.
443     @param s SNAT session
444     @return 1 if SNAT session for unknown protocol otherwise 0
445 */
446 #define snat_is_unk_proto_session(s) (s->flags & SNAT_SESSION_FLAG_UNKNOWN_PROTO)
447
448 /** \brief Check if NAT session is twice NAT.
449     @param s NAT session
450     @return 1 if NAT session is twice NAT
451 */
452 #define is_twice_nat_session(s) (s->flags & SNAT_SESSION_FLAG_TWICE_NAT)
453
454 /** \brief Check if NAT session is load-balancing.
455     @param s NAT session
456     @return 1 if NAT session is load-balancing
457 */
458 #define is_lb_session(s) (s->flags & SNAT_SESSION_FLAG_LOAD_BALANCING)
459
460 /** \brief Check if NAT session is endpoint dependent.
461     @param s NAT session
462     @return 1 if NAT session is endpoint dependent
463 */
464 #define is_ed_session(s) (snat_is_unk_proto_session (s) || is_twice_nat_session (s) || is_lb_session (s))
465
466 #define nat_interface_is_inside(i) i->flags & NAT_INTERFACE_FLAG_IS_INSIDE
467 #define nat_interface_is_outside(i) i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE
468
469 /*
470  * Why is this here? Because we don't need to touch this layer to
471  * simply reply to an icmp. We need to change id to a unique
472  * value to NAT an echo request/reply.
473  */
474
475 typedef struct {
476   u16 identifier;
477   u16 sequence;
478 } icmp_echo_header_t;
479
480 always_inline u32
481 ip_proto_to_snat_proto (u8 ip_proto)
482 {
483   u32 snat_proto = ~0;
484
485   snat_proto = (ip_proto == IP_PROTOCOL_UDP) ? SNAT_PROTOCOL_UDP : snat_proto;
486   snat_proto = (ip_proto == IP_PROTOCOL_TCP) ? SNAT_PROTOCOL_TCP : snat_proto;
487   snat_proto = (ip_proto == IP_PROTOCOL_ICMP) ? SNAT_PROTOCOL_ICMP : snat_proto;
488   snat_proto = (ip_proto == IP_PROTOCOL_ICMP6) ? SNAT_PROTOCOL_ICMP : snat_proto;
489
490   return snat_proto;
491 }
492
493 always_inline u8
494 snat_proto_to_ip_proto (snat_protocol_t snat_proto)
495 {
496   u8 ip_proto = ~0;
497
498   ip_proto = (snat_proto == SNAT_PROTOCOL_UDP) ? IP_PROTOCOL_UDP : ip_proto;
499   ip_proto = (snat_proto == SNAT_PROTOCOL_TCP) ? IP_PROTOCOL_TCP : ip_proto;
500   ip_proto = (snat_proto == SNAT_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP : ip_proto;
501
502   return ip_proto;
503 }
504
505 typedef struct {
506   u16 src_port, dst_port;
507 } tcp_udp_header_t;
508
509 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
510                            u32 thread_index, vlib_buffer_t *b0,
511                            ip4_header_t *ip0, u8 *p_proto,
512                            snat_session_key_t *p_value,
513                            u8 *p_dont_translate, void *d, void *e);
514 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
515                            u32 thread_index, vlib_buffer_t *b0,
516                            ip4_header_t *ip0, u8 *p_proto,
517                            snat_session_key_t *p_value,
518                            u8 *p_dont_translate, void *d, void *e);
519 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
520                           u32 thread_index, vlib_buffer_t *b0,
521                           ip4_header_t *ip0, u8 *p_proto,
522                           snat_session_key_t *p_value,
523                           u8 *p_dont_translate, void *d, void *e);
524 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
525                            u32 thread_index, vlib_buffer_t *b0,
526                            ip4_header_t *ip0, u8 *p_proto,
527                            snat_session_key_t *p_value,
528                            u8 *p_dont_translate, void *d, void *e);
529 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
530                            u32 thread_index, vlib_buffer_t *b0,
531                            ip4_header_t *ip0, u8 *p_proto,
532                            snat_session_key_t *p_value,
533                            u8 *p_dont_translate, void *d, void *e);
534 u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
535                           u32 thread_index, vlib_buffer_t *b0,
536                           ip4_header_t *ip0, u8 *p_proto,
537                           snat_session_key_t *p_value,
538                           u8 *p_dont_translate, void *d, void *e);
539 void increment_v4_address(ip4_address_t * a);
540 void snat_add_address(snat_main_t *sm, ip4_address_t *addr, u32 vrf_id,
541                       u8 twice_nat);
542 int snat_del_address(snat_main_t *sm, ip4_address_t addr, u8 delete_sm,
543                      u8 twice_nat);
544 void nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add);
545 int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
546                             u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
547                             u32 sw_if_index, snat_protocol_t proto, int is_add,
548                             u8 twice_nat);
549 clib_error_t * snat_api_init(vlib_main_t * vm, snat_main_t * sm);
550 int snat_set_workers (uword * bitmap);
551 int snat_interface_add_del(u32 sw_if_index, u8 is_inside, int is_del);
552 int snat_interface_add_del_output_feature(u32 sw_if_index, u8 is_inside,
553                                           int is_del);
554 int snat_add_interface_address(snat_main_t *sm, u32 sw_if_index, int is_del,
555                                u8 twice_nat);
556 uword unformat_snat_protocol(unformat_input_t * input, va_list * args);
557 u8 * format_snat_protocol(u8 * s, va_list * args);
558 int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
559                                      snat_protocol_t proto, u32 vrf_id,
560                                      nat44_lb_addr_port_t *locals, u8 is_add,
561                                      u8 twice_nat, u8 out2in_only);
562 int nat44_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
563                        snat_protocol_t proto, u32 vrf_id, int is_in);
564 void nat_free_session_data (snat_main_t * sm, snat_session_t * s,
565                             u32 thread_index);
566 snat_user_t * nat_user_get_or_create (snat_main_t *sm, ip4_address_t *addr,
567                                       u32 fib_index, u32 thread_index);
568 snat_session_t * nat_session_alloc_or_recycle (snat_main_t *sm, snat_user_t *u,
569                                                u32 thread_index);
570
571 static_always_inline u8
572 icmp_is_error_message (icmp46_header_t * icmp)
573 {
574   switch(icmp->type)
575     {
576     case ICMP4_destination_unreachable:
577     case ICMP4_time_exceeded:
578     case ICMP4_parameter_problem:
579     case ICMP4_source_quench:
580     case ICMP4_redirect:
581     case ICMP4_alternate_host_address:
582       return 1;
583     }
584   return 0;
585 }
586
587 static_always_inline u8
588 is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0,
589                   u32 ip4_addr)
590 {
591   snat_runtime_t *rt = (snat_runtime_t *) node->runtime_data;
592   ip4_address_t * first_int_addr;
593
594   if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
595     {
596       first_int_addr =
597         ip4_interface_first_address (sm->ip4_main, sw_if_index0,
598                                      0 /* just want the address */);
599       rt->cached_sw_if_index = sw_if_index0;
600       if (first_int_addr)
601         rt->cached_ip4_address = first_int_addr->as_u32;
602       else
603         rt->cached_ip4_address = 0;
604     }
605
606   if (PREDICT_FALSE(ip4_addr == rt->cached_ip4_address))
607     return 1;
608   else
609     return 0;
610 }
611
612 always_inline u8
613 maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index)
614 {
615   if (pool_elts (sm->per_thread_data[thread_index].sessions) >= sm->max_translations)
616     return 1;
617
618   return 0;
619 }
620
621 static_always_inline void
622 nat_send_all_to_node(vlib_main_t *vm, u32 *bi_vector,
623                      vlib_node_runtime_t *node, vlib_error_t *error, u32 next)
624 {
625   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
626
627   from = bi_vector;
628   n_left_from = vec_len(bi_vector);
629   next_index = node->cached_next_index;
630   while (n_left_from > 0) {
631     vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
632     while (n_left_from > 0 && n_left_to_next > 0) {
633       u32 bi0 = to_next[0] = from[0];
634       from += 1;
635       n_left_from -= 1;
636       to_next += 1;
637       n_left_to_next -= 1;
638       vlib_buffer_t *p0 = vlib_get_buffer(vm, bi0);
639       p0->error = *error;
640       vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
641                                       n_left_to_next, bi0, next);
642     }
643     vlib_put_next_frame(vm, node, next_index, n_left_to_next);
644   }
645 }
646
647 #endif /* __included_snat_h__ */