New api in order to get max entries of connection table is added.
[vpp.git] / src / plugins / acl / fa_node.h
1 #ifndef _FA_NODE_H_
2 #define _FA_NODE_H_
3
4 #include <stddef.h>
5 #include <vppinfra/bihash_16_8.h>
6 #include <vppinfra/bihash_40_8.h>
7
8 #include <plugins/acl/exported_types.h>
9
10 // #define FA_NODE_VERBOSE_DEBUG 3
11
12 #define TCP_FLAG_FIN    0x01
13 #define TCP_FLAG_SYN    0x02
14 #define TCP_FLAG_RST    0x04
15 #define TCP_FLAG_PUSH   0x08
16 #define TCP_FLAG_ACK    0x10
17 #define TCP_FLAG_URG    0x20
18 #define TCP_FLAG_ECE    0x40
19 #define TCP_FLAG_CWR    0x80
20 #define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK)
21 #define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK)
22
23 #define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
24 #define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1ULL<<30)
25 #define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 500000
26
27 typedef union {
28   u64 as_u64;
29   struct {
30     u32 lc_index;
31     u16 mask_type_index_lsb;
32     u8 tcp_flags;
33     u8 tcp_flags_valid:1;
34     u8 l4_valid:1;
35     u8 is_nonfirst_fragment:1;
36     u8 is_ip6:1;
37     u8 flags_reserved:4;
38   };
39 } fa_packet_info_t;
40
41 typedef enum {
42   FA_SK_L4_FLAG_IS_INPUT    = (1 << 0),
43   FA_SK_L4_FLAG_IS_SLOWPATH = (1 << 1),
44 } fa_session_l4_key_l4_flags_t;
45
46 typedef union {
47   u64 as_u64;
48   struct {
49     u16 port[2];
50     union {
51       struct {
52         u8 proto;
53         u8 l4_flags;
54         u16 lsb_of_sw_if_index;
55       };
56       u32 non_port_l4_data;
57     };
58   };
59 } fa_session_l4_key_t;
60
61
62 static_always_inline
63 int is_session_l4_key_u64_slowpath(u64 l4key) {
64   fa_session_l4_key_t k = { .as_u64 = l4key };
65   return (k.l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;
66 }
67
68 typedef union {
69   struct {
70     union {
71       struct {
72         /* we put the IPv4 addresses
73            after padding so we can still
74            use them as (shorter) key together with
75            L4 info */
76         u32 l3_zero_pad[6];
77         ip4_address_t ip4_addr[2];
78       };
79       ip6_address_t ip6_addr[2];
80     };
81     fa_session_l4_key_t l4;
82     /* This field should align with u64 value in bihash_40_8 and bihash_16_8 keyvalue struct */
83     fa_packet_info_t pkt;
84   };
85   clib_bihash_kv_40_8_t kv_40_8;
86   struct {
87     u64 padding_for_kv_16_8[3];
88     clib_bihash_kv_16_8_t kv_16_8;
89   };
90 } fa_5tuple_t;
91
92 static_always_inline u8 *
93 format_fa_session_l4_key(u8 * s, va_list * args)
94 {
95   fa_session_l4_key_t *l4 = va_arg (*args, fa_session_l4_key_t *);
96   int is_input = (l4->l4_flags & FA_SK_L4_FLAG_IS_INPUT) ? 1 : 0;
97   int is_slowpath = (l4->l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;
98
99   return (format (s, "l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_flags 0x%02x port %d -> %d",
100                   l4->lsb_of_sw_if_index,
101                   l4->proto, is_input, is_slowpath,
102                   l4->l4_flags, l4->port[0], l4->port[1]));
103 }
104
105 typedef struct {
106   fa_5tuple_t info; /* (5+1)*8 = 48 bytes */
107   u64 last_active_time;   /* +8 bytes = 56 */
108   u32 sw_if_index;        /* +4 bytes = 60 */
109   union {
110     u8 as_u8[2];
111     u16 as_u16;
112   } tcp_flags_seen; ;     /* +2 bytes = 62 */
113   u16 thread_index;          /* +2 bytes = 64 */
114   u64 link_enqueue_time;  /* 8 byte = 8 */
115   u32 link_prev_idx;      /* +4 bytes = 12 */
116   u32 link_next_idx;      /* +4 bytes = 16 */
117   u8 link_list_id;        /* +1 bytes = 17 */
118   u8 deleted;             /* +1 bytes = 18 */
119   u8 is_ip6;              /* +1 bytes = 19 */
120   u8 reserved1[5];        /* +5 bytes = 24 */
121   u64 reserved2[5];       /* +5*8 bytes = 64 */
122 } fa_session_t;
123
124 #define FA_POLICY_EPOCH_MASK 0x7fff
125 /* input policy epochs have the MSB set */
126 #define FA_POLICY_EPOCH_IS_INPUT 0x8000
127
128
129 /* This structure is used to fill in the u64 value
130    in the per-sw-if-index hash table */
131 typedef struct {
132   union {
133     u64 as_u64;
134     struct {
135       u32 session_index;
136       u16 thread_index;
137       u16 intf_policy_epoch;
138     };
139   };
140 } fa_full_session_id_t;
141
142 /*
143  * A few compile-time constraints on the size and the layout of the union, to ensure
144  * it makes sense both for bihash and for us.
145  */
146
147 #define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1]
148 CT_ASSERT_EQUAL(fa_l3_key_size_is_40, offsetof(fa_5tuple_t, pkt), offsetof(clib_bihash_kv_40_8_t, value));
149 CT_ASSERT_EQUAL(fa_ip6_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_40_8.value));
150 CT_ASSERT_EQUAL(fa_ip4_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_16_8.value));
151 CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64));
152 CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64));
153 CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t));
154 CT_ASSERT_EQUAL(fa_ip4_starts_at_kv16_key, offsetof(fa_5tuple_t, ip4_addr), offsetof(fa_5tuple_t, kv_16_8));
155 CT_ASSERT_EQUAL(fa_ip4_and_ip6_kv_value_match, offsetof(fa_5tuple_t, kv_16_8.value), offsetof(fa_5tuple_t, kv_40_8.value));
156
157 /* Let's try to fit within two cachelines */
158 CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128);
159
160 /* Session ID MUST be the same as u64 */
161 CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64));
162
163 CT_ASSERT_EQUAL(fa_5tuple_opaque_t_must_match_5tuple, sizeof(fa_5tuple_opaque_t), sizeof(fa_5tuple_t));
164 #undef CT_ASSERT_EQUAL
165
166 #define FA_SESSION_BOGUS_INDEX ~0
167
168 typedef struct {
169   /* The pool of sessions managed by this worker */
170   fa_session_t *fa_sessions_pool;
171   /* incoming session change requests from other workers */
172   clib_spinlock_t pending_session_change_request_lock;
173   u64 *pending_session_change_requests;
174   u64 *wip_session_change_requests;
175   u64 rcvd_session_change_requests;
176   u64 sent_session_change_requests;
177   /* per-worker ACL_N_TIMEOUTS of conn lists */
178   u32 *fa_conn_list_head;
179   u32 *fa_conn_list_tail;
180   /* expiry time set whenever an element is enqueued */
181   u64 *fa_conn_list_head_expiry_time;
182   /* adds and deletes per-worker-per-interface */
183   u64 *fa_session_dels_by_sw_if_index;
184   u64 *fa_session_adds_by_sw_if_index;
185   /* sessions deleted due to epoch change */
186   u64 *fa_session_epoch_change_by_sw_if_index;
187   /* Vector of expired connections retrieved from lists */
188   u32 *expired;
189   /* the earliest next expiry time */
190   u64 next_expiry_time;
191   /* if not zero, look at all the elements until their enqueue timestamp is after below one */
192   u64 requeue_until_time;
193   /* Current time between the checks */
194   u64 current_time_wait_interval;
195   /* Counter of how many sessions we did delete */
196   u64 cnt_deleted_sessions;
197   /* Counter of already deleted sessions being deleted - should not increment unless a bug */
198   u64 cnt_already_deleted_sessions;
199   /* Number of times we requeued a session to a head of the list */
200   u64 cnt_session_timer_restarted;
201   /* swipe up to this enqueue time, rather than following the timeouts */
202   u64 swipe_end_time;
203   /* bitmap of sw_if_index serviced by this worker */
204   uword *serviced_sw_if_index_bitmap;
205   /* bitmap of sw_if_indices to clear. set by main thread, cleared by worker */
206   uword *pending_clear_sw_if_index_bitmap;
207   /* atomic, indicates that the swipe-deletion of connections is in progress */
208   u32 clear_in_process;
209   /* Interrupt is pending from main thread */
210   int interrupt_is_pending;
211   /*
212    * Interrupt node on the worker thread sets this if it knows there is
213    * more work to do, but it has to finish to avoid hogging the
214    * core for too long.
215    */
216   int interrupt_is_needed;
217   /*
218    * Set to indicate that the interrupt node wants to get less interrupts
219    * because there is not enough work for the current rate.
220    */
221   int interrupt_is_unwanted;
222   /*
223    * Set to copy of a "generation" counter in main thread so we can sync the interrupts.
224    */
225   int interrupt_generation;
226    /*
227     * work in progress data for the pipelined node operation
228     */
229   vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
230   u32 sw_if_indices[VLIB_FRAME_SIZE];
231   fa_5tuple_t fa_5tuples[VLIB_FRAME_SIZE];
232   u64 hashes[VLIB_FRAME_SIZE];
233   u16 nexts[VLIB_FRAME_SIZE];
234
235 } acl_fa_per_worker_data_t;
236
237
238 typedef enum {
239   ACL_FA_ERROR_DROP,
240   ACL_FA_N_NEXT,
241 } acl_fa_next_t;
242
243
244 enum
245 {
246   ACL_FA_CLEANER_RESCHEDULE = 1,
247   ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
248 } acl_fa_cleaner_process_event_e;
249
250 void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable);
251
252 void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose);
253
254 u8 *format_acl_plugin_5tuple (u8 * s, va_list * args);
255
256 /* use like: elog_acl_maybe_trace_X1(am, "foobar: %d", "i4", int32_value); */
257
258 #define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1)              \
259 do {                                                                                                                     \
260   if (am->trace_sessions) {                                                                                              \
261     CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check);                               \
262     u16 thread_index = os_get_thread_index ();                                                                           \
263     vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
264     ELOG_TYPE_DECLARE (e) =                                                                                              \
265       {                                                                                                                  \
266         .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
267         .format_args = "i2" acl_elog_trace_format_args,                                                                  \
268       };                                                                                                                 \
269     CLIB_PACKED(struct                                                                                                   \
270       {                                                                                                                  \
271         u16 thread;                                                                                                      \
272         typeof(acl_elog_val1) val1;                                                                                      \
273       }) *ed;                                                                                                            \
274     ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
275     ed->thread = thread_index;                                                                                           \
276     ed->val1 = acl_elog_val1;                                                                                            \
277   }                                                                                                                      \
278 } while (0)
279
280
281 /* use like: elog_acl_maybe_trace_X2(am, "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */
282
283 #define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, acl_elog_trace_format_args,                             \
284                                                                                            acl_elog_val1, acl_elog_val2) \
285 do {                                                                                                                     \
286   if (am->trace_sessions) {                                                                                              \
287     CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check);       \
288     u16 thread_index = os_get_thread_index ();                                                                           \
289     vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
290     ELOG_TYPE_DECLARE (e) =                                                                                              \
291       {                                                                                                                  \
292         .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
293         .format_args = "i2" acl_elog_trace_format_args,                                                                  \
294       };                                                                                                                 \
295     CLIB_PACKED(struct                                                                                                   \
296       {                                                                                                                  \
297         u16 thread;                                                                                                      \
298         typeof(acl_elog_val1) val1;                                                                                      \
299         typeof(acl_elog_val2) val2;                                                                                      \
300       }) *ed;                                                                                                            \
301     ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
302     ed->thread = thread_index;                                                                                           \
303     ed->val1 = acl_elog_val1;                                                                                            \
304     ed->val2 = acl_elog_val2;                                                                                            \
305   }                                                                                                                      \
306 } while (0)
307
308
309 /* use like: elog_acl_maybe_trace_X3(am, "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */
310
311 #define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1,              \
312                                                                                            acl_elog_val2, acl_elog_val3) \
313 do {                                                                                                                     \
314   if (am->trace_sessions) {                                                                                              \
315     CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)                           \
316                                                - sizeof(acl_elog_val3)]; } *static_check);                               \
317     u16 thread_index = os_get_thread_index ();                                                                           \
318     vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
319     ELOG_TYPE_DECLARE (e) =                                                                                              \
320       {                                                                                                                  \
321         .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
322         .format_args = "i2" acl_elog_trace_format_args,                                                                  \
323       };                                                                                                                 \
324     CLIB_PACKED(struct                                                                                                   \
325       {                                                                                                                  \
326         u16 thread;                                                                                                      \
327         typeof(acl_elog_val1) val1;                                                                                      \
328         typeof(acl_elog_val2) val2;                                                                                      \
329         typeof(acl_elog_val3) val3;                                                                                      \
330       }) *ed;                                                                                                            \
331     ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
332     ed->thread = thread_index;                                                                                           \
333     ed->val1 = acl_elog_val1;                                                                                            \
334     ed->val2 = acl_elog_val2;                                                                                            \
335     ed->val3 = acl_elog_val3;                                                                                            \
336   }                                                                                                                      \
337 } while (0)
338
339
340 /* use like: elog_acl_maybe_trace_X4(am, "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */
341
342 #define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1,              \
343                                                                             acl_elog_val2, acl_elog_val3, acl_elog_val4) \
344 do {                                                                                                                     \
345   if (am->trace_sessions) {                                                                                              \
346     CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)                           \
347                                                - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check);        \
348     u16 thread_index = os_get_thread_index ();                                                                           \
349     vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
350     ELOG_TYPE_DECLARE (e) =                                                                                              \
351       {                                                                                                                  \
352         .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
353         .format_args = "i2" acl_elog_trace_format_args,                                                                  \
354       };                                                                                                                 \
355     CLIB_PACKED(struct                                                                                                   \
356       {                                                                                                                  \
357         u16 thread;                                                                                                      \
358         typeof(acl_elog_val1) val1;                                                                                      \
359         typeof(acl_elog_val2) val2;                                                                                      \
360         typeof(acl_elog_val3) val3;                                                                                      \
361         typeof(acl_elog_val4) val4;                                                                                      \
362       }) *ed;                                                                                                            \
363     ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
364     ed->thread = thread_index;                                                                                           \
365     ed->val1 = acl_elog_val1;                                                                                            \
366     ed->val2 = acl_elog_val2;                                                                                            \
367     ed->val3 = acl_elog_val3;                                                                                            \
368     ed->val4 = acl_elog_val4;                                                                                            \
369   }                                                                                                                      \
370 } while (0)
371
372
373 #endif