5 #include <vppinfra/bihash_16_8.h>
6 #include <vppinfra/bihash_40_8.h>
8 #include <plugins/acl/exported_types.h>
10 // #define FA_NODE_VERBOSE_DEBUG 3
12 #define TCP_FLAG_FIN 0x01
13 #define TCP_FLAG_SYN 0x02
14 #define TCP_FLAG_RST 0x04
15 #define TCP_FLAG_PUSH 0x08
16 #define TCP_FLAG_ACK 0x10
17 #define TCP_FLAG_URG 0x20
18 #define TCP_FLAG_ECE 0x40
19 #define TCP_FLAG_CWR 0x80
20 #define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK)
21 #define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK)
23 #define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
24 #define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1ULL<<30)
25 #define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 500000
31 u16 mask_type_index_lsb;
35 u8 is_nonfirst_fragment:1;
51 u16 lsb_of_sw_if_index;
56 } fa_session_l4_key_t;
62 /* we put the IPv4 addresses
63 after padding so we can still
64 use them as (shorter) key together with
67 ip4_address_t ip4_addr[2];
69 ip6_address_t ip6_addr[2];
71 fa_session_l4_key_t l4;
72 /* This field should align with u64 value in bihash_40_8 and bihash_16_8 keyvalue struct */
75 clib_bihash_kv_40_8_t kv_40_8;
77 u64 padding_for_kv_16_8[3];
78 clib_bihash_kv_16_8_t kv_16_8;
82 static_always_inline u8 *
83 format_fa_session_l4_key(u8 * s, va_list * args)
85 fa_session_l4_key_t *l4 = va_arg (*args, fa_session_l4_key_t *);
87 return (format (s, "l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d reserved0 0x%02x port %d -> %d",
88 l4->lsb_of_sw_if_index,
89 l4->proto, l4->is_input, l4->is_slowpath,
90 l4->reserved0, l4->port[0], l4->port[1]));
94 fa_5tuple_t info; /* (5+1)*8 = 48 bytes */
95 u64 last_active_time; /* +8 bytes = 56 */
96 u32 sw_if_index; /* +4 bytes = 60 */
100 } tcp_flags_seen; ; /* +2 bytes = 62 */
101 u16 thread_index; /* +2 bytes = 64 */
102 u64 link_enqueue_time; /* 8 byte = 8 */
103 u32 link_prev_idx; /* +4 bytes = 12 */
104 u32 link_next_idx; /* +4 bytes = 16 */
105 u8 link_list_id; /* +1 bytes = 17 */
106 u8 deleted; /* +1 bytes = 18 */
107 u8 is_ip6; /* +1 bytes = 19 */
108 u8 reserved1[5]; /* +5 bytes = 24 */
109 u64 reserved2[5]; /* +5*8 bytes = 64 */
112 #define FA_POLICY_EPOCH_MASK 0x7fff
113 /* input policy epochs have the MSB set */
114 #define FA_POLICY_EPOCH_IS_INPUT 0x8000
117 /* This structure is used to fill in the u64 value
118 in the per-sw-if-index hash table */
125 u16 intf_policy_epoch;
128 } fa_full_session_id_t;
131 * A few compile-time constraints on the size and the layout of the union, to ensure
132 * it makes sense both for bihash and for us.
135 #define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1]
136 CT_ASSERT_EQUAL(fa_l3_key_size_is_40, offsetof(fa_5tuple_t, pkt), offsetof(clib_bihash_kv_40_8_t, value));
137 CT_ASSERT_EQUAL(fa_ip6_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_40_8.value));
138 CT_ASSERT_EQUAL(fa_ip4_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_16_8.value));
139 CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64));
140 CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64));
141 CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t));
142 CT_ASSERT_EQUAL(fa_ip4_starts_at_kv16_key, offsetof(fa_5tuple_t, ip4_addr), offsetof(fa_5tuple_t, kv_16_8));
143 CT_ASSERT_EQUAL(fa_ip4_and_ip6_kv_value_match, offsetof(fa_5tuple_t, kv_16_8.value), offsetof(fa_5tuple_t, kv_40_8.value));
145 /* Let's try to fit within two cachelines */
146 CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128);
148 /* Session ID MUST be the same as u64 */
149 CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64));
151 CT_ASSERT_EQUAL(fa_5tuple_opaque_t_must_match_5tuple, sizeof(fa_5tuple_opaque_t), sizeof(fa_5tuple_t));
152 #undef CT_ASSERT_EQUAL
154 #define FA_SESSION_BOGUS_INDEX ~0
157 /* The pool of sessions managed by this worker */
158 fa_session_t *fa_sessions_pool;
159 /* incoming session change requests from other workers */
160 clib_spinlock_t pending_session_change_request_lock;
161 u64 *pending_session_change_requests;
162 u64 *wip_session_change_requests;
163 u64 rcvd_session_change_requests;
164 u64 sent_session_change_requests;
165 /* per-worker ACL_N_TIMEOUTS of conn lists */
166 u32 *fa_conn_list_head;
167 u32 *fa_conn_list_tail;
168 /* expiry time set whenever an element is enqueued */
169 u64 *fa_conn_list_head_expiry_time;
170 /* adds and deletes per-worker-per-interface */
171 u64 *fa_session_dels_by_sw_if_index;
172 u64 *fa_session_adds_by_sw_if_index;
173 /* sessions deleted due to epoch change */
174 u64 *fa_session_epoch_change_by_sw_if_index;
175 /* Vector of expired connections retrieved from lists */
177 /* the earliest next expiry time */
178 u64 next_expiry_time;
179 /* if not zero, look at all the elements until their enqueue timestamp is after below one */
180 u64 requeue_until_time;
181 /* Current time between the checks */
182 u64 current_time_wait_interval;
183 /* Counter of how many sessions we did delete */
184 u64 cnt_deleted_sessions;
185 /* Counter of already deleted sessions being deleted - should not increment unless a bug */
186 u64 cnt_already_deleted_sessions;
187 /* Number of times we requeued a session to a head of the list */
188 u64 cnt_session_timer_restarted;
189 /* swipe up to this enqueue time, rather than following the timeouts */
191 /* bitmap of sw_if_index serviced by this worker */
192 uword *serviced_sw_if_index_bitmap;
193 /* bitmap of sw_if_indices to clear. set by main thread, cleared by worker */
194 uword *pending_clear_sw_if_index_bitmap;
195 /* atomic, indicates that the swipe-deletion of connections is in progress */
196 u32 clear_in_process;
197 /* Interrupt is pending from main thread */
198 int interrupt_is_pending;
200 * Interrupt node on the worker thread sets this if it knows there is
201 * more work to do, but it has to finish to avoid hogging the
204 int interrupt_is_needed;
206 * Set to indicate that the interrupt node wants to get less interrupts
207 * because there is not enough work for the current rate.
209 int interrupt_is_unwanted;
211 * Set to copy of a "generation" counter in main thread so we can sync the interrupts.
213 int interrupt_generation;
214 } acl_fa_per_worker_data_t;
225 ACL_FA_CLEANER_RESCHEDULE = 1,
226 ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
227 } acl_fa_cleaner_process_event_e;
229 void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable);
231 void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose);
233 u8 *format_acl_plugin_5tuple (u8 * s, va_list * args);
235 /* use like: elog_acl_maybe_trace_X1(am, "foobar: %d", "i4", int32_value); */
237 #define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1) \
239 if (am->trace_sessions) { \
240 CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check); \
241 u16 thread_index = os_get_thread_index (); \
242 vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
243 ELOG_TYPE_DECLARE (e) = \
245 .format = "(%02d) " acl_elog_trace_format_label, \
246 .format_args = "i2" acl_elog_trace_format_args, \
251 typeof(acl_elog_val1) val1; \
253 ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
254 ed->thread = thread_index; \
255 ed->val1 = acl_elog_val1; \
260 /* use like: elog_acl_maybe_trace_X2(am, "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */
262 #define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, acl_elog_trace_format_args, \
263 acl_elog_val1, acl_elog_val2) \
265 if (am->trace_sessions) { \
266 CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check); \
267 u16 thread_index = os_get_thread_index (); \
268 vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
269 ELOG_TYPE_DECLARE (e) = \
271 .format = "(%02d) " acl_elog_trace_format_label, \
272 .format_args = "i2" acl_elog_trace_format_args, \
277 typeof(acl_elog_val1) val1; \
278 typeof(acl_elog_val2) val2; \
280 ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
281 ed->thread = thread_index; \
282 ed->val1 = acl_elog_val1; \
283 ed->val2 = acl_elog_val2; \
288 /* use like: elog_acl_maybe_trace_X3(am, "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */
290 #define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
291 acl_elog_val2, acl_elog_val3) \
293 if (am->trace_sessions) { \
294 CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
295 - sizeof(acl_elog_val3)]; } *static_check); \
296 u16 thread_index = os_get_thread_index (); \
297 vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
298 ELOG_TYPE_DECLARE (e) = \
300 .format = "(%02d) " acl_elog_trace_format_label, \
301 .format_args = "i2" acl_elog_trace_format_args, \
306 typeof(acl_elog_val1) val1; \
307 typeof(acl_elog_val2) val2; \
308 typeof(acl_elog_val3) val3; \
310 ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
311 ed->thread = thread_index; \
312 ed->val1 = acl_elog_val1; \
313 ed->val2 = acl_elog_val2; \
314 ed->val3 = acl_elog_val3; \
319 /* use like: elog_acl_maybe_trace_X4(am, "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */
321 #define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
322 acl_elog_val2, acl_elog_val3, acl_elog_val4) \
324 if (am->trace_sessions) { \
325 CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
326 - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check); \
327 u16 thread_index = os_get_thread_index (); \
328 vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
329 ELOG_TYPE_DECLARE (e) = \
331 .format = "(%02d) " acl_elog_trace_format_label, \
332 .format_args = "i2" acl_elog_trace_format_args, \
337 typeof(acl_elog_val1) val1; \
338 typeof(acl_elog_val2) val2; \
339 typeof(acl_elog_val3) val3; \
340 typeof(acl_elog_val4) val4; \
342 ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
343 ed->thread = thread_index; \
344 ed->val1 = acl_elog_val1; \
345 ed->val2 = acl_elog_val2; \
346 ed->val3 = acl_elog_val3; \
347 ed->val4 = acl_elog_val4; \