2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <netinet/in.h>
18 #include <vlib/vlib.h>
19 #include <vnet/vnet.h>
20 #include <vnet/pg/pg.h>
21 #include <vppinfra/error.h>
23 #include <vppinfra/bihash_40_8.h>
25 #include <vppinfra/bihash_template.h>
26 #include <vppinfra/bihash_template.c>
29 #include "hash_lookup.h"
35 u32 match_acl_in_index;
43 format_fa_5tuple (u8 * s, va_list * args)
45 fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *);
47 return format(s, "%s sw_if_index %d (lsb16 %d) l3 %s%s %U -> %U"
48 " l4 proto %d l4_valid %d port %d -> %d tcp flags (%s) %02x rsvd %x",
49 p5t->pkt.is_input ? "input" : "output",
50 p5t->pkt.sw_if_index, p5t->l4.lsb_of_sw_if_index, p5t->pkt.is_ip6 ? "ip6" : "ip4",
51 p5t->pkt.is_nonfirst_fragment ? " non-initial fragment" : "",
52 format_ip46_address, &p5t->addr[0], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
53 format_ip46_address, &p5t->addr[1], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
54 p5t->l4.proto, p5t->pkt.l4_valid,
55 p5t->l4.port[0], p5t->l4.port[1],
56 p5t->pkt.tcp_flags_valid ? "valid": "invalid",
58 p5t->pkt.flags_reserved);
62 format_acl_plugin_5tuple (u8 * s, va_list * args)
64 return format_fa_5tuple(s, args);
67 /* packet trace format function */
69 format_acl_fa_trace (u8 * s, va_list * args)
71 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
72 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
73 acl_fa_trace_t *t = va_arg (*args, acl_fa_trace_t *);
77 "acl-plugin: sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n"
78 " pkt info %016llx %016llx %016llx %016llx %016llx %016llx",
79 t->sw_if_index, t->next_index, t->action, t->match_acl_in_index,
80 t->match_rule_index, t->trace_bitmap,
81 t->packet_info[0], t->packet_info[1], t->packet_info[2],
82 t->packet_info[3], t->packet_info[4], t->packet_info[5]);
84 /* Now also print out the packet_info in a form usable by humans */
85 s = format (s, "\n %U", format_fa_5tuple, t->packet_info);
90 #define foreach_acl_fa_error \
91 _(ACL_DROP, "ACL deny packets") \
92 _(ACL_PERMIT, "ACL permit packets") \
93 _(ACL_NEW_SESSION, "new sessions added") \
94 _(ACL_EXIST_SESSION, "existing session packets") \
95 _(ACL_CHECK, "checked packets") \
96 _(ACL_RESTART_SESSION_TIMER, "restart session timer") \
97 _(ACL_TOO_MANY_SESSIONS, "too many sessions to add new") \
102 #define _(sym,str) ACL_FA_ERROR_##sym,
108 static char *acl_fa_error_strings[] = {
109 #define _(sym,string) string,
116 get_ptr_to_offset (vlib_buffer_t * b0, int offset)
118 u8 *p = vlib_buffer_get_current (b0) + offset;
124 fa_acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2,
125 int prefixlen, int is_ip6)
129 /* match any always succeeds */
134 if (memcmp (addr1, addr2, prefixlen / 8))
136 /* If the starting full bytes do not match, no point in bittwidling the thumbs further */
141 u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
142 u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
143 u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
144 return (b1 & mask0) == b2;
148 /* The prefix fits into integer number of bytes, so nothing left to do */
154 uint32_t a1 = ntohl (addr1->ip4.as_u32);
155 uint32_t a2 = ntohl (addr2->ip4.as_u32);
156 uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1);
157 return (a1 & mask0) == a2;
162 fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6)
164 return ((port >= port_first) && (port <= port_last));
168 single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple,
169 int is_ip6, u8 * r_action, u32 * r_acl_match_p,
170 u32 * r_rule_match_p, u32 * trace_bitmap)
176 if (pool_is_free_index (am->acls, acl_index))
179 *r_acl_match_p = acl_index;
181 *r_rule_match_p = -1;
182 /* the ACL does not exist but is used for policy. Block traffic. */
185 a = am->acls + acl_index;
186 for (i = 0; i < a->count; i++)
189 if (is_ip6 != r->is_ipv6)
193 if (!fa_acl_match_addr
194 (&pkt_5tuple->addr[1], &r->dst, r->dst_prefixlen, is_ip6))
197 #ifdef FA_NODE_VERBOSE_DEBUG
199 ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d",
200 acl_index, i, format_ip46_address, &pkt_5tuple->addr[1],
201 r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4, format_ip46_address,
202 &r->dst, r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4,
206 if (!fa_acl_match_addr
207 (&pkt_5tuple->addr[0], &r->src, r->src_prefixlen, is_ip6))
210 #ifdef FA_NODE_VERBOSE_DEBUG
212 ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d",
213 acl_index, i, format_ip46_address, &pkt_5tuple->addr[0],
214 r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4, format_ip46_address,
215 &r->src, r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4,
218 ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d",
219 acl_index, i, pkt_5tuple->l4.proto, r->proto);
223 if (pkt_5tuple->l4.proto != r->proto)
226 if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment &&
227 am->l4_match_nonfirst_fragment))
229 /* non-initial fragment with frag match configured - match this rule */
230 *trace_bitmap |= 0x80000000;
231 *r_action = r->is_permit;
233 *r_acl_match_p = acl_index;
239 /* A sanity check just to ensure we are about to match the ports extracted from the packet */
240 if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
243 #ifdef FA_NODE_VERBOSE_DEBUG
245 ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d",
246 acl_index, i, pkt_5tuple->l4.proto, r->proto);
249 if (!fa_acl_match_port
250 (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
251 r->src_port_or_type_last, is_ip6))
254 #ifdef FA_NODE_VERBOSE_DEBUG
256 ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]",
257 acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first,
258 r->src_port_or_type_last);
261 if (!fa_acl_match_port
262 (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
263 r->dst_port_or_code_last, is_ip6))
266 #ifdef FA_NODE_VERBOSE_DEBUG
268 ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]",
269 acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
270 r->dst_port_or_code_last);
272 if (pkt_5tuple->pkt.tcp_flags_valid
273 && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
277 /* everything matches! */
278 #ifdef FA_NODE_VERBOSE_DEBUG
279 clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d",
280 acl_index, i, r->is_permit);
282 *r_action = r->is_permit;
284 *r_acl_match_p = acl_index;
293 linear_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
294 int is_ip6, int is_input, u32 * acl_match_p,
295 u32 * rule_match_p, u32 * trace_bitmap)
297 acl_main_t *am = &acl_main;
304 vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
305 acl_vector = am->input_acl_vec_by_sw_if_index[sw_if_index];
309 vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
310 acl_vector = am->output_acl_vec_by_sw_if_index[sw_if_index];
312 for (i = 0; i < vec_len (acl_vector); i++)
314 #ifdef FA_NODE_VERBOSE_DEBUG
315 clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d",
318 if (single_acl_match_5tuple
319 (am, acl_vector[i], pkt_5tuple, is_ip6, &action,
320 acl_match_p, rule_match_p, trace_bitmap))
325 if (vec_len (acl_vector) > 0)
327 /* If there are ACLs and none matched, deny by default */
330 #ifdef FA_NODE_VERBOSE_DEBUG
331 clib_warning ("ACL_FA_NODE_DBG: No ACL on sw_if_index %d", sw_if_index);
333 /* Deny by default. If there are no ACLs defined we should not be here. */
338 multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
339 int is_ip6, int is_input, u32 * acl_match_p,
340 u32 * rule_match_p, u32 * trace_bitmap)
342 acl_main_t *am = &acl_main;
343 if (am->use_hash_acl_matching) {
344 return hash_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6,
345 is_input, acl_match_p, rule_match_p, trace_bitmap);
347 return linear_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6,
348 is_input, acl_match_p, rule_match_p, trace_bitmap);
353 offset_within_packet (vlib_buffer_t * b0, int offset)
355 /* For the purposes of this code, "within" means we have at least 8 bytes after it */
356 return (offset <= (b0->current_length - 8));
360 acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
361 int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
368 /* IP4 and IP6 protocol numbers of ICMP */
369 static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
373 l3_offset = ethernet_buffer_header_size(b0);
380 l3_offset = vnet_buffer(b0)->ip.save_rewrite_length;
383 /* key[0..3] contains src/dst address and is cleared/set below */
384 /* Remainder of the key and per-packet non-key data */
385 p5tuple_pkt->kv.key[4] = 0;
386 p5tuple_pkt->kv.value = 0;
390 clib_memcpy (&p5tuple_pkt->addr,
391 get_ptr_to_offset (b0,
392 offsetof (ip6_header_t,
393 src_address) + l3_offset),
394 sizeof (p5tuple_pkt->addr));
396 *(u8 *) get_ptr_to_offset (b0,
397 offsetof (ip6_header_t,
398 protocol) + l3_offset);
399 l4_offset = l3_offset + sizeof (ip6_header_t);
400 #ifdef FA_NODE_VERBOSE_DEBUG
401 clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto,
404 /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */
405 int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
406 if (PREDICT_FALSE (need_skip_eh))
408 while (need_skip_eh && offset_within_packet (b0, l4_offset))
410 /* Fragment header needs special handling */
411 if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto))
413 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
415 clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset));
416 frag_offset = ntohs(frag_offset) >> 3;
419 p5tuple_pkt->pkt.is_nonfirst_fragment = 1;
420 /* invalidate L4 offset so we don't try to find L4 info */
421 l4_offset += b0->current_length;
425 /* First fragment: skip the frag header and move on. */
431 u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset);
432 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
433 l4_offset += 8 * (1 + (u16) nwords);
435 #ifdef FA_NODE_VERBOSE_DEBUG
436 clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d",
440 clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
446 p5tuple_pkt->kv.key[0] = 0;
447 p5tuple_pkt->kv.key[1] = 0;
448 p5tuple_pkt->kv.key[2] = 0;
449 p5tuple_pkt->kv.key[3] = 0;
450 clib_memcpy (&p5tuple_pkt->addr[0].ip4,
451 get_ptr_to_offset (b0,
452 offsetof (ip4_header_t,
453 src_address) + l3_offset),
454 sizeof (p5tuple_pkt->addr[0].ip4));
455 clib_memcpy (&p5tuple_pkt->addr[1].ip4,
456 get_ptr_to_offset (b0,
457 offsetof (ip4_header_t,
458 dst_address) + l3_offset),
459 sizeof (p5tuple_pkt->addr[1].ip4));
461 *(u8 *) get_ptr_to_offset (b0,
462 offsetof (ip4_header_t,
463 protocol) + l3_offset);
464 l4_offset = l3_offset + sizeof (ip4_header_t);
465 u16 flags_and_fragment_offset;
466 clib_memcpy (&flags_and_fragment_offset,
467 get_ptr_to_offset (b0,
468 offsetof (ip4_header_t,
469 flags_and_fragment_offset)) + l3_offset,
470 sizeof(flags_and_fragment_offset));
471 flags_and_fragment_offset = ntohs (flags_and_fragment_offset);
473 /* non-initial fragments have non-zero offset */
474 if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset)))
476 p5tuple_pkt->pkt.is_nonfirst_fragment = 1;
477 /* invalidate L4 offset so we don't try to find L4 info */
478 l4_offset += b0->current_length;
482 p5tuple_pkt->l4.proto = proto;
483 if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
485 p5tuple_pkt->pkt.l4_valid = 1;
486 if (icmp_protos[is_ip6] == proto)
489 p5tuple_pkt->l4.port[0] =
490 *(u8 *) get_ptr_to_offset (b0,
491 l4_offset + offsetof (icmp46_header_t,
494 p5tuple_pkt->l4.port[1] =
495 *(u8 *) get_ptr_to_offset (b0,
496 l4_offset + offsetof (icmp46_header_t,
499 else if ((IPPROTO_TCP == proto) || (IPPROTO_UDP == proto))
502 get_ptr_to_offset (b0,
503 l4_offset + offsetof (tcp_header_t,
506 p5tuple_pkt->l4.port[0] = ntohs (ports[0]);
507 p5tuple_pkt->l4.port[1] = ntohs (ports[1]);
509 p5tuple_pkt->pkt.tcp_flags =
510 *(u8 *) get_ptr_to_offset (b0,
511 l4_offset + offsetof (tcp_header_t,
513 p5tuple_pkt->pkt.tcp_flags_valid = (proto == IPPROTO_TCP);
516 * FIXME: rather than the above conditional, here could
517 * be a nice generic mechanism to extract two L4 values:
519 * have a per-protocol array of 4 elements like this:
520 * u8 offset; to take the byte from, off L4 header
521 * u8 mask; to mask it with, before storing
523 * this way we can describe UDP, TCP and ICMP[46] semantics,
524 * and add a sort of FPM-type behavior for other protocols.
526 * Of course, is it faster ? and is it needed ?
533 /* Session keys match the packets received, and mirror the packets sent */
535 acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt,
536 fa_5tuple_t * p5tuple_sess)
538 int src_index = is_input ? 0 : 1;
539 int dst_index = is_input ? 1 : 0;
540 p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
541 p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
542 p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
543 p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
544 p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
549 acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0)
551 return am->fa_sessions_hash_is_initialized;
555 acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0)
557 int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0);
562 acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
564 int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0);
570 fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
572 /* seen both SYNs and ACKs but not FINs means we are in establshed state */
574 sess->tcp_flags_seen.as_u16 & ((TCP_FLAGS_RSTFINACKSYN << 8) +
575 TCP_FLAGS_RSTFINACKSYN);
576 switch (sess->info.l4.proto)
579 if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags)
581 return ACL_TIMEOUT_TCP_IDLE;
585 return ACL_TIMEOUT_TCP_TRANSIENT;
589 return ACL_TIMEOUT_UDP_IDLE;
592 return ACL_TIMEOUT_UDP_IDLE;
598 fa_session_get_shortest_timeout(acl_main_t * am)
602 for(timeout_type = 0; timeout_type < ACL_N_TIMEOUTS; timeout_type++) {
603 if (timeout > am->session_timeout_sec[timeout_type]) {
604 timeout = am->session_timeout_sec[timeout_type];
611 * Get the timeout of the session in a list since its enqueue time.
615 fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess)
617 u64 timeout = am->vlib_main->clib_time.clocks_per_second;
619 * we have the shortest possible timeout type in all the lists
620 * (see README-multicore for the rationale)
622 timeout *= fa_session_get_shortest_timeout(am);
627 * Get the idle timeout of a session.
631 fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
633 u64 timeout = am->vlib_main->clib_time.clocks_per_second;
634 int timeout_type = fa_session_get_timeout_type (am, sess);
635 timeout *= am->session_timeout_sec[timeout_type];
640 acl_fa_verify_init_sessions (acl_main_t * am)
642 if (!am->fa_sessions_hash_is_initialized) {
644 /* Allocate the per-worker sessions pools */
645 for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
646 acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
649 * // In lieu of trying to preallocate the pool and its free bitmap, rather use pool_init_fixed
650 * pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES);
651 * clib_bitmap_validate(pool_header(pw->fa_sessions_pool)->free_bitmap, am->fa_conn_table_max_entries);
653 pool_init_fixed(pw->fa_sessions_pool, am->fa_conn_table_max_entries);
656 /* ... and the interface session hash table */
657 BV (clib_bihash_init) (&am->fa_sessions_hash,
658 "ACL plugin FA session bihash",
659 am->fa_conn_table_hash_num_buckets,
660 am->fa_conn_table_hash_memory_size);
661 am->fa_sessions_hash_is_initialized = 1;
665 static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index)
667 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
668 fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index);
672 static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess)
674 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
675 return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool)));
679 acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now)
681 fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
682 u8 list_id = fa_session_get_timeout_type(am, sess);
683 uword thread_index = os_get_thread_index ();
684 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
685 /* the retrieved session thread index must be necessarily the same as the one in the key */
686 ASSERT (sess->thread_index == sess_id.thread_index);
687 /* the retrieved session thread index must be the same as current thread */
688 ASSERT (sess->thread_index == thread_index);
689 sess->link_enqueue_time = now;
690 sess->link_list_id = list_id;
691 sess->link_next_idx = ~0;
692 sess->link_prev_idx = pw->fa_conn_list_tail[list_id];
693 if (~0 != pw->fa_conn_list_tail[list_id]) {
694 fa_session_t *prev_sess = get_session_ptr(am, thread_index, pw->fa_conn_list_tail[list_id]);
695 prev_sess->link_next_idx = sess_id.session_index;
696 /* We should never try to link with a session on another thread */
697 ASSERT(prev_sess->thread_index == sess->thread_index);
699 pw->fa_conn_list_tail[list_id] = sess_id.session_index;
700 pw->serviced_sw_if_index_bitmap = clib_bitmap_set(pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1);
702 if (~0 == pw->fa_conn_list_head[list_id]) {
703 pw->fa_conn_list_head[list_id] = sess_id.session_index;
708 acl_fa_conn_list_delete_session (acl_main_t *am, fa_full_session_id_t sess_id)
710 uword thread_index = os_get_thread_index ();
711 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
712 if (thread_index != sess_id.thread_index) {
713 /* If another thread attempts to delete the session, fail it. */
714 #ifdef FA_NODE_VERBOSE_DEBUG
715 clib_warning("thread id in key %d != curr thread index, not deleting");
719 fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
720 /* we should never try to delete the session with another thread index */
721 ASSERT(sess->thread_index == thread_index);
722 if (~0 != sess->link_prev_idx) {
723 fa_session_t *prev_sess = get_session_ptr(am, thread_index, sess->link_prev_idx);
724 /* the previous session must be in the same list as this one */
725 ASSERT(prev_sess->link_list_id == sess->link_list_id);
726 prev_sess->link_next_idx = sess->link_next_idx;
728 if (~0 != sess->link_next_idx) {
729 fa_session_t *next_sess = get_session_ptr(am, thread_index, sess->link_next_idx);
730 /* The next session must be in the same list as the one we are deleting */
731 ASSERT(next_sess->link_list_id == sess->link_list_id);
732 next_sess->link_prev_idx = sess->link_prev_idx;
734 if (pw->fa_conn_list_head[sess->link_list_id] == sess_id.session_index) {
735 pw->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx;
737 if (pw->fa_conn_list_tail[sess->link_list_id] == sess_id.session_index) {
738 pw->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx;
744 acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, fa_full_session_id_t sess_id)
746 if (acl_fa_conn_list_delete_session(am, sess_id)) {
747 acl_fa_conn_list_add_session(am, sess_id, now);
751 * Our thread does not own this connection, so we can not delete
752 * The session. To avoid the complicated signaling, we simply
753 * pick the list waiting time to be the shortest of the timeouts.
754 * This way we do not have to do anything special, and let
755 * the regular requeue check take care of everything.
763 acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
764 fa_session_t * sess, fa_5tuple_t * pkt_5tuple)
766 sess->last_active_time = now;
767 if (pkt_5tuple->pkt.tcp_flags_valid)
769 sess->tcp_flags_seen.as_u8[is_input] |= pkt_5tuple->pkt.tcp_flags;
776 acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id)
778 void *oldheap = clib_mem_set_heap(am->acl_mheap);
779 fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
780 ASSERT(sess->thread_index == os_get_thread_index ());
781 BV (clib_bihash_add_del) (&am->fa_sessions_hash,
783 acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
784 pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
785 /* Deleting from timer structures not needed,
786 as the caller must have dealt with the timers. */
787 vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index);
788 clib_mem_set_heap (oldheap);
789 pw->fa_session_dels_by_sw_if_index[sw_if_index]++;
790 clib_smp_atomic_add(&am->fa_session_total_dels, 1);
794 acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
797 curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels;
798 return (curr_sess_count < am->fa_conn_table_max_entries);
802 acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type)
804 fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]);
806 * We can not check just the index here because inbetween the worker thread might
807 * dequeue the connection from the head just as we are about to check it.
809 if (!is_valid_session_ptr(am, thread_index, sess)) {
810 return ~0LL; // infinity.
813 sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
819 acl_fa_conn_time_to_check (acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, u32 session_index)
821 fa_session_t *sess = get_session_ptr(am, thread_index, session_index);
823 sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
824 return (timeout_time < now) || (sess->link_enqueue_time <= pw->swipe_end_time);
828 * see if there are sessions ready to be checked,
829 * do the maintenance (requeue or delete), and
830 * return the total number of sessions reclaimed.
833 acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now)
835 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
836 fa_full_session_id_t fsid;
837 fsid.thread_index = thread_index;
838 int total_expired = 0;
842 for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) {
843 while((vec_len(pw->expired) < am->fa_max_deleted_sessions_per_interval)
844 && (~0 != pw->fa_conn_list_head[tt])
845 && (acl_fa_conn_time_to_check(am, pw, now, thread_index,
846 pw->fa_conn_list_head[tt]))) {
847 fsid.session_index = pw->fa_conn_list_head[tt];
848 vec_add1(pw->expired, fsid.session_index);
849 acl_fa_conn_list_delete_session(am, fsid);
855 vec_foreach (psid, pw->expired)
857 fsid.session_index = *psid;
858 if (!pool_is_free_index (pw->fa_sessions_pool, fsid.session_index))
860 fa_session_t *sess = get_session_ptr(am, thread_index, fsid.session_index);
861 u32 sw_if_index = sess->sw_if_index;
862 u64 sess_timeout_time =
863 sess->last_active_time + fa_session_get_timeout (am, sess);
864 if ((now < sess_timeout_time) && (0 == clib_bitmap_get(pw->pending_clear_sw_if_index_bitmap, sw_if_index)))
866 #ifdef FA_NODE_VERBOSE_DEBUG
867 clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d",
868 (int) session_index);
870 /* There was activity on the session, so the idle timeout
871 has not passed. Enqueue for another time period. */
873 acl_fa_conn_list_add_session(am, fsid, now);
874 pw->cnt_session_timer_restarted++;
878 #ifdef FA_NODE_VERBOSE_DEBUG
879 clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d",
880 (int) session_index);
882 acl_fa_delete_session (am, sw_if_index, fsid);
883 pw->cnt_deleted_sessions++;
888 pw->cnt_already_deleted_sessions++;
891 total_expired = vec_len(pw->expired);
892 /* zero out the vector which we have acted on */
894 _vec_len (pw->expired) = 0;
895 /* if we were advancing and reached the end
896 * (no more sessions to recycle), reset the fast-forward timestamp */
898 if (pw->swipe_end_time && 0 == total_expired)
899 pw->swipe_end_time = 0;
900 return (total_expired);
904 acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, u32 sw_if_index)
906 /* try to recycle a TCP transient session */
907 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
908 u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT;
909 fa_full_session_id_t sess_id;
910 sess_id.session_index = pw->fa_conn_list_head[timeout_type];
911 if (~0 != sess_id.session_index) {
912 sess_id.thread_index = thread_index;
913 acl_fa_conn_list_delete_session(am, sess_id);
914 acl_fa_delete_session(am, sw_if_index, sess_id);
918 static fa_session_t *
919 acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
920 fa_5tuple_t * p5tuple)
922 clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
923 clib_bihash_kv_40_8_t kv;
924 fa_full_session_id_t f_sess_id;
925 uword thread_index = os_get_thread_index();
926 void *oldheap = clib_mem_set_heap(am->acl_mheap);
927 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
929 f_sess_id.thread_index = thread_index;
932 pool_get_aligned (pw->fa_sessions_pool, sess, CLIB_CACHE_LINE_BYTES);
933 f_sess_id.session_index = sess - pw->fa_sessions_pool;
935 kv.key[0] = pkv->key[0];
936 kv.key[1] = pkv->key[1];
937 kv.key[2] = pkv->key[2];
938 kv.key[3] = pkv->key[3];
939 kv.key[4] = pkv->key[4];
940 kv.value = f_sess_id.as_u64;
942 memcpy (sess, pkv, sizeof (pkv->key));
943 sess->last_active_time = now;
944 sess->sw_if_index = sw_if_index;
945 sess->tcp_flags_seen.as_u16 = 0;
946 sess->thread_index = thread_index;
947 sess->link_list_id = ~0;
948 sess->link_prev_idx = ~0;
949 sess->link_next_idx = ~0;
953 ASSERT(am->fa_sessions_hash_is_initialized == 1);
954 BV (clib_bihash_add_del) (&am->fa_sessions_hash,
956 acl_fa_conn_list_add_session(am, f_sess_id, now);
958 vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
959 clib_mem_set_heap (oldheap);
960 pw->fa_session_adds_by_sw_if_index[sw_if_index]++;
961 clib_smp_atomic_add(&am->fa_session_total_adds, 1);
966 acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple,
967 clib_bihash_kv_40_8_t * pvalue_sess)
969 return (BV (clib_bihash_search)
970 (&am->fa_sessions_hash, &p5tuple->kv,
976 acl_fa_node_fn (vlib_main_t * vm,
977 vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip6,
978 int is_input, int is_l2_path, u32 * l2_feat_next_node_index,
979 vlib_node_registration_t * acl_fa_node)
981 u32 n_left_from, *from, *to_next;
982 acl_fa_next_t next_index;
983 u32 pkts_acl_checked = 0;
984 u32 pkts_new_session = 0;
985 u32 pkts_exist_session = 0;
986 u32 pkts_acl_permit = 0;
987 u32 pkts_restart_session_timer = 0;
988 u32 trace_bitmap = 0;
989 acl_main_t *am = &acl_main;
990 fa_5tuple_t fa_5tuple, kv_sess;
991 clib_bihash_kv_40_8_t value_sess;
992 vlib_node_runtime_t *error_node;
993 u64 now = clib_cpu_time_now ();
994 uword thread_index = os_get_thread_index ();
996 from = vlib_frame_vector_args (frame);
997 n_left_from = frame->n_vectors;
998 next_index = node->cached_next_index;
1000 error_node = vlib_node_get_runtime (vm, acl_fa_node->index);
1002 while (n_left_from > 0)
1006 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1008 while (n_left_from > 0 && n_left_to_next > 0)
1015 int acl_check_needed = 1;
1016 u32 match_acl_in_index = ~0;
1017 u32 match_rule_index = ~0;
1020 /* speculatively enqueue b0 to the current next frame */
1026 n_left_to_next -= 1;
1028 b0 = vlib_get_buffer (vm, bi0);
1031 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1033 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1036 * Extract the L3/L4 matching info into a 5-tuple structure,
1037 * then create a session key whose layout is independent on forward or reverse
1038 * direction of the packet.
1041 acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple);
1042 fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff;
1043 acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess);
1044 fa_5tuple.pkt.sw_if_index = sw_if_index0;
1045 fa_5tuple.pkt.is_ip6 = is_ip6;
1046 fa_5tuple.pkt.is_input = is_input;
1047 fa_5tuple.pkt.mask_type_index_lsb = ~0;
1048 #ifdef FA_NODE_VERBOSE_DEBUG
1050 ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx",
1051 kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2],
1052 kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value);
1054 ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx",
1055 fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2],
1056 fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value);
1059 /* Try to match an existing session first */
1061 if (acl_fa_ifc_has_sessions (am, sw_if_index0))
1063 if (acl_fa_find_session
1064 (am, sw_if_index0, &kv_sess, &value_sess))
1066 trace_bitmap |= 0x80000000;
1067 error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
1068 fa_full_session_id_t f_sess_id;
1070 f_sess_id.as_u64 = value_sess.value;
1071 ASSERT(f_sess_id.thread_index < vec_len(vlib_mains));
1073 fa_session_t *sess = get_session_ptr(am, f_sess_id.thread_index, f_sess_id.session_index);
1074 int old_timeout_type =
1075 fa_session_get_timeout_type (am, sess);
1077 acl_fa_track_session (am, is_input, sw_if_index0, now,
1079 /* expose the session id to the tracer */
1080 match_rule_index = f_sess_id.session_index;
1081 int new_timeout_type =
1082 fa_session_get_timeout_type (am, sess);
1083 acl_check_needed = 0;
1084 pkts_exist_session += 1;
1085 /* Tracking might have changed the session timeout type, e.g. from transient to established */
1086 if (PREDICT_FALSE (old_timeout_type != new_timeout_type))
1088 acl_fa_restart_timer_for_session (am, now, f_sess_id);
1089 pkts_restart_session_timer++;
1091 0x00010000 + ((0xff & old_timeout_type) << 8) +
1092 (0xff & new_timeout_type);
1095 * I estimate the likelihood to be very low - the VPP needs
1096 * to have >64K interfaces to start with and then on
1097 * exactly 64K indices apart needs to be exactly the same
1098 * 5-tuple... Anyway, since this probability is nonzero -
1099 * print an error and drop the unlucky packet.
1100 * If this shows up in real world, we would need to bump
1101 * the hash key length.
1103 if (PREDICT_FALSE(sess->sw_if_index != sw_if_index0)) {
1104 clib_warning("BUG: session LSB16(sw_if_index) and 5-tuple collision!");
1105 acl_check_needed = 0;
1111 if (acl_check_needed)
1114 multi_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path,
1115 is_ip6, is_input, &match_acl_in_index,
1116 &match_rule_index, &trace_bitmap);
1119 pkts_acl_permit += 1;
1122 if (!acl_fa_can_add_session (am, is_input, sw_if_index0))
1123 acl_fa_try_recycle_session (am, is_input, thread_index, sw_if_index0);
1125 if (acl_fa_can_add_session (am, is_input, sw_if_index0))
1127 fa_session_t *sess = acl_fa_add_session (am, is_input, sw_if_index0, now,
1129 acl_fa_track_session (am, is_input, sw_if_index0, now,
1131 pkts_new_session += 1;
1136 error0 = ACL_FA_ERROR_ACL_TOO_MANY_SESSIONS;
1146 next0 = vnet_l2_feature_next (b0, l2_feat_next_node_index, 0);
1148 vnet_feature_next (sw_if_index0, &next0, b0);
1151 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1152 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1154 acl_fa_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
1155 t->sw_if_index = sw_if_index0;
1156 t->next_index = next0;
1157 t->match_acl_in_index = match_acl_in_index;
1158 t->match_rule_index = match_rule_index;
1159 t->packet_info[0] = fa_5tuple.kv.key[0];
1160 t->packet_info[1] = fa_5tuple.kv.key[1];
1161 t->packet_info[2] = fa_5tuple.kv.key[2];
1162 t->packet_info[3] = fa_5tuple.kv.key[3];
1163 t->packet_info[4] = fa_5tuple.kv.key[4];
1164 t->packet_info[5] = fa_5tuple.kv.value;
1166 t->trace_bitmap = trace_bitmap;
1169 next0 = next0 < node->n_next_nodes ? next0 : 0;
1171 b0->error = error_node->errors[error0];
1173 pkts_acl_checked += 1;
1175 /* verify speculative enqueue, maybe switch current next frame */
1176 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1177 to_next, n_left_to_next, bi0,
1181 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1184 vlib_node_increment_counter (vm, acl_fa_node->index,
1185 ACL_FA_ERROR_ACL_CHECK, pkts_acl_checked);
1186 vlib_node_increment_counter (vm, acl_fa_node->index,
1187 ACL_FA_ERROR_ACL_PERMIT, pkts_acl_permit);
1188 vlib_node_increment_counter (vm, acl_fa_node->index,
1189 ACL_FA_ERROR_ACL_NEW_SESSION,
1191 vlib_node_increment_counter (vm, acl_fa_node->index,
1192 ACL_FA_ERROR_ACL_EXIST_SESSION,
1193 pkts_exist_session);
1194 vlib_node_increment_counter (vm, acl_fa_node->index,
1195 ACL_FA_ERROR_ACL_RESTART_SESSION_TIMER,
1196 pkts_restart_session_timer);
1197 return frame->n_vectors;
1201 vlib_node_registration_t acl_in_l2_ip6_node;
1203 acl_in_ip6_l2_node_fn (vlib_main_t * vm,
1204 vlib_node_runtime_t * node, vlib_frame_t * frame)
1206 acl_main_t *am = &acl_main;
1207 return acl_fa_node_fn (vm, node, frame, 1, 1, 1,
1208 am->fa_acl_in_ip6_l2_node_feat_next_node_index,
1209 &acl_in_l2_ip6_node);
1212 vlib_node_registration_t acl_in_l2_ip4_node;
1214 acl_in_ip4_l2_node_fn (vlib_main_t * vm,
1215 vlib_node_runtime_t * node, vlib_frame_t * frame)
1217 acl_main_t *am = &acl_main;
1218 return acl_fa_node_fn (vm, node, frame, 0, 1, 1,
1219 am->fa_acl_in_ip4_l2_node_feat_next_node_index,
1220 &acl_in_l2_ip4_node);
1223 vlib_node_registration_t acl_out_l2_ip6_node;
1225 acl_out_ip6_l2_node_fn (vlib_main_t * vm,
1226 vlib_node_runtime_t * node, vlib_frame_t * frame)
1228 acl_main_t *am = &acl_main;
1229 return acl_fa_node_fn (vm, node, frame, 1, 0, 1,
1230 am->fa_acl_out_ip6_l2_node_feat_next_node_index,
1231 &acl_out_l2_ip6_node);
1234 vlib_node_registration_t acl_out_l2_ip4_node;
1236 acl_out_ip4_l2_node_fn (vlib_main_t * vm,
1237 vlib_node_runtime_t * node, vlib_frame_t * frame)
1239 acl_main_t *am = &acl_main;
1240 return acl_fa_node_fn (vm, node, frame, 0, 0, 1,
1241 am->fa_acl_out_ip4_l2_node_feat_next_node_index,
1242 &acl_out_l2_ip4_node);
1246 /**** L3 processing path nodes ****/
1249 vlib_node_registration_t acl_in_fa_ip6_node;
1251 acl_in_ip6_fa_node_fn (vlib_main_t * vm,
1252 vlib_node_runtime_t * node, vlib_frame_t * frame)
1254 return acl_fa_node_fn (vm, node, frame, 1, 1, 0, 0, &acl_in_fa_ip6_node);
1257 vlib_node_registration_t acl_in_fa_ip4_node;
1259 acl_in_ip4_fa_node_fn (vlib_main_t * vm,
1260 vlib_node_runtime_t * node, vlib_frame_t * frame)
1262 return acl_fa_node_fn (vm, node, frame, 0, 1, 0, 0, &acl_in_fa_ip4_node);
1265 vlib_node_registration_t acl_out_fa_ip6_node;
1267 acl_out_ip6_fa_node_fn (vlib_main_t * vm,
1268 vlib_node_runtime_t * node, vlib_frame_t * frame)
1270 return acl_fa_node_fn (vm, node, frame, 1, 0, 0, 0, &acl_out_fa_ip6_node);
1273 vlib_node_registration_t acl_out_fa_ip4_node;
1275 acl_out_ip4_fa_node_fn (vlib_main_t * vm,
1276 vlib_node_runtime_t * node, vlib_frame_t * frame)
1278 return acl_fa_node_fn (vm, node, frame, 0, 0, 0, 0, &acl_out_fa_ip4_node);
1282 * This process ensures the connection cleanup happens every so often
1283 * even in absence of traffic, as well as provides general orchestration
1284 * for requests like connection deletion on a given sw_if_index.
1289 #define foreach_acl_fa_cleaner_error \
1290 _(UNKNOWN_EVENT, "unknown event received") \
1295 #define _(sym,str) ACL_FA_CLEANER_ERROR_##sym,
1296 foreach_acl_fa_cleaner_error
1298 ACL_FA_CLEANER_N_ERROR,
1299 } acl_fa_cleaner_error_t;
1301 static char *acl_fa_cleaner_error_strings[] = {
1302 #define _(sym,string) string,
1303 foreach_acl_fa_cleaner_error
1309 static vlib_node_registration_t acl_fa_session_cleaner_process_node;
1310 static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node;
1313 * Per-worker thread interrupt-driven cleaner thread
1314 * to clean idle connections if there are no packets
1317 acl_fa_worker_conn_cleaner_process(vlib_main_t * vm,
1318 vlib_node_runtime_t * rt, vlib_frame_t * f)
1320 acl_main_t *am = &acl_main;
1321 u64 now = clib_cpu_time_now ();
1322 u16 thread_index = os_get_thread_index ();
1323 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
1325 #ifdef FA_NODE_VERBOSE_DEBUG
1326 clib_warning("\nacl_fa_worker_conn_cleaner: thread index %d now %lu\n\n", thread_index, now);
1328 /* allow another interrupt to be queued */
1329 pw->interrupt_is_pending = 0;
1330 if (pw->clear_in_process) {
1331 if (0 == pw->swipe_end_time) {
1333 * Someone has just set the flag to start clearing.
1334 * we do this by combing through the connections up to a "time T"
1335 * which is now, and requeueing everything except the expired
1336 * connections and those matching the interface(s) being cleared.
1340 * first filter the sw_if_index bitmap that they want from us, by
1341 * a bitmap of sw_if_index for which we actually have connections.
1343 if ((pw->pending_clear_sw_if_index_bitmap == 0)
1344 || (pw->serviced_sw_if_index_bitmap == 0)) {
1345 #ifdef FA_NODE_VERBOSE_DEBUG
1346 clib_warning("WORKER-CLEAR: someone tried to call clear, but one of the bitmaps are empty");
1348 clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap);
1350 #ifdef FA_NODE_VERBOSE_DEBUG
1351 clib_warning("WORKER-CLEAR: (before and) swiping sw-if-index bitmap: %U, my serviced bitmap %U",
1352 format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap,
1353 format_bitmap_hex, pw->serviced_sw_if_index_bitmap);
1355 pw->pending_clear_sw_if_index_bitmap = clib_bitmap_and(pw->pending_clear_sw_if_index_bitmap,
1356 pw->serviced_sw_if_index_bitmap);
1359 if (clib_bitmap_is_zero(pw->pending_clear_sw_if_index_bitmap)) {
1360 /* if the cross-section is a zero vector, no need to do anything. */
1361 #ifdef FA_NODE_VERBOSE_DEBUG
1362 clib_warning("WORKER: clearing done - nothing to do");
1364 pw->clear_in_process = 0;
1366 #ifdef FA_NODE_VERBOSE_DEBUG
1367 clib_warning("WORKER-CLEAR: swiping sw-if-index bitmap: %U, my serviced bitmap %U",
1368 format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap,
1369 format_bitmap_hex, pw->serviced_sw_if_index_bitmap);
1371 /* swipe through the connection lists until enqueue timestamps become above "now" */
1372 pw->swipe_end_time = now;
1376 num_expired = acl_fa_check_idle_sessions(am, thread_index, now);
1377 // clib_warning("WORKER-CLEAR: checked %d sessions (clear_in_progress: %d)", num_expired, pw->clear_in_process);
1378 if (pw->clear_in_process) {
1379 if (0 == num_expired) {
1380 /* we were clearing but we could not process any more connections. time to stop. */
1381 clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap);
1382 pw->clear_in_process = 0;
1383 #ifdef FA_NODE_VERBOSE_DEBUG
1384 clib_warning("WORKER: clearing done, all done");
1387 #ifdef FA_NODE_VERBOSE_DEBUG
1388 clib_warning("WORKER-CLEAR: more work to do, raising interrupt");
1390 /* should continue clearing.. So could they please sent an interrupt again? */
1391 pw->interrupt_is_needed = 1;
1394 if (num_expired >= am->fa_max_deleted_sessions_per_interval) {
1395 /* there was too much work, we should get an interrupt ASAP */
1396 pw->interrupt_is_needed = 1;
1397 pw->interrupt_is_unwanted = 0;
1398 } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) {
1399 /* signal that they should trigger us less */
1400 pw->interrupt_is_needed = 0;
1401 pw->interrupt_is_unwanted = 1;
1403 /* the current rate of interrupts is ok */
1404 pw->interrupt_is_needed = 0;
1405 pw->interrupt_is_unwanted = 0;
1408 pw->interrupt_generation = am->fa_interrupt_generation;
1413 send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index)
1415 acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
1416 if (!pw->interrupt_is_pending) {
1417 pw->interrupt_is_pending = 1;
1418 vlib_node_set_interrupt_pending (vlib_mains[thread_index],
1419 acl_fa_worker_session_cleaner_process_node.index);
1420 /* if the interrupt was requested, mark that done. */
1421 /* pw->interrupt_is_needed = 0; */
1426 send_interrupts_to_workers (vlib_main_t * vm, acl_main_t *am)
1429 /* Can't use vec_len(am->per_worker_data) since the threads might not have come up yet; */
1430 int n_threads = vec_len(vlib_mains);
1431 for (i = n_threads > 1 ? 1 : 0; i < n_threads; i++) {
1432 send_one_worker_interrupt(vm, am, i);
1436 /* centralized process to drive per-worker cleaners */
1438 acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
1441 acl_main_t *am = &acl_main;
1443 f64 cpu_cps = vm->clib_time.clocks_per_second;
1445 /* We should check if there are connections to clean up - at least twice a second */
1446 u64 max_timer_wait_interval = cpu_cps / 2;
1447 uword event_type, *event_data = 0;
1448 acl_fa_per_worker_data_t *pw0;
1450 am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval;
1451 am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index;
1452 am->fa_interrupt_generation = 1;
1455 now = clib_cpu_time_now ();
1456 next_expire = now + am->fa_current_cleaner_timer_wait_interval;
1457 int has_pending_conns = 0;
1462 * walk over all per-thread list heads of different timeouts,
1463 * and see if there are any connections pending.
1464 * If there aren't - we do not need to wake up until the
1465 * worker code signals that it has added a connection.
1467 * Also, while we are at it, calculate the earliest we need to wake up.
1469 for(ti = 0; ti < vec_len(vlib_mains); ti++) {
1470 if (ti >= vec_len(am->per_worker_data)) {
1473 acl_fa_per_worker_data_t *pw = &am->per_worker_data[ti];
1474 for(tt = 0; tt < vec_len(pw->fa_conn_list_head); tt++) {
1475 u64 head_expiry = acl_fa_get_list_head_expiry_time(am, pw, now, ti, tt);
1476 if ((head_expiry < next_expire) && !pw->interrupt_is_pending) {
1477 #ifdef FA_NODE_VERBOSE_DEBUG
1478 clib_warning("Head expiry: %lu, now: %lu, next_expire: %lu (worker: %d, tt: %d)", head_expiry, now, next_expire, ti, tt);
1480 next_expire = head_expiry;
1482 if (~0 != pw->fa_conn_list_head[tt]) {
1483 has_pending_conns = 1;
1488 /* If no pending connections and no ACL applied then no point in timing out */
1489 if (!has_pending_conns && (0 == am->fa_total_enabled_count))
1491 am->fa_cleaner_cnt_wait_without_timeout++;
1492 (void) vlib_process_wait_for_event (vm);
1493 event_type = vlib_process_get_events (vm, &event_data);
1497 f64 timeout = ((i64) next_expire - (i64) now) / cpu_cps;
1500 /* skip waiting altogether */
1505 am->fa_cleaner_cnt_wait_with_timeout++;
1506 (void) vlib_process_wait_for_event_or_clock (vm, timeout);
1507 event_type = vlib_process_get_events (vm, &event_data);
1516 case ACL_FA_CLEANER_RESCHEDULE:
1517 /* Nothing to do. */
1519 case ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX:
1521 uword *clear_sw_if_index_bitmap = 0;
1522 uword *sw_if_index0;
1524 #ifdef FA_NODE_VERBOSE_DEBUG
1525 clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX received");
1527 vec_foreach (sw_if_index0, event_data)
1529 am->fa_cleaner_cnt_delete_by_sw_index++;
1530 #ifdef FA_NODE_VERBOSE_DEBUG
1532 ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d",
1535 if (*sw_if_index0 == ~0)
1541 if (!pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, *sw_if_index0))
1543 clear_sw_if_index_bitmap = clib_bitmap_set(clear_sw_if_index_bitmap, *sw_if_index0, 1);
1547 #ifdef FA_NODE_VERBOSE_DEBUG
1548 clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap);
1550 vec_foreach(pw0, am->per_worker_data) {
1551 CLIB_MEMORY_BARRIER ();
1552 while (pw0->clear_in_process) {
1553 CLIB_MEMORY_BARRIER ();
1554 #ifdef FA_NODE_VERBOSE_DEBUG
1555 clib_warning("ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d...", pw0 - am->per_worker_data);
1557 vlib_process_suspend(vm, 0.0001);
1558 if (pw0->interrupt_is_needed) {
1559 send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data));
1562 if (pw0->clear_in_process) {
1563 clib_warning("ERROR-BUG! Could not initiate cleaning on worker because another cleanup in progress");
1567 /* if we need to clear all, then just clear the interfaces that we are servicing */
1568 pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(pw0->serviced_sw_if_index_bitmap);
1572 pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(clear_sw_if_index_bitmap);
1574 pw0->clear_in_process = 1;
1577 /* send some interrupts so they can start working */
1578 send_interrupts_to_workers(vm, am);
1580 /* now wait till they all complete */
1581 #ifdef FA_NODE_VERBOSE_DEBUG
1582 clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data));
1584 vec_foreach(pw0, am->per_worker_data) {
1585 CLIB_MEMORY_BARRIER ();
1586 while (pw0->clear_in_process) {
1587 CLIB_MEMORY_BARRIER ();
1588 #ifdef FA_NODE_VERBOSE_DEBUG
1589 clib_warning("ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d...", pw0 - am->per_worker_data);
1591 vlib_process_suspend(vm, 0.0001);
1592 if (pw0->interrupt_is_needed) {
1593 send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data));
1597 #ifdef FA_NODE_VERBOSE_DEBUG
1598 clib_warning("ACL_FA_NODE_CLEAN: cleaning done");
1600 clib_bitmap_free(clear_sw_if_index_bitmap);
1604 #ifdef FA_NODE_VERBOSE_DEBUG
1605 clib_warning ("ACL plugin connection cleaner: unknown event %u",
1608 vlib_node_increment_counter (vm,
1609 acl_fa_session_cleaner_process_node.
1611 ACL_FA_CLEANER_ERROR_UNKNOWN_EVENT, 1);
1612 am->fa_cleaner_cnt_unknown_event++;
1616 send_interrupts_to_workers(vm, am);
1619 _vec_len (event_data) = 0;
1622 * If the interrupts were not processed yet, ensure we wait a bit,
1623 * but up to a point.
1625 int need_more_wait = 0;
1626 int max_wait_cycles = 100;
1629 vec_foreach(pw0, am->per_worker_data) {
1630 if (pw0->interrupt_generation != am->fa_interrupt_generation) {
1634 if (need_more_wait) {
1635 vlib_process_suspend(vm, 0.0001);
1637 } while (need_more_wait && (--max_wait_cycles > 0));
1639 int interrupts_needed = 0;
1640 int interrupts_unwanted = 0;
1642 vec_foreach(pw0, am->per_worker_data) {
1643 if (pw0->interrupt_is_needed) {
1644 interrupts_needed++;
1645 /* the per-worker value is reset when sending the interrupt */
1647 if (pw0->interrupt_is_unwanted) {
1648 interrupts_unwanted++;
1649 pw0->interrupt_is_unwanted = 0;
1652 if (interrupts_needed) {
1653 /* they need more interrupts, do less waiting around next time */
1654 am->fa_current_cleaner_timer_wait_interval /= 2;
1655 /* never go into zero-wait either though - we need to give the space to others */
1656 am->fa_current_cleaner_timer_wait_interval += 1;
1657 } else if (interrupts_unwanted) {
1658 /* slowly increase the amount of sleep up to a limit */
1659 if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval)
1660 am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment;
1662 am->fa_cleaner_cnt_event_cycles++;
1663 am->fa_interrupt_generation++;
1671 acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
1673 acl_main_t *am = &acl_main;
1674 if (enable_disable) {
1675 acl_fa_verify_init_sessions(am);
1676 am->fa_total_enabled_count++;
1677 void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
1678 vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
1679 ACL_FA_CLEANER_RESCHEDULE, 0);
1680 clib_mem_set_heap (oldheap);
1682 am->fa_total_enabled_count--;
1687 ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable);
1688 void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
1689 vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
1690 sw_if_index, enable_disable, 0, 0);
1691 vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
1692 sw_if_index, enable_disable, 0, 0);
1693 clib_mem_set_heap (oldheap);
1694 am->fa_in_acl_on_sw_if_index =
1695 clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
1700 ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable);
1701 void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
1702 vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
1703 sw_if_index, enable_disable, 0, 0);
1704 vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
1705 sw_if_index, enable_disable, 0, 0);
1706 clib_mem_set_heap (oldheap);
1707 am->fa_out_acl_on_sw_if_index =
1708 clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
1711 if ((!enable_disable) && (!acl_fa_ifc_has_in_acl (am, sw_if_index))
1712 && (!acl_fa_ifc_has_out_acl (am, sw_if_index)))
1714 #ifdef FA_NODE_VERBOSE_DEBUG
1715 clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index);
1717 void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
1718 vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
1719 ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
1721 clib_mem_set_heap (oldheap);
1726 show_fa_sessions_hash(vlib_main_t * vm, u32 verbose)
1728 acl_main_t *am = &acl_main;
1729 if (am->fa_sessions_hash_is_initialized) {
1730 vlib_cli_output(vm, "\nSession lookup hash table:\n%U\n\n",
1731 BV (format_bihash), &am->fa_sessions_hash, verbose);
1733 vlib_cli_output(vm, "\nSession lookup hash table is not allocated.\n\n");
1740 VLIB_REGISTER_NODE (acl_fa_worker_session_cleaner_process_node, static) = {
1741 .function = acl_fa_worker_conn_cleaner_process,
1742 .name = "acl-plugin-fa-worker-cleaner-process",
1743 .type = VLIB_NODE_TYPE_INPUT,
1744 .state = VLIB_NODE_STATE_INTERRUPT,
1747 VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = {
1748 .function = acl_fa_session_cleaner_process,
1749 .type = VLIB_NODE_TYPE_PROCESS,
1750 .name = "acl-plugin-fa-cleaner-process",
1751 .n_errors = ARRAY_LEN (acl_fa_cleaner_error_strings),
1752 .error_strings = acl_fa_cleaner_error_strings,
1758 VLIB_REGISTER_NODE (acl_in_l2_ip6_node) =
1760 .function = acl_in_ip6_l2_node_fn,
1761 .name = "acl-plugin-in-ip6-l2",
1762 .vector_size = sizeof (u32),
1763 .format_trace = format_acl_fa_trace,
1764 .type = VLIB_NODE_TYPE_INTERNAL,
1765 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1766 .error_strings = acl_fa_error_strings,
1767 .n_next_nodes = ACL_FA_N_NEXT,
1770 [ACL_FA_ERROR_DROP] = "error-drop",
1774 VLIB_REGISTER_NODE (acl_in_l2_ip4_node) =
1776 .function = acl_in_ip4_l2_node_fn,
1777 .name = "acl-plugin-in-ip4-l2",
1778 .vector_size = sizeof (u32),
1779 .format_trace = format_acl_fa_trace,
1780 .type = VLIB_NODE_TYPE_INTERNAL,
1781 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1782 .error_strings = acl_fa_error_strings,
1783 .n_next_nodes = ACL_FA_N_NEXT,
1786 [ACL_FA_ERROR_DROP] = "error-drop",
1790 VLIB_REGISTER_NODE (acl_out_l2_ip6_node) =
1792 .function = acl_out_ip6_l2_node_fn,
1793 .name = "acl-plugin-out-ip6-l2",
1794 .vector_size = sizeof (u32),
1795 .format_trace = format_acl_fa_trace,
1796 .type = VLIB_NODE_TYPE_INTERNAL,
1797 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1798 .error_strings = acl_fa_error_strings,
1799 .n_next_nodes = ACL_FA_N_NEXT,
1802 [ACL_FA_ERROR_DROP] = "error-drop",
1806 VLIB_REGISTER_NODE (acl_out_l2_ip4_node) =
1808 .function = acl_out_ip4_l2_node_fn,
1809 .name = "acl-plugin-out-ip4-l2",
1810 .vector_size = sizeof (u32),
1811 .format_trace = format_acl_fa_trace,
1812 .type = VLIB_NODE_TYPE_INTERNAL,
1813 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1814 .error_strings = acl_fa_error_strings,
1815 .n_next_nodes = ACL_FA_N_NEXT,
1818 [ACL_FA_ERROR_DROP] = "error-drop",
1823 VLIB_REGISTER_NODE (acl_in_fa_ip6_node) =
1825 .function = acl_in_ip6_fa_node_fn,
1826 .name = "acl-plugin-in-ip6-fa",
1827 .vector_size = sizeof (u32),
1828 .format_trace = format_acl_fa_trace,
1829 .type = VLIB_NODE_TYPE_INTERNAL,
1830 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1831 .error_strings = acl_fa_error_strings,
1832 .n_next_nodes = ACL_FA_N_NEXT,
1835 [ACL_FA_ERROR_DROP] = "error-drop",
1839 VNET_FEATURE_INIT (acl_in_ip6_fa_feature, static) =
1841 .arc_name = "ip6-unicast",
1842 .node_name = "acl-plugin-in-ip6-fa",
1843 .runs_before = VNET_FEATURES ("ip6-flow-classify"),
1846 VLIB_REGISTER_NODE (acl_in_fa_ip4_node) =
1848 .function = acl_in_ip4_fa_node_fn,
1849 .name = "acl-plugin-in-ip4-fa",
1850 .vector_size = sizeof (u32),
1851 .format_trace = format_acl_fa_trace,
1852 .type = VLIB_NODE_TYPE_INTERNAL,
1853 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1854 .error_strings = acl_fa_error_strings,
1855 .n_next_nodes = ACL_FA_N_NEXT,
1858 [ACL_FA_ERROR_DROP] = "error-drop",
1862 VNET_FEATURE_INIT (acl_in_ip4_fa_feature, static) =
1864 .arc_name = "ip4-unicast",
1865 .node_name = "acl-plugin-in-ip4-fa",
1866 .runs_before = VNET_FEATURES ("ip4-flow-classify"),
1870 VLIB_REGISTER_NODE (acl_out_fa_ip6_node) =
1872 .function = acl_out_ip6_fa_node_fn,
1873 .name = "acl-plugin-out-ip6-fa",
1874 .vector_size = sizeof (u32),
1875 .format_trace = format_acl_fa_trace,
1876 .type = VLIB_NODE_TYPE_INTERNAL,
1877 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1878 .error_strings = acl_fa_error_strings,
1879 .n_next_nodes = ACL_FA_N_NEXT,
1882 [ACL_FA_ERROR_DROP] = "error-drop",
1886 VNET_FEATURE_INIT (acl_out_ip6_fa_feature, static) =
1888 .arc_name = "ip6-output",
1889 .node_name = "acl-plugin-out-ip6-fa",
1890 .runs_before = VNET_FEATURES ("interface-output"),
1893 VLIB_REGISTER_NODE (acl_out_fa_ip4_node) =
1895 .function = acl_out_ip4_fa_node_fn,
1896 .name = "acl-plugin-out-ip4-fa",
1897 .vector_size = sizeof (u32),
1898 .format_trace = format_acl_fa_trace,
1899 .type = VLIB_NODE_TYPE_INTERNAL,
1900 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1901 .error_strings = acl_fa_error_strings,
1902 .n_next_nodes = ACL_FA_N_NEXT,
1903 /* edit / add dispositions here */
1906 [ACL_FA_ERROR_DROP] = "error-drop",
1910 VNET_FEATURE_INIT (acl_out_ip4_fa_feature, static) =
1912 .arc_name = "ip4-output",
1913 .node_name = "acl-plugin-out-ip4-fa",
1914 .runs_before = VNET_FEATURES ("interface-output"),