X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Facl%2Ffa_node.c;h=d29576a4bce5753360ed311a31d208212b1e8e9c;hb=94f9a6de3f706243d138e05b63fef1d5c8174f6c;hp=c0ff1a5e01a9f79303257543bf94e918d97c2e28;hpb=eb46754ef6b3efd958a11ea2f0b110eb99ce3b27;p=vpp.git diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index c0ff1a5e01a..d29576a4bce 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -19,19 +19,25 @@ #include #include #include + + #include -#include "bihash_40_8.h" +#include +#include +#include +#include +#include + +#include #include #include -#include "fa_node.h" -#include "hash_lookup.h" - typedef struct { u32 next_index; u32 sw_if_index; + u32 lc_index; u32 match_acl_in_index; u32 match_rule_index; u64 packet_info[6]; @@ -39,6 +45,61 @@ typedef struct u8 action; } acl_fa_trace_t; +/* ICMPv4 invert type for stateful ACL */ +static const u8 icmp4_invmap[] = { + [ICMP4_echo_reply] = ICMP4_echo_request + 1, + [ICMP4_timestamp_reply] = ICMP4_timestamp_request + 1, + [ICMP4_information_reply] = ICMP4_information_request + 1, + [ICMP4_address_mask_reply] = ICMP4_address_mask_request + 1 +}; + +/* Supported ICMPv4 messages for session creation */ +static const u8 icmp4_valid_new[] = { + [ICMP4_echo_request] = 1, + [ICMP4_timestamp_request] = 1, + [ICMP4_information_request] = 1, + [ICMP4_address_mask_request] = 1 +}; + +/* ICMPv6 invert type for stateful ACL */ +static const u8 icmp6_invmap[] = { + [ICMP6_echo_reply - 128] = ICMP6_echo_request + 1, + [ICMP6_node_information_response - 128] = ICMP6_node_information_request + 1 +}; + +/* Supported ICMPv6 messages for session creation */ +static const u8 icmp6_valid_new[] = { + [ICMP6_echo_request - 128] = 1, + [ICMP6_node_information_request - 128] = 1 +}; + +/* IP4 and IP6 protocol numbers of ICMP */ +static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 }; + +static u8 * +format_fa_5tuple (u8 * s, va_list * args) +{ + fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *); + + return format(s, "lc_index %d (lsb16 of sw_if_index %d) l3 %s%s %U -> %U" + " l4 proto %d l4_valid %d port %d -> %d tcp flags (%s) %02x rsvd %x", + p5t->pkt.lc_index, p5t->l4.lsb_of_sw_if_index, p5t->pkt.is_ip6 ? "ip6" : "ip4", + p5t->pkt.is_nonfirst_fragment ? " non-initial fragment" : "", + format_ip46_address, &p5t->addr[0], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4, + format_ip46_address, &p5t->addr[1], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4, + p5t->l4.proto, p5t->pkt.l4_valid, + p5t->l4.port[0], p5t->l4.port[1], + p5t->pkt.tcp_flags_valid ? "valid": "invalid", + p5t->pkt.tcp_flags, + p5t->pkt.flags_reserved); +} + +u8 * +format_acl_plugin_5tuple (u8 * s, va_list * args) +{ + return format_fa_5tuple(s, args); +} + /* packet trace format function */ static u8 * format_acl_fa_trace (u8 * s, va_list * args) @@ -49,12 +110,15 @@ format_acl_fa_trace (u8 * s, va_list * args) s = format (s, - "acl-plugin: sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n" + "acl-plugin: lc_index: %d, sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n" " pkt info %016llx %016llx %016llx %016llx %016llx %016llx", - t->sw_if_index, t->next_index, t->action, t->match_acl_in_index, + t->lc_index, t->sw_if_index, t->next_index, t->action, t->match_acl_in_index, t->match_rule_index, t->trace_bitmap, t->packet_info[0], t->packet_info[1], t->packet_info[2], t->packet_info[3], t->packet_info[4], t->packet_info[5]); + + /* Now also print out the packet_info in a form usable by humans */ + s = format (s, "\n %U", format_fa_5tuple, t->packet_info); return s; } @@ -84,447 +148,88 @@ static char *acl_fa_error_strings[] = { }; /* *INDENT-ON* */ -static void * -get_ptr_to_offset (vlib_buffer_t * b0, int offset) -{ - u8 *p = vlib_buffer_get_current (b0) + offset; - return p; -} - - static int -fa_acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2, - int prefixlen, int is_ip6) +acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0) { - if (prefixlen == 0) - { - /* match any always succeeds */ - return 1; - } - if (is_ip6) - { - if (memcmp (addr1, addr2, prefixlen / 8)) - { - /* If the starting full bytes do not match, no point in bittwidling the thumbs further */ - return 0; - } - if (prefixlen % 8) - { - u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8); - u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8); - u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1)); - return (b1 & mask0) == b2; - } - else - { - /* The prefix fits into integer number of bytes, so nothing left to do */ - return 1; - } - } - else - { - uint32_t a1 = ntohl (addr1->ip4.as_u32); - uint32_t a2 = ntohl (addr2->ip4.as_u32); - uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1); - return (a1 & mask0) == a2; - } + return am->fa_sessions_hash_is_initialized; } static int -fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6) +acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0) { - return ((port >= port_first) && (port <= port_last)); + int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0); + return it_has; } -int -single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple, - int is_ip6, u8 * r_action, u32 * r_acl_match_p, - u32 * r_rule_match_p, u32 * trace_bitmap) +static int +acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0) { - int i; - acl_list_t *a; - acl_rule_t *r; - - if (pool_is_free_index (am->acls, acl_index)) - { - if (r_acl_match_p) - *r_acl_match_p = acl_index; - if (r_rule_match_p) - *r_rule_match_p = -1; - /* the ACL does not exist but is used for policy. Block traffic. */ - return 0; - } - a = am->acls + acl_index; - for (i = 0; i < a->count; i++) - { - r = a->rules + i; - if (is_ip6 != r->is_ipv6) - { - continue; - } - if (!fa_acl_match_addr - (&pkt_5tuple->addr[1], &r->dst, r->dst_prefixlen, is_ip6)) - continue; - -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d", - acl_index, i, format_ip46_address, &pkt_5tuple->addr[1], - IP46_TYPE_ANY, format_ip46_address, &r->dst, IP46_TYPE_ANY, - r->dst_prefixlen); -#endif - - if (!fa_acl_match_addr - (&pkt_5tuple->addr[0], &r->src, r->src_prefixlen, is_ip6)) - continue; - -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d", - acl_index, i, format_ip46_address, &pkt_5tuple->addr[0], - IP46_TYPE_ANY, format_ip46_address, &r->src, IP46_TYPE_ANY, - r->src_prefixlen); - clib_warning - ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d", - acl_index, i, pkt_5tuple->l4.proto, r->proto); -#endif - if (r->proto) - { - if (pkt_5tuple->l4.proto != r->proto) - continue; - - if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment && - am->l4_match_nonfirst_fragment)) - { - /* non-initial fragment with frag match configured - match this rule */ - *trace_bitmap |= 0x80000000; - *r_action = r->is_permit; - if (r_acl_match_p) - *r_acl_match_p = acl_index; - if (r_rule_match_p) - *r_rule_match_p = i; - return 1; - } - - /* A sanity check just to ensure we are about to match the ports extracted from the packet */ - if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid)) - continue; - -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d", - acl_index, i, pkt_5tuple->l4.proto, r->proto); -#endif - - if (!fa_acl_match_port - (pkt_5tuple->l4.port[0], r->src_port_or_type_first, - r->src_port_or_type_last, is_ip6)) - continue; - -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]", - acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first, - r->src_port_or_type_last); -#endif - - if (!fa_acl_match_port - (pkt_5tuple->l4.port[1], r->dst_port_or_code_first, - r->dst_port_or_code_last, is_ip6)) - continue; - -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]", - acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first, - r->dst_port_or_code_last); -#endif - if (pkt_5tuple->pkt.tcp_flags_valid - && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) != - r->tcp_flags_value)) - continue; - } - /* everything matches! */ -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d", - acl_index, i, r->is_permit); -#endif - *r_action = r->is_permit; - if (r_acl_match_p) - *r_acl_match_p = acl_index; - if (r_rule_match_p) - *r_rule_match_p = i; - return 1; - } - return 0; + int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0); + return it_has; } -static u8 -linear_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, - int is_ip6, int is_input, u32 * acl_match_p, - u32 * rule_match_p, u32 * trace_bitmap) +/* Session keys match the packets received, and mirror the packets sent */ +static u32 +acl_make_5tuple_session_key (acl_main_t * am, int is_input, int is_ip6, + u32 sw_if_index, fa_5tuple_t * p5tuple_pkt, + fa_5tuple_t * p5tuple_sess) { - acl_main_t *am = &acl_main; - int i; - u32 *acl_vector; - u8 action = 0; + int src_index = is_input ? 0 : 1; + int dst_index = is_input ? 1 : 0; + u32 valid_new_sess = 1; + p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0]; + p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1]; + p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64; - if (is_input) + if (PREDICT_TRUE(p5tuple_pkt->l4.proto != icmp_protos[is_ip6])) { - vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); - acl_vector = am->input_acl_vec_by_sw_if_index[sw_if_index]; + p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0]; + p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1]; } else { - vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); - acl_vector = am->output_acl_vec_by_sw_if_index[sw_if_index]; - } - for (i = 0; i < vec_len (acl_vector); i++) - { -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d", - acl_vector[i]); -#endif - if (single_acl_match_5tuple - (am, acl_vector[i], pkt_5tuple, is_ip6, &action, - acl_match_p, rule_match_p, trace_bitmap)) - { - return action; - } - } - if (vec_len (acl_vector) > 0) - { - /* If there are ACLs and none matched, deny by default */ - return 0; - } -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning ("ACL_FA_NODE_DBG: No ACL on sw_if_index %d", sw_if_index); -#endif - /* Deny by default. If there are no ACLs defined we should not be here. */ - return 0; -} + static const u8 * icmp_invmap[] = { icmp4_invmap, icmp6_invmap }; + static const u8 * icmp_valid_new[] = { icmp4_valid_new, icmp6_valid_new }; + static const u8 icmp_invmap_size[] = { sizeof(icmp4_invmap), + sizeof(icmp6_invmap) }; + static const u8 icmp_valid_new_size[] = { sizeof(icmp4_valid_new), + sizeof(icmp6_valid_new) }; + int type = is_ip6 ? p5tuple_pkt->l4.port[0]-128: p5tuple_pkt->l4.port[0]; -static u8 -multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, - int is_ip6, int is_input, u32 * acl_match_p, - u32 * rule_match_p, u32 * trace_bitmap) -{ - acl_main_t *am = &acl_main; - if (am->use_hash_acl_matching) { - return hash_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6, - is_input, acl_match_p, rule_match_p, trace_bitmap); - } else { - return linear_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6, - is_input, acl_match_p, rule_match_p, trace_bitmap); - } -} - -static int -offset_within_packet (vlib_buffer_t * b0, int offset) -{ - /* For the purposes of this code, "within" means we have at least 8 bytes after it */ - return (offset <= (b0->current_length - 8)); -} + p5tuple_sess->l4.port[0] = p5tuple_pkt->l4.port[0]; + p5tuple_sess->l4.port[1] = p5tuple_pkt->l4.port[1]; -static void -acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, - int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt) -{ - int l3_offset = ethernet_buffer_header_size(b0); - int l4_offset; - u16 ports[2]; - u16 proto; - /* IP4 and IP6 protocol numbers of ICMP */ - static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 }; - - if (is_input && !(is_l2_path)) - { - l3_offset = 0; - } - - /* key[0..3] contains src/dst address and is cleared/set below */ - /* Remainder of the key and per-packet non-key data */ - p5tuple_pkt->kv.key[4] = 0; - p5tuple_pkt->kv.value = 0; - - if (is_ip6) - { - clib_memcpy (&p5tuple_pkt->addr, - get_ptr_to_offset (b0, - offsetof (ip6_header_t, - src_address) + l3_offset), - sizeof (p5tuple_pkt->addr)); - proto = - *(u8 *) get_ptr_to_offset (b0, - offsetof (ip6_header_t, - protocol) + l3_offset); - l4_offset = l3_offset + sizeof (ip6_header_t); -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto, - l4_offset); -#endif - /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */ - int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto); - if (PREDICT_FALSE (need_skip_eh)) - { - while (need_skip_eh && offset_within_packet (b0, l4_offset)) - { - /* Fragment header needs special handling */ - if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto)) - { - proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); - u16 frag_offset; - clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset)); - frag_offset = ntohs(frag_offset) >> 3; - if (frag_offset) - { - p5tuple_pkt->pkt.is_nonfirst_fragment = 1; - /* invalidate L4 offset so we don't try to find L4 info */ - l4_offset += b0->current_length; - } - else - { - /* First fragment: skip the frag header and move on. */ - l4_offset += 8; - } - } - else - { - u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset); - proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); - l4_offset += 8 * (1 + (u16) nwords); - } -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d", - proto, l4_offset); -#endif - need_skip_eh = - clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto); - } - } - } - else - { - p5tuple_pkt->kv.key[0] = 0; - p5tuple_pkt->kv.key[1] = 0; - p5tuple_pkt->kv.key[2] = 0; - p5tuple_pkt->kv.key[3] = 0; - clib_memcpy (&p5tuple_pkt->addr[0].ip4, - get_ptr_to_offset (b0, - offsetof (ip4_header_t, - src_address) + l3_offset), - sizeof (p5tuple_pkt->addr[0].ip4)); - clib_memcpy (&p5tuple_pkt->addr[1].ip4, - get_ptr_to_offset (b0, - offsetof (ip4_header_t, - dst_address) + l3_offset), - sizeof (p5tuple_pkt->addr[1].ip4)); - proto = - *(u8 *) get_ptr_to_offset (b0, - offsetof (ip4_header_t, - protocol) + l3_offset); - l4_offset = l3_offset + sizeof (ip4_header_t); - u16 flags_and_fragment_offset; - clib_memcpy (&flags_and_fragment_offset, - get_ptr_to_offset (b0, - offsetof (ip4_header_t, - flags_and_fragment_offset)) + l3_offset, - sizeof(flags_and_fragment_offset)); - flags_and_fragment_offset = ntohs (flags_and_fragment_offset); - - /* non-initial fragments have non-zero offset */ - if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset))) + /* + * Invert ICMP type for valid icmp_invmap messages: + * 1) input node with outbound ACL interface + * 2) output node with inbound ACL interface + * + */ + if ((is_input && acl_fa_ifc_has_out_acl(am, sw_if_index)) || + (!is_input && acl_fa_ifc_has_in_acl(am, sw_if_index))) { - p5tuple_pkt->pkt.is_nonfirst_fragment = 1; - /* invalidate L4 offset so we don't try to find L4 info */ - l4_offset += b0->current_length; + if (type >= 0 && + type <= icmp_invmap_size[is_ip6] && + icmp_invmap[is_ip6][type]) + { + p5tuple_sess->l4.port[0] = icmp_invmap[is_ip6][type] - 1; + } } - } - p5tuple_pkt->l4.proto = proto; - if (PREDICT_TRUE (offset_within_packet (b0, l4_offset))) - { - p5tuple_pkt->pkt.l4_valid = 1; - if (icmp_protos[is_ip6] == proto) - { - /* type */ - p5tuple_pkt->l4.port[0] = - *(u8 *) get_ptr_to_offset (b0, - l4_offset + offsetof (icmp46_header_t, - type)); - /* code */ - p5tuple_pkt->l4.port[1] = - *(u8 *) get_ptr_to_offset (b0, - l4_offset + offsetof (icmp46_header_t, - code)); - } - else if ((IPPROTO_TCP == proto) || (IPPROTO_UDP == proto)) - { - clib_memcpy (&ports, - get_ptr_to_offset (b0, - l4_offset + offsetof (tcp_header_t, - src_port)), - sizeof (ports)); - p5tuple_pkt->l4.port[0] = ntohs (ports[0]); - p5tuple_pkt->l4.port[1] = ntohs (ports[1]); - - p5tuple_pkt->pkt.tcp_flags = - *(u8 *) get_ptr_to_offset (b0, - l4_offset + offsetof (tcp_header_t, - flags)); - p5tuple_pkt->pkt.tcp_flags_valid = (proto == IPPROTO_TCP); - } /* - * FIXME: rather than the above conditional, here could - * be a nice generic mechanism to extract two L4 values: - * - * have a per-protocol array of 4 elements like this: - * u8 offset; to take the byte from, off L4 header - * u8 mask; to mask it with, before storing - * - * this way we can describe UDP, TCP and ICMP[46] semantics, - * and add a sort of FPM-type behavior for other protocols. - * - * Of course, is it faster ? and is it needed ? - * + * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table + * are allowed to create stateful ACL. + * The other messages will be forwarded without creating a reflexive ACL. */ + if (type < 0 || + type > icmp_valid_new_size[is_ip6] || + !icmp_valid_new[is_ip6][type]) + { + valid_new_sess = 0; + } } -} - - -/* Session keys match the packets received, and mirror the packets sent */ -static void -acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt, - fa_5tuple_t * p5tuple_sess) -{ - int src_index = is_input ? 0 : 1; - int dst_index = is_input ? 1 : 0; - p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0]; - p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1]; - p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64; - p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0]; - p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1]; -} - -static int -acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0) -{ - return am->fa_sessions_hash_is_initialized; -} - -static int -acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0) -{ - int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0); - return it_has; -} - -static int -acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0) -{ - int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0); - return it_has; + return valid_new_sess; } @@ -599,29 +304,44 @@ fa_session_get_timeout (acl_main_t * am, fa_session_t * sess) } static void -acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0) +acl_fa_verify_init_sessions (acl_main_t * am) { - /// FIXME-MULTICORE: lock around this function -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("Initializing bihash for sw_if_index %d num buckets %lu memory size %llu", - sw_if_index0, am->fa_conn_table_hash_num_buckets, - am->fa_conn_table_hash_memory_size); -#endif - BV (clib_bihash_init) (&am->fa_sessions_hash, + if (!am->fa_sessions_hash_is_initialized) { + u16 wk; + /* Allocate the per-worker sessions pools */ + for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { + acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; + + /* + * // In lieu of trying to preallocate the pool and its free bitmap, rather use pool_init_fixed + * pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES); + * clib_bitmap_validate(pool_header(pw->fa_sessions_pool)->free_bitmap, am->fa_conn_table_max_entries); + */ + pool_init_fixed(pw->fa_sessions_pool, am->fa_conn_table_max_entries); + } + + /* ... and the interface session hash table */ + BV (clib_bihash_init) (&am->fa_sessions_hash, "ACL plugin FA session bihash", am->fa_conn_table_hash_num_buckets, am->fa_conn_table_hash_memory_size); - am->fa_sessions_hash_is_initialized = 1; + am->fa_sessions_hash_is_initialized = 1; + } } static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; - fa_session_t *sess = pw->fa_sessions_pool + session_index; + fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index); return sess; } +static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool))); +} + static void acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now) { @@ -644,13 +364,14 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 ASSERT(prev_sess->thread_index == sess->thread_index); } pw->fa_conn_list_tail[list_id] = sess_id.session_index; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("FA-SESSION-DEBUG: add session id %d on thread %d sw_if_index %d", sess_id.session_index, thread_index, sess->sw_if_index); +#endif pw->serviced_sw_if_index_bitmap = clib_bitmap_set(pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1); if (~0 == pw->fa_conn_list_head[list_id]) { pw->fa_conn_list_head[list_id] = sess_id.session_index; - /* If it is a first conn in any list, kick the cleaner */ - vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, - ACL_FA_CLEANER_RESCHEDULE, 0); } } @@ -725,6 +446,7 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, static void acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id) { + void *oldheap = clib_mem_set_heap(am->acl_mheap); fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); ASSERT(sess->thread_index == os_get_thread_index ()); BV (clib_bihash_add_del) (&am->fa_sessions_hash, @@ -733,8 +455,9 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se pool_put_index (pw->fa_sessions_pool, sess_id.session_index); /* Deleting from timer structures not needed, as the caller must have dealt with the timers. */ - vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index); - am->fa_session_dels_by_sw_if_index[sw_if_index]++; + vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index); + clib_mem_set_heap (oldheap); + pw->fa_session_dels_by_sw_if_index[sw_if_index]++; clib_smp_atomic_add(&am->fa_session_total_dels, 1); } @@ -749,10 +472,14 @@ acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index) static u64 acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type) { - if (~0 == pw->fa_conn_list_head[timeout_type]) { + fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]); + /* + * We can not check just the index here because inbetween the worker thread might + * dequeue the connection from the head just as we are about to check it. + */ + if (!is_valid_session_ptr(am, thread_index, sess)) { return ~0LL; // infinity. } else { - fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]); u64 timeout_time = sess->link_enqueue_time + fa_session_get_list_timeout (am, sess); return timeout_time; @@ -789,6 +516,7 @@ acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now) && (acl_fa_conn_time_to_check(am, pw, now, thread_index, pw->fa_conn_list_head[tt]))) { fsid.session_index = pw->fa_conn_list_head[tt]; + elog_acl_maybe_trace_X2(am, "acl_fa_check_idle_sessions: expire session %d on thread %d", "i4i4", (u32)fsid.session_index, (u32)thread_index); vec_add1(pw->expired, fsid.session_index); acl_fa_conn_list_delete_session(am, fsid); } @@ -808,8 +536,8 @@ acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now) if ((now < sess_timeout_time) && (0 == clib_bitmap_get(pw->pending_clear_sw_if_index_bitmap, sw_if_index))) { #ifdef FA_NODE_VERBOSE_DEBUG - clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d", - (int) session_index); + clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d, sw_if_index %d", + (int) fsid.session_index, sess->sw_if_index); #endif /* There was activity on the session, so the idle timeout has not passed. Enqueue for another time period. */ @@ -820,8 +548,8 @@ acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now) else { #ifdef FA_NODE_VERBOSE_DEBUG - clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d", - (int) session_index); + clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d, sw_if_index %d", + (int) fsid.session_index, sess->sw_if_index); #endif acl_fa_delete_session (am, sw_if_index, fsid); pw->cnt_deleted_sessions++; @@ -859,7 +587,7 @@ acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, u32 } } -static void +static fa_session_t * acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, fa_5tuple_t * p5tuple) { @@ -867,6 +595,7 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, clib_bihash_kv_40_8_t kv; fa_full_session_id_t f_sess_id; uword thread_index = os_get_thread_index(); + void *oldheap = clib_mem_set_heap(am->acl_mheap); acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; f_sess_id.thread_index = thread_index; @@ -893,27 +622,23 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, - if (!acl_fa_ifc_has_sessions (am, sw_if_index)) - { - acl_fa_ifc_init_sessions (am, sw_if_index); - } - + ASSERT(am->fa_sessions_hash_is_initialized == 1); BV (clib_bihash_add_del) (&am->fa_sessions_hash, &kv, 1); acl_fa_conn_list_add_session(am, f_sess_id, now); - vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index); - am->fa_session_adds_by_sw_if_index[sw_if_index]++; + vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index); + clib_mem_set_heap (oldheap); + pw->fa_session_adds_by_sw_if_index[sw_if_index]++; clib_smp_atomic_add(&am->fa_session_total_adds, 1); + return sess; } static int acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple, clib_bihash_kv_40_8_t * pvalue_sess) { - return (BV (clib_bihash_search) - (&am->fa_sessions_hash, &p5tuple->kv, - pvalue_sess) == 0); + return (clib_bihash_search_40_8 (&am->fa_sessions_hash, &p5tuple->kv, pvalue_sess) == 0); } @@ -931,7 +656,6 @@ acl_fa_node_fn (vlib_main_t * vm, u32 pkts_acl_permit = 0; u32 pkts_restart_session_timer = 0; u32 trace_bitmap = 0; - u32 feature_bitmap0; acl_main_t *am = &acl_main; fa_5tuple_t fa_5tuple, kv_sess; clib_bihash_kv_40_8_t value_sess; @@ -958,10 +682,13 @@ acl_fa_node_fn (vlib_main_t * vm, u32 next0 = 0; u8 action = 0; u32 sw_if_index0; + u32 lc_index0; int acl_check_needed = 1; u32 match_acl_in_index = ~0; + u32 match_acl_pos = ~0; u32 match_rule_index = ~0; u8 error0 = 0; + u32 valid_new_sess; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -977,29 +704,31 @@ acl_fa_node_fn (vlib_main_t * vm, sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; else sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - if (is_l2_path) - feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; + if (is_input) + lc_index0 = am->input_lc_index_by_sw_if_index[sw_if_index0]; + else + lc_index0 = am->output_lc_index_by_sw_if_index[sw_if_index0]; /* * Extract the L3/L4 matching info into a 5-tuple structure, * then create a session key whose layout is independent on forward or reverse * direction of the packet. */ - acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple); + acl_plugin_fill_5tuple_inline (lc_index0, b0, is_ip6, is_input, is_l2_path, (fa_5tuple_opaque_t *)&fa_5tuple); fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff; - acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess); - fa_5tuple.pkt.sw_if_index = sw_if_index0; + fa_5tuple.pkt.lc_index = lc_index0; + valid_new_sess = acl_make_5tuple_session_key (am, is_input, is_ip6, sw_if_index0, &fa_5tuple, &kv_sess); fa_5tuple.pkt.is_ip6 = is_ip6; - fa_5tuple.pkt.is_input = is_input; + // XXDEL fa_5tuple.pkt.is_input = is_input; fa_5tuple.pkt.mask_type_index_lsb = ~0; #ifdef FA_NODE_VERBOSE_DEBUG clib_warning - ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx", + ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx", kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2], kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value); clib_warning - ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx", + ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx", fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2], fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value); #endif @@ -1058,9 +787,9 @@ acl_fa_node_fn (vlib_main_t * vm, if (acl_check_needed) { - action = - multi_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path, - is_ip6, is_input, &match_acl_in_index, + action = 0; /* deny by default */ + acl_plugin_match_5tuple_inline (lc_index0, (fa_5tuple_opaque_t *)&fa_5tuple, + is_ip6, &action, &match_acl_pos, &match_acl_in_index, &match_rule_index, &trace_bitmap); error0 = action; if (1 == action) @@ -1072,9 +801,21 @@ acl_fa_node_fn (vlib_main_t * vm, if (acl_fa_can_add_session (am, is_input, sw_if_index0)) { - acl_fa_add_session (am, is_input, sw_if_index0, now, - &kv_sess); - pkts_new_session += 1; + if (PREDICT_TRUE (valid_new_sess)) { + fa_session_t *sess = acl_fa_add_session (am, is_input, + sw_if_index0, + now, &kv_sess); + acl_fa_track_session (am, is_input, sw_if_index0, now, + sess, &fa_5tuple); + pkts_new_session += 1; + } else { + /* + * ICMP packets with non-icmp_valid_new type will be + * forwared without being dropped. + */ + action = 1; + pkts_acl_permit += 1; + } } else { @@ -1089,18 +830,20 @@ acl_fa_node_fn (vlib_main_t * vm, if (action > 0) { if (is_l2_path) - next0 = - feat_bitmap_get_next_node_index (l2_feat_next_node_index, - feature_bitmap0); + next0 = vnet_l2_feature_next (b0, l2_feat_next_node_index, 0); else vnet_feature_next (sw_if_index0, &next0, b0); } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_NODE_DBG: sw_if_index %d lc_index %d action %d acl_index %d rule_index %d", sw_if_index0, lc_index0, action, match_acl_in_index, match_rule_index); +#endif if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { acl_fa_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->sw_if_index = sw_if_index0; + t->lc_index = lc_index0; t->next_index = next0; t->match_acl_in_index = match_acl_in_index; t->match_rule_index = match_rule_index; @@ -1270,9 +1013,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, u16 thread_index = os_get_thread_index (); acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; int num_expired; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("\nacl_fa_worker_conn_cleaner: thread index %d now %lu\n\n", thread_index, now); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner interrupt: now %lu", "i8", now); /* allow another interrupt to be queued */ pw->interrupt_is_pending = 0; if (pw->clear_in_process) { @@ -1290,9 +1031,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, */ if ((pw->pending_clear_sw_if_index_bitmap == 0) || (pw->serviced_sw_if_index_bitmap == 0)) { -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER-CLEAR: someone tried to call clear, but one of the bitmaps are empty"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, someone tried to call clear but one of the bitmaps are empty", "i8", now); clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); } else { #ifdef FA_NODE_VERBOSE_DEBUG @@ -1306,9 +1045,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, if (clib_bitmap_is_zero(pw->pending_clear_sw_if_index_bitmap)) { /* if the cross-section is a zero vector, no need to do anything. */ -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER: clearing done - nothing to do"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, clearing done, nothing to do", "i8", now); pw->clear_in_process = 0; } else { #ifdef FA_NODE_VERBOSE_DEBUG @@ -1316,6 +1053,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap, format_bitmap_hex, pw->serviced_sw_if_index_bitmap); #endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: swiping until %lu", "i8", now); /* swipe through the connection lists until enqueue timestamps become above "now" */ pw->swipe_end_time = now; } @@ -1323,18 +1061,15 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, } num_expired = acl_fa_check_idle_sessions(am, thread_index, now); // clib_warning("WORKER-CLEAR: checked %d sessions (clear_in_progress: %d)", num_expired, pw->clear_in_process); + elog_acl_maybe_trace_X2(am, "acl_fa_worker_conn_cleaner: checked %d sessions (clear_in_process: %d)", "i4i4", (u32)num_expired, (u32)pw->clear_in_process); if (pw->clear_in_process) { if (0 == num_expired) { /* we were clearing but we could not process any more connections. time to stop. */ clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); pw->clear_in_process = 0; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER: clearing done, all done"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, clearing done - all done", "i8", now); } else { -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER-CLEAR: more work to do, raising interrupt"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, more work to do - requesting interrupt", "i8", now); /* should continue clearing.. So could they please sent an interrupt again? */ pw->interrupt_is_needed = 1; } @@ -1342,15 +1077,19 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, if (num_expired >= am->fa_max_deleted_sessions_per_interval) { /* there was too much work, we should get an interrupt ASAP */ pw->interrupt_is_needed = 1; + pw->interrupt_is_unwanted = 0; } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) { /* signal that they should trigger us less */ + pw->interrupt_is_needed = 0; pw->interrupt_is_unwanted = 1; } else { /* the current rate of interrupts is ok */ pw->interrupt_is_needed = 0; pw->interrupt_is_unwanted = 0; } + elog_acl_maybe_trace_X3(am, "acl_fa_worker_conn_cleaner: now %lu, interrupt needed: %u, interrupt unwanted: %u", "i8i4i4", now, ((u32)pw->interrupt_is_needed), ((u32)pw->interrupt_is_unwanted)); } + pw->interrupt_generation = am->fa_interrupt_generation; return 0; } @@ -1359,11 +1098,12 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; if (!pw->interrupt_is_pending) { + pw->interrupt_is_pending = 1; vlib_node_set_interrupt_pending (vlib_mains[thread_index], acl_fa_worker_session_cleaner_process_node.index); - pw->interrupt_is_pending = 1; + elog_acl_maybe_trace_X1(am, "send_one_worker_interrupt: send interrupt to worker %d", "i4", ((u32)thread_index)); /* if the interrupt was requested, mark that done. */ - pw->interrupt_is_needed = 0; + /* pw->interrupt_is_needed = 0; */ } } @@ -1373,7 +1113,7 @@ send_interrupts_to_workers (vlib_main_t * vm, acl_main_t *am) int i; /* Can't use vec_len(am->per_worker_data) since the threads might not have come up yet; */ int n_threads = vec_len(vlib_mains); - for (i = n_threads > 1 ? 1 : 0; i < n_threads; i++) { + for (i = 0; i < n_threads; i++) { send_one_worker_interrupt(vm, am, i); } } @@ -1394,7 +1134,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval; am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index; - + am->fa_interrupt_generation = 1; while (1) { now = clib_cpu_time_now (); @@ -1419,9 +1159,8 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, for(tt = 0; tt < vec_len(pw->fa_conn_list_head); tt++) { u64 head_expiry = acl_fa_get_list_head_expiry_time(am, pw, now, ti, tt); if ((head_expiry < next_expire) && !pw->interrupt_is_pending) { -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("Head expiry: %lu, now: %lu, next_expire: %lu (worker: %d, tt: %d)", head_expiry, now, next_expire, ti, tt); -#endif + elog_acl_maybe_trace_X3(am, "acl_fa_session_cleaner_process: now %lu, worker: %d tt: %d", "i8i2i2", now, ti, tt); + elog_acl_maybe_trace_X2(am, "acl_fa_session_cleaner_process: head expiry: %lu, is earlier than curr next expire: %lu", "i8i8", head_expiry, next_expire); next_expire = head_expiry; } if (~0 != pw->fa_conn_list_head[tt]) { @@ -1430,10 +1169,11 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } } - /* If no pending connections then no point in timing out */ - if (!has_pending_conns) + /* If no pending connections and no ACL applied then no point in timing out */ + if (!has_pending_conns && (0 == am->fa_total_enabled_count)) { am->fa_cleaner_cnt_wait_without_timeout++; + elog_acl_maybe_trace_X1(am, "acl_conn_cleaner: now %lu entering wait without timeout", "i8", now); (void) vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); } @@ -1448,6 +1188,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, else { am->fa_cleaner_cnt_wait_with_timeout++; + elog_acl_maybe_trace_X2(am, "acl_conn_cleaner: now %lu entering wait with timeout %.6f sec", "i8f8", now, timeout); (void) vlib_process_wait_for_event_or_clock (vm, timeout); event_type = vlib_process_get_events (vm, &event_data); } @@ -1466,17 +1207,12 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, uword *clear_sw_if_index_bitmap = 0; uword *sw_if_index0; int clear_all = 0; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX received"); -#endif + now = clib_cpu_time_now (); + elog_acl_maybe_trace_X1(am, "acl_fa_session_cleaner_process: now %lu, received ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX", "i8", now); vec_foreach (sw_if_index0, event_data) { am->fa_cleaner_cnt_delete_by_sw_index++; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d", - *sw_if_index0); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_session_cleaner_process: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX %d", "i4", *sw_if_index0); if (*sw_if_index0 == ~0) { clear_all = 1; @@ -1496,9 +1232,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d...", pw0 - am->per_worker_data); -#endif + elog_acl_maybe_trace_X1(am, "ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d", "i4", (u32)(pw0 - am->per_worker_data)); vlib_process_suspend(vm, 0.0001); if (pw0->interrupt_is_needed) { send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); @@ -1530,9 +1264,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d...", pw0 - am->per_worker_data); -#endif + elog_acl_maybe_trace_X1(am, "ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d", "i4", (u32)(pw0 - am->per_worker_data)); vlib_process_suspend(vm, 0.0001); if (pw0->interrupt_is_needed) { send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); @@ -1563,6 +1295,23 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (event_data) _vec_len (event_data) = 0; + /* + * If the interrupts were not processed yet, ensure we wait a bit, + * but up to a point. + */ + int need_more_wait = 0; + int max_wait_cycles = 100; + do { + need_more_wait = 0; + vec_foreach(pw0, am->per_worker_data) { + if (pw0->interrupt_generation != am->fa_interrupt_generation) { + need_more_wait = 1; + } + } + if (need_more_wait) { + vlib_process_suspend(vm, 0.0001); + } + } while (need_more_wait && (--max_wait_cycles > 0)); int interrupts_needed = 0; int interrupts_unwanted = 0; @@ -1580,12 +1329,15 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (interrupts_needed) { /* they need more interrupts, do less waiting around next time */ am->fa_current_cleaner_timer_wait_interval /= 2; + /* never go into zero-wait either though - we need to give the space to others */ + am->fa_current_cleaner_timer_wait_interval += 1; } else if (interrupts_unwanted) { /* slowly increase the amount of sleep up to a limit */ if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval) am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment; } am->fa_cleaner_cnt_event_cycles++; + am->fa_interrupt_generation++; } /* NOT REACHED */ return 0; @@ -1596,22 +1348,39 @@ void acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) { acl_main_t *am = &acl_main; + if (enable_disable) { + acl_fa_verify_init_sessions(am); + am->fa_total_enabled_count++; + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_RESCHEDULE, 0); + clib_mem_set_heap (oldheap); + } else { + am->fa_total_enabled_count--; + } + if (is_input) { + ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable); + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa", sw_if_index, enable_disable, 0, 0); vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa", sw_if_index, enable_disable, 0, 0); + clib_mem_set_heap (oldheap); am->fa_in_acl_on_sw_if_index = clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index, enable_disable); } else { + ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable); + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa", sw_if_index, enable_disable, 0, 0); vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa", sw_if_index, enable_disable, 0, 0); + clib_mem_set_heap (oldheap); am->fa_out_acl_on_sw_if_index = clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index, enable_disable); @@ -1622,9 +1391,11 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) #ifdef FA_NODE_VERBOSE_DEBUG clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index); #endif + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, sw_if_index); + clib_mem_set_heap (oldheap); } }