2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <netinet/in.h>
18 #include <vlib/vlib.h>
19 #include <vnet/vnet.h>
20 #include <vnet/pg/pg.h>
21 #include <vppinfra/error.h>
23 #include "bihash_40_8.h"
25 #include <vppinfra/bihash_template.h>
26 #include <vppinfra/bihash_template.c>
34 u32 match_acl_in_index;
41 /* packet trace format function */
43 format_acl_fa_trace (u8 * s, va_list * args)
45 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
46 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
47 acl_fa_trace_t *t = va_arg (*args, acl_fa_trace_t *);
51 "acl-plugin: sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n"
52 " pkt info %016llx %016llx %016llx %016llx %016llx %016llx",
53 t->sw_if_index, t->next_index, t->action, t->match_acl_in_index,
54 t->match_rule_index, t->trace_bitmap,
55 t->packet_info[0], t->packet_info[1], t->packet_info[2],
56 t->packet_info[3], t->packet_info[4], t->packet_info[5]);
61 #define foreach_acl_fa_error \
62 _(ACL_DROP, "ACL deny packets") \
63 _(ACL_PERMIT, "ACL permit packets") \
64 _(ACL_NEW_SESSION, "new sessions added") \
65 _(ACL_EXIST_SESSION, "existing session packets") \
66 _(ACL_CHECK, "checked packets") \
67 _(ACL_RESTART_SESSION_TIMER, "restart session timer") \
68 _(ACL_TOO_MANY_SESSIONS, "too many sessions to add new") \
73 #define _(sym,str) ACL_FA_ERROR_##sym,
79 static char *acl_fa_error_strings[] = {
80 #define _(sym,string) string,
87 get_ptr_to_offset (vlib_buffer_t * b0, int offset)
89 u8 *p = vlib_buffer_get_current (b0) + offset;
95 fa_acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2,
96 int prefixlen, int is_ip6)
100 /* match any always succeeds */
105 if (memcmp (addr1, addr2, prefixlen / 8))
107 /* If the starting full bytes do not match, no point in bittwidling the thumbs further */
112 u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
113 u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
114 u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
115 return (b1 & mask0) == b2;
119 /* The prefix fits into integer number of bytes, so nothing left to do */
125 uint32_t a1 = ntohl (addr1->ip4.as_u32);
126 uint32_t a2 = ntohl (addr2->ip4.as_u32);
127 uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1);
128 return (a1 & mask0) == a2;
133 fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6)
135 return ((port >= port_first) && (port <= port_last));
139 acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple,
140 int is_ip6, u8 * r_action, u32 * r_acl_match_p,
141 u32 * r_rule_match_p, u32 * trace_bitmap)
147 if (pool_is_free_index (am->acls, acl_index))
150 *r_acl_match_p = acl_index;
152 *r_rule_match_p = -1;
153 /* the ACL does not exist but is used for policy. Block traffic. */
156 a = am->acls + acl_index;
157 for (i = 0; i < a->count; i++)
160 if (is_ip6 != r->is_ipv6)
164 if (!fa_acl_match_addr
165 (&pkt_5tuple->addr[1], &r->dst, r->dst_prefixlen, is_ip6))
168 #ifdef FA_NODE_VERBOSE_DEBUG
170 ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d",
171 acl_index, i, format_ip46_address, &pkt_5tuple->addr[1],
172 IP46_TYPE_ANY, format_ip46_address, &r->dst, IP46_TYPE_ANY,
176 if (!fa_acl_match_addr
177 (&pkt_5tuple->addr[0], &r->src, r->src_prefixlen, is_ip6))
180 #ifdef FA_NODE_VERBOSE_DEBUG
182 ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d",
183 acl_index, i, format_ip46_address, &pkt_5tuple->addr[0],
184 IP46_TYPE_ANY, format_ip46_address, &r->src, IP46_TYPE_ANY,
187 ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d",
188 acl_index, i, pkt_5tuple->l4.proto, r->proto);
192 if (pkt_5tuple->l4.proto != r->proto)
194 /* A sanity check just to ensure what we jave just matched was a valid L4 extracted from the packet */
195 if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
198 #ifdef FA_NODE_VERBOSE_DEBUG
200 ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d",
201 acl_index, i, pkt_5tuple->l4.proto, r->proto);
204 if (!fa_acl_match_port
205 (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
206 r->src_port_or_type_last, is_ip6))
209 #ifdef FA_NODE_VERBOSE_DEBUG
211 ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]",
212 acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first,
213 r->src_port_or_type_last);
216 if (!fa_acl_match_port
217 (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
218 r->dst_port_or_code_last, is_ip6))
221 #ifdef FA_NODE_VERBOSE_DEBUG
223 ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]",
224 acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
225 r->dst_port_or_code_last);
227 if (pkt_5tuple->pkt.tcp_flags_valid
228 && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
232 /* everything matches! */
233 #ifdef FA_NODE_VERBOSE_DEBUG
234 clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d",
235 acl_index, i, r->is_permit);
237 *r_action = r->is_permit;
239 *r_acl_match_p = acl_index;
248 full_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
249 int is_ip6, int is_input, u32 * acl_match_p,
250 u32 * rule_match_p, u32 * trace_bitmap)
252 acl_main_t *am = &acl_main;
259 vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
260 acl_vector = am->input_acl_vec_by_sw_if_index[sw_if_index];
264 vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
265 acl_vector = am->output_acl_vec_by_sw_if_index[sw_if_index];
267 for (i = 0; i < vec_len (acl_vector); i++)
269 #ifdef FA_NODE_VERBOSE_DEBUG
270 clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d",
274 (am, acl_vector[i], pkt_5tuple, is_ip6, &action,
275 acl_match_p, rule_match_p, trace_bitmap))
280 if (vec_len (acl_vector) > 0)
282 /* If there are ACLs and none matched, deny by default */
285 #ifdef FA_NODE_VERBOSE_DEBUG
286 clib_warning ("ACL_FA_NODE_DBG: No ACL on sw_if_index %d", sw_if_index);
288 /* Deny by default. If there are no ACLs defined we should not be here. */
293 offset_within_packet (vlib_buffer_t * b0, int offset)
295 /* For the purposes of this code, "within" means we have at least 8 bytes after it */
296 return (offset < (b0->current_length - 8));
300 acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
301 int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
307 /* IP4 and IP6 protocol numbers of ICMP */
308 static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
310 if (is_input && !(is_l2_path))
318 clib_memcpy (&p5tuple_pkt->addr,
319 get_ptr_to_offset (b0,
320 offsetof (ip6_header_t,
321 src_address) + l3_offset),
322 sizeof (p5tuple_pkt->addr));
324 *(u8 *) get_ptr_to_offset (b0,
325 offsetof (ip6_header_t,
326 protocol) + l3_offset);
327 l4_offset = l3_offset + sizeof (ip6_header_t);
328 #ifdef FA_NODE_VERBOSE_DEBUG
329 clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto,
332 /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */
333 int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
334 if (PREDICT_FALSE (need_skip_eh))
336 /* FIXME: add fragment header special handling. Currently causes treated as unknown header. */
337 while (need_skip_eh && offset_within_packet (b0, l4_offset))
339 u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset);
340 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
341 l4_offset += 8 * (1 + (u16) nwords);
342 #ifdef FA_NODE_VERBOSE_DEBUG
343 clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d",
347 clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
353 p5tuple_pkt->kv.key[0] = 0;
354 p5tuple_pkt->kv.key[1] = 0;
355 p5tuple_pkt->kv.key[2] = 0;
356 p5tuple_pkt->kv.key[3] = 0;
357 clib_memcpy (&p5tuple_pkt->addr[0].ip4,
358 get_ptr_to_offset (b0,
359 offsetof (ip4_header_t,
360 src_address) + l3_offset),
361 sizeof (p5tuple_pkt->addr[0].ip4));
362 clib_memcpy (&p5tuple_pkt->addr[1].ip4,
363 get_ptr_to_offset (b0,
364 offsetof (ip4_header_t,
365 dst_address) + l3_offset),
366 sizeof (p5tuple_pkt->addr[1].ip4));
368 *(u8 *) get_ptr_to_offset (b0,
369 offsetof (ip4_header_t,
370 protocol) + l3_offset);
371 l4_offset = l3_offset + sizeof (ip4_header_t);
373 /* Remainder of the key and per-packet non-key data */
374 p5tuple_pkt->kv.key[4] = 0;
375 p5tuple_pkt->kv.value = 0;
376 if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
378 p5tuple_pkt->l4.proto = proto;
379 p5tuple_pkt->pkt.l4_valid = 1;
380 if (icmp_protos[is_ip6] == proto)
383 p5tuple_pkt->l4.port[0] =
384 *(u8 *) get_ptr_to_offset (b0,
385 l4_offset + offsetof (icmp46_header_t,
388 p5tuple_pkt->l4.port[1] =
389 *(u8 *) get_ptr_to_offset (b0,
390 l4_offset + offsetof (icmp46_header_t,
393 else if ((IPPROTO_TCP == proto) || (IPPROTO_UDP == proto))
396 get_ptr_to_offset (b0,
397 l4_offset + offsetof (tcp_header_t,
400 p5tuple_pkt->l4.port[0] = ntohs (ports[0]);
401 p5tuple_pkt->l4.port[1] = ntohs (ports[1]);
403 p5tuple_pkt->pkt.tcp_flags =
404 *(u8 *) get_ptr_to_offset (b0,
405 l4_offset + offsetof (tcp_header_t,
407 p5tuple_pkt->pkt.tcp_flags_valid = (proto == IPPROTO_TCP);
410 * FIXME: rather than the above conditional, here could
411 * be a nice generic mechanism to extract two L4 values:
413 * have a per-protocol array of 4 elements like this:
414 * u8 offset; to take the byte from, off L4 header
415 * u8 mask; to mask it with, before storing
417 * this way we can describe UDP, TCP and ICMP[46] semantics,
418 * and add a sort of FPM-type behavior for other protocols.
420 * Of course, is it faster ? and is it needed ?
427 /* Session keys match the packets received, and mirror the packets sent */
429 acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt,
430 fa_5tuple_t * p5tuple_sess)
432 int src_index = is_input ? 0 : 1;
433 int dst_index = is_input ? 1 : 0;
434 p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
435 p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
436 p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
437 p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
438 p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
443 acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0)
446 clib_bitmap_get (am->fa_sessions_on_sw_if_index, sw_if_index0);
451 acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0)
453 int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0);
458 acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
460 int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0);
466 fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
468 /* seen both SYNs and ACKs but not FINs means we are in establshed state */
470 sess->tcp_flags_seen.as_u16 & ((TCP_FLAGS_RSTFINACKSYN << 8) +
471 TCP_FLAGS_RSTFINACKSYN);
472 switch (sess->info.l4.proto)
475 if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags)
477 return ACL_TIMEOUT_TCP_IDLE;
481 return ACL_TIMEOUT_TCP_TRANSIENT;
485 return ACL_TIMEOUT_UDP_IDLE;
488 return ACL_TIMEOUT_UDP_IDLE;
494 fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
496 u64 timeout = am->vlib_main->clib_time.clocks_per_second;
497 int timeout_type = fa_session_get_timeout_type (am, sess);
498 timeout *= am->session_timeout_sec[timeout_type];
503 acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0)
505 #ifdef FA_NODE_VERBOSE_DEBUG
507 ("Initializing bihash for sw_if_index %d num buckets %lu memory size %llu",
508 sw_if_index0, am->fa_conn_table_hash_num_buckets,
509 am->fa_conn_table_hash_memory_size);
511 vec_validate (am->fa_sessions_by_sw_if_index, sw_if_index0);
512 BV (clib_bihash_init) (&am->fa_sessions_by_sw_if_index
513 [sw_if_index0], "ACL plugin FA session bihash",
514 am->fa_conn_table_hash_num_buckets,
515 am->fa_conn_table_hash_memory_size);
516 am->fa_sessions_on_sw_if_index =
517 clib_bitmap_set (am->fa_sessions_on_sw_if_index, sw_if_index0, 1);
521 acl_fa_conn_list_add_session (acl_main_t * am, u32 sess_id)
523 fa_session_t *sess = am->fa_sessions_pool + sess_id;
524 u8 list_id = fa_session_get_timeout_type(am, sess);
525 sess->link_list_id = list_id;
526 sess->link_next_idx = ~0;
527 sess->link_prev_idx = am->fa_conn_list_tail[list_id];
528 if (~0 != am->fa_conn_list_tail[list_id]) {
529 fa_session_t *prev_sess = am->fa_sessions_pool + am->fa_conn_list_tail[list_id];
530 prev_sess->link_next_idx = sess_id;
532 am->fa_conn_list_tail[list_id] = sess_id;
534 if (~0 == am->fa_conn_list_head[list_id]) {
535 am->fa_conn_list_head[list_id] = sess_id;
536 /* If it is a first conn in any list, kick off the cleaner */
537 vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
538 ACL_FA_CLEANER_RESCHEDULE, 0);
544 acl_fa_conn_list_delete_session (acl_main_t *am, u32 sess_id)
546 fa_session_t *sess = am->fa_sessions_pool + sess_id;
547 if (~0 != sess->link_prev_idx) {
548 fa_session_t *prev_sess = am->fa_sessions_pool + sess->link_prev_idx;
549 prev_sess->link_next_idx = sess->link_next_idx;
550 if (prev_sess->link_list_id != sess->link_list_id)
551 clib_warning("(prev_sess->link_list_id != sess->link_list_id)");
553 if (~0 != sess->link_next_idx) {
554 fa_session_t *next_sess = am->fa_sessions_pool + sess->link_next_idx;
555 next_sess->link_prev_idx = sess->link_prev_idx;
556 if (next_sess->link_list_id != sess->link_list_id)
557 clib_warning("(next_sess->link_list_id != sess->link_list_id)");
559 if (am->fa_conn_list_head[sess->link_list_id] == sess_id) {
560 am->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx;
562 if (am->fa_conn_list_tail[sess->link_list_id] == sess_id) {
563 am->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx;
569 acl_fa_session_is_dead (acl_main_t * am, u32 sw_if_index, u64 now,
576 acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, u32 sess_id)
578 // fa_session_t *sess = am->fa_sessions_pool + sess_id;
579 acl_fa_conn_list_delete_session(am, sess_id);
580 acl_fa_conn_list_add_session(am, sess_id);
585 acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
586 fa_session_t * sess, fa_5tuple_t * pkt_5tuple)
588 sess->last_active_time = now;
589 if (pkt_5tuple->pkt.tcp_flags_valid)
591 sess->tcp_flags_seen.as_u8[is_input] |= pkt_5tuple->pkt.tcp_flags;
598 acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, u32 sess_id)
600 fa_session_t *sess = (fa_session_t *) am->fa_sessions_pool + sess_id;
601 BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index],
603 pool_put_index (am->fa_sessions_pool, sess_id);
604 /* Deleting from timer wheel not needed, as the cleaner deals with the timers. */
605 vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index);
606 am->fa_session_dels_by_sw_if_index[sw_if_index]++;
610 acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
613 vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index);
614 vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index);
616 am->fa_session_adds_by_sw_if_index[sw_if_index] -
617 am->fa_session_dels_by_sw_if_index[sw_if_index];
618 return (curr_sess < am->fa_conn_table_max_entries);
622 acl_fa_try_recycle_session (acl_main_t * am, int is_input, u32 sw_if_index)
624 /* try to recycle a TCP transient session */
625 u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT;
626 u32 sess_id = am->fa_conn_list_head[timeout_type];
628 acl_fa_conn_list_delete_session(am, sess_id);
629 acl_fa_delete_session(am, sw_if_index, sess_id);
634 acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
635 fa_5tuple_t * p5tuple)
637 clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
638 clib_bihash_kv_40_8_t kv;
642 pool_get (am->fa_sessions_pool, sess);
643 sess_id = sess - am->fa_sessions_pool;
646 kv.key[0] = pkv->key[0];
647 kv.key[1] = pkv->key[1];
648 kv.key[2] = pkv->key[2];
649 kv.key[3] = pkv->key[3];
650 kv.key[4] = pkv->key[4];
653 memcpy (sess, pkv, sizeof (pkv->key));
654 sess->last_active_time = now;
655 sess->sw_if_index = sw_if_index;
656 sess->tcp_flags_seen.as_u16 = 0;
658 sess->link_list_id = ~0;
659 sess->link_prev_idx = ~0;
660 sess->link_next_idx = ~0;
664 if (!acl_fa_ifc_has_sessions (am, sw_if_index))
666 acl_fa_ifc_init_sessions (am, sw_if_index);
669 BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index],
671 acl_fa_conn_list_add_session(am, sess_id);
673 vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index);
674 am->fa_session_adds_by_sw_if_index[sw_if_index]++;
678 acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple,
679 clib_bihash_kv_40_8_t * pvalue_sess)
681 return (BV (clib_bihash_search)
682 (&am->fa_sessions_by_sw_if_index[sw_if_index0], &p5tuple->kv,
688 acl_fa_node_fn (vlib_main_t * vm,
689 vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip6,
690 int is_input, int is_l2_path, u32 * l2_feat_next_node_index,
691 vlib_node_registration_t * acl_fa_node)
693 u32 n_left_from, *from, *to_next;
694 acl_fa_next_t next_index;
695 u32 pkts_acl_checked = 0;
696 u32 pkts_new_session = 0;
697 u32 pkts_exist_session = 0;
698 u32 pkts_acl_permit = 0;
699 u32 pkts_restart_session_timer = 0;
700 u32 trace_bitmap = 0;
702 acl_main_t *am = &acl_main;
703 fa_5tuple_t fa_5tuple, kv_sess;
704 clib_bihash_kv_40_8_t value_sess;
705 vlib_node_runtime_t *error_node;
706 u64 now = clib_cpu_time_now ();
708 from = vlib_frame_vector_args (frame);
709 n_left_from = frame->n_vectors;
710 next_index = node->cached_next_index;
712 error_node = vlib_node_get_runtime (vm, acl_fa_node->index);
714 while (n_left_from > 0)
718 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
720 while (n_left_from > 0 && n_left_to_next > 0)
727 int acl_check_needed = 1;
728 u32 match_acl_in_index = ~0;
729 u32 match_rule_index = ~0;
732 /* speculatively enqueue b0 to the current next frame */
740 b0 = vlib_get_buffer (vm, bi0);
743 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
745 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
747 feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap;
750 * Extract the L3/L4 matching info into a 5-tuple structure,
751 * then create a session key whose layout is independent on forward or reverse
752 * direction of the packet.
755 acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple);
756 acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess);
757 #ifdef FA_NODE_VERBOSE_DEBUG
759 ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx",
760 kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2],
761 kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value);
763 ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx",
764 fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2],
765 fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value);
768 /* Try to match an existing session first */
770 if (acl_fa_ifc_has_sessions (am, sw_if_index0))
772 if (acl_fa_find_session
773 (am, sw_if_index0, &kv_sess, &value_sess))
775 trace_bitmap |= 0x80000000;
776 error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
777 // FIXME assert(value_sess.value == (0xffffffff & value_sess.value));
778 u32 sess_id = value_sess.value;
779 fa_session_t *sess = am->fa_sessions_pool + sess_id;
780 int old_timeout_type =
781 fa_session_get_timeout_type (am, sess);
783 acl_fa_track_session (am, is_input, sw_if_index0, now,
785 /* expose the session id to the tracer */
786 match_rule_index = sess_id;
787 int new_timeout_type =
788 fa_session_get_timeout_type (am, sess);
789 acl_check_needed = 0;
790 pkts_exist_session += 1;
791 /* Tracking might have changed the session timeout type, e.g. from transient to established */
792 if (PREDICT_FALSE (old_timeout_type != new_timeout_type))
794 acl_fa_restart_timer_for_session (am, now, sess_id);
795 pkts_restart_session_timer++;
797 0x00010000 + ((0xff & old_timeout_type) << 8) +
798 (0xff & new_timeout_type);
803 if (acl_check_needed)
806 full_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path,
807 is_ip6, is_input, &match_acl_in_index,
808 &match_rule_index, &trace_bitmap);
811 pkts_acl_permit += 1;
814 if (!acl_fa_can_add_session (am, is_input, sw_if_index0))
815 acl_fa_try_recycle_session (am, is_input, sw_if_index0);
817 if (acl_fa_can_add_session (am, is_input, sw_if_index0))
819 acl_fa_add_session (am, is_input, sw_if_index0, now,
821 pkts_new_session += 1;
826 error0 = ACL_FA_ERROR_ACL_TOO_MANY_SESSIONS;
837 feat_bitmap_get_next_node_index (l2_feat_next_node_index,
840 vnet_feature_next (sw_if_index0, &next0, b0);
843 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
844 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
846 acl_fa_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
847 t->sw_if_index = sw_if_index0;
848 t->next_index = next0;
849 t->match_acl_in_index = match_acl_in_index;
850 t->match_rule_index = match_rule_index;
851 t->packet_info[0] = fa_5tuple.kv.key[0];
852 t->packet_info[1] = fa_5tuple.kv.key[1];
853 t->packet_info[2] = fa_5tuple.kv.key[2];
854 t->packet_info[3] = fa_5tuple.kv.key[3];
855 t->packet_info[4] = fa_5tuple.kv.key[4];
856 t->packet_info[5] = fa_5tuple.kv.value;
858 t->trace_bitmap = trace_bitmap;
861 next0 = next0 < node->n_next_nodes ? next0 : 0;
863 b0->error = error_node->errors[error0];
865 pkts_acl_checked += 1;
867 /* verify speculative enqueue, maybe switch current next frame */
868 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
869 to_next, n_left_to_next, bi0,
873 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
876 vlib_node_increment_counter (vm, acl_fa_node->index,
877 ACL_FA_ERROR_ACL_CHECK, pkts_acl_checked);
878 vlib_node_increment_counter (vm, acl_fa_node->index,
879 ACL_FA_ERROR_ACL_PERMIT, pkts_acl_permit);
880 vlib_node_increment_counter (vm, acl_fa_node->index,
881 ACL_FA_ERROR_ACL_NEW_SESSION,
883 vlib_node_increment_counter (vm, acl_fa_node->index,
884 ACL_FA_ERROR_ACL_EXIST_SESSION,
886 vlib_node_increment_counter (vm, acl_fa_node->index,
887 ACL_FA_ERROR_ACL_RESTART_SESSION_TIMER,
888 pkts_restart_session_timer);
889 return frame->n_vectors;
893 vlib_node_registration_t acl_in_l2_ip6_node;
895 acl_in_ip6_l2_node_fn (vlib_main_t * vm,
896 vlib_node_runtime_t * node, vlib_frame_t * frame)
898 acl_main_t *am = &acl_main;
899 return acl_fa_node_fn (vm, node, frame, 1, 1, 1,
900 am->fa_acl_in_ip6_l2_node_feat_next_node_index,
901 &acl_in_l2_ip6_node);
904 vlib_node_registration_t acl_in_l2_ip4_node;
906 acl_in_ip4_l2_node_fn (vlib_main_t * vm,
907 vlib_node_runtime_t * node, vlib_frame_t * frame)
909 acl_main_t *am = &acl_main;
910 return acl_fa_node_fn (vm, node, frame, 0, 1, 1,
911 am->fa_acl_in_ip4_l2_node_feat_next_node_index,
912 &acl_in_l2_ip4_node);
915 vlib_node_registration_t acl_out_l2_ip6_node;
917 acl_out_ip6_l2_node_fn (vlib_main_t * vm,
918 vlib_node_runtime_t * node, vlib_frame_t * frame)
920 acl_main_t *am = &acl_main;
921 return acl_fa_node_fn (vm, node, frame, 1, 0, 1,
922 am->fa_acl_out_ip6_l2_node_feat_next_node_index,
923 &acl_out_l2_ip6_node);
926 vlib_node_registration_t acl_out_l2_ip4_node;
928 acl_out_ip4_l2_node_fn (vlib_main_t * vm,
929 vlib_node_runtime_t * node, vlib_frame_t * frame)
931 acl_main_t *am = &acl_main;
932 return acl_fa_node_fn (vm, node, frame, 0, 0, 1,
933 am->fa_acl_out_ip4_l2_node_feat_next_node_index,
934 &acl_out_l2_ip4_node);
938 /**** L3 processing path nodes ****/
941 vlib_node_registration_t acl_in_fa_ip6_node;
943 acl_in_ip6_fa_node_fn (vlib_main_t * vm,
944 vlib_node_runtime_t * node, vlib_frame_t * frame)
946 return acl_fa_node_fn (vm, node, frame, 1, 1, 0, 0, &acl_in_fa_ip6_node);
949 vlib_node_registration_t acl_in_fa_ip4_node;
951 acl_in_ip4_fa_node_fn (vlib_main_t * vm,
952 vlib_node_runtime_t * node, vlib_frame_t * frame)
954 return acl_fa_node_fn (vm, node, frame, 0, 1, 0, 0, &acl_in_fa_ip4_node);
957 vlib_node_registration_t acl_out_fa_ip6_node;
959 acl_out_ip6_fa_node_fn (vlib_main_t * vm,
960 vlib_node_runtime_t * node, vlib_frame_t * frame)
962 return acl_fa_node_fn (vm, node, frame, 1, 0, 0, 0, &acl_out_fa_ip6_node);
965 vlib_node_registration_t acl_out_fa_ip4_node;
967 acl_out_ip4_fa_node_fn (vlib_main_t * vm,
968 vlib_node_runtime_t * node, vlib_frame_t * frame)
970 return acl_fa_node_fn (vm, node, frame, 0, 0, 0, 0, &acl_out_fa_ip4_node);
974 * This process performs all the connection clean up - both for idle connections,
975 * as well as receiving the signals to clean up the connections in case of sw_if_index deletion,
976 * or (maybe in the future) the connection deletion due to policy reasons.
978 * The previous iteration (l2sess) attempted to clean up the connections in small increments,
979 * in-band, but the problem it tried to preemptively address (process starvation) is yet to be seen.
981 * The approach with a single thread deleting the connections is simpler, thus we use it until
982 * there is a real starvation problem to solve.
988 #define foreach_acl_fa_cleaner_error \
989 _(UNKNOWN_EVENT, "unknown event received") \
994 #define _(sym,str) ACL_FA_CLEANER_ERROR_##sym,
995 foreach_acl_fa_cleaner_error
997 ACL_FA_CLEANER_N_ERROR,
998 } acl_fa_cleaner_error_t;
1000 static char *acl_fa_cleaner_error_strings[] = {
1001 #define _(sym,string) string,
1002 foreach_acl_fa_cleaner_error
1007 acl_fa_clean_sessions_by_sw_if_index (acl_main_t *am, u32 sw_if_index, u32 *count)
1015 pool_foreach(sess, am->fa_sessions_pool, ({
1016 if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) )
1017 vec_add1(dv, sess-am->fa_sessions_pool);
1021 sess = pool_elt_at_index(am->fa_sessions_pool, *ii);
1022 acl_fa_delete_session(am, sess->sw_if_index, *ii);
1026 pool_foreach(sess, am->fa_sessions_pool, ({
1027 if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) )
1032 if (~0 == sw_if_index)
1034 /* FIXME: clean-up tables ? */
1038 /* FIXME: clean-up tables ? */
1041 return (undeleted == 0);
1045 static vlib_node_registration_t acl_fa_session_cleaner_process_node;
1048 acl_fa_conn_has_timed_out (acl_main_t *am, u64 now, u32 session_index)
1050 fa_session_t *sess = am->fa_sessions_pool + session_index;
1051 u64 sess_timeout_time =
1052 sess->last_active_time + fa_session_get_timeout (am, sess);
1053 return (sess_timeout_time < now);
1058 acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
1061 acl_main_t *am = &acl_main;
1062 u64 now = clib_cpu_time_now ();
1063 f64 cpu_cps = vm->clib_time.clocks_per_second;
1065 /* We should call timer wheel at least twice a second */
1066 u64 max_timer_wait_interval = cpu_cps / 2;
1067 am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval;
1069 u32 *expired = NULL;
1070 uword event_type, *event_data = 0;
1072 am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index;
1076 u32 count_deleted_sessions = 0;
1077 u32 count_already_deleted = 0;
1078 now = clib_cpu_time_now ();
1079 next_expire = now + am->fa_current_cleaner_timer_wait_interval;
1080 int has_pending_conns = 0;
1082 for(tt = 0; tt < ACL_N_TIMEOUTS; tt++)
1084 if (~0 != am->fa_conn_list_head[tt])
1085 has_pending_conns = 1;
1088 /* If no pending connections then no point in timing out */
1089 if (!has_pending_conns)
1091 am->fa_cleaner_cnt_wait_without_timeout++;
1092 (void) vlib_process_wait_for_event (vm);
1093 event_type = vlib_process_get_events (vm, &event_data);
1097 f64 timeout = ((i64) next_expire - (i64) now) / cpu_cps;
1100 /* skip waiting altogether */
1105 /* Timing wheel code is happier if it is called regularly */
1108 am->fa_cleaner_cnt_wait_with_timeout++;
1109 (void) vlib_process_wait_for_event_or_clock (vm, timeout);
1110 event_type = vlib_process_get_events (vm, &event_data);
1114 now = clib_cpu_time_now ();
1120 case ACL_FA_CLEANER_RESCHEDULE:
1121 /* Nothing to do. */
1123 case ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX:
1125 uword *sw_if_index0;
1126 vec_foreach (sw_if_index0, event_data)
1128 am->fa_cleaner_cnt_delete_by_sw_index++;
1129 #ifdef FA_NODE_VERBOSE_DEBUG
1131 ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d",
1136 acl_fa_clean_sessions_by_sw_if_index (am, *sw_if_index0,
1138 count_deleted_sessions += count;
1139 am->fa_cleaner_cnt_delete_by_sw_index_ok += result;
1144 #ifdef FA_NODE_VERBOSE_DEBUG
1145 clib_warning ("ACL plugin connection cleaner: unknown event %u",
1148 vlib_node_increment_counter (vm,
1149 acl_fa_session_cleaner_process_node.
1151 ACL_FA_CLEANER_ERROR_UNKNOWN_EVENT, 1);
1152 am->fa_cleaner_cnt_unknown_event++;
1158 for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) {
1159 while((vec_len(expired) < 2*am->fa_max_deleted_sessions_per_interval)
1160 && (~0 != am->fa_conn_list_head[tt])
1161 && (acl_fa_conn_has_timed_out(am, now,
1162 am->fa_conn_list_head[tt]))) {
1163 u32 sess_id = am->fa_conn_list_head[tt];
1164 vec_add1(expired, sess_id);
1165 acl_fa_conn_list_delete_session(am, sess_id);
1171 vec_foreach (psid, expired)
1173 u32 session_index = *psid;
1174 if (!pool_is_free_index (am->fa_sessions_pool, session_index))
1176 fa_session_t *sess = am->fa_sessions_pool + session_index;
1177 u32 sw_if_index = sess->sw_if_index;
1178 u64 sess_timeout_time =
1179 sess->last_active_time + fa_session_get_timeout (am, sess);
1180 if (now < sess_timeout_time)
1182 /* clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d",
1183 (int) session_index); */
1185 /* There was activity on the session, so the idle timeout
1186 has not passed. Enqueue for another time period. */
1188 acl_fa_conn_list_add_session(am, session_index);
1190 /* FIXME: When/if moving to timer wheel,
1191 pretend we did this in the past,
1192 at last_active moment, so the timer is accurate */
1193 am->fa_cleaner_cnt_timer_restarted++;
1197 /* clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d",
1198 (int) session_index); */
1199 acl_fa_delete_session (am, sw_if_index, session_index);
1200 count_deleted_sessions++;
1205 count_already_deleted++;
1209 _vec_len (expired) = 0;
1211 _vec_len (event_data) = 0;
1213 if (count_deleted_sessions > am->fa_max_deleted_sessions_per_interval) {
1214 /* if there was too many sessions to delete, do less waiting around next time */
1215 am->fa_current_cleaner_timer_wait_interval /= 2;
1216 } else if (count_deleted_sessions < am->fa_min_deleted_sessions_per_interval) {
1217 /* Too few deleted sessions, slowly increase the amount of sleep up to a limit */
1218 if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval)
1219 am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment;
1221 am->fa_cleaner_cnt_event_cycles++;
1222 am->fa_cleaner_cnt_deleted_sessions += count_deleted_sessions;
1223 am->fa_cleaner_cnt_already_deleted += count_already_deleted;
1231 acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
1233 acl_main_t *am = &acl_main;
1236 vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
1237 sw_if_index, enable_disable, 0, 0);
1238 vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
1239 sw_if_index, enable_disable, 0, 0);
1240 am->fa_in_acl_on_sw_if_index =
1241 clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
1246 vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
1247 sw_if_index, enable_disable, 0, 0);
1248 vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
1249 sw_if_index, enable_disable, 0, 0);
1250 am->fa_out_acl_on_sw_if_index =
1251 clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
1254 if ((!enable_disable) && (!acl_fa_ifc_has_in_acl (am, sw_if_index))
1255 && (!acl_fa_ifc_has_out_acl (am, sw_if_index)))
1257 vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
1258 ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
1268 VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = {
1269 .function = acl_fa_session_cleaner_process,
1270 .type = VLIB_NODE_TYPE_PROCESS,
1271 .name = "acl-plugin-fa-cleaner-process",
1272 .n_errors = ARRAY_LEN (acl_fa_cleaner_error_strings),
1273 .error_strings = acl_fa_cleaner_error_strings,
1279 VLIB_REGISTER_NODE (acl_in_l2_ip6_node) =
1281 .function = acl_in_ip6_l2_node_fn,
1282 .name = "acl-plugin-in-ip6-l2",
1283 .vector_size = sizeof (u32),
1284 .format_trace = format_acl_fa_trace,
1285 .type = VLIB_NODE_TYPE_INTERNAL,
1286 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1287 .error_strings = acl_fa_error_strings,
1288 .n_next_nodes = ACL_FA_N_NEXT,
1291 [ACL_FA_ERROR_DROP] = "error-drop",
1295 VLIB_REGISTER_NODE (acl_in_l2_ip4_node) =
1297 .function = acl_in_ip4_l2_node_fn,
1298 .name = "acl-plugin-in-ip4-l2",
1299 .vector_size = sizeof (u32),
1300 .format_trace = format_acl_fa_trace,
1301 .type = VLIB_NODE_TYPE_INTERNAL,
1302 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1303 .error_strings = acl_fa_error_strings,
1304 .n_next_nodes = ACL_FA_N_NEXT,
1307 [ACL_FA_ERROR_DROP] = "error-drop",
1311 VLIB_REGISTER_NODE (acl_out_l2_ip6_node) =
1313 .function = acl_out_ip6_l2_node_fn,
1314 .name = "acl-plugin-out-ip6-l2",
1315 .vector_size = sizeof (u32),
1316 .format_trace = format_acl_fa_trace,
1317 .type = VLIB_NODE_TYPE_INTERNAL,
1318 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1319 .error_strings = acl_fa_error_strings,
1320 .n_next_nodes = ACL_FA_N_NEXT,
1323 [ACL_FA_ERROR_DROP] = "error-drop",
1327 VLIB_REGISTER_NODE (acl_out_l2_ip4_node) =
1329 .function = acl_out_ip4_l2_node_fn,
1330 .name = "acl-plugin-out-ip4-l2",
1331 .vector_size = sizeof (u32),
1332 .format_trace = format_acl_fa_trace,
1333 .type = VLIB_NODE_TYPE_INTERNAL,
1334 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1335 .error_strings = acl_fa_error_strings,
1336 .n_next_nodes = ACL_FA_N_NEXT,
1339 [ACL_FA_ERROR_DROP] = "error-drop",
1344 VLIB_REGISTER_NODE (acl_in_fa_ip6_node) =
1346 .function = acl_in_ip6_fa_node_fn,
1347 .name = "acl-plugin-in-ip6-fa",
1348 .vector_size = sizeof (u32),
1349 .format_trace = format_acl_fa_trace,
1350 .type = VLIB_NODE_TYPE_INTERNAL,
1351 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1352 .error_strings = acl_fa_error_strings,
1353 .n_next_nodes = ACL_FA_N_NEXT,
1356 [ACL_FA_ERROR_DROP] = "error-drop",
1360 VNET_FEATURE_INIT (acl_in_ip6_fa_feature, static) =
1362 .arc_name = "ip6-unicast",
1363 .node_name = "acl-plugin-in-ip6-fa",
1364 .runs_before = VNET_FEATURES ("ip6-flow-classify"),
1367 VLIB_REGISTER_NODE (acl_in_fa_ip4_node) =
1369 .function = acl_in_ip4_fa_node_fn,
1370 .name = "acl-plugin-in-ip4-fa",
1371 .vector_size = sizeof (u32),
1372 .format_trace = format_acl_fa_trace,
1373 .type = VLIB_NODE_TYPE_INTERNAL,
1374 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1375 .error_strings = acl_fa_error_strings,
1376 .n_next_nodes = ACL_FA_N_NEXT,
1379 [ACL_FA_ERROR_DROP] = "error-drop",
1383 VNET_FEATURE_INIT (acl_in_ip4_fa_feature, static) =
1385 .arc_name = "ip4-unicast",
1386 .node_name = "acl-plugin-in-ip4-fa",
1387 .runs_before = VNET_FEATURES ("ip4-flow-classify"),
1391 VLIB_REGISTER_NODE (acl_out_fa_ip6_node) =
1393 .function = acl_out_ip6_fa_node_fn,
1394 .name = "acl-plugin-out-ip6-fa",
1395 .vector_size = sizeof (u32),
1396 .format_trace = format_acl_fa_trace,
1397 .type = VLIB_NODE_TYPE_INTERNAL,
1398 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1399 .error_strings = acl_fa_error_strings,
1400 .n_next_nodes = ACL_FA_N_NEXT,
1403 [ACL_FA_ERROR_DROP] = "error-drop",
1407 VNET_FEATURE_INIT (acl_out_ip6_fa_feature, static) =
1409 .arc_name = "ip6-output",
1410 .node_name = "acl-plugin-out-ip6-fa",
1411 .runs_before = VNET_FEATURES ("interface-output"),
1414 VLIB_REGISTER_NODE (acl_out_fa_ip4_node) =
1416 .function = acl_out_ip4_fa_node_fn,
1417 .name = "acl-plugin-out-ip4-fa",
1418 .vector_size = sizeof (u32),
1419 .format_trace = format_acl_fa_trace,
1420 .type = VLIB_NODE_TYPE_INTERNAL,
1421 .n_errors = ARRAY_LEN (acl_fa_error_strings),
1422 .error_strings = acl_fa_error_strings,
1423 .n_next_nodes = ACL_FA_N_NEXT,
1424 /* edit / add dispositions here */
1427 [ACL_FA_ERROR_DROP] = "error-drop",
1431 VNET_FEATURE_INIT (acl_out_ip4_fa_feature, static) =
1433 .arc_name = "ip4-output",
1434 .node_name = "acl-plugin-out-ip4-fa",
1435 .runs_before = VNET_FEATURES ("interface-output"),