2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #ifndef included_acl_inlines_h
17 #define included_acl_inlines_h
21 #include <vlib/unix/plugin.h>
22 #include <plugins/acl/acl.h>
23 #include <plugins/acl/fa_node.h>
24 #include <plugins/acl/hash_lookup_private.h>
26 #include <plugins/acl/exported_types.h>
28 #define LOAD_SYMBOL_FROM_PLUGIN_TO(p, s, st) \
30 st = vlib_get_plugin_symbol(p, #s); \
32 return clib_error_return(0, \
33 "Plugin %s and/or symbol %s not found.", p, #s); \
36 #define LOAD_SYMBOL(s) LOAD_SYMBOL_FROM_PLUGIN_TO("acl_plugin.so", s, s)
39 static inline clib_error_t * acl_plugin_exports_init (acl_plugin_methods_t *m)
41 acl_plugin_methods_vtable_init_fn_t mvi;
43 LOAD_SYMBOL_FROM_PLUGIN_TO("acl_plugin.so", acl_plugin_methods_vtable_init, mvi);
48 get_ptr_to_offset (vlib_buffer_t * b0, int offset)
50 u8 *p = vlib_buffer_get_current (b0) + offset;
55 offset_within_packet (vlib_buffer_t * b0, int offset)
57 /* For the purposes of this code, "within" means we have at least 8 bytes after it */
58 return (offset <= (b0->current_length - 8));
63 acl_fill_5tuple_l3_data (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
64 int l3_offset, fa_5tuple_t * p5tuple_pkt)
68 clib_memcpy (&p5tuple_pkt->ip6_addr,
69 get_ptr_to_offset (b0,
70 offsetof (ip6_header_t,
71 src_address) + l3_offset),
72 sizeof (p5tuple_pkt->ip6_addr));
76 memset(p5tuple_pkt->l3_zero_pad, 0, sizeof(p5tuple_pkt->l3_zero_pad));
77 clib_memcpy (&p5tuple_pkt->ip4_addr,
78 get_ptr_to_offset (b0,
79 offsetof (ip4_header_t,
80 src_address) + l3_offset),
81 sizeof (p5tuple_pkt->ip4_addr));
86 acl_fill_5tuple_l4_and_pkt_data (acl_main_t * am, u32 sw_if_index0, vlib_buffer_t * b0, int is_ip6, int is_input,
87 int l3_offset, fa_session_l4_key_t *p5tuple_l4, fa_packet_info_t *p5tuple_pkt)
89 /* IP4 and IP6 protocol numbers of ICMP */
90 static u8 icmp_protos_v4v6[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
96 fa_session_l4_key_t tmp_l4 = { .lsb_of_sw_if_index = sw_if_index0 & 0xffff };
97 fa_packet_info_t tmp_pkt = { .is_ip6 = is_ip6, .mask_type_index_lsb = ~0 };
102 *(u8 *) get_ptr_to_offset (b0,
103 offsetof (ip6_header_t,
104 protocol) + l3_offset);
105 l4_offset = l3_offset + sizeof (ip6_header_t);
106 #ifdef FA_NODE_VERBOSE_DEBUG
107 clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto,
110 /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */
111 int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
112 if (PREDICT_FALSE (need_skip_eh))
114 while (need_skip_eh && offset_within_packet (b0, l4_offset))
116 /* Fragment header needs special handling */
117 if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto))
119 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
121 clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset));
122 frag_offset = clib_net_to_host_u16(frag_offset) >> 3;
125 tmp_pkt.is_nonfirst_fragment = 1;
126 /* invalidate L4 offset so we don't try to find L4 info */
127 l4_offset += b0->current_length;
131 /* First fragment: skip the frag header and move on. */
137 u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset);
138 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
139 l4_offset += 8 * (1 + (u16) nwords);
141 #ifdef FA_NODE_VERBOSE_DEBUG
142 clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d",
146 clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
153 *(u8 *) get_ptr_to_offset (b0,
154 offsetof (ip4_header_t,
155 protocol) + l3_offset);
156 l4_offset = l3_offset + sizeof (ip4_header_t);
157 u16 flags_and_fragment_offset;
158 clib_memcpy (&flags_and_fragment_offset,
159 get_ptr_to_offset (b0,
160 offsetof (ip4_header_t,
161 flags_and_fragment_offset)) + l3_offset,
162 sizeof(flags_and_fragment_offset));
163 flags_and_fragment_offset = clib_net_to_host_u16 (flags_and_fragment_offset);
165 /* non-initial fragments have non-zero offset */
166 if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset)))
168 tmp_pkt.is_nonfirst_fragment = 1;
169 /* invalidate L4 offset so we don't try to find L4 info */
170 l4_offset += b0->current_length;
174 tmp_l4.proto = proto;
175 tmp_l4.is_input = is_input;
177 if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
179 tmp_pkt.l4_valid = 1;
180 if (icmp_protos_v4v6[is_ip6] == proto)
184 *(u8 *) get_ptr_to_offset (b0,
185 l4_offset + offsetof (icmp46_header_t,
189 *(u8 *) get_ptr_to_offset (b0,
190 l4_offset + offsetof (icmp46_header_t,
192 tmp_l4.is_slowpath = 1;
194 else if ((IP_PROTOCOL_TCP == proto) || (IP_PROTOCOL_UDP == proto))
197 get_ptr_to_offset (b0,
198 l4_offset + offsetof (tcp_header_t,
201 tmp_l4.port[0] = clib_net_to_host_u16 (ports[0]);
202 tmp_l4.port[1] = clib_net_to_host_u16 (ports[1]);
205 *(u8 *) get_ptr_to_offset (b0,
206 l4_offset + offsetof (tcp_header_t,
208 tmp_pkt.tcp_flags_valid = (proto == IP_PROTOCOL_TCP);
209 tmp_l4.is_slowpath = 0;
213 tmp_l4.is_slowpath = 1;
217 p5tuple_pkt->as_u64 = tmp_pkt.as_u64;
218 p5tuple_l4->as_u64 = tmp_l4.as_u64;
222 acl_fill_5tuple (acl_main_t * am, u32 sw_if_index0, vlib_buffer_t * b0, int is_ip6,
223 int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
229 l3_offset = ethernet_buffer_header_size(b0);
236 l3_offset = vnet_buffer(b0)->ip.save_rewrite_length;
239 /* key[0..3] contains src/dst address and is cleared/set below */
240 /* Remainder of the key and per-packet non-key data */
241 acl_fill_5tuple_l3_data(am, b0, is_ip6, l3_offset, p5tuple_pkt);
242 acl_fill_5tuple_l4_and_pkt_data(am, sw_if_index0, b0, is_ip6, is_input, l3_offset, &p5tuple_pkt->l4, &p5tuple_pkt->pkt);
246 acl_plugin_fill_5tuple_inline (void *p_acl_main, u32 lc_index, vlib_buffer_t * b0, int is_ip6,
247 int is_input, int is_l2_path, fa_5tuple_opaque_t * p5tuple_pkt)
249 acl_main_t *am = p_acl_main;
250 acl_fill_5tuple(am, 0, b0, is_ip6, is_input, is_l2_path, (fa_5tuple_t *)p5tuple_pkt);
256 fa_acl_match_ip4_addr (ip4_address_t * addr1, ip4_address_t * addr2,
261 /* match any always succeeds */
264 uint32_t a1 = clib_net_to_host_u32 (addr1->as_u32);
265 uint32_t a2 = clib_net_to_host_u32 (addr2->as_u32);
266 uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1);
267 return (a1 & mask0) == a2;
271 fa_acl_match_ip6_addr (ip6_address_t * addr1, ip6_address_t * addr2,
276 /* match any always succeeds */
279 if (memcmp (addr1, addr2, prefixlen / 8))
281 /* If the starting full bytes do not match, no point in bittwidling the thumbs further */
286 u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
287 u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
288 u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
289 return (b1 & mask0) == b2;
293 /* The prefix fits into integer number of bytes, so nothing left to do */
299 fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6)
301 return ((port >= port_first) && (port <= port_last));
305 single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple,
306 int is_ip6, u8 * r_action, u32 * r_acl_match_p,
307 u32 * r_rule_match_p, u32 * trace_bitmap)
313 if (pool_is_free_index (am->acls, acl_index))
316 *r_acl_match_p = acl_index;
318 *r_rule_match_p = -1;
319 /* the ACL does not exist but is used for policy. Block traffic. */
322 a = am->acls + acl_index;
323 for (i = 0; i < a->count; i++)
326 if (is_ip6 != r->is_ipv6)
331 if (!fa_acl_match_ip6_addr
332 (&pkt_5tuple->ip6_addr[1], &r->dst.ip6, r->dst_prefixlen))
334 if (!fa_acl_match_ip6_addr
335 (&pkt_5tuple->ip6_addr[0], &r->src.ip6, r->src_prefixlen))
338 if (!fa_acl_match_ip4_addr
339 (&pkt_5tuple->ip4_addr[1], &r->dst.ip4, r->dst_prefixlen))
341 if (!fa_acl_match_ip4_addr
342 (&pkt_5tuple->ip4_addr[0], &r->src.ip4, r->src_prefixlen))
348 if (pkt_5tuple->l4.proto != r->proto)
351 if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment &&
352 am->l4_match_nonfirst_fragment))
354 /* non-initial fragment with frag match configured - match this rule */
355 *trace_bitmap |= 0x80000000;
356 *r_action = r->is_permit;
358 *r_acl_match_p = acl_index;
364 /* A sanity check just to ensure we are about to match the ports extracted from the packet */
365 if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
368 #ifdef FA_NODE_VERBOSE_DEBUG
370 ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d",
371 acl_index, i, pkt_5tuple->l4.proto, r->proto);
374 if (!fa_acl_match_port
375 (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
376 r->src_port_or_type_last, is_ip6))
379 #ifdef FA_NODE_VERBOSE_DEBUG
381 ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]",
382 acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first,
383 r->src_port_or_type_last);
386 if (!fa_acl_match_port
387 (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
388 r->dst_port_or_code_last, is_ip6))
391 #ifdef FA_NODE_VERBOSE_DEBUG
393 ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]",
394 acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
395 r->dst_port_or_code_last);
397 if (pkt_5tuple->pkt.tcp_flags_valid
398 && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
402 /* everything matches! */
403 #ifdef FA_NODE_VERBOSE_DEBUG
404 clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d",
405 acl_index, i, r->is_permit);
407 *r_action = r->is_permit;
409 *r_acl_match_p = acl_index;
418 acl_plugin_single_acl_match_5tuple (void *p_acl_main, u32 acl_index, fa_5tuple_t * pkt_5tuple,
419 int is_ip6, u8 * r_action, u32 * r_acl_match_p,
420 u32 * r_rule_match_p, u32 * trace_bitmap)
422 acl_main_t * am = p_acl_main;
423 return single_acl_match_5tuple(am, acl_index, pkt_5tuple, is_ip6, r_action,
424 r_acl_match_p, r_rule_match_p, trace_bitmap);
428 linear_multi_acl_match_5tuple (void *p_acl_main, u32 lc_index, fa_5tuple_t * pkt_5tuple,
429 int is_ip6, u8 *r_action, u32 *acl_pos_p, u32 * acl_match_p,
430 u32 * rule_match_p, u32 * trace_bitmap)
432 acl_main_t *am = p_acl_main;
436 acl_lookup_context_t *acontext = pool_elt_at_index(am->acl_lookup_contexts, lc_index);
438 acl_vector = acontext->acl_indices;
440 for (i = 0; i < vec_len (acl_vector); i++)
442 #ifdef FA_NODE_VERBOSE_DEBUG
443 clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d",
446 if (single_acl_match_5tuple
447 (am, acl_vector[i], pkt_5tuple, is_ip6, &action,
448 acl_match_p, rule_match_p, trace_bitmap))
455 if (vec_len (acl_vector) > 0)
459 #ifdef FA_NODE_VERBOSE_DEBUG
460 clib_warning ("ACL_FA_NODE_DBG: No ACL on lc_index %d", lc_index);
462 /* If there are no ACLs defined we should not be here. */
469 * This returns true if there is indeed a match on the portranges.
470 * With all these levels of indirections, this is not going to be very fast,
471 * so, best use the individual ports or wildcard ports for performance.
474 match_portranges(acl_main_t *am, fa_5tuple_t *match, u32 index)
477 applied_hash_ace_entry_t **applied_hash_aces = vec_elt_at_index(am->hash_entry_vec_by_lc_index, match->pkt.lc_index);
478 applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), index);
480 acl_rule_t *r = &(am->acls[pae->acl_index].rules[pae->ace_index]);
482 #ifdef FA_NODE_VERBOSE_DEBUG
483 clib_warning("PORTMATCH: %d <= %d <= %d && %d <= %d <= %d ?",
484 r->src_port_or_type_first, match->l4.port[0], r->src_port_or_type_last,
485 r->dst_port_or_code_first, match->l4.port[1], r->dst_port_or_code_last);
488 return ( ((r->src_port_or_type_first <= match->l4.port[0]) && r->src_port_or_type_last >= match->l4.port[0]) &&
489 ((r->dst_port_or_code_first <= match->l4.port[1]) && r->dst_port_or_code_last >= match->l4.port[1]) );
493 single_rule_match_5tuple (acl_rule_t * r, int is_ip6, fa_5tuple_t * pkt_5tuple)
495 if (is_ip6 != r->is_ipv6)
502 if (!fa_acl_match_ip6_addr
503 (&pkt_5tuple->ip6_addr[1], &r->dst.ip6, r->dst_prefixlen))
505 if (!fa_acl_match_ip6_addr
506 (&pkt_5tuple->ip6_addr[0], &r->src.ip6, r->src_prefixlen))
511 if (!fa_acl_match_ip4_addr
512 (&pkt_5tuple->ip4_addr[1], &r->dst.ip4, r->dst_prefixlen))
514 if (!fa_acl_match_ip4_addr
515 (&pkt_5tuple->ip4_addr[0], &r->src.ip4, r->src_prefixlen))
521 if (pkt_5tuple->l4.proto != r->proto)
524 /* A sanity check just to ensure we are about to match the ports extracted from the packet */
525 if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
529 if (!fa_acl_match_port
530 (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
531 r->src_port_or_type_last, pkt_5tuple->pkt.is_ip6))
535 if (!fa_acl_match_port
536 (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
537 r->dst_port_or_code_last, pkt_5tuple->pkt.is_ip6))
540 if (pkt_5tuple->pkt.tcp_flags_valid
541 && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
545 /* everything matches! */
550 multi_acl_match_get_applied_ace_index (acl_main_t * am, int is_ip6, fa_5tuple_t * match)
552 clib_bihash_kv_48_8_t kv;
553 clib_bihash_kv_48_8_t result;
554 fa_5tuple_t *kv_key = (fa_5tuple_t *) kv.key;
555 hash_acl_lookup_value_t *result_val =
556 (hash_acl_lookup_value_t *) & result.value;
557 u64 *pmatch = (u64 *) match;
560 int mask_type_index, order_index;
561 u32 curr_match_index = (~0 - 1);
565 u32 lc_index = match->pkt.lc_index;
566 applied_hash_ace_entry_t **applied_hash_aces =
567 vec_elt_at_index (am->hash_entry_vec_by_lc_index, lc_index);
569 hash_applied_mask_info_t **hash_applied_mask_info_vec =
570 vec_elt_at_index (am->hash_applied_mask_info_vec_by_lc_index, lc_index);
572 hash_applied_mask_info_t *minfo;
574 DBG ("TRYING TO MATCH: %016llx %016llx %016llx %016llx %016llx %016llx",
575 pmatch[0], pmatch[1], pmatch[2], pmatch[3], pmatch[4], pmatch[5]);
577 for (order_index = 0; order_index < vec_len ((*hash_applied_mask_info_vec));
580 minfo = vec_elt_at_index ((*hash_applied_mask_info_vec), order_index);
581 if (minfo->first_rule_index > curr_match_index)
583 /* Index in this and following (by construction) partitions are greater than our candidate, Avoid trying to match! */
587 mask_type_index = minfo->mask_type_index;
588 ace_mask_type_entry_t *mte =
589 vec_elt_at_index (am->ace_mask_type_pool, mask_type_index);
590 pmatch = (u64 *) match;
591 pmask = (u64 *) & mte->mask;
592 pkey = (u64 *) kv.key;
594 * unrolling the below loop results in a noticeable performance increase.
597 kv.key[i] = pmatch[i] & pmask[i];
601 *pkey++ = *pmatch++ & *pmask++;
602 *pkey++ = *pmatch++ & *pmask++;
603 *pkey++ = *pmatch++ & *pmask++;
604 *pkey++ = *pmatch++ & *pmask++;
605 *pkey++ = *pmatch++ & *pmask++;
606 *pkey++ = *pmatch++ & *pmask++;
609 * The use of temporary variable convinces the compiler
610 * to make a u64 write, avoiding the stall on crc32 operation
613 fa_packet_info_t tmp_pkt = kv_key->pkt;
614 tmp_pkt.mask_type_index_lsb = mask_type_index;
615 kv_key->pkt.as_u64 = tmp_pkt.as_u64;
618 clib_bihash_search_inline_2_48_8 (&am->acl_lookup_hash, &kv, &result);
622 /* There is a hit in the hash, so check the collision vector */
623 u32 curr_index = result_val->applied_entry_index;
624 applied_hash_ace_entry_t *pae =
625 vec_elt_at_index ((*applied_hash_aces), curr_index);
626 collision_match_rule_t *crs = pae->colliding_rules;
628 for (i = 0; i < vec_len (crs); i++)
630 if (crs[i].applied_entry_index >= curr_match_index)
634 if (single_rule_match_5tuple (&crs[i].rule, is_ip6, match))
636 curr_match_index = crs[i].applied_entry_index;
641 DBG ("MATCH-RESULT: %d", curr_match_index);
642 return curr_match_index;
646 hash_multi_acl_match_5tuple (void *p_acl_main, u32 lc_index, fa_5tuple_t * pkt_5tuple,
647 int is_ip6, u8 *action, u32 *acl_pos_p, u32 * acl_match_p,
648 u32 * rule_match_p, u32 * trace_bitmap)
650 acl_main_t *am = p_acl_main;
651 applied_hash_ace_entry_t **applied_hash_aces = vec_elt_at_index(am->hash_entry_vec_by_lc_index, lc_index);
652 u32 match_index = multi_acl_match_get_applied_ace_index(am, is_ip6, pkt_5tuple);
653 if (match_index < vec_len((*applied_hash_aces))) {
654 applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), match_index);
656 *acl_pos_p = pae->acl_position;
657 *acl_match_p = pae->acl_index;
658 *rule_match_p = pae->ace_index;
659 *action = pae->action;
668 acl_plugin_match_5tuple_inline (void *p_acl_main, u32 lc_index,
669 fa_5tuple_opaque_t * pkt_5tuple,
670 int is_ip6, u8 * r_action,
673 u32 * r_rule_match_p,
676 acl_main_t *am = p_acl_main;
677 fa_5tuple_t * pkt_5tuple_internal = (fa_5tuple_t *)pkt_5tuple;
678 pkt_5tuple_internal->pkt.lc_index = lc_index;
679 if (PREDICT_TRUE(am->use_hash_acl_matching)) {
680 if (PREDICT_FALSE(pkt_5tuple_internal->pkt.is_nonfirst_fragment)) {
682 * tuplemerge does not take fragments into account,
683 * and in general making fragments first class citizens has
684 * proved more overhead than it's worth - so just fall back to linear
685 * matching in that case.
687 return linear_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
688 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
690 return hash_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
691 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
694 return linear_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
695 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);