2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #ifndef included_acl_inlines_h
17 #define included_acl_inlines_h
21 #include <vlib/unix/plugin.h>
22 #include <plugins/acl/acl.h>
23 #include <plugins/acl/fa_node.h>
24 #include <plugins/acl/hash_lookup_private.h>
26 #include <plugins/acl/exported_types.h>
28 #define LOAD_SYMBOL_FROM_PLUGIN_TO(p, s, st) \
30 st = vlib_get_plugin_symbol(p, #s); \
32 return clib_error_return(0, \
33 "Plugin %s and/or symbol %s not found.", p, #s); \
36 #define LOAD_SYMBOL(s) LOAD_SYMBOL_FROM_PLUGIN_TO("acl_plugin.so", s, s)
39 static inline clib_error_t * acl_plugin_exports_init (acl_plugin_methods_t *m)
41 acl_plugin_methods_vtable_init_fn_t mvi;
43 LOAD_SYMBOL_FROM_PLUGIN_TO("acl_plugin.so", acl_plugin_methods_vtable_init, mvi);
48 get_ptr_to_offset (vlib_buffer_t * b0, int offset)
50 u8 *p = vlib_buffer_get_current (b0) + offset;
55 offset_within_packet (vlib_buffer_t * b0, int offset)
57 /* For the purposes of this code, "within" means we have at least 8 bytes after it */
58 return (offset <= (b0->current_length - 8));
62 offset_beyond_packet (vlib_buffer_t * b0, int offset)
64 /* For the purposes of this code, "within" means we have at least 8 bytes after it */
65 return (offset > (b0->current_length - 8));
70 acl_fill_5tuple_l3_data (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
71 int l3_offset, fa_5tuple_t * p5tuple_pkt)
75 ip6_header_t *ip6 = vlib_buffer_get_current (b0) + l3_offset;
76 p5tuple_pkt->ip6_addr[0] = ip6->src_address;
77 p5tuple_pkt->ip6_addr[1] = ip6->dst_address;
82 for(ii=0; ii<6; ii++) {
83 p5tuple_pkt->l3_zero_pad[ii] = 0;
85 ip4_header_t *ip4 = vlib_buffer_get_current (b0) + l3_offset;
86 p5tuple_pkt->ip4_addr[0] = ip4->src_address;
87 p5tuple_pkt->ip4_addr[1] = ip4->dst_address;
92 acl_fill_5tuple_l4_and_pkt_data (acl_main_t * am, u32 sw_if_index0, vlib_buffer_t * b0, int is_ip6, int is_input,
93 int l3_offset, fa_session_l4_key_t *p5tuple_l4, fa_packet_info_t *p5tuple_pkt)
95 /* IP4 and IP6 protocol numbers of ICMP */
96 static u8 icmp_protos_v4v6[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
103 fa_packet_info_t tmp_pkt = { .is_ip6 = is_ip6, .mask_type_index_lsb = ~0 };
107 ip6_header_t *ip6 = vlib_buffer_get_current (b0) + l3_offset;
108 proto = ip6->protocol;
110 l4_offset = l3_offset + sizeof (ip6_header_t);
112 /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */
113 int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
114 if (PREDICT_FALSE (need_skip_eh))
116 while (need_skip_eh && offset_within_packet (b0, l4_offset))
118 /* Fragment header needs special handling */
119 if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto))
121 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
122 u16 frag_offset = *(u16 *) get_ptr_to_offset (b0, 2 + l4_offset);
123 frag_offset = clib_net_to_host_u16(frag_offset) >> 3;
126 tmp_pkt.is_nonfirst_fragment = 1;
127 /* invalidate L4 offset so we don't try to find L4 info */
128 l4_offset += b0->current_length;
132 /* First fragment: skip the frag header and move on. */
138 u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset);
139 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
140 l4_offset += 8 * (1 + (u16) nwords);
143 clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
149 ip4_header_t *ip4 = vlib_buffer_get_current (b0) + l3_offset;
150 proto = ip4->protocol;
151 l4_offset = l3_offset + ip4_header_bytes(ip4);
153 /* non-initial fragments have non-zero offset */
154 if (PREDICT_FALSE(ip4_get_fragment_offset(ip4)))
156 tmp_pkt.is_nonfirst_fragment = 1;
157 /* invalidate L4 offset so we don't try to find L4 info */
158 l4_offset += b0->current_length;
162 tmp_l4_flags |= is_input ? FA_SK_L4_FLAG_IS_INPUT : 0;
164 if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
166 tcp_header_t *tcph = vlib_buffer_get_current (b0) + l4_offset;
167 udp_header_t *udph = vlib_buffer_get_current (b0) + l4_offset;
168 tmp_pkt.l4_valid = 1;
170 if (PREDICT_FALSE(icmp_protos_v4v6[is_ip6] == proto))
172 icmp46_header_t *icmph = vlib_buffer_get_current (b0) + l4_offset;
173 ports[0] = icmph->type;
174 ports[1] = icmph->code;
175 /* ICMP needs special handling */
176 tmp_l4_flags |= FA_SK_L4_FLAG_IS_SLOWPATH;
178 else if (IP_PROTOCOL_TCP == proto)
180 ports[0] = clib_net_to_host_u16(tcph->src_port);
181 ports[1] = clib_net_to_host_u16(tcph->dst_port);
182 tmp_pkt.tcp_flags = tcph->flags;
183 tmp_pkt.tcp_flags_valid = 1;
185 else if (IP_PROTOCOL_UDP == proto)
187 ports[0] = clib_net_to_host_u16(udph->src_port);
188 ports[1] = clib_net_to_host_u16(udph->dst_port);
192 tmp_l4_flags |= FA_SK_L4_FLAG_IS_SLOWPATH;
196 p5tuple_pkt->as_u64 = tmp_pkt.as_u64;
198 fa_session_l4_key_t tmp_l4 = { .port = { ports[0], ports[1] },
200 .l4_flags = tmp_l4_flags,
201 .lsb_of_sw_if_index = sw_if_index0 & 0xffff };
203 p5tuple_l4->as_u64 = tmp_l4.as_u64;
207 acl_fill_5tuple (acl_main_t * am, u32 sw_if_index0, vlib_buffer_t * b0, int is_ip6,
208 int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
214 l3_offset = ethernet_buffer_header_size(b0);
221 l3_offset = vnet_buffer(b0)->ip.save_rewrite_length;
224 /* key[0..3] contains src/dst address and is cleared/set below */
225 /* Remainder of the key and per-packet non-key data */
226 acl_fill_5tuple_l3_data(am, b0, is_ip6, l3_offset, p5tuple_pkt);
227 acl_fill_5tuple_l4_and_pkt_data(am, sw_if_index0, b0, is_ip6, is_input, l3_offset, &p5tuple_pkt->l4, &p5tuple_pkt->pkt);
231 acl_plugin_fill_5tuple_inline (void *p_acl_main, u32 lc_index, vlib_buffer_t * b0, int is_ip6,
232 int is_input, int is_l2_path, fa_5tuple_opaque_t * p5tuple_pkt)
234 acl_main_t *am = p_acl_main;
235 acl_fill_5tuple(am, 0, b0, is_ip6, is_input, is_l2_path, (fa_5tuple_t *)p5tuple_pkt);
241 fa_acl_match_ip4_addr (ip4_address_t * addr1, ip4_address_t * addr2,
246 /* match any always succeeds */
249 uint32_t a1 = clib_net_to_host_u32 (addr1->as_u32);
250 uint32_t a2 = clib_net_to_host_u32 (addr2->as_u32);
251 uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1);
252 return (a1 & mask0) == a2;
256 fa_acl_match_ip6_addr (ip6_address_t * addr1, ip6_address_t * addr2,
261 /* match any always succeeds */
264 if (memcmp (addr1, addr2, prefixlen / 8))
266 /* If the starting full bytes do not match, no point in bittwidling the thumbs further */
271 u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
272 u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
273 u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
274 return (b1 & mask0) == b2;
278 /* The prefix fits into integer number of bytes, so nothing left to do */
284 fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6)
286 return ((port >= port_first) && (port <= port_last));
290 single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple,
291 int is_ip6, u8 * r_action, u32 * r_acl_match_p,
292 u32 * r_rule_match_p, u32 * trace_bitmap)
298 if (pool_is_free_index (am->acls, acl_index))
301 *r_acl_match_p = acl_index;
303 *r_rule_match_p = -1;
304 /* the ACL does not exist but is used for policy. Block traffic. */
307 a = am->acls + acl_index;
308 for (i = 0; i < a->count; i++)
311 if (is_ip6 != r->is_ipv6)
316 if (!fa_acl_match_ip6_addr
317 (&pkt_5tuple->ip6_addr[1], &r->dst.ip6, r->dst_prefixlen))
319 if (!fa_acl_match_ip6_addr
320 (&pkt_5tuple->ip6_addr[0], &r->src.ip6, r->src_prefixlen))
323 if (!fa_acl_match_ip4_addr
324 (&pkt_5tuple->ip4_addr[1], &r->dst.ip4, r->dst_prefixlen))
326 if (!fa_acl_match_ip4_addr
327 (&pkt_5tuple->ip4_addr[0], &r->src.ip4, r->src_prefixlen))
333 if (pkt_5tuple->l4.proto != r->proto)
336 if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment &&
337 am->l4_match_nonfirst_fragment))
339 /* non-initial fragment with frag match configured - match this rule */
340 *trace_bitmap |= 0x80000000;
341 *r_action = r->is_permit;
343 *r_acl_match_p = acl_index;
349 /* A sanity check just to ensure we are about to match the ports extracted from the packet */
350 if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
353 #ifdef FA_NODE_VERBOSE_DEBUG
355 ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d",
356 acl_index, i, pkt_5tuple->l4.proto, r->proto);
359 if (!fa_acl_match_port
360 (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
361 r->src_port_or_type_last, is_ip6))
364 #ifdef FA_NODE_VERBOSE_DEBUG
366 ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]",
367 acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first,
368 r->src_port_or_type_last);
371 if (!fa_acl_match_port
372 (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
373 r->dst_port_or_code_last, is_ip6))
376 #ifdef FA_NODE_VERBOSE_DEBUG
378 ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]",
379 acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
380 r->dst_port_or_code_last);
382 if (pkt_5tuple->pkt.tcp_flags_valid
383 && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
387 /* everything matches! */
388 #ifdef FA_NODE_VERBOSE_DEBUG
389 clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d",
390 acl_index, i, r->is_permit);
392 *r_action = r->is_permit;
394 *r_acl_match_p = acl_index;
403 acl_plugin_single_acl_match_5tuple (void *p_acl_main, u32 acl_index, fa_5tuple_t * pkt_5tuple,
404 int is_ip6, u8 * r_action, u32 * r_acl_match_p,
405 u32 * r_rule_match_p, u32 * trace_bitmap)
407 acl_main_t * am = p_acl_main;
408 return single_acl_match_5tuple(am, acl_index, pkt_5tuple, is_ip6, r_action,
409 r_acl_match_p, r_rule_match_p, trace_bitmap);
413 linear_multi_acl_match_5tuple (void *p_acl_main, u32 lc_index, fa_5tuple_t * pkt_5tuple,
414 int is_ip6, u8 *r_action, u32 *acl_pos_p, u32 * acl_match_p,
415 u32 * rule_match_p, u32 * trace_bitmap)
417 acl_main_t *am = p_acl_main;
421 acl_lookup_context_t *acontext = pool_elt_at_index(am->acl_lookup_contexts, lc_index);
423 acl_vector = acontext->acl_indices;
425 for (i = 0; i < vec_len (acl_vector); i++)
427 #ifdef FA_NODE_VERBOSE_DEBUG
428 clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d",
431 if (single_acl_match_5tuple
432 (am, acl_vector[i], pkt_5tuple, is_ip6, &action,
433 acl_match_p, rule_match_p, trace_bitmap))
440 if (vec_len (acl_vector) > 0)
444 #ifdef FA_NODE_VERBOSE_DEBUG
445 clib_warning ("ACL_FA_NODE_DBG: No ACL on lc_index %d", lc_index);
447 /* If there are no ACLs defined we should not be here. */
454 * This returns true if there is indeed a match on the portranges.
455 * With all these levels of indirections, this is not going to be very fast,
456 * so, best use the individual ports or wildcard ports for performance.
459 match_portranges(acl_main_t *am, fa_5tuple_t *match, u32 index)
462 applied_hash_ace_entry_t **applied_hash_aces = vec_elt_at_index(am->hash_entry_vec_by_lc_index, match->pkt.lc_index);
463 applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), index);
465 acl_rule_t *r = &(am->acls[pae->acl_index].rules[pae->ace_index]);
467 #ifdef FA_NODE_VERBOSE_DEBUG
468 clib_warning("PORTMATCH: %d <= %d <= %d && %d <= %d <= %d ?",
469 r->src_port_or_type_first, match->l4.port[0], r->src_port_or_type_last,
470 r->dst_port_or_code_first, match->l4.port[1], r->dst_port_or_code_last);
473 return ( ((r->src_port_or_type_first <= match->l4.port[0]) && r->src_port_or_type_last >= match->l4.port[0]) &&
474 ((r->dst_port_or_code_first <= match->l4.port[1]) && r->dst_port_or_code_last >= match->l4.port[1]) );
478 single_rule_match_5tuple (acl_rule_t * r, int is_ip6, fa_5tuple_t * pkt_5tuple)
480 if (is_ip6 != r->is_ipv6)
487 if (!fa_acl_match_ip6_addr
488 (&pkt_5tuple->ip6_addr[1], &r->dst.ip6, r->dst_prefixlen))
490 if (!fa_acl_match_ip6_addr
491 (&pkt_5tuple->ip6_addr[0], &r->src.ip6, r->src_prefixlen))
496 if (!fa_acl_match_ip4_addr
497 (&pkt_5tuple->ip4_addr[1], &r->dst.ip4, r->dst_prefixlen))
499 if (!fa_acl_match_ip4_addr
500 (&pkt_5tuple->ip4_addr[0], &r->src.ip4, r->src_prefixlen))
506 if (pkt_5tuple->l4.proto != r->proto)
509 /* A sanity check just to ensure we are about to match the ports extracted from the packet */
510 if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
514 if (!fa_acl_match_port
515 (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
516 r->src_port_or_type_last, pkt_5tuple->pkt.is_ip6))
520 if (!fa_acl_match_port
521 (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
522 r->dst_port_or_code_last, pkt_5tuple->pkt.is_ip6))
525 if (pkt_5tuple->pkt.tcp_flags_valid
526 && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
530 /* everything matches! */
535 multi_acl_match_get_applied_ace_index (acl_main_t * am, int is_ip6, fa_5tuple_t * match)
537 clib_bihash_kv_48_8_t kv;
538 clib_bihash_kv_48_8_t result;
539 fa_5tuple_t *kv_key = (fa_5tuple_t *) kv.key;
540 hash_acl_lookup_value_t *result_val =
541 (hash_acl_lookup_value_t *) & result.value;
542 u64 *pmatch = (u64 *) match;
545 int mask_type_index, order_index;
546 u32 curr_match_index = (~0 - 1);
550 u32 lc_index = match->pkt.lc_index;
551 applied_hash_ace_entry_t **applied_hash_aces =
552 vec_elt_at_index (am->hash_entry_vec_by_lc_index, lc_index);
554 hash_applied_mask_info_t **hash_applied_mask_info_vec =
555 vec_elt_at_index (am->hash_applied_mask_info_vec_by_lc_index, lc_index);
557 hash_applied_mask_info_t *minfo;
559 DBG ("TRYING TO MATCH: %016llx %016llx %016llx %016llx %016llx %016llx",
560 pmatch[0], pmatch[1], pmatch[2], pmatch[3], pmatch[4], pmatch[5]);
562 for (order_index = 0; order_index < vec_len ((*hash_applied_mask_info_vec));
565 minfo = vec_elt_at_index ((*hash_applied_mask_info_vec), order_index);
566 if (minfo->first_rule_index > curr_match_index)
568 /* Index in this and following (by construction) partitions are greater than our candidate, Avoid trying to match! */
572 mask_type_index = minfo->mask_type_index;
573 ace_mask_type_entry_t *mte =
574 vec_elt_at_index (am->ace_mask_type_pool, mask_type_index);
575 pmatch = (u64 *) match;
576 pmask = (u64 *) & mte->mask;
577 pkey = (u64 *) kv.key;
579 * unrolling the below loop results in a noticeable performance increase.
582 kv.key[i] = pmatch[i] & pmask[i];
586 *pkey++ = *pmatch++ & *pmask++;
587 *pkey++ = *pmatch++ & *pmask++;
588 *pkey++ = *pmatch++ & *pmask++;
589 *pkey++ = *pmatch++ & *pmask++;
590 *pkey++ = *pmatch++ & *pmask++;
591 *pkey++ = *pmatch++ & *pmask++;
594 * The use of temporary variable convinces the compiler
595 * to make a u64 write, avoiding the stall on crc32 operation
598 fa_packet_info_t tmp_pkt = kv_key->pkt;
599 tmp_pkt.mask_type_index_lsb = mask_type_index;
600 kv_key->pkt.as_u64 = tmp_pkt.as_u64;
603 clib_bihash_search_inline_2_48_8 (&am->acl_lookup_hash, &kv, &result);
607 /* There is a hit in the hash, so check the collision vector */
608 u32 curr_index = result_val->applied_entry_index;
609 applied_hash_ace_entry_t *pae =
610 vec_elt_at_index ((*applied_hash_aces), curr_index);
611 collision_match_rule_t *crs = pae->colliding_rules;
613 for (i = 0; i < vec_len (crs); i++)
615 if (crs[i].applied_entry_index >= curr_match_index)
619 if (single_rule_match_5tuple (&crs[i].rule, is_ip6, match))
621 curr_match_index = crs[i].applied_entry_index;
626 DBG ("MATCH-RESULT: %d", curr_match_index);
627 return curr_match_index;
631 hash_multi_acl_match_5tuple (void *p_acl_main, u32 lc_index, fa_5tuple_t * pkt_5tuple,
632 int is_ip6, u8 *action, u32 *acl_pos_p, u32 * acl_match_p,
633 u32 * rule_match_p, u32 * trace_bitmap)
635 acl_main_t *am = p_acl_main;
636 applied_hash_ace_entry_t **applied_hash_aces = vec_elt_at_index(am->hash_entry_vec_by_lc_index, lc_index);
637 u32 match_index = multi_acl_match_get_applied_ace_index(am, is_ip6, pkt_5tuple);
638 if (match_index < vec_len((*applied_hash_aces))) {
639 applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), match_index);
641 *acl_pos_p = pae->acl_position;
642 *acl_match_p = pae->acl_index;
643 *rule_match_p = pae->ace_index;
644 *action = pae->action;
653 acl_plugin_match_5tuple_inline (void *p_acl_main, u32 lc_index,
654 fa_5tuple_opaque_t * pkt_5tuple,
655 int is_ip6, u8 * r_action,
658 u32 * r_rule_match_p,
661 acl_main_t *am = p_acl_main;
662 fa_5tuple_t * pkt_5tuple_internal = (fa_5tuple_t *)pkt_5tuple;
663 pkt_5tuple_internal->pkt.lc_index = lc_index;
664 if (PREDICT_TRUE(am->use_hash_acl_matching)) {
665 if (PREDICT_FALSE(pkt_5tuple_internal->pkt.is_nonfirst_fragment)) {
667 * tuplemerge does not take fragments into account,
668 * and in general making fragments first class citizens has
669 * proved more overhead than it's worth - so just fall back to linear
670 * matching in that case.
672 return linear_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
673 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
675 return hash_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
676 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
679 return linear_multi_acl_match_5tuple(p_acl_main, lc_index, pkt_5tuple_internal, is_ip6, r_action,
680 r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);