/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2017 Intel Corporation */ #include #include #include #include #include #include #include "rte_member.h" #include "rte_member_vbf.h" /* * vBF currently implemented as a big array. * The BFs have a vertical layout. Bits in same location of all bfs will stay * in the same cache line. * For example, if we have 32 bloom filters, we use a uint32_t array to * represent all of them. array[0] represent the first location of all the * bloom filters, array[1] represents the second location of all the * bloom filters, etc. The advantage of this layout is to minimize the average * number of memory accesses to test all bloom filters. * * Currently the implementation supports vBF containing 1,2,4,8,16,32 BFs. */ int rte_member_create_vbf(struct rte_member_setsum *ss, const struct rte_member_parameters *params) { if (params->num_set > RTE_MEMBER_MAX_BF || !rte_is_power_of_2(params->num_set) || params->num_keys == 0 || params->false_positive_rate == 0 || params->false_positive_rate > 1) { rte_errno = EINVAL; RTE_MEMBER_LOG(ERR, "Membership vBF create with invalid parameters\n"); return -EINVAL; } /* We assume expected keys evenly distribute to all BFs */ uint32_t num_keys_per_bf = 1 + (params->num_keys - 1) / ss->num_set; /* * Note that the false positive rate is for all BFs in the vBF * such that the single BF's false positive rate needs to be * calculated. * Assume each BF's False positive rate is fp_one_bf. The total false * positive rate is fp = 1-(1-fp_one_bf)^n. * => fp_one_bf = 1 - (1-fp)^(1/n) */ float fp_one_bf = 1 - pow((1 - params->false_positive_rate), 1.0 / ss->num_set); if (fp_one_bf == 0) { rte_errno = EINVAL; RTE_MEMBER_LOG(ERR, "Membership BF false positive rate is too small\n"); return -EINVAL; } uint32_t bits = ceil((num_keys_per_bf * log(fp_one_bf)) / log(1.0 / (pow(2.0, log(2.0))))); /* We round to power of 2 for performance during lookup */ ss->bits = rte_align32pow2(bits); ss->num_hashes = (uint32_t)(log(2.0) * bits / num_keys_per_bf); ss->bit_mask = ss->bits - 1; /* * Since we round the bits to power of 2, the final false positive * rate will probably not be same as the user specified. We log the * new value as debug message. */ float new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf * ss->num_hashes)), ss->num_hashes); new_fp = 1 - pow((1 - new_fp), ss->num_set); /* * Reduce hash function count, until we approach the user specified * false-positive rate. Otherwise it is too conservative */ int tmp_num_hash = ss->num_hashes; while (tmp_num_hash > 1) { float tmp_fp = new_fp; tmp_num_hash--; new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf * tmp_num_hash)), tmp_num_hash); new_fp = 1 - pow((1 - new_fp), ss->num_set); if (new_fp > params->false_positive_rate) { new_fp = tmp_fp; tmp_num_hash++; break; } } ss->num_hashes = tmp_num_hash; /* * To avoid multiplication and division: * mul_shift is used for multiplication shift during bit test * div_shift is used for division shift, to be divided by number of bits * represented by a uint32_t variable */ ss->mul_shift = __builtin_ctzl(ss->num_set); ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift); RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, " "each bloom filter expects %u keys, needs %u bits, %u hashes, " "with false positive rate set as %.5f, " "The new calculated vBF false positive rate is %.5f\n", num_keys_per_bf, ss->bits, ss->num_hashes, fp_one_bf, new_fp); ss->table = rte_zmalloc_socket(NULL, ss->num_set * (ss->bits >> 3), RTE_CACHE_LINE_SIZE, ss->socket_id); if (ss->table == NULL) return -ENOMEM; return 0; } static inline uint32_t test_bit(uint32_t bit_loc, const struct rte_member_setsum *ss) { uint32_t *vbf = ss->table; uint32_t n = ss->num_set; uint32_t div_shift = ss->div_shift; uint32_t mul_shift = ss->mul_shift; /* * a is how many bits in one BF are represented by one 32bit * variable. */ uint32_t a = 32 >> mul_shift; /* * x>>b is the divide, x & (a-1) is the mod, & (1<> div_shift] >> ((bit_loc & (a - 1)) << mul_shift)) & ((1ULL << n) - 1); } static inline void set_bit(uint32_t bit_loc, const struct rte_member_setsum *ss, int32_t set) { uint32_t *vbf = ss->table; uint32_t div_shift = ss->div_shift; uint32_t mul_shift = ss->mul_shift; uint32_t a = 32 >> mul_shift; vbf[bit_loc >> div_shift] |= 1UL << (((bit_loc & (a - 1)) << mul_shift) + set - 1); } int rte_member_lookup_vbf(const struct rte_member_setsum *ss, const void *key, member_set_t *set_id) { uint32_t j; uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed); uint32_t mask = ~0; uint32_t bit_loc; for (j = 0; j < ss->num_hashes; j++) { bit_loc = (h1 + j * h2) & ss->bit_mask; mask &= test_bit(bit_loc, ss); } if (mask) { *set_id = __builtin_ctzl(mask) + 1; return 1; } *set_id = RTE_MEMBER_NO_MATCH; return 0; } uint32_t rte_member_lookup_bulk_vbf(const struct rte_member_setsum *ss, const void **keys, uint32_t num_keys, member_set_t *set_ids) { uint32_t i, k; uint32_t num_matches = 0; uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t bit_loc; for (i = 0; i < num_keys; i++) h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, ss->prim_hash_seed); for (i = 0; i < num_keys; i++) h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), ss->sec_hash_seed); for (i = 0; i < num_keys; i++) { mask[i] = ~0; for (k = 0; k < ss->num_hashes; k++) { bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; mask[i] &= test_bit(bit_loc, ss); } } for (i = 0; i < num_keys; i++) { if (mask[i]) { set_ids[i] = __builtin_ctzl(mask[i]) + 1; num_matches++; } else set_ids[i] = RTE_MEMBER_NO_MATCH; } return num_matches; } uint32_t rte_member_lookup_multi_vbf(const struct rte_member_setsum *ss, const void *key, uint32_t match_per_key, member_set_t *set_id) { uint32_t num_matches = 0; uint32_t j; uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed); uint32_t mask = ~0; uint32_t bit_loc; for (j = 0; j < ss->num_hashes; j++) { bit_loc = (h1 + j * h2) & ss->bit_mask; mask &= test_bit(bit_loc, ss); } while (mask) { uint32_t loc = __builtin_ctzl(mask); set_id[num_matches] = loc + 1; num_matches++; if (num_matches >= match_per_key) return num_matches; mask &= ~(1UL << loc); } return num_matches; } uint32_t rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *ss, const void **keys, uint32_t num_keys, uint32_t match_per_key, uint32_t *match_count, member_set_t *set_ids) { uint32_t i, k; uint32_t num_matches = 0; uint32_t match_cnt_t; uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX]; uint32_t bit_loc; for (i = 0; i < num_keys; i++) h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len, ss->prim_hash_seed); for (i = 0; i < num_keys; i++) h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t), ss->sec_hash_seed); for (i = 0; i < num_keys; i++) { mask[i] = ~0; for (k = 0; k < ss->num_hashes; k++) { bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask; mask[i] &= test_bit(bit_loc, ss); } } for (i = 0; i < num_keys; i++) { match_cnt_t = 0; while (mask[i]) { uint32_t loc = __builtin_ctzl(mask[i]); set_ids[i * match_per_key + match_cnt_t] = loc + 1; match_cnt_t++; if (match_cnt_t >= match_per_key) break; mask[i] &= ~(1UL << loc); } match_count[i] = match_cnt_t; if (match_cnt_t != 0) num_matches++; } return num_matches; } int rte_member_add_vbf(const struct rte_member_setsum *ss, const void *key, member_set_t set_id) { uint32_t i, h1, h2; uint32_t bit_loc; if (set_id > ss->num_set || set_id == RTE_MEMBER_NO_MATCH) return -EINVAL; h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed); h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed); for (i = 0; i < ss->num_hashes; i++) { bit_loc = (h1 + i * h2) & ss->bit_mask; set_bit(bit_loc, ss, set_id); } return 0; } void rte_member_free_vbf(struct rte_member_setsum *ss) { rte_free(ss->table); } void rte_member_reset_vbf(const struct rte_member_setsum *ss) { uint32_t *vbf = ss->table; memset(vbf, 0, (ss->num_set * ss->bits) >> 3); }