- CLIB_PREFETCH (&(e[1]), sizeof(lb_hash_entry_t), STORE);
- for (i=0; i<LBHASH_ENTRY_PER_BUCKET; i++) {
- CLIB_PREFETCH (&(e[i+2]), sizeof(lb_hash_entry_t), STORE); //+2 somehow performs best
- u64 cmp =
- (e[i].key[0] ^ k[0]) |
- (e[i].key[1] ^ k[1]) |
- (e[i].key[2] ^ k[2]) |
- (e[i].key[3] ^ k[3]) |
- (e[i].key[4] ^ k[4]);
-
- u8 timeouted = clib_u32_loop_gt(time_now, e[i].last_seen + h->timeout);
-
- *value = (cmp || timeouted)?*value:e[i].value;
- e[i].last_seen = (cmp || timeouted)?e[i].last_seen:time_now;
- *available_index = (timeouted && (*available_index == ~0))?(&e[i] - h->entries):*available_index;
-
- if (!cmp)
- return;
+#if __SSE4_2__ && LB_HASH_DO_NOT_USE_SSE_BUCKETS == 0
+ u32 bitmask, found_index;
+ __m128i mask;
+
+ // mask[*] = timeout[*] > now
+ mask = _mm_cmpgt_epi32(_mm_loadu_si128 ((__m128i *) bucket->timeout),
+ _mm_set1_epi32 (time_now));
+ // bitmask[*] = now <= timeout[*/4]
+ bitmask = (~_mm_movemask_epi8(mask)) & 0xffff;
+ // Get first index with now <= timeout[*], if any.
+ *available_index = (bitmask)?__builtin_ctz(bitmask)/4:*available_index;
+
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->hash),
+ _mm_set1_epi32 (hash)));
+
+ // Load the array of vip values
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash) && (vip[*] == vip)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->vip),
+ _mm_set1_epi32 (vip)));
+
+ // mask[*] = (timeout[*x4] > now) && (hash[*x4] == hash) && (vip[*x4] == vip)
+ bitmask = _mm_movemask_epi8(mask);
+ // Get first index, if any
+ found_index = (bitmask)?__builtin_ctzll(bitmask)/4:0;
+ ASSERT(found_index < 4);
+ *found_value = (bitmask)?bucket->value[found_index]:*found_value;
+ bucket->timeout[found_index] =
+ (bitmask)?time_now + ht->timeout:bucket->timeout[found_index];
+#else
+ u32 i;
+ for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) {
+ u8 cmp = (bucket->hash[i] == hash && bucket->vip[i] == vip);
+ u8 timeouted = clib_u32_loop_gt(time_now, bucket->timeout[i]);
+ *found_value = (cmp || timeouted)?*found_value:bucket->value[i];
+ bucket->timeout[i] = (cmp || timeouted)?time_now + ht->timeout:bucket->timeout[i];
+ *available_index = (timeouted && (*available_index == ~0))?i:*available_index;
+
+ if (!cmp)
+ return;