From cb656303dfae0497c06ac925a08bda9f2b9d4d91 Mon Sep 17 00:00:00 2001 From: Pierre Pfister Date: Wed, 16 Mar 2016 09:14:28 +0000 Subject: [PATCH] vnet_classify: Handle non-aligned vectors and various bugfixes classifier used to crash when packet data was not aligned. This commit also includes: - writer lock initialization bugfix - CLI help was missing opaque-index - ip_input_acl was applying the mask on buffer->data instead of packet head On a side note, my tests failed to show any improvements when using SSE. Further tests might be interesting to see if they actually perform better. Change-Id: Ic34eecd6a2226919121ffce2fe4324506deee52f Signed-off-by: Pierre Pfister --- vnet/vnet/classify/vnet_classify.c | 3 +- vnet/vnet/classify/vnet_classify.h | 231 +++++++++++++++++++++---------------- 2 files changed, 136 insertions(+), 98 deletions(-) diff --git a/vnet/vnet/classify/vnet_classify.c b/vnet/vnet/classify/vnet_classify.c index 43acb024033..7f7138af2af 100644 --- a/vnet/vnet/classify/vnet_classify.c +++ b/vnet/vnet/classify/vnet_classify.c @@ -95,6 +95,7 @@ vnet_classify_new_table (vnet_classify_main_t *cm, t->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + t->writer_lock[0] = 0; clib_mem_set_heap (oldheap); return (t); @@ -1735,7 +1736,7 @@ VLIB_CLI_COMMAND (classify_session_command, static) = { .path = "classify session", .short_help = "classify session [hit-next|l2-hit-next|acl-hit-next ]" - "\n table-index match [hex] [l2] [l3 ip4]", + "\n table-index match [hex] [l2] [l3 ip4] [opaque-index ]", .function = classify_session_command_fn, }; diff --git a/vnet/vnet/classify/vnet_classify.h b/vnet/vnet/classify/vnet_classify.h index 0c93e90d020..e33e6dac201 100644 --- a/vnet/vnet/classify/vnet_classify.h +++ b/vnet/vnet/classify/vnet_classify.h @@ -40,6 +40,12 @@ extern vlib_node_registration_t ip6_classify_node; #define CLASSIFY_TRACE 0 +#ifndef __aarch64__ +#define CLASSIFY_USE_SSE //Allow usage of SSE operations +#endif + +#define U32X4_ALIGNED(p) PREDICT_TRUE((((u64)p) & 0xf) == 0) + struct _vnet_classify_main; typedef struct _vnet_classify_main vnet_classify_main_t; @@ -167,42 +173,70 @@ static inline u64 vnet_classify_hash_packet_inline (vnet_classify_table_t * t, u8 * h) { - u32x4 *data, *mask; - + u32x4 *mask; + union { u32x4 as_u32x4; u64 as_u64[2]; } xor_sum __attribute__((aligned(sizeof(u32x4)))); - + ASSERT(t); - - data = (u32x4 *)h; mask = t->mask; - - ASSERT ((((u64)h) & 0xf) == 0); - - xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; - - switch (t->match_n_vectors) +#ifdef CLASSIFY_USE_SSE + if (U32X4_ALIGNED(h)) { //SSE can't handle unaligned data + u32x4 *data = (u32x4 *)h; + xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; + switch (t->match_n_vectors) { - case 5: - xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; - /* FALLTHROUGH */ - case 4: - xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; - /* FALLTHROUGH */ - case 3: - xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; - /* FALLTHROUGH */ - case 2: - xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; - /* FALLTHROUGH */ - case 1: - break; - - default: - abort(); + case 5: + xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort(); } + } else +#endif /* CLASSIFY_USE_SSE */ + { + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *)h; + xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *)mask)[0]; + xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *)mask)[1]; + switch (t->match_n_vectors) + { + case 5: + xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *)mask)[8]; + xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *)mask)[9]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *)mask)[6]; + xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *)mask)[7]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *)mask)[4]; + xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *)mask)[5]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *)mask)[2]; + xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *)mask)[3]; + /* FALLTHROUGH */ + case 1: + break; + + default: + abort(); + } + } return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]); } @@ -290,22 +324,19 @@ vnet_classify_find_entry_inline (vnet_classify_table_t * t, u8 * h, u64 hash, f64 now) { vnet_classify_entry_t * v; - u32x4 * mask, * data, *data_start, * key; - u32x4 result __attribute__((aligned(sizeof(u32x4)))); + u32x4 *mask, *key; + union { + u32x4 as_u32x4; + u64 as_u64[2]; + } result __attribute__((aligned(sizeof(u32x4)))); vnet_classify_bucket_t * b; u32 value_index; -#ifndef __aarch64__ - u32 result_mask; -#endif u32 bucket_index; int i; - ASSERT ((((u64)h) & 0xf) == 0); - - data_start = (u32x4 *) h; - bucket_index = hash & (t->nbuckets-1); b = &t->buckets[bucket_index]; + mask = t->mask; if (b->offset == 0) return 0; @@ -314,84 +345,90 @@ vnet_classify_find_entry_inline (vnet_classify_table_t * t, v = vnet_classify_get_entry (t, b->offset); value_index = hash & ((1<log2_pages)-1); - v = vnet_classify_entry_at_index (t, v, value_index); - for (i = 0; i < t->entries_per_page; i++) - { - mask = t->mask; - data = data_start; +#ifdef CLASSIFY_USE_SSE + if (U32X4_ALIGNED(h)) { + u32x4 *data = (u32x4 *) h; + for (i = 0; i < t->entries_per_page; i++) { key = v->key; - + result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; switch (t->match_n_vectors) - { - case 1: - result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; - break; - - case 2: - result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; - result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; - break; - + { + case 5: + result.as_u32x4 |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; + /* FALLTHROUGH */ + case 4: + result.as_u32x4 |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; + /* FALLTHROUGH */ case 3: - result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; - result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; - result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + result.as_u32x4 |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + /* FALLTHROUGH */ + case 2: + result.as_u32x4 |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + /* FALLTHROUGH */ + case 1: break; + default: + abort(); + } - case 4: - result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; - result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; - result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; - result |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; - break; + if (u32x4_zero_byte_mask (result.as_u32x4) == 0xffff) { + if (PREDICT_TRUE(now)) { + v->hits++; + v->last_heard = now; + } + return (v); + } + v = vnet_classify_entry_at_index (t, v, 1); + } + } else +#endif /* CLASSIFY_USE_SSE */ + { + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *)h; + for (i = 0; i < t->entries_per_page; i++) { + key = v->key; + result.as_u64[0] = (data64[0 + skip_u64] & ((u64 *)mask)[0]) ^ ((u64 *)key)[0]; + result.as_u64[1] = (data64[1 + skip_u64] & ((u64 *)mask)[1]) ^ ((u64 *)key)[1]; + switch (t->match_n_vectors) + { case 5: - result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; - result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; - result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; - result |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; - result |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; + result.as_u64[0] |= (data64[8 + skip_u64] & ((u64 *)mask)[8]) ^ ((u64 *)key)[8]; + result.as_u64[1] |= (data64[9 + skip_u64] & ((u64 *)mask)[9]) ^ ((u64 *)key)[9]; + /* FALLTHROUGH */ + case 4: + result.as_u64[0] |= (data64[6 + skip_u64] & ((u64 *)mask)[6]) ^ ((u64 *)key)[6]; + result.as_u64[1] |= (data64[7 + skip_u64] & ((u64 *)mask)[7]) ^ ((u64 *)key)[7]; + /* FALLTHROUGH */ + case 3: + result.as_u64[0] |= (data64[4 + skip_u64] & ((u64 *)mask)[4]) ^ ((u64 *)key)[4]; + result.as_u64[1] |= (data64[5 + skip_u64] & ((u64 *)mask)[5]) ^ ((u64 *)key)[5]; + /* FALLTHROUGH */ + case 2: + result.as_u64[0] |= (data64[2 + skip_u64] & ((u64 *)mask)[2]) ^ ((u64 *)key)[2]; + result.as_u64[1] |= (data64[3 + skip_u64] & ((u64 *)mask)[3]) ^ ((u64 *)key)[3]; + /* FALLTHROUGH */ + case 1: break; - default: abort(); - } + } -#ifndef __aarch64__ - result_mask = u32x4_zero_byte_mask (result); - if (result_mask == 0xffff) - { - if (PREDICT_TRUE(now)) - { - v->hits++; - v->last_heard = now; - } - return (v); + if (result.as_u64[0] == 0 && result.as_u64[1] == 0) { + if (PREDICT_TRUE(now)) { + v->hits++; + v->last_heard = now; } -#else - { - typedef union {u32x4 as_u32x4; u64 as_u64[2];} u64u_t; - u64u_t u; - u.as_u32x4 = result; - - if (u.as_u64[0] == 0 && u.as_u64[1] == 0) - { - if (PREDICT_TRUE(now)) - { - v->hits++; - v->last_heard = now; - } - return (v); - } + return (v); } -#endif - + v = vnet_classify_entry_at_index (t, v, 1); } + } return 0; -} + } vnet_classify_table_t * vnet_classify_new_table (vnet_classify_main_t *cm, -- 2.16.6