Code Review
/
vpp.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
review
|
tree
raw
|
patch
|
inline
| side by side (parent:
4e96dda
)
crypto-native: add AArch64 AES-GCM native implementation
94/24994/8
author
Damjan Marion
<dmarion@me.com>
Wed, 12 Feb 2020 09:59:14 +0000
(10:59 +0100)
committer
Neale Ranns
<nranns@cisco.com>
Thu, 13 Feb 2020 12:47:22 +0000
(12:47 +0000)
Type: feature
Change-Id: I4f96b0af13b875d491704b010328a1814e1dbda1
Signed-off-by: Damjan Marion <dmarion@me.com>
src/plugins/crypto_native/CMakeLists.txt
patch
|
blob
|
history
src/plugins/crypto_native/aes.h
patch
|
blob
|
history
src/plugins/crypto_native/aes_gcm.c
patch
|
blob
|
history
src/plugins/crypto_native/crypto_native.h
patch
|
blob
|
history
src/plugins/crypto_native/main.c
patch
|
blob
|
history
src/vppinfra/vector_neon.h
patch
|
blob
|
history
src/vppinfra/vector_sse42.h
patch
|
blob
|
history
diff --git
a/src/plugins/crypto_native/CMakeLists.txt
b/src/plugins/crypto_native/CMakeLists.txt
index
9ac2946
..
c6d916d
100644
(file)
--- a/
src/plugins/crypto_native/CMakeLists.txt
+++ b/
src/plugins/crypto_native/CMakeLists.txt
@@
-26,7
+26,7
@@
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
list(APPEND VARIANTS "armv8\;-march=armv8.1-a+crc+crypto")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
list(APPEND VARIANTS "armv8\;-march=armv8.1-a+crc+crypto")
- set (COMPILE_FILES aes_cbc.c)
+ set (COMPILE_FILES aes_cbc.c
aes_gcm.c
)
set (COMPILE_OPTS -Wall -fno-common)
endif()
set (COMPILE_OPTS -Wall -fno-common)
endif()
diff --git
a/src/plugins/crypto_native/aes.h
b/src/plugins/crypto_native/aes.h
index
aacbf8a
..
b914088
100644
(file)
--- a/
src/plugins/crypto_native/aes.h
+++ b/
src/plugins/crypto_native/aes.h
@@
-28,8
+28,6
@@
typedef enum
#define AES_KEY_ROUNDS(x) (10 + x * 2)
#define AES_KEY_BYTES(x) (16 + x * 8)
#define AES_KEY_ROUNDS(x) (10 + x * 2)
#define AES_KEY_BYTES(x) (16 + x * 8)
-#ifdef __x86_64__
-
static const u8x16 byte_mask_scale = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
static const u8x16 byte_mask_scale = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
@@
-37,21
+35,31
@@
static const u8x16 byte_mask_scale = {
static_always_inline u8x16
aes_block_load (u8 * p)
{
static_always_inline u8x16
aes_block_load (u8 * p)
{
- return
(u8x16) _mm_loadu_si128 ((__m128i *) p)
;
+ return
*(u8x16u *) p
;
}
static_always_inline u8x16
aes_enc_round (u8x16 a, u8x16 k)
{
}
static_always_inline u8x16
aes_enc_round (u8x16 a, u8x16 k)
{
+#if defined (__AES__)
return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
+#elif defined (__ARM_FEATURE_AES)
+ return vaesmcq_u8 (vaeseq_u8 (a, u8x16_splat (0))) ^ k;
+#endif
}
static_always_inline u8x16
aes_enc_last_round (u8x16 a, u8x16 k)
{
}
static_always_inline u8x16
aes_enc_last_round (u8x16 a, u8x16 k)
{
+#if defined (__AES__)
return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
+#elif defined (__ARM_FEATURE_AES)
+ return vaeseq_u8 (a, u8x16_splat (0)) ^ k;
+#endif
}
}
+#ifdef __x86_64__
+
static_always_inline u8x16
aes_dec_round (u8x16 a, u8x16 k)
{
static_always_inline u8x16
aes_dec_round (u8x16 a, u8x16 k)
{
@@
-63,11
+71,12
@@
aes_dec_last_round (u8x16 a, u8x16 k)
{
return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
}
{
return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
}
+#endif
static_always_inline void
aes_block_store (u8 * p, u8x16 r)
{
static_always_inline void
aes_block_store (u8 * p, u8x16 r)
{
-
_mm_storeu_si128 ((__m128i *) p, (__m128i) r)
;
+
*(u8x16u *) p = r
;
}
static_always_inline u8x16
}
static_always_inline u8x16
@@
-91,31
+100,40
@@
aes_load_partial (u8x16u * p, int n_bytes)
static_always_inline void
aes_store_partial (void *p, u8x16 r, int n_bytes)
{
static_always_inline void
aes_store_partial (void *p, u8x16 r, int n_bytes)
{
+#if __aarch64__
+ clib_memcpy_fast (p, &r, n_bytes);
+#else
#ifdef __AVX512F__
_mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, (__m128i) r);
#else
u8x16 mask = u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
_mm_maskmoveu_si128 ((__m128i) r, (__m128i) mask, p);
#endif
#ifdef __AVX512F__
_mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, (__m128i) r);
#else
u8x16 mask = u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
_mm_maskmoveu_si128 ((__m128i) r, (__m128i) mask, p);
#endif
+#endif
}
static_always_inline u8x16
aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
{
}
static_always_inline u8x16
aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
{
- int
i
;
+ int
rounds = AES_KEY_ROUNDS (ks)
;
block ^= round_keys[0];
block ^= round_keys[0];
- for (i
= 1; i < AES_KEY_ROUNDS (ks)
; i += 1)
+ for (i
nt i = 1; i < rounds
; i += 1)
block = aes_enc_round (block, round_keys[i]);
block = aes_enc_round (block, round_keys[i]);
- return aes_enc_last_round (block, round_keys[
i
]);
+ return aes_enc_last_round (block, round_keys[
rounds
]);
}
static_always_inline u8x16
aes_inv_mix_column (u8x16 a)
{
}
static_always_inline u8x16
aes_inv_mix_column (u8x16 a)
{
+#if defined (__AES__)
return (u8x16) _mm_aesimc_si128 ((__m128i) a);
return (u8x16) _mm_aesimc_si128 ((__m128i) a);
+#elif defined (__ARM_FEATURE_AES)
+ return vaesimcq_u8 (a);
+#endif
}
}
+#ifdef __x86_64__
#define aes_keygen_assist(a, b) \
(u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
#define aes_keygen_assist(a, b) \
(u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
@@
-244,12
+262,6
@@
aes256_key_expand (u8x16 * rk, u8x16u const *k)
#ifdef __aarch64__
#ifdef __aarch64__
-static_always_inline u8x16
-aes_inv_mix_column (u8x16 a)
-{
- return vaesimcq_u8 (a);
-}
-
static const u8x16 aese_prep_mask1 =
{ 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
static const u8x16 aese_prep_mask2 =
static const u8x16 aese_prep_mask1 =
{ 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
static const u8x16 aese_prep_mask2 =
diff --git
a/src/plugins/crypto_native/aes_gcm.c
b/src/plugins/crypto_native/aes_gcm.c
index
f2dec62
..
41a9d48
100644
(file)
--- a/
src/plugins/crypto_native/aes_gcm.c
+++ b/
src/plugins/crypto_native/aes_gcm.c
@@
-18,7
+18,6
@@
#include <vlib/vlib.h>
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <vlib/vlib.h>
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
-#include <x86intrin.h>
#include <crypto_native/crypto_native.h>
#include <crypto_native/aes.h>
#include <crypto_native/ghash.h>
#include <crypto_native/crypto_native.h>
#include <crypto_native/aes.h>
#include <crypto_native/ghash.h>
@@
-35,18
+34,6
@@
typedef struct
const u8x16 Ke[15];
} aes_gcm_key_data_t;
const u8x16 Ke[15];
} aes_gcm_key_data_t;
-static const u32x4 last_byte_one = { 0, 0, 0, 1 << 24 };
-
-static const u8x16 bswap_mask = {
- 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-};
-
-static_always_inline u8x16
-aesni_gcm_bswap (u8x16 x)
-{
- return (u8x16) _mm_shuffle_epi8 ((__m128i) x, (__m128i) bswap_mask);
-}
-
static_always_inline void
aesni_gcm_load (u8x16 * d, u8x16u * inv, int n, int n_bytes)
{
static_always_inline void
aesni_gcm_load (u8x16 * d, u8x16u * inv, int n, int n_bytes)
{
@@
-70,6
+57,8
@@
static_always_inline void
aesni_gcm_enc_first_round (u8x16 * r, u32x4 * Y, u32 * ctr, u8x16 k,
int n_blocks)
{
aesni_gcm_enc_first_round (u8x16 * r, u32x4 * Y, u32 * ctr, u8x16 k,
int n_blocks)
{
+ static const u32x4 last_byte_one = { 0, 0, 0, 1 << 24 };
+
if (PREDICT_TRUE ((u8) ctr[0] < (256 - n_blocks)))
{
for (int i = 0; i < n_blocks; i++)
if (PREDICT_TRUE ((u8) ctr[0] < (256 - n_blocks)))
{
for (int i = 0; i < n_blocks; i++)
@@
-115,9
+104,9
@@
aesni_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
{
ghash_data_t _gd, *gd = &_gd;
const u8x16 *Hi = kd->Hi + n_blocks - 1;
{
ghash_data_t _gd, *gd = &_gd;
const u8x16 *Hi = kd->Hi + n_blocks - 1;
- ghash_mul_first (gd,
aesni_gcm_bswap
(in[0]) ^ T, Hi[0]);
+ ghash_mul_first (gd,
u8x16_reflect
(in[0]) ^ T, Hi[0]);
for (int i = 1; i < n_blocks; i++)
for (int i = 1; i < n_blocks; i++)
- ghash_mul_next (gd,
aesni_gcm_bswap
((in[i])), Hi[-i]);
+ ghash_mul_next (gd,
u8x16_reflect
((in[i])), Hi[-i]);
ghash_reduce (gd);
ghash_reduce2 (gd);
return ghash_final (gd);
ghash_reduce (gd);
ghash_reduce2 (gd);
return ghash_final (gd);
@@
-158,7
+147,7
@@
aesni_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
if (n_left)
{
u8x16 r = aes_load_partial (in, n_left);
if (n_left)
{
u8x16 r = aes_load_partial (in, n_left);
- T = ghash_mul (
aesni_gcm_bswap
(r) ^ T, kd->Hi[0]);
+ T = ghash_mul (
u8x16_reflect
(r) ^ T, kd->Hi[0]);
}
return T;
}
}
return T;
}
@@
-174,7
+163,7
@@
aesni_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
const u8x16 *rk = (u8x16 *) kd->Ke;
int hidx = is_encrypt ? 4 : n, didx = 0;
const u8x16 *rk = (u8x16 *) kd->Ke;
int hidx = is_encrypt ? 4 : n, didx = 0;
-
_mm_prefetch (inv + 4, _MM_HINT_T0
);
+
clib_prefetch_load (inv + 4
);
/* AES rounds 0 and 1 */
aesni_gcm_enc_first_round (r, Y, ctr, rk[0], n);
/* AES rounds 0 and 1 */
aesni_gcm_enc_first_round (r, Y, ctr, rk[0], n);
@@
-186,7
+175,7
@@
aesni_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
/* GHASH multiply block 1 */
if (with_ghash)
/* GHASH multiply block 1 */
if (with_ghash)
- ghash_mul_first (gd,
aesni_gcm_bswap
(d[didx++]) ^ T, kd->Hi[--hidx]);
+ ghash_mul_first (gd,
u8x16_reflect
(d[didx++]) ^ T, kd->Hi[--hidx]);
/* AES rounds 2 and 3 */
aesni_gcm_enc_round (r, rk[2], n);
/* AES rounds 2 and 3 */
aesni_gcm_enc_round (r, rk[2], n);
@@
-194,7
+183,7
@@
aesni_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
/* GHASH multiply block 2 */
if (with_ghash && hidx)
/* GHASH multiply block 2 */
if (with_ghash && hidx)
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[didx++]), kd->Hi[--hidx]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[didx++]), kd->Hi[--hidx]);
/* AES rounds 4 and 5 */
aesni_gcm_enc_round (r, rk[4], n);
/* AES rounds 4 and 5 */
aesni_gcm_enc_round (r, rk[4], n);
@@
-202,7
+191,7
@@
aesni_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
/* GHASH multiply block 3 */
if (with_ghash && hidx)
/* GHASH multiply block 3 */
if (with_ghash && hidx)
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[didx++]), kd->Hi[--hidx]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[didx++]), kd->Hi[--hidx]);
/* AES rounds 6 and 7 */
aesni_gcm_enc_round (r, rk[6], n);
/* AES rounds 6 and 7 */
aesni_gcm_enc_round (r, rk[6], n);
@@
-210,7
+199,7
@@
aesni_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
/* GHASH multiply block 4 */
if (with_ghash && hidx)
/* GHASH multiply block 4 */
if (with_ghash && hidx)
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[didx++]), kd->Hi[--hidx]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[didx++]), kd->Hi[--hidx]);
/* AES rounds 8 and 9 */
aesni_gcm_enc_round (r, rk[8], n);
/* AES rounds 8 and 9 */
aesni_gcm_enc_round (r, rk[8], n);
@@
-259,28
+248,28
@@
aesni_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
aesni_gcm_load (d, inv, 4, 0);
/* GHASH multiply block 0 */
aesni_gcm_load (d, inv, 4, 0);
/* GHASH multiply block 0 */
- ghash_mul_first (gd,
aesni_gcm_bswap
(d[0]) ^ T, kd->Hi[7]);
+ ghash_mul_first (gd,
u8x16_reflect
(d[0]) ^ T, kd->Hi[7]);
/* AES rounds 2 and 3 */
aesni_gcm_enc_round (r, rk[2], 4);
aesni_gcm_enc_round (r, rk[3], 4);
/* GHASH multiply block 1 */
/* AES rounds 2 and 3 */
aesni_gcm_enc_round (r, rk[2], 4);
aesni_gcm_enc_round (r, rk[3], 4);
/* GHASH multiply block 1 */
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[1]), kd->Hi[6]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[1]), kd->Hi[6]);
/* AES rounds 4 and 5 */
aesni_gcm_enc_round (r, rk[4], 4);
aesni_gcm_enc_round (r, rk[5], 4);
/* GHASH multiply block 2 */
/* AES rounds 4 and 5 */
aesni_gcm_enc_round (r, rk[4], 4);
aesni_gcm_enc_round (r, rk[5], 4);
/* GHASH multiply block 2 */
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[2]), kd->Hi[5]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[2]), kd->Hi[5]);
/* AES rounds 6 and 7 */
aesni_gcm_enc_round (r, rk[6], 4);
aesni_gcm_enc_round (r, rk[7], 4);
/* GHASH multiply block 3 */
/* AES rounds 6 and 7 */
aesni_gcm_enc_round (r, rk[6], 4);
aesni_gcm_enc_round (r, rk[7], 4);
/* GHASH multiply block 3 */
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[3]), kd->Hi[4]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[3]), kd->Hi[4]);
/* AES rounds 8 and 9 */
aesni_gcm_enc_round (r, rk[8], 4);
/* AES rounds 8 and 9 */
aesni_gcm_enc_round (r, rk[8], 4);
@@
-301,7
+290,7
@@
aesni_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
aesni_gcm_load (d, inv + 4, 4, 0);
/* GHASH multiply block 4 */
aesni_gcm_load (d, inv + 4, 4, 0);
/* GHASH multiply block 4 */
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[0]), kd->Hi[3]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[0]), kd->Hi[3]);
/* AES rounds 0, 1 and 2 */
aesni_gcm_enc_first_round (r, Y, ctr, rk[0], 4);
/* AES rounds 0, 1 and 2 */
aesni_gcm_enc_first_round (r, Y, ctr, rk[0], 4);
@@
-309,21
+298,21
@@
aesni_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
aesni_gcm_enc_round (r, rk[2], 4);
/* GHASH multiply block 5 */
aesni_gcm_enc_round (r, rk[2], 4);
/* GHASH multiply block 5 */
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[1]), kd->Hi[2]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[1]), kd->Hi[2]);
/* AES rounds 3 and 4 */
aesni_gcm_enc_round (r, rk[3], 4);
aesni_gcm_enc_round (r, rk[4], 4);
/* GHASH multiply block 6 */
/* AES rounds 3 and 4 */
aesni_gcm_enc_round (r, rk[3], 4);
aesni_gcm_enc_round (r, rk[4], 4);
/* GHASH multiply block 6 */
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[2]), kd->Hi[1]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[2]), kd->Hi[1]);
/* AES rounds 5 and 6 */
aesni_gcm_enc_round (r, rk[5], 4);
aesni_gcm_enc_round (r, rk[6], 4);
/* GHASH multiply block 7 */
/* AES rounds 5 and 6 */
aesni_gcm_enc_round (r, rk[5], 4);
aesni_gcm_enc_round (r, rk[6], 4);
/* GHASH multiply block 7 */
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[3]), kd->Hi[0]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[3]), kd->Hi[0]);
/* AES rounds 7 and 8 */
aesni_gcm_enc_round (r, rk[7], 4);
/* AES rounds 7 and 8 */
aesni_gcm_enc_round (r, rk[7], 4);
@@
-361,13
+350,13
@@
aesni_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
if (n_bytes)
d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);
if (n_bytes)
d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);
- ghash_mul_first (gd,
aesni_gcm_bswap
(d[0]) ^ T, kd->Hi[n_blocks - 1]);
+ ghash_mul_first (gd,
u8x16_reflect
(d[0]) ^ T, kd->Hi[n_blocks - 1]);
if (n_blocks > 1)
if (n_blocks > 1)
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[1]), kd->Hi[n_blocks - 2]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[1]), kd->Hi[n_blocks - 2]);
if (n_blocks > 2)
if (n_blocks > 2)
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[2]), kd->Hi[n_blocks - 3]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[2]), kd->Hi[n_blocks - 3]);
if (n_blocks > 3)
if (n_blocks > 3)
- ghash_mul_next (gd,
aesni_gcm_bswap
(d[3]), kd->Hi[n_blocks - 4]);
+ ghash_mul_next (gd,
u8x16_reflect
(d[3]), kd->Hi[n_blocks - 4]);
ghash_reduce (gd);
ghash_reduce2 (gd);
return ghash_final (gd);
ghash_reduce (gd);
ghash_reduce2 (gd);
return ghash_final (gd);
@@
-539,9
+528,9
@@
aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u * iv, u8x16u * tag,
u32x4 Y0;
ghash_data_t _gd, *gd = &_gd;
u32x4 Y0;
ghash_data_t _gd, *gd = &_gd;
-
_mm_prefetch (iv, _MM_HINT_T0
);
-
_mm_prefetch (in, _MM_HINT_T0
);
-
_mm_prefetch (in + CLIB_CACHE_LINE_BYTES, _MM_HINT_T0
);
+
clib_prefetch_load (iv
);
+
clib_prefetch_load (in
);
+
clib_prefetch_load (in + 4
);
/* calculate ghash for AAD - optimized for ipsec common cases */
if (aad_bytes == 8)
/* calculate ghash for AAD - optimized for ipsec common cases */
if (aad_bytes == 8)
@@
-561,7
+550,7
@@
aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u * iv, u8x16u * tag,
else
T = aesni_gcm_dec (T, kd, Y0, in, out, data_bytes, aes_rounds);
else
T = aesni_gcm_dec (T, kd, Y0, in, out, data_bytes, aes_rounds);
-
_mm_prefetch (tag, _MM_HINT_T0
);
+
clib_prefetch_load (tag
);
/* Finalize ghash - data bytes and aad bytes converted to bits */
/* *INDENT-OFF* */
/* Finalize ghash - data bytes and aad bytes converted to bits */
/* *INDENT-OFF* */
@@
-581,7
+570,7
@@
aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u * iv, u8x16u * tag,
for (; i < aes_rounds; i += 1)
r = aes_enc_round (r, kd->Ke[i]);
r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
for (; i < aes_rounds; i += 1)
r = aes_enc_round (r, kd->Ke[i]);
r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
- T =
aesni_gcm_bswap
(T) ^ r;
+ T =
u8x16_reflect
(T) ^ r;
/* tag_len 16 -> 0 */
tag_len &= 0xf;
/* tag_len 16 -> 0 */
tag_len &= 0xf;
@@
-679,7
+668,7
@@
aesni_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
/* pre-calculate H */
H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
/* pre-calculate H */
H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
- H =
aesni_gcm_bswap
(H);
+ H =
u8x16_reflect
(H);
ghash_precompute (H, (u8x16 *) kd->Hi, 8);
return kd;
}
ghash_precompute (H, (u8x16 *) kd->Hi, 8);
return kd;
}
@@
-706,6
+695,8
@@
crypto_native_aes_gcm_init_vaes (vlib_main_t * vm)
crypto_native_aes_gcm_init_avx512 (vlib_main_t * vm)
#elif __AVX2__
crypto_native_aes_gcm_init_avx2 (vlib_main_t * vm)
crypto_native_aes_gcm_init_avx512 (vlib_main_t * vm)
#elif __AVX2__
crypto_native_aes_gcm_init_avx2 (vlib_main_t * vm)
+#elif __aarch64__
+crypto_native_aes_gcm_init_neon (vlib_main_t * vm)
#else
crypto_native_aes_gcm_init_sse42 (vlib_main_t * vm)
#endif
#else
crypto_native_aes_gcm_init_sse42 (vlib_main_t * vm)
#endif
diff --git
a/src/plugins/crypto_native/crypto_native.h
b/src/plugins/crypto_native/crypto_native.h
index
0b6116a
..
279684b
100644
(file)
--- a/
src/plugins/crypto_native/crypto_native.h
+++ b/
src/plugins/crypto_native/crypto_native.h
@@
-45,6
+45,7
@@
clib_error_t *crypto_native_aes_gcm_init_sse42 (vlib_main_t * vm);
clib_error_t *crypto_native_aes_gcm_init_avx2 (vlib_main_t * vm);
clib_error_t *crypto_native_aes_gcm_init_avx512 (vlib_main_t * vm);
clib_error_t *crypto_native_aes_gcm_init_vaes (vlib_main_t * vm);
clib_error_t *crypto_native_aes_gcm_init_avx2 (vlib_main_t * vm);
clib_error_t *crypto_native_aes_gcm_init_avx512 (vlib_main_t * vm);
clib_error_t *crypto_native_aes_gcm_init_vaes (vlib_main_t * vm);
+clib_error_t *crypto_native_aes_gcm_init_neon (vlib_main_t * vm);
#endif /* __crypto_native_h__ */
/*
#endif /* __crypto_native_h__ */
/*
diff --git
a/src/plugins/crypto_native/main.c
b/src/plugins/crypto_native/main.c
index
d338ab6
..
45d3d8d
100644
(file)
--- a/
src/plugins/crypto_native/main.c
+++ b/
src/plugins/crypto_native/main.c
@@
-102,9
+102,10
@@
crypto_native_init (vlib_main_t * vm)
}
#endif
#if __aarch64__
}
#endif
#if __aarch64__
- error = crypto_native_aes_cbc_init_neon (vm);
+ if ((error = crypto_native_aes_cbc_init_neon (vm)))
+ goto error;
- if (
error
)
+ if (
(error = crypto_native_aes_gcm_init_neon (vm))
)
goto error;
#endif
goto error;
#endif
diff --git
a/src/vppinfra/vector_neon.h
b/src/vppinfra/vector_neon.h
index
307fbc5
..
81d99a6
100644
(file)
--- a/
src/vppinfra/vector_neon.h
+++ b/
src/vppinfra/vector_neon.h
@@
-194,6
+194,15
@@
u8x16_word_shift_right (u8x16 x, const int n)
return vextq_u8 (x, u8x16_splat (0), n);
}
return vextq_u8 (x, u8x16_splat (0), n);
}
+static_always_inline u8x16
+u8x16_reflect (u8x16 v)
+{
+ u8x16 mask = {
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+ };
+ return (u8x16) vqtbl1q_u8 (v, mask);
+}
+
#define CLIB_HAVE_VEC128_MSB_MASK
#define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE
#define CLIB_HAVE_VEC128_MSB_MASK
#define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE
diff --git
a/src/vppinfra/vector_sse42.h
b/src/vppinfra/vector_sse42.h
index
0c1b2f0
..
c22e86e
100644
(file)
--- a/
src/vppinfra/vector_sse42.h
+++ b/
src/vppinfra/vector_sse42.h
@@
-635,6
+635,15
@@
u16x8_byte_swap (u16x8 v)
return (u16x8) _mm_shuffle_epi8 ((__m128i) v, (__m128i) swap);
}
return (u16x8) _mm_shuffle_epi8 ((__m128i) v, (__m128i) swap);
}
+static_always_inline u8x16
+u8x16_reflect (u8x16 v)
+{
+ u8x16 mask = {
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+ };
+ return (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) mask);
+}
+
static_always_inline u32x4
u32x4_hadd (u32x4 v1, u32x4 v2)
{
static_always_inline u32x4
u32x4_hadd (u32x4 v1, u32x4 v2)
{