#include <vlib/vlib.h>
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
-#include <x86intrin.h>
#include <crypto_native/crypto_native.h>
#include <crypto_native/aes.h>
#include <crypto_native/ghash.h>
#pragma GCC optimize ("O3")
#endif
+#define NUM_HI 8
typedef struct
{
/* pre-calculated hash key values */
- const u8x16 Hi[8];
+ const u8x16 Hi[NUM_HI];
/* extracted AES key */
const u8x16 Ke[15];
} aes_gcm_key_data_t;
-static const u32x4 last_byte_one = { 0, 0, 0, 1 << 24 };
-
-static const u8x16 bswap_mask = {
- 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-};
-
-static_always_inline u8x16
-aesni_gcm_bswap (u8x16 x)
+typedef struct
{
- return (u8x16) _mm_shuffle_epi8 ((__m128i) x, (__m128i) bswap_mask);
-}
+ u32 counter;
+ u32x4 Y;
+} aes_gcm_counter_t;
+
+static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
static_always_inline void
-aesni_gcm_load (u8x16 * d, u8x16u * inv, int n, int n_bytes)
+aes_gcm_load (u8x16 * d, u8x16u * inv, int n, int n_bytes)
{
for (int i = 0; i < n - 1; i++)
d[i] = inv[i];
}
static_always_inline void
-aesni_gcm_store (u8x16 * d, u8x16u * outv, int n, int n_bytes)
+aes_gcm_store (u8x16 * d, u8x16u * outv, int n, int n_bytes)
{
for (int i = 0; i < n - 1; i++)
outv[i] = d[i];
}
static_always_inline void
-aesni_gcm_enc_first_round (u8x16 * r, u32x4 * Y, u32 * ctr, u8x16 k,
- int n_blocks)
+aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k,
+ int n_blocks)
{
- if (PREDICT_TRUE ((u8) ctr[0] < (256 - n_blocks)))
+ if (PREDICT_TRUE ((u8) ctr->counter < (256 - 2 * n_blocks)))
{
for (int i = 0; i < n_blocks; i++)
{
- Y[0] += last_byte_one;
- r[i] = k ^ (u8x16) Y[0];
+ r[i] = k ^ (u8x16) ctr->Y;
+ ctr->Y += ctr_inv_1;
}
- ctr[0] += n_blocks;
+ ctr->counter += n_blocks;
}
else
{
for (int i = 0; i < n_blocks; i++)
{
- Y[0][3] = clib_host_to_net_u32 (++ctr[0]);
- r[i] = k ^ (u8x16) Y[0];
+ r[i] = k ^ (u8x16) ctr->Y;
+ ctr->counter++;
+ ctr->Y[3] = clib_host_to_net_u32 (ctr->counter + 1);
}
}
}
static_always_inline void
-aesni_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks)
+aes_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks)
{
for (int i = 0; i < n_blocks; i++)
r[i] = aes_enc_round (r[i], k);
}
static_always_inline void
-aesni_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
- int rounds, int n_blocks)
+aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
+ int rounds, int n_blocks)
{
/* additional ronuds for AES-192 and AES-256 */
for (int i = 10; i < rounds; i++)
- aesni_gcm_enc_round (r, k[i], n_blocks);
+ aes_gcm_enc_round (r, k[i], n_blocks);
for (int i = 0; i < n_blocks; i++)
d[i] ^= aes_enc_last_round (r[i], k[rounds]);
}
static_always_inline u8x16
-aesni_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
- u8x16u * in, int n_blocks)
+aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
+ u8x16u * in, int n_blocks)
{
ghash_data_t _gd, *gd = &_gd;
- const u8x16 *Hi = kd->Hi + n_blocks - 1;
- ghash_mul_first (gd, aesni_gcm_bswap (in[0]) ^ T, Hi[0]);
+ u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
+ ghash_mul_first (gd, u8x16_reflect (in[0]) ^ T, Hi[0]);
for (int i = 1; i < n_blocks; i++)
- ghash_mul_next (gd, aesni_gcm_bswap ((in[i])), Hi[-i]);
+ ghash_mul_next (gd, u8x16_reflect ((in[i])), Hi[i]);
ghash_reduce (gd);
ghash_reduce2 (gd);
return ghash_final (gd);
}
static_always_inline u8x16
-aesni_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
+aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
{
while (n_left >= 128)
{
- T = aesni_gcm_ghash_blocks (T, kd, in, 8);
+ T = aes_gcm_ghash_blocks (T, kd, in, 8);
n_left -= 128;
in += 8;
}
if (n_left >= 64)
{
- T = aesni_gcm_ghash_blocks (T, kd, in, 4);
+ T = aes_gcm_ghash_blocks (T, kd, in, 4);
n_left -= 64;
in += 4;
}
if (n_left >= 32)
{
- T = aesni_gcm_ghash_blocks (T, kd, in, 2);
+ T = aes_gcm_ghash_blocks (T, kd, in, 2);
n_left -= 32;
in += 2;
}
if (n_left >= 16)
{
- T = aesni_gcm_ghash_blocks (T, kd, in, 1);
+ T = aes_gcm_ghash_blocks (T, kd, in, 1);
n_left -= 16;
in += 1;
}
if (n_left)
{
u8x16 r = aes_load_partial (in, n_left);
- T = ghash_mul (aesni_gcm_bswap (r) ^ T, kd->Hi[0]);
+ T = ghash_mul (u8x16_reflect (r) ^ T, kd->Hi[NUM_HI - 1]);
}
return T;
}
static_always_inline u8x16
-aesni_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- u32x4 * Y, u32 * ctr, u8x16u * inv, u8x16u * outv,
- int rounds, int n, int last_block_bytes, int with_ghash,
- int is_encrypt)
+aes_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
+ aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
+ int rounds, int n, int last_block_bytes, int with_ghash,
+ int is_encrypt)
{
u8x16 r[n];
ghash_data_t _gd = { }, *gd = &_gd;
const u8x16 *rk = (u8x16 *) kd->Ke;
- int hidx = is_encrypt ? 4 : n, didx = 0;
+ int ghash_blocks = is_encrypt ? 4 : n, gc = 1;
+ u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - ghash_blocks;
- _mm_prefetch (inv + 4, _MM_HINT_T0);
+ clib_prefetch_load (inv + 4);
/* AES rounds 0 and 1 */
- aesni_gcm_enc_first_round (r, Y, ctr, rk[0], n);
- aesni_gcm_enc_round (r, rk[1], n);
+ aes_gcm_enc_first_round (r, ctr, rk[0], n);
+ aes_gcm_enc_round (r, rk[1], n);
/* load data - decrypt round */
if (is_encrypt == 0)
- aesni_gcm_load (d, inv, n, last_block_bytes);
+ aes_gcm_load (d, inv, n, last_block_bytes);
/* GHASH multiply block 1 */
if (with_ghash)
- ghash_mul_first (gd, aesni_gcm_bswap (d[didx++]) ^ T, kd->Hi[--hidx]);
+ ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
/* AES rounds 2 and 3 */
- aesni_gcm_enc_round (r, rk[2], n);
- aesni_gcm_enc_round (r, rk[3], n);
+ aes_gcm_enc_round (r, rk[2], n);
+ aes_gcm_enc_round (r, rk[3], n);
/* GHASH multiply block 2 */
- if (with_ghash && hidx)
- ghash_mul_next (gd, aesni_gcm_bswap (d[didx++]), kd->Hi[--hidx]);
+ if (with_ghash && gc++ < ghash_blocks)
+ ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
/* AES rounds 4 and 5 */
- aesni_gcm_enc_round (r, rk[4], n);
- aesni_gcm_enc_round (r, rk[5], n);
+ aes_gcm_enc_round (r, rk[4], n);
+ aes_gcm_enc_round (r, rk[5], n);
/* GHASH multiply block 3 */
- if (with_ghash && hidx)
- ghash_mul_next (gd, aesni_gcm_bswap (d[didx++]), kd->Hi[--hidx]);
+ if (with_ghash && gc++ < ghash_blocks)
+ ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
/* AES rounds 6 and 7 */
- aesni_gcm_enc_round (r, rk[6], n);
- aesni_gcm_enc_round (r, rk[7], n);
+ aes_gcm_enc_round (r, rk[6], n);
+ aes_gcm_enc_round (r, rk[7], n);
/* GHASH multiply block 4 */
- if (with_ghash && hidx)
- ghash_mul_next (gd, aesni_gcm_bswap (d[didx++]), kd->Hi[--hidx]);
+ if (with_ghash && gc++ < ghash_blocks)
+ ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
/* AES rounds 8 and 9 */
- aesni_gcm_enc_round (r, rk[8], n);
- aesni_gcm_enc_round (r, rk[9], n);
+ aes_gcm_enc_round (r, rk[8], n);
+ aes_gcm_enc_round (r, rk[9], n);
/* GHASH reduce 1st step */
if (with_ghash)
/* load data - encrypt round */
if (is_encrypt)
- aesni_gcm_load (d, inv, n, last_block_bytes);
+ aes_gcm_load (d, inv, n, last_block_bytes);
/* GHASH reduce 2nd step */
if (with_ghash)
ghash_reduce2 (gd);
/* AES last round(s) */
- aesni_gcm_enc_last_round (r, d, rk, rounds, n);
+ aes_gcm_enc_last_round (r, d, rk, rounds, n);
/* store data */
- aesni_gcm_store (d, outv, n, last_block_bytes);
+ aes_gcm_store (d, outv, n, last_block_bytes);
/* GHASH final step */
if (with_ghash)
}
static_always_inline u8x16
-aesni_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- u32x4 * Y, u32 * ctr, u8x16u * inv, u8x16u * outv,
- int rounds, int is_encrypt)
+aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
+ aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
+ int rounds, int is_encrypt)
{
u8x16 r[4];
ghash_data_t _gd, *gd = &_gd;
const u8x16 *rk = (u8x16 *) kd->Ke;
+ u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - 8;
/* AES rounds 0 and 1 */
- aesni_gcm_enc_first_round (r, Y, ctr, rk[0], 4);
- aesni_gcm_enc_round (r, rk[1], 4);
+ aes_gcm_enc_first_round (r, ctr, rk[0], 4);
+ aes_gcm_enc_round (r, rk[1], 4);
/* load 4 blocks of data - decrypt round */
if (is_encrypt == 0)
- aesni_gcm_load (d, inv, 4, 0);
+ aes_gcm_load (d, inv, 4, 0);
/* GHASH multiply block 0 */
- ghash_mul_first (gd, aesni_gcm_bswap (d[0]) ^ T, kd->Hi[7]);
+ ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
/* AES rounds 2 and 3 */
- aesni_gcm_enc_round (r, rk[2], 4);
- aesni_gcm_enc_round (r, rk[3], 4);
+ aes_gcm_enc_round (r, rk[2], 4);
+ aes_gcm_enc_round (r, rk[3], 4);
/* GHASH multiply block 1 */
- ghash_mul_next (gd, aesni_gcm_bswap (d[1]), kd->Hi[6]);
+ ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
/* AES rounds 4 and 5 */
- aesni_gcm_enc_round (r, rk[4], 4);
- aesni_gcm_enc_round (r, rk[5], 4);
+ aes_gcm_enc_round (r, rk[4], 4);
+ aes_gcm_enc_round (r, rk[5], 4);
/* GHASH multiply block 2 */
- ghash_mul_next (gd, aesni_gcm_bswap (d[2]), kd->Hi[5]);
+ ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
/* AES rounds 6 and 7 */
- aesni_gcm_enc_round (r, rk[6], 4);
- aesni_gcm_enc_round (r, rk[7], 4);
+ aes_gcm_enc_round (r, rk[6], 4);
+ aes_gcm_enc_round (r, rk[7], 4);
/* GHASH multiply block 3 */
- ghash_mul_next (gd, aesni_gcm_bswap (d[3]), kd->Hi[4]);
+ ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
/* AES rounds 8 and 9 */
- aesni_gcm_enc_round (r, rk[8], 4);
- aesni_gcm_enc_round (r, rk[9], 4);
+ aes_gcm_enc_round (r, rk[8], 4);
+ aes_gcm_enc_round (r, rk[9], 4);
/* load 4 blocks of data - encrypt round */
if (is_encrypt)
- aesni_gcm_load (d, inv, 4, 0);
+ aes_gcm_load (d, inv, 4, 0);
/* AES last round(s) */
- aesni_gcm_enc_last_round (r, d, rk, rounds, 4);
+ aes_gcm_enc_last_round (r, d, rk, rounds, 4);
/* store 4 blocks of data */
- aesni_gcm_store (d, outv, 4, 0);
+ aes_gcm_store (d, outv, 4, 0);
/* load next 4 blocks of data data - decrypt round */
if (is_encrypt == 0)
- aesni_gcm_load (d, inv + 4, 4, 0);
+ aes_gcm_load (d, inv + 4, 4, 0);
/* GHASH multiply block 4 */
- ghash_mul_next (gd, aesni_gcm_bswap (d[0]), kd->Hi[3]);
+ ghash_mul_next (gd, u8x16_reflect (d[0]), Hi[4]);
/* AES rounds 0, 1 and 2 */
- aesni_gcm_enc_first_round (r, Y, ctr, rk[0], 4);
- aesni_gcm_enc_round (r, rk[1], 4);
- aesni_gcm_enc_round (r, rk[2], 4);
+ aes_gcm_enc_first_round (r, ctr, rk[0], 4);
+ aes_gcm_enc_round (r, rk[1], 4);
+ aes_gcm_enc_round (r, rk[2], 4);
/* GHASH multiply block 5 */
- ghash_mul_next (gd, aesni_gcm_bswap (d[1]), kd->Hi[2]);
+ ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[5]);
/* AES rounds 3 and 4 */
- aesni_gcm_enc_round (r, rk[3], 4);
- aesni_gcm_enc_round (r, rk[4], 4);
+ aes_gcm_enc_round (r, rk[3], 4);
+ aes_gcm_enc_round (r, rk[4], 4);
/* GHASH multiply block 6 */
- ghash_mul_next (gd, aesni_gcm_bswap (d[2]), kd->Hi[1]);
+ ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[6]);
/* AES rounds 5 and 6 */
- aesni_gcm_enc_round (r, rk[5], 4);
- aesni_gcm_enc_round (r, rk[6], 4);
+ aes_gcm_enc_round (r, rk[5], 4);
+ aes_gcm_enc_round (r, rk[6], 4);
/* GHASH multiply block 7 */
- ghash_mul_next (gd, aesni_gcm_bswap (d[3]), kd->Hi[0]);
+ ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[7]);
/* AES rounds 7 and 8 */
- aesni_gcm_enc_round (r, rk[7], 4);
- aesni_gcm_enc_round (r, rk[8], 4);
+ aes_gcm_enc_round (r, rk[7], 4);
+ aes_gcm_enc_round (r, rk[8], 4);
/* GHASH reduce 1st step */
ghash_reduce (gd);
/* AES round 9 */
- aesni_gcm_enc_round (r, rk[9], 4);
+ aes_gcm_enc_round (r, rk[9], 4);
/* load data - encrypt round */
if (is_encrypt)
- aesni_gcm_load (d, inv + 4, 4, 0);
+ aes_gcm_load (d, inv + 4, 4, 0);
/* GHASH reduce 2nd step */
ghash_reduce2 (gd);
/* AES last round(s) */
- aesni_gcm_enc_last_round (r, d, rk, rounds, 4);
+ aes_gcm_enc_last_round (r, d, rk, rounds, 4);
/* store data */
- aesni_gcm_store (d, outv + 4, 4, 0);
+ aes_gcm_store (d, outv + 4, 4, 0);
/* GHASH final step */
return ghash_final (gd);
}
static_always_inline u8x16
-aesni_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- int n_blocks, int n_bytes)
+aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
+ int n_blocks, int n_bytes)
{
ghash_data_t _gd, *gd = &_gd;
+ u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
if (n_bytes)
d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);
- ghash_mul_first (gd, aesni_gcm_bswap (d[0]) ^ T, kd->Hi[n_blocks - 1]);
+ ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
if (n_blocks > 1)
- ghash_mul_next (gd, aesni_gcm_bswap (d[1]), kd->Hi[n_blocks - 2]);
+ ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
if (n_blocks > 2)
- ghash_mul_next (gd, aesni_gcm_bswap (d[2]), kd->Hi[n_blocks - 3]);
+ ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
if (n_blocks > 3)
- ghash_mul_next (gd, aesni_gcm_bswap (d[3]), kd->Hi[n_blocks - 4]);
+ ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
ghash_reduce (gd);
ghash_reduce2 (gd);
return ghash_final (gd);
static_always_inline u8x16
-aesni_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, u32x4 Y, u8x16u * inv,
- u8x16u * outv, u32 n_left, int rounds)
+aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
+ u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
{
u8x16 d[4];
- u32 ctr = 1;
if (n_left == 0)
return T;
if (n_left > 48)
{
n_left &= 0x0f;
- aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, n_left,
- /* with_ghash */ 0, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 4, n_left);
+ aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left,
+ /* with_ghash */ 0, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 4, n_left);
}
else if (n_left > 32)
{
n_left &= 0x0f;
- aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 3, n_left,
- /* with_ghash */ 0, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 3, n_left);
+ aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left,
+ /* with_ghash */ 0, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 3, n_left);
}
else if (n_left > 16)
{
n_left &= 0x0f;
- aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 2, n_left,
- /* with_ghash */ 0, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 2, n_left);
+ aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left,
+ /* with_ghash */ 0, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 2, n_left);
}
else
{
n_left &= 0x0f;
- aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
- /* with_ghash */ 0, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 1, n_left);
+ aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left,
+ /* with_ghash */ 0, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 1, n_left);
}
}
- aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0,
- /* with_ghash */ 0, /* is_encrypt */ 1);
+ aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0,
+ /* with_ghash */ 0, /* is_encrypt */ 1);
/* next */
n_left -= 64;
while (n_left >= 128)
{
- T = aesni_gcm_calc_double (T, kd, d, &Y, &ctr, inv, outv, rounds,
- /* is_encrypt */ 1);
+ T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds,
+ /* is_encrypt */ 1);
/* next */
n_left -= 128;
if (n_left >= 64)
{
- T = aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0,
- /* with_ghash */ 1, /* is_encrypt */ 1);
+ T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0,
+ /* with_ghash */ 1, /* is_encrypt */ 1);
/* next */
n_left -= 64;
}
if (n_left == 0)
- return aesni_gcm_ghash_last (T, kd, d, 4, 0);
+ return aes_gcm_ghash_last (T, kd, d, 4, 0);
if (n_left > 48)
{
n_left &= 0x0f;
- T = aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, n_left,
- /* with_ghash */ 1, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 4, n_left);
+ T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left,
+ /* with_ghash */ 1, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 4, n_left);
}
if (n_left > 32)
{
n_left &= 0x0f;
- T = aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 3, n_left,
- /* with_ghash */ 1, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 3, n_left);
+ T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left,
+ /* with_ghash */ 1, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 3, n_left);
}
if (n_left > 16)
{
n_left &= 0x0f;
- T = aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 2, n_left,
- /* with_ghash */ 1, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 2, n_left);
+ T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left,
+ /* with_ghash */ 1, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 2, n_left);
}
n_left &= 0x0f;
- T = aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
- /* with_ghash */ 1, /* is_encrypt */ 1);
- return aesni_gcm_ghash_last (T, kd, d, 1, n_left);
+ T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left,
+ /* with_ghash */ 1, /* is_encrypt */ 1);
+ return aes_gcm_ghash_last (T, kd, d, 1, n_left);
}
static_always_inline u8x16
-aesni_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, u32x4 Y, u8x16u * inv,
- u8x16u * outv, u32 n_left, int rounds)
+aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
+ u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
{
u8x16 d[8];
- u32 ctr = 1;
while (n_left >= 128)
{
- T = aesni_gcm_calc_double (T, kd, d, &Y, &ctr, inv, outv, rounds,
- /* is_encrypt */ 0);
+ T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds,
+ /* is_encrypt */ 0);
/* next */
n_left -= 128;
if (n_left >= 64)
{
- T = aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0, 1, 0);
+ T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, 1, 0);
/* next */
n_left -= 64;
return T;
if (n_left > 48)
- return aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4,
- n_left - 48,
- /* with_ghash */ 1, /* is_encrypt */ 0);
+ return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48,
+ /* with_ghash */ 1, /* is_encrypt */ 0);
if (n_left > 32)
- return aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 3,
- n_left - 32,
- /* with_ghash */ 1, /* is_encrypt */ 0);
+ return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32,
+ /* with_ghash */ 1, /* is_encrypt */ 0);
if (n_left > 16)
- return aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 2,
- n_left - 16,
- /* with_ghash */ 1, /* is_encrypt */ 0);
-
- return aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
+ return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16,
/* with_ghash */ 1, /* is_encrypt */ 0);
+
+ return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left,
+ /* with_ghash */ 1, /* is_encrypt */ 0);
}
static_always_inline int
u8x16 r, T = { };
u32x4 Y0;
ghash_data_t _gd, *gd = &_gd;
+ aes_gcm_counter_t _ctr, *ctr = &_ctr;
- _mm_prefetch (iv, _MM_HINT_T0);
- _mm_prefetch (in, _MM_HINT_T0);
- _mm_prefetch (in + CLIB_CACHE_LINE_BYTES, _MM_HINT_T0);
+ clib_prefetch_load (iv);
+ clib_prefetch_load (in);
+ clib_prefetch_load (in + 4);
/* calculate ghash for AAD - optimized for ipsec common cases */
if (aad_bytes == 8)
- T = aesni_gcm_ghash (T, kd, addt, 8);
+ T = aes_gcm_ghash (T, kd, addt, 8);
else if (aad_bytes == 12)
- T = aesni_gcm_ghash (T, kd, addt, 12);
+ T = aes_gcm_ghash (T, kd, addt, 12);
else
- T = aesni_gcm_ghash (T, kd, addt, aad_bytes);
+ T = aes_gcm_ghash (T, kd, addt, aad_bytes);
/* initalize counter */
- Y0 = (u32x4) aes_load_partial (iv, 12);
- Y0[3] = clib_host_to_net_u32 (1);
+ ctr->counter = 1;
+ Y0 = (u32x4) aes_load_partial (iv, 12) + ctr_inv_1;
+ ctr->Y = Y0 + ctr_inv_1;
/* ghash and encrypt/edcrypt */
if (is_encrypt)
- T = aesni_gcm_enc (T, kd, Y0, in, out, data_bytes, aes_rounds);
+ T = aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
else
- T = aesni_gcm_dec (T, kd, Y0, in, out, data_bytes, aes_rounds);
+ T = aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
- _mm_prefetch (tag, _MM_HINT_T0);
+ clib_prefetch_load (tag);
/* Finalize ghash - data bytes and aad bytes converted to bits */
/* *INDENT-OFF* */
/* *INDENT-ON* */
/* interleaved computation of final ghash and E(Y0, k) */
- ghash_mul_first (gd, r ^ T, kd->Hi[0]);
+ ghash_mul_first (gd, r ^ T, kd->Hi[NUM_HI - 1]);
r = kd->Ke[0] ^ (u8x16) Y0;
for (i = 1; i < 5; i += 1)
r = aes_enc_round (r, kd->Ke[i]);
for (; i < aes_rounds; i += 1)
r = aes_enc_round (r, kd->Ke[i]);
r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
- T = aesni_gcm_bswap (T) ^ r;
+ T = u8x16_reflect (T) ^ r;
/* tag_len 16 -> 0 */
tag_len &= 0xf;
{
/* store tag */
if (tag_len)
- aes_store_partial (tag, T, (1 << tag_len) - 1);
+ aes_store_partial (tag, T, tag_len);
else
tag[0] = T;
}
}
static_always_inline u32
-aesni_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
- u32 n_ops, aes_key_size_t ks)
+aes_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
+ u32 n_ops, aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
vnet_crypto_op_t *op = ops[0];
}
static_always_inline u32
-aesni_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
- u32 n_ops, aes_key_size_t ks)
+aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
+ aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
vnet_crypto_op_t *op = ops[0];
}
static_always_inline void *
-aesni_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
+aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
{
aes_gcm_key_data_t *kd;
u8x16 H;
/* pre-calculate H */
H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
- H = aesni_gcm_bswap (H);
- ghash_precompute (H, (u8x16 *) kd->Hi, 8);
+ H = u8x16_reflect (H);
+ ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI);
return kd;
}
-#define foreach_aesni_gcm_handler_type _(128) _(192) _(256)
+#define foreach_aes_gcm_handler_type _(128) _(192) _(256)
#define _(x) \
-static u32 aesni_ops_dec_aes_gcm_##x \
+static u32 aes_ops_dec_aes_gcm_##x \
(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aesni_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
-static u32 aesni_ops_enc_aes_gcm_##x \
+{ return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
+static u32 aes_ops_enc_aes_gcm_##x \
(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aesni_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
-static void * aesni_gcm_key_exp_##x (vnet_crypto_key_t *key) \
-{ return aesni_gcm_key_exp (key, AES_KEY_##x); }
+{ return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
+static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
+{ return aes_gcm_key_exp (key, AES_KEY_##x); }
-foreach_aesni_gcm_handler_type;
+foreach_aes_gcm_handler_type;
#undef _
clib_error_t *
crypto_native_aes_gcm_init_avx512 (vlib_main_t * vm)
#elif __AVX2__
crypto_native_aes_gcm_init_avx2 (vlib_main_t * vm)
+#elif __aarch64__
+crypto_native_aes_gcm_init_neon (vlib_main_t * vm)
#else
crypto_native_aes_gcm_init_sse42 (vlib_main_t * vm)
#endif
#define _(x) \
vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
- aesni_ops_enc_aes_gcm_##x); \
+ aes_ops_enc_aes_gcm_##x); \
vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
- aesni_ops_dec_aes_gcm_##x); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aesni_gcm_key_exp_##x;
- foreach_aesni_gcm_handler_type;
+ aes_ops_dec_aes_gcm_##x); \
+ cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
+ foreach_aes_gcm_handler_type;
#undef _
return 0;
}