1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2023 Cisco Systems, Inc.
5 #ifndef __crypto_aes_gcm_h__
6 #define __crypto_aes_gcm_h__
8 #include <vppinfra/clib.h>
9 #include <vppinfra/vector.h>
10 #include <vppinfra/cache.h>
11 #include <vppinfra/string.h>
12 #include <vppinfra/crypto/aes.h>
13 #include <vppinfra/crypto/ghash.h>
16 #if defined(__VAES__) && defined(__AVX512F__)
17 typedef u8x64 aes_data_t;
18 typedef u8x64u aes_ghash_t;
19 typedef u8x64u aes_mem_t;
20 typedef u32x16 aes_gcm_counter_t;
22 #define aes_gcm_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n)
23 #define aes_gcm_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n)
24 #define aes_gcm_splat(v) u8x64_splat (v)
25 #define aes_gcm_reflect(r) u8x64_reflect_u8x16 (r)
26 #define aes_gcm_ghash_reduce(c) ghash4_reduce (&(c)->gd)
27 #define aes_gcm_ghash_reduce2(c) ghash4_reduce2 (&(c)->gd)
28 #define aes_gcm_ghash_final(c) (c)->T = ghash4_final (&(c)->gd)
29 #elif defined(__VAES__)
30 typedef u8x32 aes_data_t;
31 typedef u8x32u aes_ghash_t;
32 typedef u8x32u aes_mem_t;
33 typedef u32x8 aes_gcm_counter_t;
35 #define aes_gcm_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n)
36 #define aes_gcm_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n)
37 #define aes_gcm_splat(v) u8x32_splat (v)
38 #define aes_gcm_reflect(r) u8x32_reflect_u8x16 (r)
39 #define aes_gcm_ghash_reduce(c) ghash2_reduce (&(c)->gd)
40 #define aes_gcm_ghash_reduce2(c) ghash2_reduce2 (&(c)->gd)
41 #define aes_gcm_ghash_final(c) (c)->T = ghash2_final (&(c)->gd)
43 typedef u8x16 aes_data_t;
44 typedef u8x16 aes_ghash_t;
45 typedef u8x16u aes_mem_t;
46 typedef u32x4 aes_gcm_counter_t;
48 #define aes_gcm_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n)
49 #define aes_gcm_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n)
50 #define aes_gcm_splat(v) u8x16_splat (v)
51 #define aes_gcm_reflect(r) u8x16_reflect (r)
52 #define aes_gcm_ghash_reduce(c) ghash_reduce (&(c)->gd)
53 #define aes_gcm_ghash_reduce2(c) ghash_reduce2 (&(c)->gd)
54 #define aes_gcm_ghash_final(c) (c)->T = ghash_final (&(c)->gd)
56 #define N_LANES (N / 16)
60 AES_GCM_OP_UNKNONW = 0,
73 aes_gcm_expaned_key_t;
77 /* pre-calculated hash key values */
78 const u8x16 Hi[NUM_HI];
79 /* extracted AES key */
80 const aes_gcm_expaned_key_t Ke[AES_KEY_ROUNDS (AES_KEY_256) + 1];
85 aes_gcm_op_t operation;
95 const aes_ghash_t *next_Hi;
98 const aes_gcm_expaned_key_t *Ke;
109 static_always_inline void
110 aes_gcm_ghash_mul_first (aes_gcm_ctx_t *ctx, aes_data_t data, u32 n_lanes)
112 uword hash_offset = NUM_HI - n_lanes;
113 ctx->next_Hi = (aes_ghash_t *) (ctx->Hi + hash_offset);
116 tag4 = u8x64_insert_u8x16 (tag4, ctx->T, 0);
117 ghash4_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag4, *ctx->next_Hi++);
120 tag2 = u8x32_insert_lo (tag2, ctx->T);
121 ghash2_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag2, *ctx->next_Hi++);
123 ghash_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ ctx->T, *ctx->next_Hi++);
127 static_always_inline void
128 aes_gcm_ghash_mul_next (aes_gcm_ctx_t *ctx, aes_data_t data)
131 ghash4_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
133 ghash2_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
135 ghash_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
139 static_always_inline void
140 aes_gcm_ghash_mul_bit_len (aes_gcm_ctx_t *ctx)
142 u8x16 r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
144 u8x64 h = u8x64_insert_u8x16 (u8x64_zero (), ctx->Hi[NUM_HI - 1], 0);
145 u8x64 r4 = u8x64_insert_u8x16 (u8x64_zero (), r, 0);
146 ghash4_mul_next (&ctx->gd, r4, h);
148 u8x32 h = u8x32_insert_lo (u8x32_zero (), ctx->Hi[NUM_HI - 1]);
149 u8x32 r2 = u8x32_insert_lo (u8x32_zero (), r);
150 ghash2_mul_next (&ctx->gd, r2, h);
152 ghash_mul_next (&ctx->gd, r, ctx->Hi[NUM_HI - 1]);
156 static_always_inline void
157 aes_gcm_enc_ctr0_round (aes_gcm_ctx_t *ctx, int aes_round)
160 ctx->EY0 ^= ctx->Ke[0].x1;
161 else if (aes_round == ctx->rounds)
162 ctx->EY0 = aes_enc_last_round (ctx->EY0, ctx->Ke[aes_round].x1);
164 ctx->EY0 = aes_enc_round (ctx->EY0, ctx->Ke[aes_round].x1);
167 static_always_inline void
168 aes_gcm_ghash (aes_gcm_ctx_t *ctx, u8 *data, u32 n_left)
172 const aes_mem_t *d = (aes_mem_t *) data;
174 for (; n_left >= 8 * N; n_left -= 8 * N, d += 8)
176 if (ctx->operation == AES_GCM_OP_GMAC && n_left == N * 8)
178 aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES + 1);
179 for (i = 1; i < 8; i++)
180 aes_gcm_ghash_mul_next (ctx, d[i]);
181 aes_gcm_ghash_mul_bit_len (ctx);
182 aes_gcm_ghash_reduce (ctx);
183 aes_gcm_ghash_reduce2 (ctx);
184 aes_gcm_ghash_final (ctx);
188 aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES);
189 for (i = 1; i < 8; i++)
190 aes_gcm_ghash_mul_next (ctx, d[i]);
191 aes_gcm_ghash_reduce (ctx);
192 aes_gcm_ghash_reduce2 (ctx);
193 aes_gcm_ghash_final (ctx);
198 int n_lanes = (n_left + 15) / 16;
200 if (ctx->operation == AES_GCM_OP_GMAC)
205 clib_memcpy_fast (&r, d, n_left);
206 aes_gcm_ghash_mul_first (ctx, r, n_lanes);
210 aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
216 aes_gcm_ghash_mul_next (ctx, d[i]);
217 aes_gcm_ghash_mul_next (ctx, d[i + 1]);
218 aes_gcm_ghash_mul_next (ctx, d[i + 2]);
219 aes_gcm_ghash_mul_next (ctx, d[i + 3]);
225 aes_gcm_ghash_mul_next (ctx, d[i]);
226 aes_gcm_ghash_mul_next (ctx, d[i + 1]);
233 aes_gcm_ghash_mul_next (ctx, d[i]);
240 clib_memcpy_fast (&r, d + i, n_left);
241 aes_gcm_ghash_mul_next (ctx, r);
245 if (ctx->operation == AES_GCM_OP_GMAC)
246 aes_gcm_ghash_mul_bit_len (ctx);
247 aes_gcm_ghash_reduce (ctx);
248 aes_gcm_ghash_reduce2 (ctx);
249 aes_gcm_ghash_final (ctx);
251 else if (ctx->operation == AES_GCM_OP_GMAC)
253 u8x16 r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
254 ctx->T = ghash_mul (r ^ ctx->T, ctx->Hi[NUM_HI - 1]);
258 /* encrypt counter 0 E(Y0, k) */
259 if (ctx->operation == AES_GCM_OP_GMAC)
260 for (int i = 0; i < ctx->rounds + 1; i += 1)
261 aes_gcm_enc_ctr0_round (ctx, i);
264 static_always_inline void
265 aes_gcm_enc_first_round (aes_gcm_ctx_t *ctx, aes_data_t *r, uword n_blocks)
267 const aes_gcm_expaned_key_t Ke0 = ctx->Ke[0];
271 const u32x16 ctr_inv_4444 = { 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24,
272 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 };
274 const u32x16 ctr_4444 = {
275 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0,
278 /* As counter is stored in network byte order for performance reasons we
279 are incrementing least significant byte only except in case where we
280 overlow. As we are processing four 512-blocks in parallel except the
281 last round, overflow can happen only when n == 4 */
286 r[i] = Ke0.x4 ^ (u8x64) ctx->Y;
287 ctx->Y += ctr_inv_4444;
290 if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 242))
292 u32x16 Yr = (u32x16) aes_gcm_reflect ((u8x64) ctx->Y);
294 for (; i < n_blocks; i++)
296 r[i] = Ke0.x4 ^ (u8x64) ctx->Y;
298 ctx->Y = (u32x16) aes_gcm_reflect ((u8x64) Yr);
303 for (; i < n_blocks; i++)
305 r[i] = Ke0.x4 ^ (u8x64) ctx->Y;
306 ctx->Y += ctr_inv_4444;
309 ctx->counter += n_blocks * 4;
311 const u32x8 ctr_inv_22 = { 0, 0, 0, 2 << 24, 0, 0, 0, 2 << 24 };
312 const u32x8 ctr_22 = { 2, 0, 0, 0, 2, 0, 0, 0 };
314 /* As counter is stored in network byte order for performance reasons we
315 are incrementing least significant byte only except in case where we
316 overlow. As we are processing four 512-blocks in parallel except the
317 last round, overflow can happen only when n == 4 */
322 r[i] = Ke0.x2 ^ (u8x32) ctx->Y;
323 ctx->Y += ctr_inv_22;
326 if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 250))
328 u32x8 Yr = (u32x8) aes_gcm_reflect ((u8x32) ctx->Y);
330 for (; i < n_blocks; i++)
332 r[i] = Ke0.x2 ^ (u8x32) ctx->Y;
334 ctx->Y = (u32x8) aes_gcm_reflect ((u8x32) Yr);
339 for (; i < n_blocks; i++)
341 r[i] = Ke0.x2 ^ (u8x32) ctx->Y;
342 ctx->Y += ctr_inv_22;
345 ctx->counter += n_blocks * 2;
347 const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
349 if (PREDICT_TRUE ((u8) ctx->counter < 0xfe) || n_blocks < 3)
351 for (; i < n_blocks; i++)
353 r[i] = Ke0.x1 ^ (u8x16) ctx->Y;
356 ctx->counter += n_blocks;
360 r[i++] = Ke0.x1 ^ (u8x16) ctx->Y;
364 for (; i < n_blocks; i++)
366 r[i] = Ke0.x1 ^ (u8x16) ctx->Y;
368 ctx->Y[3] = clib_host_to_net_u32 (ctx->counter);
374 static_always_inline void
375 aes_gcm_enc_round (aes_data_t *r, const aes_gcm_expaned_key_t *Ke,
378 for (int i = 0; i < n_blocks; i++)
380 r[i] = aes_enc_round_x4 (r[i], Ke->x4);
382 r[i] = aes_enc_round_x2 (r[i], Ke->x2);
384 r[i] = aes_enc_round (r[i], Ke->x1);
388 static_always_inline void
389 aes_gcm_enc_last_round (aes_gcm_ctx_t *ctx, aes_data_t *r, aes_data_t *d,
390 const aes_gcm_expaned_key_t *Ke, uword n_blocks)
392 /* additional ronuds for AES-192 and AES-256 */
393 for (int i = 10; i < ctx->rounds; i++)
394 aes_gcm_enc_round (r, Ke + i, n_blocks);
396 for (int i = 0; i < n_blocks; i++)
398 d[i] ^= aes_enc_last_round_x4 (r[i], Ke[ctx->rounds].x4);
400 d[i] ^= aes_enc_last_round_x2 (r[i], Ke[ctx->rounds].x2);
402 d[i] ^= aes_enc_last_round (r[i], Ke[ctx->rounds].x1);
406 static_always_inline void
407 aes_gcm_calc (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, u32 n,
408 u32 n_bytes, int with_ghash)
410 const aes_gcm_expaned_key_t *k = ctx->Ke;
411 const aes_mem_t *sv = (aes_mem_t *) src;
412 aes_mem_t *dv = (aes_mem_t *) dst;
413 uword ghash_blocks, gc = 1;
417 if (ctx->operation == AES_GCM_OP_ENCRYPT)
420 n_lanes = N_LANES * 4;
425 n_lanes = n * N_LANES;
428 n_lanes = (n_bytes + 15) / 16;
432 n_bytes -= (n - 1) * N;
434 /* AES rounds 0 and 1 */
435 aes_gcm_enc_first_round (ctx, r, n);
436 aes_gcm_enc_round (r, k + 1, n);
438 /* load data - decrypt round */
439 if (ctx->operation == AES_GCM_OP_DECRYPT)
441 for (i = 0; i < n - ctx->last; i++)
445 d[n - 1] = aes_gcm_load_partial ((u8 *) (sv + n - 1), n_bytes);
448 /* GHASH multiply block 0 */
450 aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
452 /* AES rounds 2 and 3 */
453 aes_gcm_enc_round (r, k + 2, n);
454 aes_gcm_enc_round (r, k + 3, n);
456 /* GHASH multiply block 1 */
457 if (with_ghash && gc++ < ghash_blocks)
458 aes_gcm_ghash_mul_next (ctx, (d[1]));
460 /* AES rounds 4 and 5 */
461 aes_gcm_enc_round (r, k + 4, n);
462 aes_gcm_enc_round (r, k + 5, n);
464 /* GHASH multiply block 2 */
465 if (with_ghash && gc++ < ghash_blocks)
466 aes_gcm_ghash_mul_next (ctx, (d[2]));
468 /* AES rounds 6 and 7 */
469 aes_gcm_enc_round (r, k + 6, n);
470 aes_gcm_enc_round (r, k + 7, n);
472 /* GHASH multiply block 3 */
473 if (with_ghash && gc++ < ghash_blocks)
474 aes_gcm_ghash_mul_next (ctx, (d[3]));
476 /* load 4 blocks of data - decrypt round */
477 if (ctx->operation == AES_GCM_OP_ENCRYPT)
479 for (i = 0; i < n - ctx->last; i++)
483 d[n - 1] = aes_gcm_load_partial (sv + n - 1, n_bytes);
486 /* AES rounds 8 and 9 */
487 aes_gcm_enc_round (r, k + 8, n);
488 aes_gcm_enc_round (r, k + 9, n);
490 /* AES last round(s) */
491 aes_gcm_enc_last_round (ctx, r, d, k, n);
494 for (i = 0; i < n - ctx->last; i++)
498 aes_gcm_store_partial (d[n - 1], dv + n - 1, n_bytes);
500 /* GHASH reduce 1st step */
501 aes_gcm_ghash_reduce (ctx);
503 /* GHASH reduce 2nd step */
505 aes_gcm_ghash_reduce2 (ctx);
507 /* GHASH final step */
509 aes_gcm_ghash_final (ctx);
512 static_always_inline void
513 aes_gcm_calc_double (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst,
516 const aes_gcm_expaned_key_t *k = ctx->Ke;
517 const aes_mem_t *sv = (aes_mem_t *) src;
518 aes_mem_t *dv = (aes_mem_t *) dst;
521 /* AES rounds 0 and 1 */
522 aes_gcm_enc_first_round (ctx, r, 4);
523 aes_gcm_enc_round (r, k + 1, 4);
525 /* load 4 blocks of data - decrypt round */
526 if (ctx->operation == AES_GCM_OP_DECRYPT)
527 for (int i = 0; i < 4; i++)
530 /* GHASH multiply block 0 */
531 aes_gcm_ghash_mul_first (ctx, d[0], N_LANES * 8);
533 /* AES rounds 2 and 3 */
534 aes_gcm_enc_round (r, k + 2, 4);
535 aes_gcm_enc_round (r, k + 3, 4);
537 /* GHASH multiply block 1 */
538 aes_gcm_ghash_mul_next (ctx, (d[1]));
540 /* AES rounds 4 and 5 */
541 aes_gcm_enc_round (r, k + 4, 4);
542 aes_gcm_enc_round (r, k + 5, 4);
544 /* GHASH multiply block 2 */
545 aes_gcm_ghash_mul_next (ctx, (d[2]));
547 /* AES rounds 6 and 7 */
548 aes_gcm_enc_round (r, k + 6, 4);
549 aes_gcm_enc_round (r, k + 7, 4);
551 /* GHASH multiply block 3 */
552 aes_gcm_ghash_mul_next (ctx, (d[3]));
554 /* AES rounds 8 and 9 */
555 aes_gcm_enc_round (r, k + 8, 4);
556 aes_gcm_enc_round (r, k + 9, 4);
558 /* load 4 blocks of data - encrypt round */
559 if (ctx->operation == AES_GCM_OP_ENCRYPT)
560 for (int i = 0; i < 4; i++)
563 /* AES last round(s) */
564 aes_gcm_enc_last_round (ctx, r, d, k, 4);
566 /* store 4 blocks of data */
567 for (int i = 0; i < 4; i++)
570 /* load next 4 blocks of data data - decrypt round */
571 if (ctx->operation == AES_GCM_OP_DECRYPT)
572 for (int i = 0; i < 4; i++)
575 /* GHASH multiply block 4 */
576 aes_gcm_ghash_mul_next (ctx, (d[0]));
578 /* AES rounds 0 and 1 */
579 aes_gcm_enc_first_round (ctx, r, 4);
580 aes_gcm_enc_round (r, k + 1, 4);
582 /* GHASH multiply block 5 */
583 aes_gcm_ghash_mul_next (ctx, (d[1]));
585 /* AES rounds 2 and 3 */
586 aes_gcm_enc_round (r, k + 2, 4);
587 aes_gcm_enc_round (r, k + 3, 4);
589 /* GHASH multiply block 6 */
590 aes_gcm_ghash_mul_next (ctx, (d[2]));
592 /* AES rounds 4 and 5 */
593 aes_gcm_enc_round (r, k + 4, 4);
594 aes_gcm_enc_round (r, k + 5, 4);
596 /* GHASH multiply block 7 */
597 aes_gcm_ghash_mul_next (ctx, (d[3]));
599 /* AES rounds 6 and 7 */
600 aes_gcm_enc_round (r, k + 6, 4);
601 aes_gcm_enc_round (r, k + 7, 4);
603 /* GHASH reduce 1st step */
604 aes_gcm_ghash_reduce (ctx);
606 /* AES rounds 8 and 9 */
607 aes_gcm_enc_round (r, k + 8, 4);
608 aes_gcm_enc_round (r, k + 9, 4);
610 /* GHASH reduce 2nd step */
611 aes_gcm_ghash_reduce2 (ctx);
613 /* load 4 blocks of data - encrypt round */
614 if (ctx->operation == AES_GCM_OP_ENCRYPT)
615 for (int i = 0; i < 4; i++)
618 /* AES last round(s) */
619 aes_gcm_enc_last_round (ctx, r, d, k, 4);
622 for (int i = 0; i < 4; i++)
625 /* GHASH final step */
626 aes_gcm_ghash_final (ctx);
629 static_always_inline void
630 aes_gcm_mask_bytes (aes_data_t *d, uword n_bytes)
637 .b = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
638 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
639 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
640 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
643 d[0] &= (aes_gcm_splat (n_bytes) > scale.r);
646 static_always_inline void
647 aes_gcm_calc_last (aes_gcm_ctx_t *ctx, aes_data_t *d, int n_blocks,
650 int n_lanes = (N_LANES == 1 ? n_blocks : (n_bytes + 15) / 16) + 1;
651 n_bytes -= (n_blocks - 1) * N;
654 aes_gcm_enc_ctr0_round (ctx, 0);
655 aes_gcm_enc_ctr0_round (ctx, 1);
658 aes_gcm_mask_bytes (d + n_blocks - 1, n_bytes);
660 aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
662 aes_gcm_enc_ctr0_round (ctx, 2);
663 aes_gcm_enc_ctr0_round (ctx, 3);
666 aes_gcm_ghash_mul_next (ctx, d[1]);
668 aes_gcm_enc_ctr0_round (ctx, 4);
669 aes_gcm_enc_ctr0_round (ctx, 5);
672 aes_gcm_ghash_mul_next (ctx, d[2]);
674 aes_gcm_enc_ctr0_round (ctx, 6);
675 aes_gcm_enc_ctr0_round (ctx, 7);
678 aes_gcm_ghash_mul_next (ctx, d[3]);
680 aes_gcm_enc_ctr0_round (ctx, 8);
681 aes_gcm_enc_ctr0_round (ctx, 9);
683 aes_gcm_ghash_mul_bit_len (ctx);
684 aes_gcm_ghash_reduce (ctx);
686 for (i = 10; i < ctx->rounds; i++)
687 aes_gcm_enc_ctr0_round (ctx, i);
689 aes_gcm_ghash_reduce2 (ctx);
691 aes_gcm_ghash_final (ctx);
693 aes_gcm_enc_ctr0_round (ctx, i);
696 static_always_inline void
697 aes_gcm_enc (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, u32 n_left)
701 if (PREDICT_FALSE (n_left == 0))
704 for (i = 0; i < ctx->rounds + 1; i++)
705 aes_gcm_enc_ctr0_round (ctx, i);
714 aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 0);
715 aes_gcm_calc_last (ctx, d, 4, n_left);
717 else if (n_left > 2 * N)
719 aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 0);
720 aes_gcm_calc_last (ctx, d, 3, n_left);
724 aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 0);
725 aes_gcm_calc_last (ctx, d, 2, n_left);
729 aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 0);
730 aes_gcm_calc_last (ctx, d, 1, n_left);
734 aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 0);
741 for (; n_left >= 8 * N; n_left -= 8 * N, src += 8 * N, dst += 8 * N)
742 aes_gcm_calc_double (ctx, d, src, dst, /* with_ghash */ 1);
746 aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
756 aes_gcm_calc_last (ctx, d, 4, 4 * N);
764 aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
765 aes_gcm_calc_last (ctx, d, 4, n_left);
767 else if (n_left > 2 * N)
769 aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
770 aes_gcm_calc_last (ctx, d, 3, n_left);
774 aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
775 aes_gcm_calc_last (ctx, d, 2, n_left);
779 aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
780 aes_gcm_calc_last (ctx, d, 1, n_left);
784 static_always_inline void
785 aes_gcm_dec (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, uword n_left)
787 aes_data_t d[4] = {};
788 for (; n_left >= 8 * N; n_left -= 8 * N, dst += 8 * N, src += 8 * N)
789 aes_gcm_calc_double (ctx, d, src, dst, /* with_ghash */ 1);
793 aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
807 aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
808 else if (n_left > 2 * N)
809 aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
811 aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
813 aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
817 r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
818 ctx->T = ghash_mul (r ^ ctx->T, ctx->Hi[NUM_HI - 1]);
820 /* encrypt counter 0 E(Y0, k) */
821 for (int i = 0; i < ctx->rounds + 1; i += 1)
822 aes_gcm_enc_ctr0_round (ctx, i);
825 static_always_inline int
826 aes_gcm (const u8 *src, u8 *dst, const u8 *aad, u8 *ivp, u8 *tag,
827 u32 data_bytes, u32 aad_bytes, u8 tag_len,
828 const aes_gcm_key_data_t *kd, int aes_rounds, aes_gcm_op_t op)
830 u8 *addt = (u8 *) aad;
833 aes_gcm_ctx_t _ctx = { .counter = 2,
834 .rounds = aes_rounds,
836 .data_bytes = data_bytes,
837 .aad_bytes = aad_bytes,
841 /* initalize counter */
842 Y0 = (u32x4) (u64x2){ *(u64u *) ivp, 0 };
843 Y0[2] = *(u32u *) (ivp + 8);
845 ctx->EY0 = (u8x16) Y0;
848 ctx->Y = u32x16_splat_u32x4 (Y0) + (u32x16){
849 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
853 u32x8_splat_u32x4 (Y0) + (u32x8){ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24 };
855 ctx->Y = Y0 + (u32x4){ 0, 0, 0, 1 << 24 };
858 /* calculate ghash for AAD */
859 aes_gcm_ghash (ctx, addt, aad_bytes);
861 clib_prefetch_load (tag);
863 /* ghash and encrypt/edcrypt */
864 if (op == AES_GCM_OP_ENCRYPT)
865 aes_gcm_enc (ctx, src, dst, data_bytes);
866 else if (op == AES_GCM_OP_DECRYPT)
867 aes_gcm_dec (ctx, src, dst, data_bytes);
870 ctx->T = u8x16_reflect (ctx->T) ^ ctx->EY0;
872 /* tag_len 16 -> 0 */
875 if (op == AES_GCM_OP_ENCRYPT || op == AES_GCM_OP_GMAC)
879 u8x16_store_partial (ctx->T, tag, tag_len);
881 ((u8x16u *) tag)[0] = ctx->T;
888 u16 mask = pow2_mask (tag_len);
889 u8x16 expected = u8x16_load_partial (tag, tag_len);
890 if ((u8x16_msb_mask (expected == ctx->T) & mask) == mask)
895 if (u8x16_is_equal (ctx->T, *(u8x16u *) tag))
902 static_always_inline void
903 clib_aes_gcm_key_expand (aes_gcm_key_data_t *kd, const u8 *key,
907 u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
908 aes_gcm_expaned_key_t *Ke = (aes_gcm_expaned_key_t *) kd->Ke;
911 aes_key_expand (ek, key, ks);
912 for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
913 Ke[i].lanes[0] = Ke[i].lanes[1] = Ke[i].lanes[2] = Ke[i].lanes[3] = ek[i];
915 /* pre-calculate H */
916 H = aes_encrypt_block (u8x16_zero (), ek, ks);
917 H = u8x16_reflect (H);
918 ghash_precompute (H, (u8x16 *) kd->Hi, ARRAY_LEN (kd->Hi));
921 static_always_inline void
922 clib_aes128_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
923 u32 data_bytes, const u8 *aad, u32 aad_bytes,
924 const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
926 aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
927 tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_ENCRYPT);
930 static_always_inline void
931 clib_aes256_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
932 u32 data_bytes, const u8 *aad, u32 aad_bytes,
933 const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
935 aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
936 tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_ENCRYPT);
939 static_always_inline int
940 clib_aes128_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
941 u32 data_bytes, const u8 *aad, u32 aad_bytes,
942 const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
944 return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
945 data_bytes, aad_bytes, tag_bytes, kd,
946 AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_DECRYPT);
949 static_always_inline int
950 clib_aes256_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
951 u32 data_bytes, const u8 *aad, u32 aad_bytes,
952 const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
954 return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
955 data_bytes, aad_bytes, tag_bytes, kd,
956 AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_DECRYPT);
959 static_always_inline void
960 clib_aes128_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
961 const u8 *iv, u32 tag_bytes, u8 *tag)
963 aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
964 AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_GMAC);
967 static_always_inline void
968 clib_aes256_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
969 const u8 *iv, u32 tag_bytes, u8 *tag)
971 aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
972 AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_GMAC);
975 #endif /* __crypto_aes_gcm_h__ */