1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2023 Cisco Systems, Inc.
5 #ifndef __crypto_aes_gcm_h__
6 #define __crypto_aes_gcm_h__
8 #include <vppinfra/clib.h>
9 #include <vppinfra/vector.h>
10 #include <vppinfra/cache.h>
11 #include <vppinfra/string.h>
12 #include <vppinfra/crypto/aes.h>
13 #include <vppinfra/crypto/ghash.h>
16 #if defined(__VAES__) && defined(__AVX512F__)
17 typedef u8x64 aes_data_t;
18 typedef u8x64u aes_ghash_t;
19 typedef u8x64u aes_mem_t;
20 typedef u32x16 aes_gcm_counter_t;
22 #define aes_gcm_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n)
23 #define aes_gcm_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n)
24 #define aes_gcm_splat(v) u8x64_splat (v)
25 #define aes_gcm_reflect(r) u8x64_reflect_u8x16 (r)
26 #define aes_gcm_ghash_reduce(c) ghash4_reduce (&(c)->gd)
27 #define aes_gcm_ghash_reduce2(c) ghash4_reduce2 (&(c)->gd)
28 #define aes_gcm_ghash_final(c) (c)->T = ghash4_final (&(c)->gd)
29 #elif defined(__VAES__)
30 typedef u8x32 aes_data_t;
31 typedef u8x32u aes_ghash_t;
32 typedef u8x32u aes_mem_t;
33 typedef u32x8 aes_gcm_counter_t;
35 #define aes_gcm_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n)
36 #define aes_gcm_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n)
37 #define aes_gcm_splat(v) u8x32_splat (v)
38 #define aes_gcm_reflect(r) u8x32_reflect_u8x16 (r)
39 #define aes_gcm_ghash_reduce(c) ghash2_reduce (&(c)->gd)
40 #define aes_gcm_ghash_reduce2(c) ghash2_reduce2 (&(c)->gd)
41 #define aes_gcm_ghash_final(c) (c)->T = ghash2_final (&(c)->gd)
43 typedef u8x16 aes_data_t;
44 typedef u8x16 aes_ghash_t;
45 typedef u8x16u aes_mem_t;
46 typedef u32x4 aes_gcm_counter_t;
48 #define aes_gcm_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n)
49 #define aes_gcm_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n)
50 #define aes_gcm_splat(v) u8x16_splat (v)
51 #define aes_gcm_reflect(r) u8x16_reflect (r)
52 #define aes_gcm_ghash_reduce(c) ghash_reduce (&(c)->gd)
53 #define aes_gcm_ghash_reduce2(c) ghash_reduce2 (&(c)->gd)
54 #define aes_gcm_ghash_final(c) (c)->T = ghash_final (&(c)->gd)
56 #define N_LANES (N / 16)
60 AES_GCM_OP_UNKNONW = 0,
73 aes_gcm_expaned_key_t;
77 /* pre-calculated hash key values */
78 const u8x16 Hi[NUM_HI];
79 /* extracted AES key */
80 const aes_gcm_expaned_key_t Ke[AES_KEY_ROUNDS (AES_KEY_256) + 1];
85 aes_gcm_op_t operation;
95 const aes_ghash_t *next_Hi;
98 const aes_gcm_expaned_key_t *Ke;
109 static_always_inline u8x16
110 aes_gcm_final_block (aes_gcm_ctx_t *ctx)
112 return (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
115 static_always_inline void
116 aes_gcm_ghash_mul_first (aes_gcm_ctx_t *ctx, aes_data_t data, u32 n_lanes)
118 uword hash_offset = NUM_HI - n_lanes;
119 ctx->next_Hi = (aes_ghash_t *) (ctx->Hi + hash_offset);
122 tag4 = u8x64_insert_u8x16 (tag4, ctx->T, 0);
123 ghash4_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag4, *ctx->next_Hi++);
126 tag2 = u8x32_insert_lo (tag2, ctx->T);
127 ghash2_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag2, *ctx->next_Hi++);
129 ghash_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ ctx->T, *ctx->next_Hi++);
133 static_always_inline void
134 aes_gcm_ghash_mul_next (aes_gcm_ctx_t *ctx, aes_data_t data)
137 ghash4_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
139 ghash2_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
141 ghash_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
145 static_always_inline void
146 aes_gcm_ghash_mul_final_block (aes_gcm_ctx_t *ctx)
149 u8x64 h = u8x64_insert_u8x16 (u8x64_zero (), ctx->Hi[NUM_HI - 1], 0);
150 u8x64 r4 = u8x64_insert_u8x16 (u8x64_zero (), aes_gcm_final_block (ctx), 0);
151 ghash4_mul_next (&ctx->gd, r4, h);
153 u8x32 h = u8x32_insert_lo (u8x32_zero (), ctx->Hi[NUM_HI - 1]);
154 u8x32 r2 = u8x32_insert_lo (u8x32_zero (), aes_gcm_final_block (ctx));
155 ghash2_mul_next (&ctx->gd, r2, h);
157 ghash_mul_next (&ctx->gd, aes_gcm_final_block (ctx), ctx->Hi[NUM_HI - 1]);
161 static_always_inline void
162 aes_gcm_enc_ctr0_round (aes_gcm_ctx_t *ctx, int aes_round)
165 ctx->EY0 ^= ctx->Ke[0].x1;
166 else if (aes_round == ctx->rounds)
167 ctx->EY0 = aes_enc_last_round (ctx->EY0, ctx->Ke[aes_round].x1);
169 ctx->EY0 = aes_enc_round (ctx->EY0, ctx->Ke[aes_round].x1);
172 static_always_inline void
173 aes_gcm_ghash (aes_gcm_ctx_t *ctx, u8 *data, u32 n_left)
177 const aes_mem_t *d = (aes_mem_t *) data;
179 for (; n_left >= 8 * N; n_left -= 8 * N, d += 8)
181 if (ctx->operation == AES_GCM_OP_GMAC && n_left == N * 8)
183 aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES + 1);
184 for (i = 1; i < 8; i++)
185 aes_gcm_ghash_mul_next (ctx, d[i]);
186 aes_gcm_ghash_mul_final_block (ctx);
187 aes_gcm_ghash_reduce (ctx);
188 aes_gcm_ghash_reduce2 (ctx);
189 aes_gcm_ghash_final (ctx);
193 aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES);
194 for (i = 1; i < 8; i++)
195 aes_gcm_ghash_mul_next (ctx, d[i]);
196 aes_gcm_ghash_reduce (ctx);
197 aes_gcm_ghash_reduce2 (ctx);
198 aes_gcm_ghash_final (ctx);
203 int n_lanes = (n_left + 15) / 16;
205 if (ctx->operation == AES_GCM_OP_GMAC)
210 clib_memcpy_fast (&r, d, n_left);
211 aes_gcm_ghash_mul_first (ctx, r, n_lanes);
215 aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
221 aes_gcm_ghash_mul_next (ctx, d[i]);
222 aes_gcm_ghash_mul_next (ctx, d[i + 1]);
223 aes_gcm_ghash_mul_next (ctx, d[i + 2]);
224 aes_gcm_ghash_mul_next (ctx, d[i + 3]);
230 aes_gcm_ghash_mul_next (ctx, d[i]);
231 aes_gcm_ghash_mul_next (ctx, d[i + 1]);
238 aes_gcm_ghash_mul_next (ctx, d[i]);
245 clib_memcpy_fast (&r, d + i, n_left);
246 aes_gcm_ghash_mul_next (ctx, r);
250 if (ctx->operation == AES_GCM_OP_GMAC)
251 aes_gcm_ghash_mul_final_block (ctx);
252 aes_gcm_ghash_reduce (ctx);
253 aes_gcm_ghash_reduce2 (ctx);
254 aes_gcm_ghash_final (ctx);
256 else if (ctx->operation == AES_GCM_OP_GMAC)
258 ghash_mul (aes_gcm_final_block (ctx) ^ ctx->T, ctx->Hi[NUM_HI - 1]);
261 /* encrypt counter 0 E(Y0, k) */
262 if (ctx->operation == AES_GCM_OP_GMAC)
263 for (int i = 0; i < ctx->rounds + 1; i += 1)
264 aes_gcm_enc_ctr0_round (ctx, i);
267 static_always_inline void
268 aes_gcm_enc_first_round (aes_gcm_ctx_t *ctx, aes_data_t *r, uword n_blocks)
270 const aes_gcm_expaned_key_t Ke0 = ctx->Ke[0];
273 /* As counter is stored in network byte order for performance reasons we
274 are incrementing least significant byte only except in case where we
275 overlow. As we are processing four 128, 256 or 512-blocks in parallel
276 except the last round, overflow can happen only when n_blocks == 4 */
279 const u32x16 ctr_inv_4444 = { 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24,
280 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 };
282 const u32x16 ctr_4444 = {
283 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0,
289 r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
290 ctx->Y += ctr_inv_4444;
293 if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 242))
295 u32x16 Yr = (u32x16) aes_gcm_reflect ((u8x64) ctx->Y);
297 for (; i < n_blocks; i++)
299 r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
301 ctx->Y = (u32x16) aes_gcm_reflect ((u8x64) Yr);
306 for (; i < n_blocks; i++)
308 r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
309 ctx->Y += ctr_inv_4444;
312 ctx->counter += n_blocks * 4;
314 const u32x8 ctr_inv_22 = { 0, 0, 0, 2 << 24, 0, 0, 0, 2 << 24 };
315 const u32x8 ctr_22 = { 2, 0, 0, 0, 2, 0, 0, 0 };
320 r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
321 ctx->Y += ctr_inv_22;
324 if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 250))
326 u32x8 Yr = (u32x8) aes_gcm_reflect ((u8x32) ctx->Y);
328 for (; i < n_blocks; i++)
330 r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
332 ctx->Y = (u32x8) aes_gcm_reflect ((u8x32) Yr);
337 for (; i < n_blocks; i++)
339 r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
340 ctx->Y += ctr_inv_22;
343 ctx->counter += n_blocks * 2;
345 const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
347 if (PREDICT_TRUE ((u8) ctx->counter < 0xfe) || n_blocks < 3)
349 for (; i < n_blocks; i++)
351 r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
354 ctx->counter += n_blocks;
358 r[i++] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
362 for (; i < n_blocks; i++)
364 r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
366 ctx->Y[3] = clib_host_to_net_u32 (ctx->counter);
372 static_always_inline void
373 aes_gcm_enc_round (aes_data_t *r, const aes_gcm_expaned_key_t *Ke,
376 for (int i = 0; i < n_blocks; i++)
378 r[i] = aes_enc_round_x4 (r[i], Ke->x4);
380 r[i] = aes_enc_round_x2 (r[i], Ke->x2);
382 r[i] = aes_enc_round (r[i], Ke->x1);
386 static_always_inline void
387 aes_gcm_enc_last_round (aes_gcm_ctx_t *ctx, aes_data_t *r, aes_data_t *d,
388 const aes_gcm_expaned_key_t *Ke, uword n_blocks)
390 /* additional ronuds for AES-192 and AES-256 */
391 for (int i = 10; i < ctx->rounds; i++)
392 aes_gcm_enc_round (r, Ke + i, n_blocks);
394 for (int i = 0; i < n_blocks; i++)
396 d[i] ^= aes_enc_last_round_x4 (r[i], Ke[ctx->rounds].x4);
398 d[i] ^= aes_enc_last_round_x2 (r[i], Ke[ctx->rounds].x2);
400 d[i] ^= aes_enc_last_round (r[i], Ke[ctx->rounds].x1);
404 static_always_inline void
405 aes_gcm_calc (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, u32 n,
406 u32 n_bytes, int with_ghash)
408 const aes_gcm_expaned_key_t *k = ctx->Ke;
409 const aes_mem_t *sv = (aes_mem_t *) src;
410 aes_mem_t *dv = (aes_mem_t *) dst;
411 uword ghash_blocks, gc = 1;
415 if (ctx->operation == AES_GCM_OP_ENCRYPT)
418 n_lanes = N_LANES * 4;
423 n_lanes = n * N_LANES;
426 n_lanes = (n_bytes + 15) / 16;
430 n_bytes -= (n - 1) * N;
432 /* AES rounds 0 and 1 */
433 aes_gcm_enc_first_round (ctx, r, n);
434 aes_gcm_enc_round (r, k + 1, n);
436 /* load data - decrypt round */
437 if (ctx->operation == AES_GCM_OP_DECRYPT)
439 for (i = 0; i < n - ctx->last; i++)
443 d[n - 1] = aes_gcm_load_partial ((u8 *) (sv + n - 1), n_bytes);
446 /* GHASH multiply block 0 */
448 aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
450 /* AES rounds 2 and 3 */
451 aes_gcm_enc_round (r, k + 2, n);
452 aes_gcm_enc_round (r, k + 3, n);
454 /* GHASH multiply block 1 */
455 if (with_ghash && gc++ < ghash_blocks)
456 aes_gcm_ghash_mul_next (ctx, (d[1]));
458 /* AES rounds 4 and 5 */
459 aes_gcm_enc_round (r, k + 4, n);
460 aes_gcm_enc_round (r, k + 5, n);
462 /* GHASH multiply block 2 */
463 if (with_ghash && gc++ < ghash_blocks)
464 aes_gcm_ghash_mul_next (ctx, (d[2]));
466 /* AES rounds 6 and 7 */
467 aes_gcm_enc_round (r, k + 6, n);
468 aes_gcm_enc_round (r, k + 7, n);
470 /* GHASH multiply block 3 */
471 if (with_ghash && gc++ < ghash_blocks)
472 aes_gcm_ghash_mul_next (ctx, (d[3]));
474 /* load 4 blocks of data - decrypt round */
475 if (ctx->operation == AES_GCM_OP_ENCRYPT)
477 for (i = 0; i < n - ctx->last; i++)
481 d[n - 1] = aes_gcm_load_partial (sv + n - 1, n_bytes);
484 /* AES rounds 8 and 9 */
485 aes_gcm_enc_round (r, k + 8, n);
486 aes_gcm_enc_round (r, k + 9, n);
488 /* AES last round(s) */
489 aes_gcm_enc_last_round (ctx, r, d, k, n);
492 for (i = 0; i < n - ctx->last; i++)
496 aes_gcm_store_partial (d[n - 1], dv + n - 1, n_bytes);
498 /* GHASH reduce 1st step */
499 aes_gcm_ghash_reduce (ctx);
501 /* GHASH reduce 2nd step */
503 aes_gcm_ghash_reduce2 (ctx);
505 /* GHASH final step */
507 aes_gcm_ghash_final (ctx);
510 static_always_inline void
511 aes_gcm_calc_double (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst)
513 const aes_gcm_expaned_key_t *k = ctx->Ke;
514 const aes_mem_t *sv = (aes_mem_t *) src;
515 aes_mem_t *dv = (aes_mem_t *) dst;
518 /* AES rounds 0 and 1 */
519 aes_gcm_enc_first_round (ctx, r, 4);
520 aes_gcm_enc_round (r, k + 1, 4);
522 /* load 4 blocks of data - decrypt round */
523 if (ctx->operation == AES_GCM_OP_DECRYPT)
524 for (int i = 0; i < 4; i++)
527 /* GHASH multiply block 0 */
528 aes_gcm_ghash_mul_first (ctx, d[0], N_LANES * 8);
530 /* AES rounds 2 and 3 */
531 aes_gcm_enc_round (r, k + 2, 4);
532 aes_gcm_enc_round (r, k + 3, 4);
534 /* GHASH multiply block 1 */
535 aes_gcm_ghash_mul_next (ctx, (d[1]));
537 /* AES rounds 4 and 5 */
538 aes_gcm_enc_round (r, k + 4, 4);
539 aes_gcm_enc_round (r, k + 5, 4);
541 /* GHASH multiply block 2 */
542 aes_gcm_ghash_mul_next (ctx, (d[2]));
544 /* AES rounds 6 and 7 */
545 aes_gcm_enc_round (r, k + 6, 4);
546 aes_gcm_enc_round (r, k + 7, 4);
548 /* GHASH multiply block 3 */
549 aes_gcm_ghash_mul_next (ctx, (d[3]));
551 /* AES rounds 8 and 9 */
552 aes_gcm_enc_round (r, k + 8, 4);
553 aes_gcm_enc_round (r, k + 9, 4);
555 /* load 4 blocks of data - encrypt round */
556 if (ctx->operation == AES_GCM_OP_ENCRYPT)
557 for (int i = 0; i < 4; i++)
560 /* AES last round(s) */
561 aes_gcm_enc_last_round (ctx, r, d, k, 4);
563 /* store 4 blocks of data */
564 for (int i = 0; i < 4; i++)
567 /* load next 4 blocks of data data - decrypt round */
568 if (ctx->operation == AES_GCM_OP_DECRYPT)
569 for (int i = 0; i < 4; i++)
572 /* GHASH multiply block 4 */
573 aes_gcm_ghash_mul_next (ctx, (d[0]));
575 /* AES rounds 0 and 1 */
576 aes_gcm_enc_first_round (ctx, r, 4);
577 aes_gcm_enc_round (r, k + 1, 4);
579 /* GHASH multiply block 5 */
580 aes_gcm_ghash_mul_next (ctx, (d[1]));
582 /* AES rounds 2 and 3 */
583 aes_gcm_enc_round (r, k + 2, 4);
584 aes_gcm_enc_round (r, k + 3, 4);
586 /* GHASH multiply block 6 */
587 aes_gcm_ghash_mul_next (ctx, (d[2]));
589 /* AES rounds 4 and 5 */
590 aes_gcm_enc_round (r, k + 4, 4);
591 aes_gcm_enc_round (r, k + 5, 4);
593 /* GHASH multiply block 7 */
594 aes_gcm_ghash_mul_next (ctx, (d[3]));
596 /* AES rounds 6 and 7 */
597 aes_gcm_enc_round (r, k + 6, 4);
598 aes_gcm_enc_round (r, k + 7, 4);
600 /* GHASH reduce 1st step */
601 aes_gcm_ghash_reduce (ctx);
603 /* AES rounds 8 and 9 */
604 aes_gcm_enc_round (r, k + 8, 4);
605 aes_gcm_enc_round (r, k + 9, 4);
607 /* GHASH reduce 2nd step */
608 aes_gcm_ghash_reduce2 (ctx);
610 /* load 4 blocks of data - encrypt round */
611 if (ctx->operation == AES_GCM_OP_ENCRYPT)
612 for (int i = 0; i < 4; i++)
615 /* AES last round(s) */
616 aes_gcm_enc_last_round (ctx, r, d, k, 4);
619 for (int i = 0; i < 4; i++)
622 /* GHASH final step */
623 aes_gcm_ghash_final (ctx);
626 static_always_inline void
627 aes_gcm_mask_bytes (aes_data_t *d, uword n_bytes)
634 .b = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
635 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
636 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
637 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
640 d[0] &= (aes_gcm_splat (n_bytes) > scale.r);
643 static_always_inline void
644 aes_gcm_calc_last (aes_gcm_ctx_t *ctx, aes_data_t *d, int n_blocks,
647 int n_lanes = (N_LANES == 1 ? n_blocks : (n_bytes + 15) / 16) + 1;
648 n_bytes -= (n_blocks - 1) * N;
651 aes_gcm_enc_ctr0_round (ctx, 0);
652 aes_gcm_enc_ctr0_round (ctx, 1);
655 aes_gcm_mask_bytes (d + n_blocks - 1, n_bytes);
657 aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
659 aes_gcm_enc_ctr0_round (ctx, 2);
660 aes_gcm_enc_ctr0_round (ctx, 3);
663 aes_gcm_ghash_mul_next (ctx, d[1]);
665 aes_gcm_enc_ctr0_round (ctx, 4);
666 aes_gcm_enc_ctr0_round (ctx, 5);
669 aes_gcm_ghash_mul_next (ctx, d[2]);
671 aes_gcm_enc_ctr0_round (ctx, 6);
672 aes_gcm_enc_ctr0_round (ctx, 7);
675 aes_gcm_ghash_mul_next (ctx, d[3]);
677 aes_gcm_enc_ctr0_round (ctx, 8);
678 aes_gcm_enc_ctr0_round (ctx, 9);
680 aes_gcm_ghash_mul_final_block (ctx);
681 aes_gcm_ghash_reduce (ctx);
683 for (i = 10; i < ctx->rounds; i++)
684 aes_gcm_enc_ctr0_round (ctx, i);
686 aes_gcm_ghash_reduce2 (ctx);
688 aes_gcm_ghash_final (ctx);
690 aes_gcm_enc_ctr0_round (ctx, i);
693 static_always_inline void
694 aes_gcm_enc (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, u32 n_left)
698 if (PREDICT_FALSE (n_left == 0))
701 for (i = 0; i < ctx->rounds + 1; i++)
702 aes_gcm_enc_ctr0_round (ctx, i);
711 aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 0);
712 aes_gcm_calc_last (ctx, d, 4, n_left);
714 else if (n_left > 2 * N)
716 aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 0);
717 aes_gcm_calc_last (ctx, d, 3, n_left);
721 aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 0);
722 aes_gcm_calc_last (ctx, d, 2, n_left);
726 aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 0);
727 aes_gcm_calc_last (ctx, d, 1, n_left);
732 aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 0);
739 for (; n_left >= 8 * N; n_left -= 8 * N, src += 8 * N, dst += 8 * N)
740 aes_gcm_calc_double (ctx, d, src, dst);
744 aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
754 aes_gcm_calc_last (ctx, d, 4, 4 * N);
762 aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
763 aes_gcm_calc_last (ctx, d, 4, n_left);
765 else if (n_left > 2 * N)
767 aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
768 aes_gcm_calc_last (ctx, d, 3, n_left);
772 aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
773 aes_gcm_calc_last (ctx, d, 2, n_left);
777 aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
778 aes_gcm_calc_last (ctx, d, 1, n_left);
782 static_always_inline void
783 aes_gcm_dec (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, uword n_left)
785 aes_data_t d[4] = {};
788 /* main encryption loop */
789 for (; n_left >= 8 * N; n_left -= 8 * N, dst += 8 * N, src += 8 * N)
790 aes_gcm_calc_double (ctx, d, src, dst);
794 aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
807 aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
808 else if (n_left > 2 * N)
809 aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
811 aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
813 aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
816 /* interleaved counter 0 encryption E(Y0, k) and ghash of final GCM
817 * (bit length) block */
819 aes_gcm_enc_ctr0_round (ctx, 0);
820 aes_gcm_enc_ctr0_round (ctx, 1);
822 ghash_mul_first (&gd, aes_gcm_final_block (ctx) ^ ctx->T,
823 ctx->Hi[NUM_HI - 1]);
825 aes_gcm_enc_ctr0_round (ctx, 2);
826 aes_gcm_enc_ctr0_round (ctx, 3);
830 aes_gcm_enc_ctr0_round (ctx, 4);
831 aes_gcm_enc_ctr0_round (ctx, 5);
835 aes_gcm_enc_ctr0_round (ctx, 6);
836 aes_gcm_enc_ctr0_round (ctx, 7);
838 ctx->T = ghash_final (&gd);
840 aes_gcm_enc_ctr0_round (ctx, 8);
841 aes_gcm_enc_ctr0_round (ctx, 9);
843 for (int i = 10; i < ctx->rounds + 1; i += 1)
844 aes_gcm_enc_ctr0_round (ctx, i);
847 static_always_inline int
848 aes_gcm (const u8 *src, u8 *dst, const u8 *aad, u8 *ivp, u8 *tag,
849 u32 data_bytes, u32 aad_bytes, u8 tag_len,
850 const aes_gcm_key_data_t *kd, int aes_rounds, aes_gcm_op_t op)
852 u8 *addt = (u8 *) aad;
855 aes_gcm_ctx_t _ctx = { .counter = 2,
856 .rounds = aes_rounds,
858 .data_bytes = data_bytes,
859 .aad_bytes = aad_bytes,
864 /* initalize counter */
865 Y0 = (u32x4) (u64x2){ *(u64u *) ivp, 0 };
866 Y0[2] = *(u32u *) (ivp + 8);
868 ctx->EY0 = (u8x16) Y0;
871 ctx->Y = u32x16_splat_u32x4 (Y0) + (u32x16){
872 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
876 u32x8_splat_u32x4 (Y0) + (u32x8){ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24 };
878 ctx->Y = Y0 + (u32x4){ 0, 0, 0, 1 << 24 };
881 /* calculate ghash for AAD */
882 aes_gcm_ghash (ctx, addt, aad_bytes);
884 /* ghash and encrypt/edcrypt */
885 if (op == AES_GCM_OP_ENCRYPT)
886 aes_gcm_enc (ctx, src, dst, data_bytes);
887 else if (op == AES_GCM_OP_DECRYPT)
888 aes_gcm_dec (ctx, src, dst, data_bytes);
891 ctx->T = u8x16_reflect (ctx->T) ^ ctx->EY0;
893 /* tag_len 16 -> 0 */
896 if (op == AES_GCM_OP_ENCRYPT || op == AES_GCM_OP_GMAC)
900 u8x16_store_partial (ctx->T, tag, tag_len);
902 ((u8x16u *) tag)[0] = ctx->T;
909 u16 mask = pow2_mask (tag_len);
910 u8x16 expected = u8x16_load_partial (tag, tag_len);
911 if ((u8x16_msb_mask (expected == ctx->T) & mask) == mask)
916 if (u8x16_is_equal (ctx->T, *(u8x16u *) tag))
923 static_always_inline void
924 clib_aes_gcm_key_expand (aes_gcm_key_data_t *kd, const u8 *key,
928 u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
929 aes_gcm_expaned_key_t *Ke = (aes_gcm_expaned_key_t *) kd->Ke;
932 aes_key_expand (ek, key, ks);
933 for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
934 Ke[i].lanes[0] = Ke[i].lanes[1] = Ke[i].lanes[2] = Ke[i].lanes[3] = ek[i];
936 /* pre-calculate H */
937 H = aes_encrypt_block (u8x16_zero (), ek, ks);
938 H = u8x16_reflect (H);
939 ghash_precompute (H, (u8x16 *) kd->Hi, ARRAY_LEN (kd->Hi));
942 static_always_inline void
943 clib_aes128_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
944 u32 data_bytes, const u8 *aad, u32 aad_bytes,
945 const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
947 aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
948 tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_ENCRYPT);
951 static_always_inline void
952 clib_aes256_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
953 u32 data_bytes, const u8 *aad, u32 aad_bytes,
954 const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
956 aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
957 tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_ENCRYPT);
960 static_always_inline int
961 clib_aes128_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
962 u32 data_bytes, const u8 *aad, u32 aad_bytes,
963 const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
965 return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
966 data_bytes, aad_bytes, tag_bytes, kd,
967 AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_DECRYPT);
970 static_always_inline int
971 clib_aes256_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
972 u32 data_bytes, const u8 *aad, u32 aad_bytes,
973 const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
975 return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
976 data_bytes, aad_bytes, tag_bytes, kd,
977 AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_DECRYPT);
980 static_always_inline void
981 clib_aes128_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
982 const u8 *iv, u32 tag_bytes, u8 *tag)
984 aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
985 AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_GMAC);
988 static_always_inline void
989 clib_aes256_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
990 const u8 *iv, u32 tag_bytes, u8 *tag)
992 aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
993 AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_GMAC);
996 #endif /* __crypto_aes_gcm_h__ */