2 *------------------------------------------------------------------
3 * Copyright (c) 2019 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
18 #include <vlib/vlib.h>
19 #include <vnet/plugin/plugin.h>
20 #include <vnet/crypto/crypto.h>
21 #include <crypto_native/crypto_native.h>
22 #include <crypto_native/aes.h>
23 #include <crypto_native/ghash.h>
25 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
26 #pragma GCC optimize ("O3")
37 /* pre-calculated hash key values */
38 const u8x16 Hi[NUM_HI];
39 /* extracted AES key */
58 AES_GCM_F_WITH_GHASH = (1 << 0),
59 AES_GCM_F_LAST_ROUND = (1 << 1),
60 AES_GCM_F_ENCRYPT = (1 << 2),
61 AES_GCM_F_DECRYPT = (1 << 3),
64 static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
66 static_always_inline void
67 aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k,
70 if (PREDICT_TRUE ((u8) ctr->counter < (256 - 2 * n_blocks)))
72 for (int i = 0; i < n_blocks; i++)
74 r[i] = k ^ (u8x16) ctr->Y;
77 ctr->counter += n_blocks;
81 for (int i = 0; i < n_blocks; i++)
83 r[i] = k ^ (u8x16) ctr->Y;
85 ctr->Y[3] = clib_host_to_net_u32 (ctr->counter + 1);
90 static_always_inline void
91 aes_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks)
93 for (int i = 0; i < n_blocks; i++)
94 r[i] = aes_enc_round (r[i], k);
97 static_always_inline void
98 aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
99 int rounds, int n_blocks)
102 /* additional ronuds for AES-192 and AES-256 */
103 for (int i = 10; i < rounds; i++)
104 aes_gcm_enc_round (r, k[i], n_blocks);
106 for (int i = 0; i < n_blocks; i++)
107 d[i] ^= aes_enc_last_round (r[i], k[rounds]);
110 static_always_inline u8x16
111 aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
112 u8x16u * in, int n_blocks)
114 ghash_data_t _gd, *gd = &_gd;
115 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
116 ghash_mul_first (gd, u8x16_reflect (in[0]) ^ T, Hi[0]);
117 for (int i = 1; i < n_blocks; i++)
118 ghash_mul_next (gd, u8x16_reflect ((in[i])), Hi[i]);
121 return ghash_final (gd);
124 static_always_inline u8x16
125 aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
128 while (n_left >= 128)
130 T = aes_gcm_ghash_blocks (T, kd, in, 8);
137 T = aes_gcm_ghash_blocks (T, kd, in, 4);
144 T = aes_gcm_ghash_blocks (T, kd, in, 2);
151 T = aes_gcm_ghash_blocks (T, kd, in, 1);
158 u8x16 r = aes_load_partial (in, n_left);
159 T = ghash_mul (u8x16_reflect (r) ^ T, kd->Hi[NUM_HI - 1]);
164 static_always_inline u8x16
165 aes_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
166 aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
167 int rounds, int n, int last_block_bytes, aes_gcm_flags_t f)
170 ghash_data_t _gd = { }, *gd = &_gd;
171 const u8x16 *rk = (u8x16 *) kd->Ke;
172 int ghash_blocks = (f & AES_GCM_F_ENCRYPT) ? 4 : n, gc = 1;
173 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - ghash_blocks;
175 clib_prefetch_load (inv + 4);
177 /* AES rounds 0 and 1 */
178 aes_gcm_enc_first_round (r, ctr, rk[0], n);
179 aes_gcm_enc_round (r, rk[1], n);
181 /* load data - decrypt round */
182 if (f & AES_GCM_F_DECRYPT)
184 for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
187 if (f & AES_GCM_F_LAST_ROUND)
188 d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
191 /* GHASH multiply block 1 */
192 if (f & AES_GCM_F_WITH_GHASH)
193 ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
195 /* AES rounds 2 and 3 */
196 aes_gcm_enc_round (r, rk[2], n);
197 aes_gcm_enc_round (r, rk[3], n);
199 /* GHASH multiply block 2 */
200 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
201 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
203 /* AES rounds 4 and 5 */
204 aes_gcm_enc_round (r, rk[4], n);
205 aes_gcm_enc_round (r, rk[5], n);
207 /* GHASH multiply block 3 */
208 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
209 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
211 /* AES rounds 6 and 7 */
212 aes_gcm_enc_round (r, rk[6], n);
213 aes_gcm_enc_round (r, rk[7], n);
215 /* GHASH multiply block 4 */
216 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
217 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
219 /* AES rounds 8 and 9 */
220 aes_gcm_enc_round (r, rk[8], n);
221 aes_gcm_enc_round (r, rk[9], n);
223 /* GHASH reduce 1st step */
224 if (f & AES_GCM_F_WITH_GHASH)
227 /* load data - encrypt round */
228 if (f & AES_GCM_F_ENCRYPT)
230 for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
233 if (f & AES_GCM_F_LAST_ROUND)
234 d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
237 /* GHASH reduce 2nd step */
238 if (f & AES_GCM_F_WITH_GHASH)
241 /* AES last round(s) */
242 aes_gcm_enc_last_round (r, d, rk, rounds, n);
245 for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
248 if (f & AES_GCM_F_LAST_ROUND)
249 aes_store_partial (outv + n - 1, d[n - 1], last_block_bytes);
251 /* GHASH final step */
252 if (f & AES_GCM_F_WITH_GHASH)
253 T = ghash_final (gd);
258 static_always_inline u8x16
259 aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
260 aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
261 int rounds, aes_gcm_flags_t f)
264 ghash_data_t _gd, *gd = &_gd;
265 const u8x16 *rk = (u8x16 *) kd->Ke;
266 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - 8;
268 /* AES rounds 0 and 1 */
269 aes_gcm_enc_first_round (r, ctr, rk[0], 4);
270 aes_gcm_enc_round (r, rk[1], 4);
272 /* load 4 blocks of data - decrypt round */
273 if (f & AES_GCM_F_DECRYPT)
281 /* GHASH multiply block 0 */
282 ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
284 /* AES rounds 2 and 3 */
285 aes_gcm_enc_round (r, rk[2], 4);
286 aes_gcm_enc_round (r, rk[3], 4);
288 /* GHASH multiply block 1 */
289 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
291 /* AES rounds 4 and 5 */
292 aes_gcm_enc_round (r, rk[4], 4);
293 aes_gcm_enc_round (r, rk[5], 4);
295 /* GHASH multiply block 2 */
296 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
298 /* AES rounds 6 and 7 */
299 aes_gcm_enc_round (r, rk[6], 4);
300 aes_gcm_enc_round (r, rk[7], 4);
302 /* GHASH multiply block 3 */
303 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
305 /* AES rounds 8 and 9 */
306 aes_gcm_enc_round (r, rk[8], 4);
307 aes_gcm_enc_round (r, rk[9], 4);
309 /* load 4 blocks of data - encrypt round */
310 if (f & AES_GCM_F_ENCRYPT)
318 /* AES last round(s) */
319 aes_gcm_enc_last_round (r, d, rk, rounds, 4);
321 /* store 4 blocks of data */
327 /* load next 4 blocks of data data - decrypt round */
328 if (f & AES_GCM_F_DECRYPT)
336 /* GHASH multiply block 4 */
337 ghash_mul_next (gd, u8x16_reflect (d[0]), Hi[4]);
339 /* AES rounds 0, 1 and 2 */
340 aes_gcm_enc_first_round (r, ctr, rk[0], 4);
341 aes_gcm_enc_round (r, rk[1], 4);
342 aes_gcm_enc_round (r, rk[2], 4);
344 /* GHASH multiply block 5 */
345 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[5]);
347 /* AES rounds 3 and 4 */
348 aes_gcm_enc_round (r, rk[3], 4);
349 aes_gcm_enc_round (r, rk[4], 4);
351 /* GHASH multiply block 6 */
352 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[6]);
354 /* AES rounds 5 and 6 */
355 aes_gcm_enc_round (r, rk[5], 4);
356 aes_gcm_enc_round (r, rk[6], 4);
358 /* GHASH multiply block 7 */
359 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[7]);
361 /* AES rounds 7 and 8 */
362 aes_gcm_enc_round (r, rk[7], 4);
363 aes_gcm_enc_round (r, rk[8], 4);
365 /* GHASH reduce 1st step */
369 aes_gcm_enc_round (r, rk[9], 4);
371 /* load data - encrypt round */
372 if (f & AES_GCM_F_ENCRYPT)
380 /* GHASH reduce 2nd step */
383 /* AES last round(s) */
384 aes_gcm_enc_last_round (r, d, rk, rounds, 4);
392 /* GHASH final step */
393 return ghash_final (gd);
396 static_always_inline u8x16
397 aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
398 int n_blocks, int n_bytes)
400 ghash_data_t _gd, *gd = &_gd;
401 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
404 d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);
406 ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
408 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
410 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
412 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
415 return ghash_final (gd);
419 static const u32x16 ctr_inv_1234 = {
420 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
423 static const u32x16 ctr_inv_4444 = {
424 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
427 static const u32x16 ctr_1234 = {
428 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
431 static_always_inline void
432 aes4_gcm_enc_first_round (u8x64 * r, aes_gcm_counter_t * ctr, u8x64 k, int n)
434 u8 last_byte = (u8) ctr->counter;
437 /* As counter is stored in network byte order for performance reasons we
438 are incrementing least significant byte only except in case where we
439 overlow. As we are processing four 512-blocks in parallel except the
440 last round, overflow can happen only when n == 4 */
445 r[i] = k ^ (u8x64) ctr->Y4;
446 ctr->Y4 += ctr_inv_4444;
449 if (n == 4 && PREDICT_TRUE (last_byte == 241))
451 u32x16 Yc, Yr = (u32x16) u8x64_reflect_u8x16 ((u8x64) ctr->Y4);
455 r[i] = k ^ (u8x64) ctr->Y4;
456 Yc = u32x16_splat (ctr->counter + 4 * (i + 1)) + ctr_1234;
457 Yr = (u32x16) u32x16_mask_blend (Yr, Yc, 0x1111);
458 ctr->Y4 = (u32x16) u8x64_reflect_u8x16 ((u8x64) Yr);
465 r[i] = k ^ (u8x64) ctr->Y4;
466 ctr->Y4 += ctr_inv_4444;
469 ctr->counter += n * 4;
472 static_always_inline void
473 aes4_gcm_enc_round (u8x64 * r, u8x64 k, int n_blocks)
475 for (int i = 0; i < n_blocks; i++)
476 r[i] = aes_enc_round_x4 (r[i], k);
479 static_always_inline void
480 aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64 const *k,
481 int rounds, int n_blocks)
484 /* additional ronuds for AES-192 and AES-256 */
485 for (int i = 10; i < rounds; i++)
486 aes4_gcm_enc_round (r, k[i], n_blocks);
488 for (int i = 0; i < n_blocks; i++)
489 d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]);
492 static_always_inline u8x16
493 aes4_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
494 aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
495 int rounds, int n, int last_4block_bytes, aes_gcm_flags_t f)
497 ghash4_data_t _gd, *gd = &_gd;
498 const u8x64 *rk = (u8x64 *) kd->Ke4;
499 int i, ghash_blocks, gc = 1;
500 u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
502 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
504 if (f & AES_GCM_F_ENCRYPT)
506 /* during encryption we either hash four 512-bit blocks from previous
507 round or we don't hash at all */
509 Hi4 = (u8x64u *) (kd->Hi + NUM_HI - ghash_blocks * 4);
513 /* during deccryption we hash 1..4 512-bit blocks from current round */
515 int n_128bit_blocks = n * 4;
516 /* if this is last round of decryption, we may have less than 4
517 128-bit blocks in the last 512-bit data block, so we need to adjust
518 Hi4 pointer accordingly */
519 if (f & AES_GCM_F_LAST_ROUND)
520 n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
521 Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
524 /* AES rounds 0 and 1 */
525 aes4_gcm_enc_first_round (r, ctr, rk[0], n);
526 aes4_gcm_enc_round (r, rk[1], n);
528 /* load 4 blocks of data - decrypt round */
529 if (f & AES_GCM_F_DECRYPT)
531 for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
534 if (f & AES_GCM_F_LAST_ROUND)
535 d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
538 /* GHASH multiply block 0 */
539 if (f & AES_GCM_F_WITH_GHASH)
540 ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
541 u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
543 /* AES rounds 2 and 3 */
544 aes4_gcm_enc_round (r, rk[2], n);
545 aes4_gcm_enc_round (r, rk[3], n);
547 /* GHASH multiply block 1 */
548 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
549 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
551 /* AES rounds 4 and 5 */
552 aes4_gcm_enc_round (r, rk[4], n);
553 aes4_gcm_enc_round (r, rk[5], n);
555 /* GHASH multiply block 2 */
556 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
557 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
559 /* AES rounds 6 and 7 */
560 aes4_gcm_enc_round (r, rk[6], n);
561 aes4_gcm_enc_round (r, rk[7], n);
563 /* GHASH multiply block 3 */
564 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
565 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
567 /* load 4 blocks of data - decrypt round */
568 if (f & AES_GCM_F_ENCRYPT)
570 for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
573 if (f & AES_GCM_F_LAST_ROUND)
574 d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
577 /* AES rounds 8 and 9 */
578 aes4_gcm_enc_round (r, rk[8], n);
579 aes4_gcm_enc_round (r, rk[9], n);
581 /* AES last round(s) */
582 aes4_gcm_enc_last_round (r, d, rk, rounds, n);
584 /* store 4 blocks of data */
585 for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
588 if (f & AES_GCM_F_LAST_ROUND)
589 u8x64_mask_store (d[i], outv + i, byte_mask);
591 /* GHASH reduce 1st step */
594 /* GHASH reduce 2nd step */
597 /* GHASH final step */
598 return ghash4_final (gd);
601 static_always_inline u8x16
602 aes4_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
603 aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
604 int rounds, aes_gcm_flags_t f)
607 ghash4_data_t _gd, *gd = &_gd;
608 const u8x64 *rk = (u8x64 *) kd->Ke4;
609 u8x64 *Hi4 = (u8x64 *) (kd->Hi + NUM_HI - 32);
610 u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;
612 /* AES rounds 0 and 1 */
613 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
614 aes4_gcm_enc_round (r, rk[1], 4);
616 /* load 4 blocks of data - decrypt round */
617 if (f & AES_GCM_F_DECRYPT)
618 for (int i = 0; i < 4; i++)
621 /* GHASH multiply block 0 */
622 ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
623 u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
625 /* AES rounds 2 and 3 */
626 aes4_gcm_enc_round (r, rk[2], 4);
627 aes4_gcm_enc_round (r, rk[3], 4);
629 /* GHASH multiply block 1 */
630 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
632 /* AES rounds 4 and 5 */
633 aes4_gcm_enc_round (r, rk[4], 4);
634 aes4_gcm_enc_round (r, rk[5], 4);
636 /* GHASH multiply block 2 */
637 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
639 /* AES rounds 6 and 7 */
640 aes4_gcm_enc_round (r, rk[6], 4);
641 aes4_gcm_enc_round (r, rk[7], 4);
643 /* GHASH multiply block 3 */
644 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
646 /* AES rounds 8 and 9 */
647 aes4_gcm_enc_round (r, rk[8], 4);
648 aes4_gcm_enc_round (r, rk[9], 4);
650 /* load 4 blocks of data - encrypt round */
651 if (f & AES_GCM_F_ENCRYPT)
652 for (int i = 0; i < 4; i++)
655 /* AES last round(s) */
656 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
658 /* store 4 blocks of data */
659 for (int i = 0; i < 4; i++)
662 /* load 4 blocks of data - decrypt round */
663 if (f & AES_GCM_F_DECRYPT)
664 for (int i = 0; i < 4; i++)
667 /* GHASH multiply block 3 */
668 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[0]), Hi4[4]);
670 /* AES rounds 0 and 1 */
671 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
672 aes4_gcm_enc_round (r, rk[1], 4);
674 /* GHASH multiply block 5 */
675 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[5]);
677 /* AES rounds 2 and 3 */
678 aes4_gcm_enc_round (r, rk[2], 4);
679 aes4_gcm_enc_round (r, rk[3], 4);
681 /* GHASH multiply block 6 */
682 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[6]);
684 /* AES rounds 4 and 5 */
685 aes4_gcm_enc_round (r, rk[4], 4);
686 aes4_gcm_enc_round (r, rk[5], 4);
688 /* GHASH multiply block 7 */
689 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[7]);
691 /* AES rounds 6 and 7 */
692 aes4_gcm_enc_round (r, rk[6], 4);
693 aes4_gcm_enc_round (r, rk[7], 4);
695 /* GHASH reduce 1st step */
698 /* AES rounds 8 and 9 */
699 aes4_gcm_enc_round (r, rk[8], 4);
700 aes4_gcm_enc_round (r, rk[9], 4);
702 /* GHASH reduce 2nd step */
705 /* load 4 blocks of data - encrypt round */
706 if (f & AES_GCM_F_ENCRYPT)
707 for (int i = 0; i < 4; i++)
710 /* AES last round(s) */
711 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
713 /* store 4 blocks of data */
714 for (int i = 0; i < 4; i++)
717 /* GHASH final step */
718 return ghash4_final (gd);
721 static_always_inline u8x16
722 aes4_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
723 int n, int last_4block_bytes)
725 ghash4_data_t _gd, *gd = &_gd;
728 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
729 n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
730 Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
732 d[n - 1] = u8x64_mask_blend (u8x64_splat (0), d[n - 1], byte_mask);
733 ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
734 u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
736 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
738 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
740 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
743 return ghash4_final (gd);
747 static_always_inline u8x16
748 aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
749 u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
752 aes_gcm_flags_t f = AES_GCM_F_ENCRYPT;
761 f |= AES_GCM_F_LAST_ROUND;
765 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
766 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
768 else if (n_left > 128)
771 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
772 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
774 else if (n_left > 64)
777 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
778 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
782 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
783 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
787 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
794 f |= AES_GCM_F_WITH_GHASH;
796 while (n_left >= 512)
798 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
806 while (n_left >= 256)
808 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
817 return aes4_gcm_ghash_last (T, kd, d4, 4, 64);
819 f |= AES_GCM_F_LAST_ROUND;
824 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
825 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
831 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
832 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
838 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
839 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
842 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
843 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
848 f |= AES_GCM_F_LAST_ROUND;
852 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
853 return aes_gcm_ghash_last (T, kd, d, 4, n_left);
855 else if (n_left > 32)
858 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
859 return aes_gcm_ghash_last (T, kd, d, 3, n_left);
861 else if (n_left > 16)
864 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
865 return aes_gcm_ghash_last (T, kd, d, 2, n_left);
869 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
870 return aes_gcm_ghash_last (T, kd, d, 1, n_left);
874 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
881 f |= AES_GCM_F_WITH_GHASH;
883 while (n_left >= 128)
885 T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
895 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
904 return aes_gcm_ghash_last (T, kd, d, 4, 0);
906 f |= AES_GCM_F_LAST_ROUND;
911 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
912 return aes_gcm_ghash_last (T, kd, d, 4, n_left);
918 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
919 return aes_gcm_ghash_last (T, kd, d, 3, n_left);
925 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
926 return aes_gcm_ghash_last (T, kd, d, 2, n_left);
929 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
930 return aes_gcm_ghash_last (T, kd, d, 1, n_left);
933 static_always_inline u8x16
934 aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
935 u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
937 aes_gcm_flags_t f = AES_GCM_F_WITH_GHASH | AES_GCM_F_DECRYPT;
941 while (n_left >= 512)
943 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
951 while (n_left >= 256)
953 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
964 f |= AES_GCM_F_LAST_ROUND;
967 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
970 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
973 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
975 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
978 while (n_left >= 128)
980 T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
990 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
1001 f |= AES_GCM_F_LAST_ROUND;
1004 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f);
1007 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f);
1010 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f);
1012 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
1016 static_always_inline int
1017 aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u * iv, u8x16u * tag,
1018 u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t * kd,
1019 int aes_rounds, int is_encrypt)
1024 ghash_data_t _gd, *gd = &_gd;
1025 aes_gcm_counter_t _ctr, *ctr = &_ctr;
1027 clib_prefetch_load (iv);
1028 clib_prefetch_load (in);
1029 clib_prefetch_load (in + 4);
1031 /* calculate ghash for AAD - optimized for ipsec common cases */
1033 T = aes_gcm_ghash (T, kd, addt, 8);
1034 else if (aad_bytes == 12)
1035 T = aes_gcm_ghash (T, kd, addt, 12);
1037 T = aes_gcm_ghash (T, kd, addt, aad_bytes);
1039 /* initalize counter */
1041 Y0 = (u32x4) aes_load_partial (iv, 12) + ctr_inv_1;
1043 ctr->Y4 = u32x16_splat_u32x4 (Y0) + ctr_inv_1234;
1045 ctr->Y = Y0 + ctr_inv_1;
1048 /* ghash and encrypt/edcrypt */
1050 T = aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
1052 T = aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
1054 clib_prefetch_load (tag);
1056 /* Finalize ghash - data bytes and aad bytes converted to bits */
1058 r = (u8x16) ((u64x2) {data_bytes, aad_bytes} << 3);
1061 /* interleaved computation of final ghash and E(Y0, k) */
1062 ghash_mul_first (gd, r ^ T, kd->Hi[NUM_HI - 1]);
1063 r = kd->Ke[0] ^ (u8x16) Y0;
1064 for (i = 1; i < 5; i += 1)
1065 r = aes_enc_round (r, kd->Ke[i]);
1068 for (; i < 9; i += 1)
1069 r = aes_enc_round (r, kd->Ke[i]);
1070 T = ghash_final (gd);
1071 for (; i < aes_rounds; i += 1)
1072 r = aes_enc_round (r, kd->Ke[i]);
1073 r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
1074 T = u8x16_reflect (T) ^ r;
1076 /* tag_len 16 -> 0 */
1083 aes_store_partial (tag, T, tag_len);
1090 u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
1091 if ((u8x16_msb_mask (tag[0] == T) & tag_mask) != tag_mask)
1097 static_always_inline u32
1098 aes_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
1099 u32 n_ops, aes_key_size_t ks)
1101 crypto_native_main_t *cm = &crypto_native_main;
1102 vnet_crypto_op_t *op = ops[0];
1103 aes_gcm_key_data_t *kd;
1108 kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
1109 aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
1110 (u8x16u *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len,
1111 op->tag_len, kd, AES_KEY_ROUNDS (ks), /* is_encrypt */ 1);
1112 op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1123 static_always_inline u32
1124 aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
1127 crypto_native_main_t *cm = &crypto_native_main;
1128 vnet_crypto_op_t *op = ops[0];
1129 aes_gcm_key_data_t *kd;
1134 kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
1135 rv = aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
1136 (u8x16u *) op->iv, (u8x16u *) op->tag, op->len,
1137 op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks),
1138 /* is_encrypt */ 0);
1142 op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1146 op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
1159 static_always_inline void *
1160 aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
1162 aes_gcm_key_data_t *kd;
1165 kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
1167 /* expand AES key */
1168 aes_key_expand ((u8x16 *) kd->Ke, key->data, ks);
1170 /* pre-calculate H */
1171 H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
1172 H = u8x16_reflect (H);
1173 ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI);
1175 u8x64 *Ke4 = (u8x64 *) kd->Ke4;
1176 for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
1177 Ke4[i] = u8x64_splat_u8x16 (kd->Ke[i]);
1182 #define foreach_aes_gcm_handler_type _(128) _(192) _(256)
1185 static u32 aes_ops_dec_aes_gcm_##x \
1186 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
1187 { return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
1188 static u32 aes_ops_enc_aes_gcm_##x \
1189 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
1190 { return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
1191 static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
1192 { return aes_gcm_key_exp (key, AES_KEY_##x); }
1194 foreach_aes_gcm_handler_type;
1199 crypto_native_aes_gcm_init_vaes (vlib_main_t * vm)
1201 crypto_native_aes_gcm_init_avx512 (vlib_main_t * vm)
1203 crypto_native_aes_gcm_init_avx2 (vlib_main_t * vm)
1205 crypto_native_aes_gcm_init_neon (vlib_main_t * vm)
1207 crypto_native_aes_gcm_init_sse42 (vlib_main_t * vm)
1210 crypto_native_main_t *cm = &crypto_native_main;
1213 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
1214 VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
1215 aes_ops_enc_aes_gcm_##x); \
1216 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
1217 VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
1218 aes_ops_dec_aes_gcm_##x); \
1219 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
1220 foreach_aes_gcm_handler_type;
1226 * fd.io coding-style-patch-verification: ON
1229 * eval: (c-set-style "gnu")