2 *------------------------------------------------------------------
3 * Copyright (c) 2019 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
18 #include <vlib/vlib.h>
19 #include <vnet/plugin/plugin.h>
20 #include <vnet/crypto/crypto.h>
21 #include <crypto_native/crypto_native.h>
22 #include <crypto_native/aes.h>
23 #include <crypto_native/ghash.h>
25 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
26 #pragma GCC optimize ("O3")
37 /* pre-calculated hash key values */
38 const u8x16 Hi[NUM_HI];
39 /* extracted AES key */
58 AES_GCM_F_WITH_GHASH = (1 << 0),
59 AES_GCM_F_LAST_ROUND = (1 << 1),
60 AES_GCM_F_ENCRYPT = (1 << 2),
61 AES_GCM_F_DECRYPT = (1 << 3),
64 static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
67 static_always_inline void
68 aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k,
71 if (PREDICT_TRUE ((u8) ctr->counter < (256 - 2 * n_blocks)))
73 for (int i = 0; i < n_blocks; i++)
75 r[i] = k ^ (u8x16) ctr->Y;
78 ctr->counter += n_blocks;
82 for (int i = 0; i < n_blocks; i++)
84 r[i] = k ^ (u8x16) ctr->Y;
86 ctr->Y[3] = clib_host_to_net_u32 (ctr->counter + 1);
91 static_always_inline void
92 aes_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks)
94 for (int i = 0; i < n_blocks; i++)
95 r[i] = aes_enc_round (r[i], k);
98 static_always_inline void
99 aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
100 int rounds, int n_blocks)
103 /* additional ronuds for AES-192 and AES-256 */
104 for (int i = 10; i < rounds; i++)
105 aes_gcm_enc_round (r, k[i], n_blocks);
107 for (int i = 0; i < n_blocks; i++)
108 d[i] ^= aes_enc_last_round (r[i], k[rounds]);
112 static_always_inline u8x16
113 aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
114 u8x16u * in, int n_blocks)
116 ghash_data_t _gd, *gd = &_gd;
117 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
118 ghash_mul_first (gd, u8x16_reflect (in[0]) ^ T, Hi[0]);
119 for (int i = 1; i < n_blocks; i++)
120 ghash_mul_next (gd, u8x16_reflect ((in[i])), Hi[i]);
123 return ghash_final (gd);
126 static_always_inline u8x16
127 aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
130 while (n_left >= 128)
132 T = aes_gcm_ghash_blocks (T, kd, in, 8);
139 T = aes_gcm_ghash_blocks (T, kd, in, 4);
146 T = aes_gcm_ghash_blocks (T, kd, in, 2);
153 T = aes_gcm_ghash_blocks (T, kd, in, 1);
160 u8x16 r = aes_load_partial (in, n_left);
161 T = ghash_mul (u8x16_reflect (r) ^ T, kd->Hi[NUM_HI - 1]);
167 static_always_inline u8x16
168 aes_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
169 aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
170 int rounds, int n, int last_block_bytes, aes_gcm_flags_t f)
173 ghash_data_t _gd = { }, *gd = &_gd;
174 const u8x16 *rk = (u8x16 *) kd->Ke;
175 int ghash_blocks = (f & AES_GCM_F_ENCRYPT) ? 4 : n, gc = 1;
176 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - ghash_blocks;
178 clib_prefetch_load (inv + 4);
180 /* AES rounds 0 and 1 */
181 aes_gcm_enc_first_round (r, ctr, rk[0], n);
182 aes_gcm_enc_round (r, rk[1], n);
184 /* load data - decrypt round */
185 if (f & AES_GCM_F_DECRYPT)
187 for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
190 if (f & AES_GCM_F_LAST_ROUND)
191 d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
194 /* GHASH multiply block 1 */
195 if (f & AES_GCM_F_WITH_GHASH)
196 ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
198 /* AES rounds 2 and 3 */
199 aes_gcm_enc_round (r, rk[2], n);
200 aes_gcm_enc_round (r, rk[3], n);
202 /* GHASH multiply block 2 */
203 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
204 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
206 /* AES rounds 4 and 5 */
207 aes_gcm_enc_round (r, rk[4], n);
208 aes_gcm_enc_round (r, rk[5], n);
210 /* GHASH multiply block 3 */
211 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
212 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
214 /* AES rounds 6 and 7 */
215 aes_gcm_enc_round (r, rk[6], n);
216 aes_gcm_enc_round (r, rk[7], n);
218 /* GHASH multiply block 4 */
219 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
220 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
222 /* AES rounds 8 and 9 */
223 aes_gcm_enc_round (r, rk[8], n);
224 aes_gcm_enc_round (r, rk[9], n);
226 /* GHASH reduce 1st step */
227 if (f & AES_GCM_F_WITH_GHASH)
230 /* load data - encrypt round */
231 if (f & AES_GCM_F_ENCRYPT)
233 for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
236 if (f & AES_GCM_F_LAST_ROUND)
237 d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
240 /* GHASH reduce 2nd step */
241 if (f & AES_GCM_F_WITH_GHASH)
244 /* AES last round(s) */
245 aes_gcm_enc_last_round (r, d, rk, rounds, n);
248 for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
251 if (f & AES_GCM_F_LAST_ROUND)
252 aes_store_partial (outv + n - 1, d[n - 1], last_block_bytes);
254 /* GHASH final step */
255 if (f & AES_GCM_F_WITH_GHASH)
256 T = ghash_final (gd);
261 static_always_inline u8x16
262 aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
263 aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
264 int rounds, aes_gcm_flags_t f)
267 ghash_data_t _gd, *gd = &_gd;
268 const u8x16 *rk = (u8x16 *) kd->Ke;
269 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - 8;
271 /* AES rounds 0 and 1 */
272 aes_gcm_enc_first_round (r, ctr, rk[0], 4);
273 aes_gcm_enc_round (r, rk[1], 4);
275 /* load 4 blocks of data - decrypt round */
276 if (f & AES_GCM_F_DECRYPT)
284 /* GHASH multiply block 0 */
285 ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
287 /* AES rounds 2 and 3 */
288 aes_gcm_enc_round (r, rk[2], 4);
289 aes_gcm_enc_round (r, rk[3], 4);
291 /* GHASH multiply block 1 */
292 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
294 /* AES rounds 4 and 5 */
295 aes_gcm_enc_round (r, rk[4], 4);
296 aes_gcm_enc_round (r, rk[5], 4);
298 /* GHASH multiply block 2 */
299 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
301 /* AES rounds 6 and 7 */
302 aes_gcm_enc_round (r, rk[6], 4);
303 aes_gcm_enc_round (r, rk[7], 4);
305 /* GHASH multiply block 3 */
306 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
308 /* AES rounds 8 and 9 */
309 aes_gcm_enc_round (r, rk[8], 4);
310 aes_gcm_enc_round (r, rk[9], 4);
312 /* load 4 blocks of data - encrypt round */
313 if (f & AES_GCM_F_ENCRYPT)
321 /* AES last round(s) */
322 aes_gcm_enc_last_round (r, d, rk, rounds, 4);
324 /* store 4 blocks of data */
330 /* load next 4 blocks of data data - decrypt round */
331 if (f & AES_GCM_F_DECRYPT)
339 /* GHASH multiply block 4 */
340 ghash_mul_next (gd, u8x16_reflect (d[0]), Hi[4]);
342 /* AES rounds 0, 1 and 2 */
343 aes_gcm_enc_first_round (r, ctr, rk[0], 4);
344 aes_gcm_enc_round (r, rk[1], 4);
345 aes_gcm_enc_round (r, rk[2], 4);
347 /* GHASH multiply block 5 */
348 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[5]);
350 /* AES rounds 3 and 4 */
351 aes_gcm_enc_round (r, rk[3], 4);
352 aes_gcm_enc_round (r, rk[4], 4);
354 /* GHASH multiply block 6 */
355 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[6]);
357 /* AES rounds 5 and 6 */
358 aes_gcm_enc_round (r, rk[5], 4);
359 aes_gcm_enc_round (r, rk[6], 4);
361 /* GHASH multiply block 7 */
362 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[7]);
364 /* AES rounds 7 and 8 */
365 aes_gcm_enc_round (r, rk[7], 4);
366 aes_gcm_enc_round (r, rk[8], 4);
368 /* GHASH reduce 1st step */
372 aes_gcm_enc_round (r, rk[9], 4);
374 /* load data - encrypt round */
375 if (f & AES_GCM_F_ENCRYPT)
383 /* GHASH reduce 2nd step */
386 /* AES last round(s) */
387 aes_gcm_enc_last_round (r, d, rk, rounds, 4);
395 /* GHASH final step */
396 return ghash_final (gd);
399 static_always_inline u8x16
400 aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
401 int n_blocks, int n_bytes)
403 ghash_data_t _gd, *gd = &_gd;
404 u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
407 d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);
409 ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
411 ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
413 ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
415 ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
418 return ghash_final (gd);
423 static const u32x16 ctr_inv_1234 = {
424 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
427 static const u32x16 ctr_inv_4444 = {
428 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
431 static const u32x16 ctr_1234 = {
432 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
435 static_always_inline void
436 aes4_gcm_enc_first_round (u8x64 * r, aes_gcm_counter_t * ctr, u8x64 k, int n)
438 u8 last_byte = (u8) ctr->counter;
441 /* As counter is stored in network byte order for performance reasons we
442 are incrementing least significant byte only except in case where we
443 overlow. As we are processing four 512-blocks in parallel except the
444 last round, overflow can happen only when n == 4 */
449 r[i] = k ^ (u8x64) ctr->Y4;
450 ctr->Y4 += ctr_inv_4444;
453 if (n == 4 && PREDICT_TRUE (last_byte == 241))
455 u32x16 Yc, Yr = (u32x16) u8x64_reflect_u8x16 ((u8x64) ctr->Y4);
459 r[i] = k ^ (u8x64) ctr->Y4;
460 Yc = u32x16_splat (ctr->counter + 4 * (i + 1)) + ctr_1234;
461 Yr = (u32x16) u32x16_mask_blend (Yr, Yc, 0x1111);
462 ctr->Y4 = (u32x16) u8x64_reflect_u8x16 ((u8x64) Yr);
469 r[i] = k ^ (u8x64) ctr->Y4;
470 ctr->Y4 += ctr_inv_4444;
473 ctr->counter += n * 4;
476 static_always_inline void
477 aes4_gcm_enc_round (u8x64 * r, u8x64 k, int n_blocks)
479 for (int i = 0; i < n_blocks; i++)
480 r[i] = aes_enc_round_x4 (r[i], k);
483 static_always_inline void
484 aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64 const *k,
485 int rounds, int n_blocks)
488 /* additional ronuds for AES-192 and AES-256 */
489 for (int i = 10; i < rounds; i++)
490 aes4_gcm_enc_round (r, k[i], n_blocks);
492 for (int i = 0; i < n_blocks; i++)
493 d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]);
496 static_always_inline u8x16
497 aes4_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
498 aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
499 int rounds, int n, int last_4block_bytes, aes_gcm_flags_t f)
501 ghash4_data_t _gd, *gd = &_gd;
502 const u8x64 *rk = (u8x64 *) kd->Ke4;
503 int i, ghash_blocks, gc = 1;
504 u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
506 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
508 if (f & AES_GCM_F_ENCRYPT)
510 /* during encryption we either hash four 512-bit blocks from previous
511 round or we don't hash at all */
513 Hi4 = (u8x64u *) (kd->Hi + NUM_HI - ghash_blocks * 4);
517 /* during deccryption we hash 1..4 512-bit blocks from current round */
519 int n_128bit_blocks = n * 4;
520 /* if this is last round of decryption, we may have less than 4
521 128-bit blocks in the last 512-bit data block, so we need to adjust
522 Hi4 pointer accordingly */
523 if (f & AES_GCM_F_LAST_ROUND)
524 n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
525 Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
528 /* AES rounds 0 and 1 */
529 aes4_gcm_enc_first_round (r, ctr, rk[0], n);
530 aes4_gcm_enc_round (r, rk[1], n);
532 /* load 4 blocks of data - decrypt round */
533 if (f & AES_GCM_F_DECRYPT)
535 for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
538 if (f & AES_GCM_F_LAST_ROUND)
539 d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
542 /* GHASH multiply block 0 */
543 if (f & AES_GCM_F_WITH_GHASH)
544 ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
545 u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
547 /* AES rounds 2 and 3 */
548 aes4_gcm_enc_round (r, rk[2], n);
549 aes4_gcm_enc_round (r, rk[3], n);
551 /* GHASH multiply block 1 */
552 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
553 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
555 /* AES rounds 4 and 5 */
556 aes4_gcm_enc_round (r, rk[4], n);
557 aes4_gcm_enc_round (r, rk[5], n);
559 /* GHASH multiply block 2 */
560 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
561 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
563 /* AES rounds 6 and 7 */
564 aes4_gcm_enc_round (r, rk[6], n);
565 aes4_gcm_enc_round (r, rk[7], n);
567 /* GHASH multiply block 3 */
568 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
569 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
571 /* load 4 blocks of data - decrypt round */
572 if (f & AES_GCM_F_ENCRYPT)
574 for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
577 if (f & AES_GCM_F_LAST_ROUND)
578 d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
581 /* AES rounds 8 and 9 */
582 aes4_gcm_enc_round (r, rk[8], n);
583 aes4_gcm_enc_round (r, rk[9], n);
585 /* AES last round(s) */
586 aes4_gcm_enc_last_round (r, d, rk, rounds, n);
588 /* store 4 blocks of data */
589 for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
592 if (f & AES_GCM_F_LAST_ROUND)
593 u8x64_mask_store (d[i], outv + i, byte_mask);
595 /* GHASH reduce 1st step */
598 /* GHASH reduce 2nd step */
601 /* GHASH final step */
602 return ghash4_final (gd);
605 static_always_inline u8x16
606 aes4_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
607 aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
608 int rounds, aes_gcm_flags_t f)
611 ghash4_data_t _gd, *gd = &_gd;
612 const u8x64 *rk = (u8x64 *) kd->Ke4;
613 u8x64 *Hi4 = (u8x64 *) (kd->Hi + NUM_HI - 32);
614 u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;
616 /* AES rounds 0 and 1 */
617 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
618 aes4_gcm_enc_round (r, rk[1], 4);
620 /* load 4 blocks of data - decrypt round */
621 if (f & AES_GCM_F_DECRYPT)
622 for (int i = 0; i < 4; i++)
625 /* GHASH multiply block 0 */
626 ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
627 u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
629 /* AES rounds 2 and 3 */
630 aes4_gcm_enc_round (r, rk[2], 4);
631 aes4_gcm_enc_round (r, rk[3], 4);
633 /* GHASH multiply block 1 */
634 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
636 /* AES rounds 4 and 5 */
637 aes4_gcm_enc_round (r, rk[4], 4);
638 aes4_gcm_enc_round (r, rk[5], 4);
640 /* GHASH multiply block 2 */
641 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
643 /* AES rounds 6 and 7 */
644 aes4_gcm_enc_round (r, rk[6], 4);
645 aes4_gcm_enc_round (r, rk[7], 4);
647 /* GHASH multiply block 3 */
648 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
650 /* AES rounds 8 and 9 */
651 aes4_gcm_enc_round (r, rk[8], 4);
652 aes4_gcm_enc_round (r, rk[9], 4);
654 /* load 4 blocks of data - encrypt round */
655 if (f & AES_GCM_F_ENCRYPT)
656 for (int i = 0; i < 4; i++)
659 /* AES last round(s) */
660 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
662 /* store 4 blocks of data */
663 for (int i = 0; i < 4; i++)
666 /* load 4 blocks of data - decrypt round */
667 if (f & AES_GCM_F_DECRYPT)
668 for (int i = 0; i < 4; i++)
671 /* GHASH multiply block 3 */
672 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[0]), Hi4[4]);
674 /* AES rounds 0 and 1 */
675 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
676 aes4_gcm_enc_round (r, rk[1], 4);
678 /* GHASH multiply block 5 */
679 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[5]);
681 /* AES rounds 2 and 3 */
682 aes4_gcm_enc_round (r, rk[2], 4);
683 aes4_gcm_enc_round (r, rk[3], 4);
685 /* GHASH multiply block 6 */
686 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[6]);
688 /* AES rounds 4 and 5 */
689 aes4_gcm_enc_round (r, rk[4], 4);
690 aes4_gcm_enc_round (r, rk[5], 4);
692 /* GHASH multiply block 7 */
693 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[7]);
695 /* AES rounds 6 and 7 */
696 aes4_gcm_enc_round (r, rk[6], 4);
697 aes4_gcm_enc_round (r, rk[7], 4);
699 /* GHASH reduce 1st step */
702 /* AES rounds 8 and 9 */
703 aes4_gcm_enc_round (r, rk[8], 4);
704 aes4_gcm_enc_round (r, rk[9], 4);
706 /* GHASH reduce 2nd step */
709 /* load 4 blocks of data - encrypt round */
710 if (f & AES_GCM_F_ENCRYPT)
711 for (int i = 0; i < 4; i++)
714 /* AES last round(s) */
715 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
717 /* store 4 blocks of data */
718 for (int i = 0; i < 4; i++)
721 /* GHASH final step */
722 return ghash4_final (gd);
725 static_always_inline u8x16
726 aes4_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
727 int n, int last_4block_bytes)
729 ghash4_data_t _gd, *gd = &_gd;
732 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
733 n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
734 Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
736 d[n - 1] = u8x64_mask_blend (u8x64_splat (0), d[n - 1], byte_mask);
737 ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
738 u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
740 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
742 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
744 ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
747 return ghash4_final (gd);
751 static_always_inline u8x16
752 aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
753 u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
755 aes_gcm_flags_t f = AES_GCM_F_ENCRYPT;
764 f |= AES_GCM_F_LAST_ROUND;
768 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
769 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
771 else if (n_left > 128)
774 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
775 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
777 else if (n_left > 64)
780 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
781 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
785 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
786 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
790 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
797 f |= AES_GCM_F_WITH_GHASH;
799 while (n_left >= 512)
801 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
809 while (n_left >= 256)
811 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
820 return aes4_gcm_ghash_last (T, kd, d4, 4, 64);
822 f |= AES_GCM_F_LAST_ROUND;
827 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
828 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
834 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
835 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
841 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
842 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
845 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
846 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
851 f |= AES_GCM_F_LAST_ROUND;
855 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
856 return aes_gcm_ghash_last (T, kd, d, 4, n_left);
858 else if (n_left > 32)
861 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
862 return aes_gcm_ghash_last (T, kd, d, 3, n_left);
864 else if (n_left > 16)
867 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
868 return aes_gcm_ghash_last (T, kd, d, 2, n_left);
872 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
873 return aes_gcm_ghash_last (T, kd, d, 1, n_left);
877 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
884 f |= AES_GCM_F_WITH_GHASH;
886 while (n_left >= 128)
888 T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
898 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
907 return aes_gcm_ghash_last (T, kd, d, 4, 0);
909 f |= AES_GCM_F_LAST_ROUND;
914 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
915 return aes_gcm_ghash_last (T, kd, d, 4, n_left);
921 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
922 return aes_gcm_ghash_last (T, kd, d, 3, n_left);
928 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
929 return aes_gcm_ghash_last (T, kd, d, 2, n_left);
932 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
933 return aes_gcm_ghash_last (T, kd, d, 1, n_left);
937 static_always_inline u8x16
938 aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
939 u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
941 aes_gcm_flags_t f = AES_GCM_F_WITH_GHASH | AES_GCM_F_DECRYPT;
945 while (n_left >= 512)
947 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
955 while (n_left >= 256)
957 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
968 f |= AES_GCM_F_LAST_ROUND;
971 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
974 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
977 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
979 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
982 while (n_left >= 128)
984 T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
994 T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
1005 f |= AES_GCM_F_LAST_ROUND;
1008 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f);
1011 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f);
1014 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f);
1016 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
1020 static_always_inline int
1021 aes_gcm (u8x16u *in, u8x16u *out, u8x16u *addt, u8 *ivp, u8x16u *tag,
1022 u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t *kd,
1023 int aes_rounds, int is_encrypt)
1028 ghash_data_t _gd, *gd = &_gd;
1029 aes_gcm_counter_t _ctr, *ctr = &_ctr;
1031 clib_prefetch_load (ivp);
1032 clib_prefetch_load (in);
1033 clib_prefetch_load (in + 4);
1035 /* calculate ghash for AAD - optimized for ipsec common cases */
1037 T = aes_gcm_ghash (T, kd, addt, 8);
1038 else if (aad_bytes == 12)
1039 T = aes_gcm_ghash (T, kd, addt, 12);
1041 T = aes_gcm_ghash (T, kd, addt, aad_bytes);
1043 /* initalize counter */
1045 Y0.as_u64x2[0] = *(u64u *) ivp;
1046 Y0.as_u32x4[2] = *(u32u *) (ivp + 8);
1047 Y0.as_u32x4 += ctr_inv_1;
1049 ctr->Y4 = u32x16_splat_u32x4 (Y0.as_u32x4) + ctr_inv_1234;
1051 ctr->Y = Y0.as_u32x4 + ctr_inv_1;
1054 /* ghash and encrypt/edcrypt */
1056 T = aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
1058 T = aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
1060 clib_prefetch_load (tag);
1062 /* Finalize ghash - data bytes and aad bytes converted to bits */
1064 r = (u8x16) ((u64x2) {data_bytes, aad_bytes} << 3);
1067 /* interleaved computation of final ghash and E(Y0, k) */
1068 ghash_mul_first (gd, r ^ T, kd->Hi[NUM_HI - 1]);
1069 r = kd->Ke[0] ^ Y0.as_u8x16;
1070 for (i = 1; i < 5; i += 1)
1071 r = aes_enc_round (r, kd->Ke[i]);
1074 for (; i < 9; i += 1)
1075 r = aes_enc_round (r, kd->Ke[i]);
1076 T = ghash_final (gd);
1077 for (; i < aes_rounds; i += 1)
1078 r = aes_enc_round (r, kd->Ke[i]);
1079 r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
1080 T = u8x16_reflect (T) ^ r;
1082 /* tag_len 16 -> 0 */
1089 aes_store_partial (tag, T, tag_len);
1096 u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
1097 if ((u8x16_msb_mask (tag[0] == T) & tag_mask) != tag_mask)
1103 static_always_inline u32
1104 aes_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
1105 u32 n_ops, aes_key_size_t ks)
1107 crypto_native_main_t *cm = &crypto_native_main;
1108 vnet_crypto_op_t *op = ops[0];
1109 aes_gcm_key_data_t *kd;
1114 kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
1115 aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
1116 (u8 *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len,
1117 op->tag_len, kd, AES_KEY_ROUNDS (ks), /* is_encrypt */ 1);
1118 op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1129 static_always_inline u32
1130 aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
1133 crypto_native_main_t *cm = &crypto_native_main;
1134 vnet_crypto_op_t *op = ops[0];
1135 aes_gcm_key_data_t *kd;
1140 kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
1141 rv = aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
1142 (u8 *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len,
1143 op->tag_len, kd, AES_KEY_ROUNDS (ks),
1144 /* is_encrypt */ 0);
1148 op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1152 op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
1165 static_always_inline void *
1166 aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
1168 aes_gcm_key_data_t *kd;
1171 kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
1173 /* expand AES key */
1174 aes_key_expand ((u8x16 *) kd->Ke, key->data, ks);
1176 /* pre-calculate H */
1177 H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
1178 H = u8x16_reflect (H);
1179 ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI);
1181 u8x64 *Ke4 = (u8x64 *) kd->Ke4;
1182 for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
1183 Ke4[i] = u8x64_splat_u8x16 (kd->Ke[i]);
1188 #define foreach_aes_gcm_handler_type _(128) _(192) _(256)
1191 static u32 aes_ops_dec_aes_gcm_##x \
1192 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
1193 { return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
1194 static u32 aes_ops_enc_aes_gcm_##x \
1195 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
1196 { return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
1197 static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
1198 { return aes_gcm_key_exp (key, AES_KEY_##x); }
1200 foreach_aes_gcm_handler_type;
1205 crypto_native_aes_gcm_init_icl (vlib_main_t * vm)
1207 crypto_native_aes_gcm_init_skx (vlib_main_t * vm)
1209 crypto_native_aes_gcm_init_hsw (vlib_main_t * vm)
1211 crypto_native_aes_gcm_init_neon (vlib_main_t * vm)
1213 crypto_native_aes_gcm_init_slm (vlib_main_t * vm)
1216 crypto_native_main_t *cm = &crypto_native_main;
1219 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
1220 VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
1221 aes_ops_enc_aes_gcm_##x); \
1222 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
1223 VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
1224 aes_ops_dec_aes_gcm_##x); \
1225 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
1226 foreach_aes_gcm_handler_type;
1232 * fd.io coding-style-patch-verification: ON
1235 * eval: (c-set-style "gnu")