2 *------------------------------------------------------------------
3 * Copyright (c) 2019 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
29 #define AESNI_KEY_ROUNDS(x) (10 + x *2)
30 #define AESNI_KEY_BYTES(x) (16 + x * 8)
33 /* AES-NI based AES key expansion based on code samples from
34 Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
37 static_always_inline __m128i
38 aes128_key_assist (__m128i r1, __m128i r2)
40 r1 ^= _mm_slli_si128 (r1, 4);
41 r1 ^= _mm_slli_si128 (r1, 4);
42 r1 ^= _mm_slli_si128 (r1, 4);
43 return r1 ^ _mm_shuffle_epi32 (r2, 0xff);
46 static_always_inline void
47 aes128_key_expand (__m128i * k, u8 * key)
49 k[0] = _mm_loadu_si128 ((const __m128i *) key);
50 k[1] = aes128_key_assist (k[0], _mm_aeskeygenassist_si128 (k[0], 0x01));
51 k[2] = aes128_key_assist (k[1], _mm_aeskeygenassist_si128 (k[1], 0x02));
52 k[3] = aes128_key_assist (k[2], _mm_aeskeygenassist_si128 (k[2], 0x04));
53 k[4] = aes128_key_assist (k[3], _mm_aeskeygenassist_si128 (k[3], 0x08));
54 k[5] = aes128_key_assist (k[4], _mm_aeskeygenassist_si128 (k[4], 0x10));
55 k[6] = aes128_key_assist (k[5], _mm_aeskeygenassist_si128 (k[5], 0x20));
56 k[7] = aes128_key_assist (k[6], _mm_aeskeygenassist_si128 (k[6], 0x40));
57 k[8] = aes128_key_assist (k[7], _mm_aeskeygenassist_si128 (k[7], 0x80));
58 k[9] = aes128_key_assist (k[8], _mm_aeskeygenassist_si128 (k[8], 0x1b));
59 k[10] = aes128_key_assist (k[9], _mm_aeskeygenassist_si128 (k[9], 0x36));
62 static_always_inline void
63 aes192_key_assist (__m128i * r1, __m128i * r2, __m128i * r3)
66 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
67 *r1 ^= r = _mm_slli_si128 (r, 0x4);
68 *r1 ^= _mm_slli_si128 (r, 0x4);
69 *r1 ^= _mm_shuffle_epi32 (*r2, 0x55);
70 *r3 ^= _mm_slli_si128 (*r3, 0x4);
71 *r3 ^= *r2 = _mm_shuffle_epi32 (*r1, 0xff);
74 static_always_inline void
75 aes192_key_expand (__m128i * k, u8 * key)
79 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
80 r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
83 r2 = _mm_aeskeygenassist_si128 (r3, 0x1);
84 aes192_key_assist (&r1, &r2, &r3);
85 k[1] = (__m128i) _mm_shuffle_pd ((__m128d) k[1], (__m128d) r1, 0);
86 k[2] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
87 r2 = _mm_aeskeygenassist_si128 (r3, 0x2);
88 aes192_key_assist (&r1, &r2, &r3);
92 r2 = _mm_aeskeygenassist_si128 (r3, 0x4);
93 aes192_key_assist (&r1, &r2, &r3);
94 k[4] = (__m128i) _mm_shuffle_pd ((__m128d) k[4], (__m128d) r1, 0);
95 k[5] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
96 r2 = _mm_aeskeygenassist_si128 (r3, 0x8);
97 aes192_key_assist (&r1, &r2, &r3);
101 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
102 aes192_key_assist (&r1, &r2, &r3);
103 k[7] = (__m128i) _mm_shuffle_pd ((__m128d) k[7], (__m128d) r1, 0);
104 k[8] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
105 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
106 aes192_key_assist (&r1, &r2, &r3);
110 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
111 aes192_key_assist (&r1, &r2, &r3);
112 k[10] = (__m128i) _mm_shuffle_pd ((__m128d) k[10], (__m128d) r1, 0);
113 k[11] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
114 r2 = _mm_aeskeygenassist_si128 (r3, 0x80);
115 aes192_key_assist (&r1, &r2, &r3);
119 static_always_inline void
120 aes256_key_assist1 (__m128i * r1, __m128i * r2)
123 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
124 *r1 ^= r = _mm_slli_si128 (r, 0x4);
125 *r1 ^= _mm_slli_si128 (r, 0x4);
126 *r1 ^= *r2 = _mm_shuffle_epi32 (*r2, 0xff);
129 static_always_inline void
130 aes256_key_assist2 (__m128i r1, __m128i * r3)
133 *r3 ^= r = _mm_slli_si128 (*r3, 0x4);
134 *r3 ^= r = _mm_slli_si128 (r, 0x4);
135 *r3 ^= _mm_slli_si128 (r, 0x4);
136 *r3 ^= _mm_shuffle_epi32 (_mm_aeskeygenassist_si128 (r1, 0x0), 0xaa);
139 static_always_inline void
140 aes256_key_expand (__m128i * k, u8 * key)
143 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
144 k[1] = r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
145 r2 = _mm_aeskeygenassist_si128 (k[1], 0x01);
146 aes256_key_assist1 (&r1, &r2);
148 aes256_key_assist2 (r1, &r3);
150 r2 = _mm_aeskeygenassist_si128 (r3, 0x02);
151 aes256_key_assist1 (&r1, &r2);
153 aes256_key_assist2 (r1, &r3);
155 r2 = _mm_aeskeygenassist_si128 (r3, 0x04);
156 aes256_key_assist1 (&r1, &r2);
158 aes256_key_assist2 (r1, &r3);
160 r2 = _mm_aeskeygenassist_si128 (r3, 0x08);
161 aes256_key_assist1 (&r1, &r2);
163 aes256_key_assist2 (r1, &r3);
165 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
166 aes256_key_assist1 (&r1, &r2);
168 aes256_key_assist2 (r1, &r3);
170 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
171 aes256_key_assist1 (&r1, &r2);
173 aes256_key_assist2 (r1, &r3);
175 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
176 aes256_key_assist1 (&r1, &r2);
180 static_always_inline void
181 aes_key_expand (__m128i * k, u8 * key, aesni_key_size_t ks)
186 aes128_key_expand (k, key);
189 aes192_key_expand (k, key);
192 aes256_key_expand (k, key);
198 static_always_inline void
199 aes_key_enc_to_dec (__m128i * k, aesni_key_size_t ks)
201 int rounds = AESNI_KEY_ROUNDS (ks);
208 for (int i = 1; i < (rounds / 2); i++)
211 k[rounds - i] = _mm_aesimc_si128 (k[i]);
212 k[i] = _mm_aesimc_si128 (r);
215 k[rounds / 2] = _mm_aesimc_si128 (k[rounds / 2]);
218 #endif /* __aesni_h__ */
221 * fd.io coding-style-patch-verification: ON
224 * eval: (c-set-style "gnu")