2 *------------------------------------------------------------------
3 * Copyright (c) 2019 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
24 __m128i encrypt_key[15];
25 __m128i decrypt_key[15];
35 #define AESNI_KEY_ROUNDS(x) (10 + x *2)
36 #define AESNI_KEY_BYTES(x) (16 + x * 8)
39 /* AES-NI based AES key expansion based on code samples from
40 Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
43 static_always_inline __m128i
44 aes128_key_assist (__m128i r1, __m128i r2)
46 r1 ^= _mm_slli_si128 (r1, 4);
47 r1 ^= _mm_slli_si128 (r1, 4);
48 r1 ^= _mm_slli_si128 (r1, 4);
49 return r1 ^ _mm_shuffle_epi32 (r2, 0xff);
52 static_always_inline void
53 aes128_key_expand (__m128i * k, u8 * key)
55 k[0] = _mm_loadu_si128 ((const __m128i *) key);
56 k[1] = aes128_key_assist (k[0], _mm_aeskeygenassist_si128 (k[0], 0x01));
57 k[2] = aes128_key_assist (k[1], _mm_aeskeygenassist_si128 (k[1], 0x02));
58 k[3] = aes128_key_assist (k[2], _mm_aeskeygenassist_si128 (k[2], 0x04));
59 k[4] = aes128_key_assist (k[3], _mm_aeskeygenassist_si128 (k[3], 0x08));
60 k[5] = aes128_key_assist (k[4], _mm_aeskeygenassist_si128 (k[4], 0x10));
61 k[6] = aes128_key_assist (k[5], _mm_aeskeygenassist_si128 (k[5], 0x20));
62 k[7] = aes128_key_assist (k[6], _mm_aeskeygenassist_si128 (k[6], 0x40));
63 k[8] = aes128_key_assist (k[7], _mm_aeskeygenassist_si128 (k[7], 0x80));
64 k[9] = aes128_key_assist (k[8], _mm_aeskeygenassist_si128 (k[8], 0x1b));
65 k[10] = aes128_key_assist (k[9], _mm_aeskeygenassist_si128 (k[9], 0x36));
68 static_always_inline void
69 aes192_key_assist (__m128i * r1, __m128i * r2, __m128i * r3)
72 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
73 *r1 ^= r = _mm_slli_si128 (r, 0x4);
74 *r1 ^= _mm_slli_si128 (r, 0x4);
75 *r1 ^= _mm_shuffle_epi32 (*r2, 0x55);
76 *r3 ^= _mm_slli_si128 (*r3, 0x4);
77 *r3 ^= *r2 = _mm_shuffle_epi32 (*r1, 0xff);
80 static_always_inline void
81 aes192_key_expand (__m128i * k, u8 * key)
85 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
86 r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
89 r2 = _mm_aeskeygenassist_si128 (r3, 0x1);
90 aes192_key_assist (&r1, &r2, &r3);
91 k[1] = (__m128i) _mm_shuffle_pd ((__m128d) k[1], (__m128d) r1, 0);
92 k[2] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
93 r2 = _mm_aeskeygenassist_si128 (r3, 0x2);
94 aes192_key_assist (&r1, &r2, &r3);
98 r2 = _mm_aeskeygenassist_si128 (r3, 0x4);
99 aes192_key_assist (&r1, &r2, &r3);
100 k[4] = (__m128i) _mm_shuffle_pd ((__m128d) k[4], (__m128d) r1, 0);
101 k[5] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
102 r2 = _mm_aeskeygenassist_si128 (r3, 0x8);
103 aes192_key_assist (&r1, &r2, &r3);
107 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
108 aes192_key_assist (&r1, &r2, &r3);
109 k[7] = (__m128i) _mm_shuffle_pd ((__m128d) k[7], (__m128d) r1, 0);
110 k[8] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
111 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
112 aes192_key_assist (&r1, &r2, &r3);
116 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
117 aes192_key_assist (&r1, &r2, &r3);
118 k[10] = (__m128i) _mm_shuffle_pd ((__m128d) k[10], (__m128d) r1, 0);
119 k[11] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
120 r2 = _mm_aeskeygenassist_si128 (r3, 0x80);
121 aes192_key_assist (&r1, &r2, &r3);
125 static_always_inline void
126 aes256_key_assist1 (__m128i * r1, __m128i * r2)
129 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
130 *r1 ^= r = _mm_slli_si128 (r, 0x4);
131 *r1 ^= _mm_slli_si128 (r, 0x4);
132 *r1 ^= *r2 = _mm_shuffle_epi32 (*r2, 0xff);
135 static_always_inline void
136 aes256_key_assist2 (__m128i r1, __m128i * r3)
139 *r3 ^= r = _mm_slli_si128 (*r3, 0x4);
140 *r3 ^= r = _mm_slli_si128 (r, 0x4);
141 *r3 ^= _mm_slli_si128 (r, 0x4);
142 *r3 ^= _mm_shuffle_epi32 (_mm_aeskeygenassist_si128 (r1, 0x0), 0xaa);
145 static_always_inline void
146 aes256_key_expand (__m128i * k, u8 * key)
149 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
150 k[1] = r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
151 r2 = _mm_aeskeygenassist_si128 (k[1], 0x01);
152 aes256_key_assist1 (&r1, &r2);
154 aes256_key_assist2 (r1, &r3);
156 r2 = _mm_aeskeygenassist_si128 (r3, 0x02);
157 aes256_key_assist1 (&r1, &r2);
159 aes256_key_assist2 (r1, &r3);
161 r2 = _mm_aeskeygenassist_si128 (r3, 0x04);
162 aes256_key_assist1 (&r1, &r2);
164 aes256_key_assist2 (r1, &r3);
166 r2 = _mm_aeskeygenassist_si128 (r3, 0x08);
167 aes256_key_assist1 (&r1, &r2);
169 aes256_key_assist2 (r1, &r3);
171 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
172 aes256_key_assist1 (&r1, &r2);
174 aes256_key_assist2 (r1, &r3);
176 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
177 aes256_key_assist1 (&r1, &r2);
179 aes256_key_assist2 (r1, &r3);
181 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
182 aes256_key_assist1 (&r1, &r2);
186 static_always_inline void
187 aes_key_expand (__m128i * k, u8 * key, aesni_key_size_t ks)
192 aes128_key_expand (k, key);
195 aes192_key_expand (k, key);
198 aes256_key_expand (k, key);
204 static_always_inline void
205 aes_key_enc_to_dec (__m128i * k, aesni_key_size_t ks)
207 int rounds = AESNI_KEY_ROUNDS (ks);
214 for (int i = 1; i < (rounds / 2); i++)
217 k[rounds - i] = _mm_aesimc_si128 (k[i]);
218 k[i] = _mm_aesimc_si128 (r);
221 k[rounds / 2] = _mm_aesimc_si128 (k[rounds / 2]);
224 #endif /* __aesni_h__ */
227 * fd.io coding-style-patch-verification: ON
230 * eval: (c-set-style "gnu")