2 *------------------------------------------------------------------
3 * Copyright (c) 2019 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
28 #define AESNI_KEY_ROUNDS(x) (10 + x *2)
29 #define AESNI_KEY_BYTES(x) (16 + x * 8)
32 /* AES-NI based AES key expansion based on code samples from
33 Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
36 static_always_inline __m128i
37 aes128_key_assist (__m128i r1, __m128i r2)
39 r1 ^= _mm_slli_si128 (r1, 4);
40 r1 ^= _mm_slli_si128 (r1, 4);
41 r1 ^= _mm_slli_si128 (r1, 4);
42 return r1 ^ _mm_shuffle_epi32 (r2, 0xff);
45 static_always_inline void
46 aes128_key_expand (__m128i * k, u8 * key)
48 k[0] = _mm_loadu_si128 ((const __m128i *) key);
49 k[1] = aes128_key_assist (k[0], _mm_aeskeygenassist_si128 (k[0], 0x01));
50 k[2] = aes128_key_assist (k[1], _mm_aeskeygenassist_si128 (k[1], 0x02));
51 k[3] = aes128_key_assist (k[2], _mm_aeskeygenassist_si128 (k[2], 0x04));
52 k[4] = aes128_key_assist (k[3], _mm_aeskeygenassist_si128 (k[3], 0x08));
53 k[5] = aes128_key_assist (k[4], _mm_aeskeygenassist_si128 (k[4], 0x10));
54 k[6] = aes128_key_assist (k[5], _mm_aeskeygenassist_si128 (k[5], 0x20));
55 k[7] = aes128_key_assist (k[6], _mm_aeskeygenassist_si128 (k[6], 0x40));
56 k[8] = aes128_key_assist (k[7], _mm_aeskeygenassist_si128 (k[7], 0x80));
57 k[9] = aes128_key_assist (k[8], _mm_aeskeygenassist_si128 (k[8], 0x1b));
58 k[10] = aes128_key_assist (k[9], _mm_aeskeygenassist_si128 (k[9], 0x36));
61 static_always_inline void
62 aes192_key_assist (__m128i * r1, __m128i * r2, __m128i * r3)
65 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
66 *r1 ^= r = _mm_slli_si128 (r, 0x4);
67 *r1 ^= _mm_slli_si128 (r, 0x4);
68 *r1 ^= _mm_shuffle_epi32 (*r2, 0x55);
69 *r3 ^= _mm_slli_si128 (*r3, 0x4);
70 *r3 ^= *r2 = _mm_shuffle_epi32 (*r1, 0xff);
73 static_always_inline void
74 aes192_key_expand (__m128i * k, u8 * key)
78 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
79 /* load the 24-bytes key as 2 * 16-bytes (and ignore last 8-bytes) */
80 r3 = CLIB_MEM_OVERFLOW_LOAD (_mm_loadu_si128, (__m128i *) (key + 16));
83 r2 = _mm_aeskeygenassist_si128 (r3, 0x1);
84 aes192_key_assist (&r1, &r2, &r3);
85 k[1] = (__m128i) _mm_shuffle_pd ((__m128d) k[1], (__m128d) r1, 0);
86 k[2] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
87 r2 = _mm_aeskeygenassist_si128 (r3, 0x2);
88 aes192_key_assist (&r1, &r2, &r3);
92 r2 = _mm_aeskeygenassist_si128 (r3, 0x4);
93 aes192_key_assist (&r1, &r2, &r3);
94 k[4] = (__m128i) _mm_shuffle_pd ((__m128d) k[4], (__m128d) r1, 0);
95 k[5] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
96 r2 = _mm_aeskeygenassist_si128 (r3, 0x8);
97 aes192_key_assist (&r1, &r2, &r3);
101 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
102 aes192_key_assist (&r1, &r2, &r3);
103 k[7] = (__m128i) _mm_shuffle_pd ((__m128d) k[7], (__m128d) r1, 0);
104 k[8] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
105 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
106 aes192_key_assist (&r1, &r2, &r3);
110 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
111 aes192_key_assist (&r1, &r2, &r3);
112 k[10] = (__m128i) _mm_shuffle_pd ((__m128d) k[10], (__m128d) r1, 0);
113 k[11] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
114 r2 = _mm_aeskeygenassist_si128 (r3, 0x80);
115 aes192_key_assist (&r1, &r2, &r3);
119 static_always_inline void
120 aes256_key_assist1 (__m128i * r1, __m128i * r2)
123 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
124 *r1 ^= r = _mm_slli_si128 (r, 0x4);
125 *r1 ^= _mm_slli_si128 (r, 0x4);
126 *r1 ^= *r2 = _mm_shuffle_epi32 (*r2, 0xff);
129 static_always_inline void
130 aes256_key_assist2 (__m128i r1, __m128i * r3)
133 *r3 ^= r = _mm_slli_si128 (*r3, 0x4);
134 *r3 ^= r = _mm_slli_si128 (r, 0x4);
135 *r3 ^= _mm_slli_si128 (r, 0x4);
136 *r3 ^= _mm_shuffle_epi32 (_mm_aeskeygenassist_si128 (r1, 0x0), 0xaa);
139 static_always_inline void
140 aes256_key_expand (__m128i * k, u8 * key)
143 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
144 k[1] = r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
145 r2 = _mm_aeskeygenassist_si128 (k[1], 0x01);
146 aes256_key_assist1 (&r1, &r2);
148 aes256_key_assist2 (r1, &r3);
150 r2 = _mm_aeskeygenassist_si128 (r3, 0x02);
151 aes256_key_assist1 (&r1, &r2);
153 aes256_key_assist2 (r1, &r3);
155 r2 = _mm_aeskeygenassist_si128 (r3, 0x04);
156 aes256_key_assist1 (&r1, &r2);
158 aes256_key_assist2 (r1, &r3);
160 r2 = _mm_aeskeygenassist_si128 (r3, 0x08);
161 aes256_key_assist1 (&r1, &r2);
163 aes256_key_assist2 (r1, &r3);
165 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
166 aes256_key_assist1 (&r1, &r2);
168 aes256_key_assist2 (r1, &r3);
170 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
171 aes256_key_assist1 (&r1, &r2);
173 aes256_key_assist2 (r1, &r3);
175 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
176 aes256_key_assist1 (&r1, &r2);
180 static_always_inline void
181 aes_key_expand (__m128i * k, u8 * key, aesni_key_size_t ks)
186 aes128_key_expand (k, key);
189 aes192_key_expand (k, key);
192 aes256_key_expand (k, key);
198 static_always_inline void
199 aes_key_enc_to_dec (__m128i * k, aesni_key_size_t ks)
201 int rounds = AESNI_KEY_ROUNDS (ks);
208 for (int i = 1; i < (rounds / 2); i++)
211 k[rounds - i] = _mm_aesimc_si128 (k[i]);
212 k[i] = _mm_aesimc_si128 (r);
215 k[rounds / 2] = _mm_aesimc_si128 (k[rounds / 2]);
218 #endif /* __aesni_h__ */
221 * fd.io coding-style-patch-verification: ON
224 * eval: (c-set-style "gnu")