- __m128i r1, r2, r3;
- k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
- k[1] = r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
- r2 = _mm_aeskeygenassist_si128 (k[1], 0x01);
- aes256_key_assist1 (&r1, &r2);
- k[2] = r1;
- aes256_key_assist2 (r1, &r3);
- k[3] = r3;
- r2 = _mm_aeskeygenassist_si128 (r3, 0x02);
- aes256_key_assist1 (&r1, &r2);
- k[4] = r1;
- aes256_key_assist2 (r1, &r3);
- k[5] = r3;
- r2 = _mm_aeskeygenassist_si128 (r3, 0x04);
- aes256_key_assist1 (&r1, &r2);
- k[6] = r1;
- aes256_key_assist2 (r1, &r3);
- k[7] = r3;
- r2 = _mm_aeskeygenassist_si128 (r3, 0x08);
- aes256_key_assist1 (&r1, &r2);
- k[8] = r1;
- aes256_key_assist2 (r1, &r3);
- k[9] = r3;
- r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
- aes256_key_assist1 (&r1, &r2);
- k[10] = r1;
- aes256_key_assist2 (r1, &r3);
- k[11] = r3;
- r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
- aes256_key_assist1 (&r1, &r2);
- k[12] = r1;
- aes256_key_assist2 (r1, &r3);
- k[13] = r3;
- r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
- aes256_key_assist1 (&r1, &r2);
- k[14] = r1;
+ __m128i *k = (__m128i *) key_schedule;
+ k[0] = _mm_loadu_si128 ((__m128i *) key);
+ k[1] = _mm_loadu_si128 ((__m128i *) (key + 16));
+ aes256_key_assist (k, 2, _mm_aeskeygenassist_si128 (k[1], 0x01));
+ aes256_key_assist (k, 4, _mm_aeskeygenassist_si128 (k[3], 0x02));
+ aes256_key_assist (k, 6, _mm_aeskeygenassist_si128 (k[5], 0x04));
+ aes256_key_assist (k, 8, _mm_aeskeygenassist_si128 (k[7], 0x08));
+ aes256_key_assist (k, 10, _mm_aeskeygenassist_si128 (k[9], 0x10));
+ aes256_key_assist (k, 12, _mm_aeskeygenassist_si128 (k[11], 0x20));
+ aes256_key_assist (k, 14, _mm_aeskeygenassist_si128 (k[13], 0x40));