1 /* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
10 int crypto_stream_afternm(unsigned char *out, unsigned long long len, const unsigned char *nonce, const unsigned char *c)
32 unsigned long long lensav;
33 unsigned char bl[128];
40 /* Copy nonce on the stack */
41 copy2(&nonce_stack, (const int128 *) (nonce + 0));
42 np = (unsigned char *)&nonce_stack;
46 xmm0 = *(int128 *) (np + 0);
56 add_uint32_big(&xmm1, 1);
57 add_uint32_big(&xmm2, 2);
58 add_uint32_big(&xmm3, 3);
59 add_uint32_big(&xmm4, 4);
60 add_uint32_big(&xmm5, 5);
61 add_uint32_big(&xmm6, 6);
62 add_uint32_big(&xmm7, 7);
73 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
75 aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
76 aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
77 aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
78 aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
79 aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
80 aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
81 aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
82 aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
83 aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
84 lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
86 bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
88 if(len < 128) goto partial;
89 if(len == 128) goto full;
91 tmp = load32_bigendian(np + 12);
93 store32_bigendian(np + 12, tmp);
95 *(int128 *) (out + 0) = xmm8;
96 *(int128 *) (out + 16) = xmm9;
97 *(int128 *) (out + 32) = xmm12;
98 *(int128 *) (out + 48) = xmm14;
99 *(int128 *) (out + 64) = xmm11;
100 *(int128 *) (out + 80) = xmm15;
101 *(int128 *) (out + 96) = xmm10;
102 *(int128 *) (out + 112) = xmm13;
114 tmp = load32_bigendian(np + 12);
116 store32_bigendian(np + 12, tmp);
119 *(int128 *)(blp + 0) = xmm8;
120 *(int128 *)(blp + 16) = xmm9;
121 *(int128 *)(blp + 32) = xmm12;
122 *(int128 *)(blp + 48) = xmm14;
123 *(int128 *)(blp + 64) = xmm11;
124 *(int128 *)(blp + 80) = xmm15;
125 *(int128 *)(blp + 96) = xmm10;
126 *(int128 *)(blp + 112) = xmm13;
130 if(lensav == 0) goto end;
132 b = blp[0]; /* clang false positive */
133 *(unsigned char *)(out + 0) = b;
143 tmp = load32_bigendian(np + 12);
145 store32_bigendian(np + 12, tmp);
147 *(int128 *) (out + 0) = xmm8;
148 *(int128 *) (out + 16) = xmm9;
149 *(int128 *) (out + 32) = xmm12;
150 *(int128 *) (out + 48) = xmm14;
151 *(int128 *) (out + 64) = xmm11;
152 *(int128 *) (out + 80) = xmm15;
153 *(int128 *) (out + 96) = xmm10;
154 *(int128 *) (out + 112) = xmm13;