1 /* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
11 int crypto_stream_xor_afternm(unsigned char *out, const unsigned char *in, unsigned long long len, const unsigned char *nonce, const unsigned char *c)
33 unsigned long long lensav;
34 unsigned char bl[128];
41 /* Copy nonce on the stack */
42 copy2(&nonce_stack, (const int128 *) (nonce + 0));
43 np = (unsigned char *)&nonce_stack;
47 xmm0 = *(int128 *) (np + 0);
57 add_uint32_big(&xmm1, 1);
58 add_uint32_big(&xmm2, 2);
59 add_uint32_big(&xmm3, 3);
60 add_uint32_big(&xmm4, 4);
61 add_uint32_big(&xmm5, 5);
62 add_uint32_big(&xmm6, 6);
63 add_uint32_big(&xmm7, 7);
74 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
76 aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
77 aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
78 aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
79 aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
80 aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
81 aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
82 aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
83 aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
84 aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
85 lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
87 bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
89 if(len < 128) goto partial;
90 if(len == 128) goto full;
92 tmp = load32_bigendian(np + 12);
94 store32_bigendian(np + 12, tmp);
96 xor2(&xmm8, (const int128 *)(in + 0));
97 xor2(&xmm9, (const int128 *)(in + 16));
98 xor2(&xmm12, (const int128 *)(in + 32));
99 xor2(&xmm14, (const int128 *)(in + 48));
100 xor2(&xmm11, (const int128 *)(in + 64));
101 xor2(&xmm15, (const int128 *)(in + 80));
102 xor2(&xmm10, (const int128 *)(in + 96));
103 xor2(&xmm13, (const int128 *)(in + 112));
105 *(int128 *) (out + 0) = xmm8;
106 *(int128 *) (out + 16) = xmm9;
107 *(int128 *) (out + 32) = xmm12;
108 *(int128 *) (out + 48) = xmm14;
109 *(int128 *) (out + 64) = xmm11;
110 *(int128 *) (out + 80) = xmm15;
111 *(int128 *) (out + 96) = xmm10;
112 *(int128 *) (out + 112) = xmm13;
125 tmp = load32_bigendian(np + 12);
127 store32_bigendian(np + 12, tmp);
130 *(int128 *)(blp + 0) = xmm8;
131 *(int128 *)(blp + 16) = xmm9;
132 *(int128 *)(blp + 32) = xmm12;
133 *(int128 *)(blp + 48) = xmm14;
134 *(int128 *)(blp + 64) = xmm11;
135 *(int128 *)(blp + 80) = xmm15;
136 *(int128 *)(blp + 96) = xmm10;
137 *(int128 *)(blp + 112) = xmm13;
141 if(lensav == 0) goto end;
143 b = blp[0]; /* clang false positive */
144 b ^= *(const unsigned char *)(in + 0);
145 *(unsigned char *)(out + 0) = b;
156 tmp = load32_bigendian(np + 12);
158 store32_bigendian(np + 12, tmp);
160 xor2(&xmm8, (const int128 *)(in + 0));
161 xor2(&xmm9, (const int128 *)(in + 16));
162 xor2(&xmm12, (const int128 *)(in + 32));
163 xor2(&xmm14, (const int128 *)(in + 48));
164 xor2(&xmm11, (const int128 *)(in + 64));
165 xor2(&xmm15, (const int128 *)(in + 80));
166 xor2(&xmm10, (const int128 *)(in + 96));
167 xor2(&xmm13, (const int128 *)(in + 112));
169 *(int128 *) (out + 0) = xmm8;
170 *(int128 *) (out + 16) = xmm9;
171 *(int128 *) (out + 32) = xmm12;
172 *(int128 *) (out + 48) = xmm14;
173 *(int128 *) (out + 64) = xmm11;
174 *(int128 *) (out + 80) = xmm15;
175 *(int128 *) (out + 96) = xmm10;
176 *(int128 *) (out + 112) = xmm13;