12 #if CRYPTOPP_MSC_VERSION 13 # pragma warning(disable: 4100 4731) 16 #ifndef CRYPTOPP_IMPORTS 17 #ifndef CRYPTOPP_GENERATE_X64_MASM 24 #if defined(CRYPTOPP_DISABLE_SHA_ASM) 25 # undef CRYPTOPP_X86_ASM_AVAILABLE 26 # undef CRYPTOPP_X32_ASM_AVAILABLE 27 # undef CRYPTOPP_X64_ASM_AVAILABLE 28 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 41 #define blk0(i) (W[i] = data[i]) 42 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1)) 44 #define f1(x,y,z) (z^(x&(y^z))) 45 #define f2(x,y,z) (x^y^z) 46 #define f3(x,y,z) ((x&y)|(z&(x|y))) 47 #define f4(x,y,z) (x^y^z) 50 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30); 51 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30); 52 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30); 53 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30); 54 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30); 56 static void SHA1_CXX_Transform(
word32 *state,
const word32 *data)
66 R0(a,b,c,d,e, 0);
R0(e,a,b,c,d, 1);
R0(d,e,a,b,c, 2);
R0(c,d,e,a,b, 3);
67 R0(b,c,d,e,a, 4);
R0(a,b,c,d,e, 5);
R0(e,a,b,c,d, 6);
R0(d,e,a,b,c, 7);
68 R0(c,d,e,a,b, 8);
R0(b,c,d,e,a, 9);
R0(a,b,c,d,e,10);
R0(e,a,b,c,d,11);
69 R0(d,e,a,b,c,12);
R0(c,d,e,a,b,13);
R0(b,c,d,e,a,14);
R0(a,b,c,d,e,15);
70 R1(e,a,b,c,d,16);
R1(d,e,a,b,c,17);
R1(c,d,e,a,b,18);
R1(b,c,d,e,a,19);
71 R2(a,b,c,d,e,20);
R2(e,a,b,c,d,21);
R2(d,e,a,b,c,22);
R2(c,d,e,a,b,23);
72 R2(b,c,d,e,a,24);
R2(a,b,c,d,e,25);
R2(e,a,b,c,d,26);
R2(d,e,a,b,c,27);
73 R2(c,d,e,a,b,28);
R2(b,c,d,e,a,29);
R2(a,b,c,d,e,30);
R2(e,a,b,c,d,31);
74 R2(d,e,a,b,c,32);
R2(c,d,e,a,b,33);
R2(b,c,d,e,a,34);
R2(a,b,c,d,e,35);
75 R2(e,a,b,c,d,36);
R2(d,e,a,b,c,37);
R2(c,d,e,a,b,38);
R2(b,c,d,e,a,39);
76 R3(a,b,c,d,e,40);
R3(e,a,b,c,d,41);
R3(d,e,a,b,c,42);
R3(c,d,e,a,b,43);
77 R3(b,c,d,e,a,44);
R3(a,b,c,d,e,45);
R3(e,a,b,c,d,46);
R3(d,e,a,b,c,47);
78 R3(c,d,e,a,b,48);
R3(b,c,d,e,a,49);
R3(a,b,c,d,e,50);
R3(e,a,b,c,d,51);
79 R3(d,e,a,b,c,52);
R3(c,d,e,a,b,53);
R3(b,c,d,e,a,54);
R3(a,b,c,d,e,55);
80 R3(e,a,b,c,d,56);
R3(d,e,a,b,c,57);
R3(c,d,e,a,b,58);
R3(b,c,d,e,a,59);
81 R4(a,b,c,d,e,60);
R4(e,a,b,c,d,61);
R4(d,e,a,b,c,62);
R4(c,d,e,a,b,63);
82 R4(b,c,d,e,a,64);
R4(a,b,c,d,e,65);
R4(e,a,b,c,d,66);
R4(d,e,a,b,c,67);
83 R4(c,d,e,a,b,68);
R4(b,c,d,e,a,69);
R4(a,b,c,d,e,70);
R4(e,a,b,c,d,71);
84 R4(d,e,a,b,c,72);
R4(c,d,e,a,b,73);
R4(b,c,d,e,a,74);
R4(a,b,c,d,e,75);
85 R4(e,a,b,c,d,76);
R4(d,e,a,b,c,77);
R4(c,d,e,a,b,78);
R4(b,c,d,e,a,79);
102 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 104 static void SHA1_SSE_SHA_Transform(
word32 *state,
const word32 *data)
106 __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
107 __m128i MASK, MSG0, MSG1, MSG2, MSG3;
116 ABCD = _mm_loadu_si128((__m128i*) state);
117 E0 = _mm_set_epi32(state[4], 0, 0, 0);
118 ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
119 MASK = _mm_set_epi64x(
W64LIT(0x0001020304050607),
W64LIT(0x08090a0b0c0d0e0f));
126 MSG0 = _mm_loadu_si128((__m128i*) data+0);
127 MSG0 = _mm_shuffle_epi8(MSG0, MASK);
128 E0 = _mm_add_epi32(E0, MSG0);
130 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
133 MSG1 = _mm_loadu_si128((__m128i*) (data+4));
134 MSG1 = _mm_shuffle_epi8(MSG1, MASK);
135 E1 = _mm_sha1nexte_epu32(E1, MSG1);
137 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
138 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
141 MSG2 = _mm_loadu_si128((__m128i*) (data+8));
142 MSG2 = _mm_shuffle_epi8(MSG2, MASK);
143 E0 = _mm_sha1nexte_epu32(E0, MSG2);
145 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
146 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
147 MSG0 = _mm_xor_si128(MSG0, MSG2);
150 MSG3 = _mm_loadu_si128((__m128i*) (data+12));
151 MSG3 = _mm_shuffle_epi8(MSG3, MASK);
152 E1 = _mm_sha1nexte_epu32(E1, MSG3);
154 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
155 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
156 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
157 MSG1 = _mm_xor_si128(MSG1, MSG3);
160 E0 = _mm_sha1nexte_epu32(E0, MSG0);
162 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
163 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
164 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
165 MSG2 = _mm_xor_si128(MSG2, MSG0);
168 E1 = _mm_sha1nexte_epu32(E1, MSG1);
170 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
171 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
172 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
173 MSG3 = _mm_xor_si128(MSG3, MSG1);
176 E0 = _mm_sha1nexte_epu32(E0, MSG2);
178 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
179 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
180 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
181 MSG0 = _mm_xor_si128(MSG0, MSG2);
184 E1 = _mm_sha1nexte_epu32(E1, MSG3);
186 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
187 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
188 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
189 MSG1 = _mm_xor_si128(MSG1, MSG3);
192 E0 = _mm_sha1nexte_epu32(E0, MSG0);
194 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
195 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
196 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
197 MSG2 = _mm_xor_si128(MSG2, MSG0);
200 E1 = _mm_sha1nexte_epu32(E1, MSG1);
202 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
203 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
204 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
205 MSG3 = _mm_xor_si128(MSG3, MSG1);
208 E0 = _mm_sha1nexte_epu32(E0, MSG2);
210 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
211 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
212 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
213 MSG0 = _mm_xor_si128(MSG0, MSG2);
216 E1 = _mm_sha1nexte_epu32(E1, MSG3);
218 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
219 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
220 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
221 MSG1 = _mm_xor_si128(MSG1, MSG3);
224 E0 = _mm_sha1nexte_epu32(E0, MSG0);
226 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
227 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
228 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
229 MSG2 = _mm_xor_si128(MSG2, MSG0);
232 E1 = _mm_sha1nexte_epu32(E1, MSG1);
234 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
235 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
236 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
237 MSG3 = _mm_xor_si128(MSG3, MSG1);
240 E0 = _mm_sha1nexte_epu32(E0, MSG2);
242 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
243 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
244 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
245 MSG0 = _mm_xor_si128(MSG0, MSG2);
248 E1 = _mm_sha1nexte_epu32(E1, MSG3);
250 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
251 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
252 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
253 MSG1 = _mm_xor_si128(MSG1, MSG3);
256 E0 = _mm_sha1nexte_epu32(E0, MSG0);
258 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
259 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
260 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
261 MSG2 = _mm_xor_si128(MSG2, MSG0);
264 E1 = _mm_sha1nexte_epu32(E1, MSG1);
266 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
267 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
268 MSG3 = _mm_xor_si128(MSG3, MSG1);
271 E0 = _mm_sha1nexte_epu32(E0, MSG2);
273 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
274 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
277 E1 = _mm_sha1nexte_epu32(E1, MSG3);
279 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
282 E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
283 ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
286 ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
287 _mm_storeu_si128((__m128i*) state, ABCD);
288 state[4] = _mm_extract_epi32(E0, 3);
298 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 300 return &SHA1_SSE_SHA_Transform;
304 return &SHA1_CXX_Transform;
309 state[0] = 0x67452301L;
310 state[1] = 0xEFCDAB89L;
311 state[2] = 0x98BADCFEL;
312 state[3] = 0x10325476L;
313 state[4] = 0xC3D2E1F0L;
326 static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
327 memcpy(state, s,
sizeof(s));
332 static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
333 memcpy(state, s,
sizeof(s));
336 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 341 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
342 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
343 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
344 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
345 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
346 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
347 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
348 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
349 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
350 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
351 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
352 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
353 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
354 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
355 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
356 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
359 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 361 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)) 365 #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ 366 #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] 374 #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4 375 #define Wt_2(i) Wt((i)-2) 376 #define Wt_15(i) Wt((i)-15) 377 #define Wt_7(i) Wt((i)-7) 378 #define K_END [BASE+8*4+16*4+0*WORD_SZ] 379 #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ] 380 #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ] 381 #define DATA_END [BASE+8*4+16*4+3*WORD_SZ] 382 #define Kt(i) WORD_REG(si)+(i)*4 383 #if CRYPTOPP_BOOL_X32 385 #elif CRYPTOPP_BOOL_X86 387 #elif defined(__GNUC__) 393 #define RA0(i, edx, edi) \ 394 AS2( add edx, [Kt(i)] )\ 395 AS2( add edx, [Wt(i)] )\ 396 AS2( add edx, H(i) )\ 398 #define RA1(i, edx, edi) 400 #define RB0(i, edx, edi) 402 #define RB1(i, edx, edi) \ 403 AS2( mov AS_REG_7d, [Wt_2(i)] )\ 404 AS2( mov edi, [Wt_15(i)])\ 405 AS2( mov ebx, AS_REG_7d )\ 406 AS2( shr AS_REG_7d, 10 )\ 408 AS2( xor AS_REG_7d, ebx )\ 410 AS2( xor ebx, AS_REG_7d )\ 411 AS2( add ebx, [Wt_7(i)])\ 412 AS2( mov AS_REG_7d, edi )\ 413 AS2( shr AS_REG_7d, 3 )\ 415 AS2( add ebx, [Wt(i)])\ 416 AS2( xor AS_REG_7d, edi )\ 417 AS2( add edx, [Kt(i)])\ 419 AS2( add edx, H(i) )\ 420 AS2( xor AS_REG_7d, edi )\ 421 AS2( add AS_REG_7d, ebx )\ 422 AS2( mov [Wt(i)], AS_REG_7d)\ 423 AS2( add edx, AS_REG_7d )\ 425 #define ROUND(i, r, eax, ecx, edi, edx)\ 428 AS2( mov edx, F(i) )\ 429 AS2( xor edx, G(i) )\ 431 AS2( xor edx, G(i) )\ 432 AS2( mov AS_REG_7d, edi )\ 434 AS2( ror AS_REG_7d, 25 )\ 436 AS2( xor AS_REG_7d, edi )\ 438 AS2( xor AS_REG_7d, edi )\ 439 AS2( add edx, AS_REG_7d )\ 444 AS2( xor ecx, B(i) )\ 446 AS2( xor eax, B(i) )\ 447 AS2( mov AS_REG_7d, ebx )\ 450 AS2( add edx, D(i) )\ 451 AS2( mov D(i), edx )\ 452 AS2( ror AS_REG_7d, 22 )\ 453 AS2( xor AS_REG_7d, ebx )\ 455 AS2( xor AS_REG_7d, ebx )\ 456 AS2( add eax, AS_REG_7d )\ 457 AS2( mov H(i), eax )\ 461 #if CRYPTOPP_BOOL_X64 462 #define SWAP_COPY(i) \ 463 AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ 464 AS1( bswap WORD_REG(bx))\ 465 AS2( mov [Wt(i*2+1)], WORD_REG(bx)) 467 #define SWAP_COPY(i) \ 468 AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ 469 AS1( bswap WORD_REG(bx))\ 470 AS2( mov [Wt(i)], WORD_REG(bx)) 473 #if defined(__GNUC__) 474 #if CRYPTOPP_BOOL_X64 483 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
485 X86_SHA256_HashBlocks PROC FRAME
490 alloc_stack(LOCALS_SIZE+8)
493 lea rsi, [?SHA256_K@
CryptoPP@@3QBIB + 48*4]
499 AS2( lea WORD_REG(si), [SHA256_K+48*4])
501 #
if !defined(_MSC_VER) || (_MSC_VER < 1400)
508 AS2( sub WORD_REG(sp), LOCALS_SIZE)
511 AS2( mov STATE_SAVE, WORD_REG(cx))
512 AS2( mov DATA_SAVE, WORD_REG(dx))
513 AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
514 AS2( mov DATA_END, WORD_REG(ax))
515 AS2( mov K_END, WORD_REG(si))
517 #
if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
521 AS1( dec DWORD PTR K_END)
523 AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
524 AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
528 #
if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
542 #
if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
544 AS2( movdqa E(0), xmm1)
545 AS2( movdqa A(0), xmm0)
550 AS2( sub WORD_REG(si), 48*4)
551 SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
552 SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
554 SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
555 SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
562 ROUND(0, 0, eax, ecx, edi, edx)
563 ROUND(1, 0, ecx, eax, edx, edi)
564 ROUND(2, 0, eax, ecx, edi, edx)
565 ROUND(3, 0, ecx, eax, edx, edi)
566 ROUND(4, 0, eax, ecx, edi, edx)
567 ROUND(5, 0, ecx, eax, edx, edi)
568 ROUND(6, 0, eax, ecx, edi, edx)
569 ROUND(7, 0, ecx, eax, edx, edi)
570 ROUND(8, 0, eax, ecx, edi, edx)
571 ROUND(9, 0, ecx, eax, edx, edi)
572 ROUND(10, 0, eax, ecx, edi, edx)
573 ROUND(11, 0, ecx, eax, edx, edi)
574 ROUND(12, 0, eax, ecx, edi, edx)
575 ROUND(13, 0, ecx, eax, edx, edi)
576 ROUND(14, 0, eax, ecx, edi, edx)
577 ROUND(15, 0, ecx, eax, edx, edi)
580 AS2(add WORD_REG(si), 4*16)
581 ROUND(0, 1, eax, ecx, edi, edx)
582 ROUND(1, 1, ecx, eax, edx, edi)
583 ROUND(2, 1, eax, ecx, edi, edx)
584 ROUND(3, 1, ecx, eax, edx, edi)
585 ROUND(4, 1, eax, ecx, edi, edx)
586 ROUND(5, 1, ecx, eax, edx, edi)
587 ROUND(6, 1, eax, ecx, edi, edx)
588 ROUND(7, 1, ecx, eax, edx, edi)
589 ROUND(8, 1, eax, ecx, edi, edx)
590 ROUND(9, 1, ecx, eax, edx, edi)
591 ROUND(10, 1, eax, ecx, edi, edx)
592 ROUND(11, 1, ecx, eax, edx, edi)
593 ROUND(12, 1, eax, ecx, edi, edx)
594 ROUND(13, 1, ecx, eax, edx, edi)
595 ROUND(14, 1, eax, ecx, edi, edx)
596 ROUND(15, 1, ecx, eax, edx, edi)
597 AS2( cmp WORD_REG(si), K_END)
602 AS2( mov WORD_REG(dx), DATA_SAVE)
603 AS2( add WORD_REG(dx), 64)
604 AS2( mov AS_REG_7, STATE_SAVE)
605 AS2( mov DATA_SAVE, WORD_REG(dx))
607 #
if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
609 AS2(
test DWORD PTR K_END, 1)
612 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
613 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
614 AS2( paddd xmm1, E(0))
615 AS2( paddd xmm0, A(0))
616 AS2( movdqa [AS_REG_7+1*16], xmm1)
617 AS2( movdqa [AS_REG_7+0*16], xmm0)
618 AS2( cmp WORD_REG(dx), DATA_END)
625 #
if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
629 AS2( add [AS_REG_7+0*4], ecx)
630 AS2( add [AS_REG_7+4*4], edi)
634 AS2( add [AS_REG_7+1*4], eax)
635 AS2( add [AS_REG_7+2*4], ebx)
636 AS2( add [AS_REG_7+3*4], ecx)
640 AS2( add [AS_REG_7+5*4], eax)
641 AS2( add [AS_REG_7+6*4], ebx)
642 AS2( add [AS_REG_7+7*4], ecx)
643 AS2( mov ecx, AS_REG_7d)
644 AS2( cmp WORD_REG(dx), DATA_END)
646 #
if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
653 #
if !defined(_MSC_VER) || (_MSC_VER < 1400)
657 #ifdef CRYPTOPP_GENERATE_X64_MASM
658 add rsp, LOCALS_SIZE+8
664 X86_SHA256_HashBlocks ENDP
670 :
"c" (state),
"d" (data),
"S" (SHA256_K+48),
"D" (len)
674 :
"memory",
"cc",
"%eax" 675 #
if CRYPTOPP_BOOL_X64
676 ,
"%rbx",
"%r8",
"%r10" 682 #endif // (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)) 684 #ifndef CRYPTOPP_GENERATE_X64_MASM 686 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 692 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 696 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM) 700 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 702 return &SHA256_SSE_SHA_HashBlocks;
706 return &X86_SHA256_HashBlocks;
712 s_pfn(
m_state, input, (length&(
size_t(0)-BLOCKSIZE)) - !HasSSE2());
713 return length % BLOCKSIZE;
719 s_pfn(
m_state, input, (length&(
size_t(0)-BLOCKSIZE)) - !HasSSE2());
720 return length % BLOCKSIZE;
724 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) 726 #define Ch(x,y,z) (z^(x&(y^z))) 727 #define Maj(x,y,z) (y^((x^y)&(y^z))) 729 #define a(i) T[(0-i)&7] 730 #define b(i) T[(1-i)&7] 731 #define c(i) T[(2-i)&7] 732 #define d(i) T[(3-i)&7] 733 #define e(i) T[(4-i)&7] 734 #define f(i) T[(5-i)&7] 735 #define g(i) T[(6-i)&7] 736 #define h(i) T[(7-i)&7] 738 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\ 739 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) 742 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22)) 743 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25)) 744 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3)) 745 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10)) 747 #if defined(__OPTIMIZE_SIZE__) 752 unsigned int i = 0, j = 0;
765 w +=
Ch(e, t[5], t[6]);
769 a = w +
Maj(a, t[1], t[2]);
780 word32 w =
s1(W[i+16-2]) +
s0(W[i+16-15]) + W[i] + W[i+16-7];
785 w +=
Ch(e, t[5], t[6]);
789 a = w +
Maj(a, t[1], t[2]);
792 w =
s1(W[(i+1)+16-2]) +
s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
793 W[(i+1)+16] = W[(i+1)] = w;
797 w +=
Ch(e, (t-1)[5], (t-1)[6]);
799 (t-1)[3] = (t-1)[3+8] =
e;
801 a = w +
Maj(a, (t-1)[1], (t-1)[2]);
802 (t-1)[-1] = (t-1)[7] =
a;
825 memcpy(T, state,
sizeof(T));
827 for (
unsigned int j=0; j<64; j+=16)
829 R( 0);
R( 1);
R( 2);
R( 3);
830 R( 4);
R( 5);
R( 6);
R( 7);
831 R( 8);
R( 9);
R(10);
R(11);
832 R(12);
R(13);
R(14);
R(15);
844 #endif // __OPTIMIZE_SIZE__ 852 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 853 static void SHA256_SSE2_Transform(
word32 *state,
const word32 *data)
858 X86_SHA256_HashBlocks(state, W, SHA256::BLOCKSIZE - !HasSSE2());
860 #endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 862 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 863 static void SHA256_SSE_SHA_Transform(
word32 *state,
const word32 *data)
865 return SHA256_SSE_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
867 #endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 873 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 880 __m128i STATE0, STATE1;
881 __m128i MSG, TMP, MASK;
882 __m128i TMSG0, TMSG1, TMSG2, TMSG3;
883 __m128i ABEF_SAVE, CDGH_SAVE;
886 TMP = _mm_loadu_si128((__m128i*) &state[0]);
887 STATE1 = _mm_loadu_si128((__m128i*) &state[4]);
888 MASK = _mm_set_epi64x(
W64LIT(0x0c0d0e0f08090a0b),
W64LIT(0x0405060700010203));
890 TMP = _mm_shuffle_epi32(TMP, 0xB1);
891 STATE1 = _mm_shuffle_epi32(STATE1, 0x1B);
892 STATE0 = _mm_alignr_epi8(TMP, STATE1, 8);
893 STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0);
902 MSG = _mm_loadu_si128((__m128i*) data+0);
903 TMSG0 = _mm_shuffle_epi8(MSG, MASK);
904 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(
W64LIT(0xE9B5DBA5B5C0FBCF),
W64LIT(0x71374491428A2F98)));
905 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
906 MSG = _mm_shuffle_epi32(MSG, 0x0E);
907 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
910 TMSG1 = _mm_loadu_si128((__m128i*) (data+4));
911 TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
912 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(
W64LIT(0xAB1C5ED5923F82A4),
W64LIT(0x59F111F13956C25B)));
913 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
914 MSG = _mm_shuffle_epi32(MSG, 0x0E);
915 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
916 TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
919 TMSG2 = _mm_loadu_si128((__m128i*) (data+8));
920 TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
921 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(
W64LIT(0x550C7DC3243185BE),
W64LIT(0x12835B01D807AA98)));
922 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
923 MSG = _mm_shuffle_epi32(MSG, 0x0E);
924 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
925 TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
928 TMSG3 = _mm_loadu_si128((__m128i*) (data+12));
929 TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
930 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(
W64LIT(0xC19BF1749BDC06A7),
W64LIT(0x80DEB1FE72BE5D74)));
931 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
932 TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
933 TMSG0 = _mm_add_epi32(TMSG0, TMP);
934 TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
935 MSG = _mm_shuffle_epi32(MSG, 0x0E);
936 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
937 TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
940 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(
W64LIT(0x240CA1CC0FC19DC6),
W64LIT(0xEFBE4786E49B69C1)));
941 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
942 TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
943 TMSG1 = _mm_add_epi32(TMSG1, TMP);
944 TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
945 MSG = _mm_shuffle_epi32(MSG, 0x0E);
946 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
947 TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
950 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(
W64LIT(0x76F988DA5CB0A9DC),
W64LIT(0x4A7484AA2DE92C6F)));
951 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
952 TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
953 TMSG2 = _mm_add_epi32(TMSG2, TMP);
954 TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
955 MSG = _mm_shuffle_epi32(MSG, 0x0E);
956 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
957 TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
960 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(
W64LIT(0xBF597FC7B00327C8),
W64LIT(0xA831C66D983E5152)));
961 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
962 TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
963 TMSG3 = _mm_add_epi32(TMSG3, TMP);
964 TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
965 MSG = _mm_shuffle_epi32(MSG, 0x0E);
966 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
967 TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
970 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(
W64LIT(0x1429296706CA6351),
W64LIT(0xD5A79147C6E00BF3)));
971 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
972 TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
973 TMSG0 = _mm_add_epi32(TMSG0, TMP);
974 TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
975 MSG = _mm_shuffle_epi32(MSG, 0x0E);
976 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
977 TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
980 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(
W64LIT(0x53380D134D2C6DFC),
W64LIT(0x2E1B213827B70A85)));
981 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
982 TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
983 TMSG1 = _mm_add_epi32(TMSG1, TMP);
984 TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
985 MSG = _mm_shuffle_epi32(MSG, 0x0E);
986 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
987 TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
990 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(
W64LIT(0x92722C8581C2C92E),
W64LIT(0x766A0ABB650A7354)));
991 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
992 TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
993 TMSG2 = _mm_add_epi32(TMSG2, TMP);
994 TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
995 MSG = _mm_shuffle_epi32(MSG, 0x0E);
996 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
997 TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
1000 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(
W64LIT(0xC76C51A3C24B8B70),
W64LIT(0xA81A664BA2BFE8A1)));
1001 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1002 TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
1003 TMSG3 = _mm_add_epi32(TMSG3, TMP);
1004 TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
1005 MSG = _mm_shuffle_epi32(MSG, 0x0E);
1006 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1007 TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
1010 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(
W64LIT(0x106AA070F40E3585),
W64LIT(0xD6990624D192E819)));
1011 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1012 TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
1013 TMSG0 = _mm_add_epi32(TMSG0, TMP);
1014 TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
1015 MSG = _mm_shuffle_epi32(MSG, 0x0E);
1016 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1017 TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
1020 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(
W64LIT(0x34B0BCB52748774C),
W64LIT(0x1E376C0819A4C116)));
1021 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1022 TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
1023 TMSG1 = _mm_add_epi32(TMSG1, TMP);
1024 TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
1025 MSG = _mm_shuffle_epi32(MSG, 0x0E);
1026 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1027 TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
1030 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(
W64LIT(0x682E6FF35B9CCA4F),
W64LIT(0x4ED8AA4A391C0CB3)));
1031 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1032 TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
1033 TMSG2 = _mm_add_epi32(TMSG2, TMP);
1034 TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
1035 MSG = _mm_shuffle_epi32(MSG, 0x0E);
1036 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1039 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(
W64LIT(0x8CC7020884C87814),
W64LIT(0x78A5636F748F82EE)));
1040 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1041 TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
1042 TMSG3 = _mm_add_epi32(TMSG3, TMP);
1043 TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
1044 MSG = _mm_shuffle_epi32(MSG, 0x0E);
1045 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1048 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(
W64LIT(0xC67178F2BEF9A3F7),
W64LIT(0xA4506CEB90BEFFFA)));
1049 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1050 MSG = _mm_shuffle_epi32(MSG, 0x0E);
1051 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1054 STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
1055 STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
1058 length -= SHA256::BLOCKSIZE;
1061 TMP = _mm_shuffle_epi32(STATE0, 0x1B);
1062 STATE1 = _mm_shuffle_epi32(STATE1, 0xB1);
1063 STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0);
1064 STATE1 = _mm_alignr_epi8(STATE1, TMP, 8);
1067 _mm_storeu_si128((__m128i*) &state[0], STATE0);
1068 _mm_storeu_si128((__m128i*) &state[4], STATE1);
1070 #endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 1078 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 1080 return &SHA256_SSE_SHA_Transform;
1083 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1085 return &SHA256_SSE2_Transform;
1102 static const word64 s[8] = {
1103 W64LIT(0xcbbb9d5dc1059ed8),
W64LIT(0x629a292a367cd507),
1104 W64LIT(0x9159015a3070dd17),
W64LIT(0x152fecd8f70e5939),
1105 W64LIT(0x67332667ffc00b31),
W64LIT(0x8eb44a8768581511),
1106 W64LIT(0xdb0c2e0d64f98fa7),
W64LIT(0x47b5481dbefa4fa4)};
1107 memcpy(state, s,
sizeof(s));
1112 static const word64 s[8] = {
1113 W64LIT(0x6a09e667f3bcc908),
W64LIT(0xbb67ae8584caa73b),
1114 W64LIT(0x3c6ef372fe94f82b),
W64LIT(0xa54ff53a5f1d36f1),
1115 W64LIT(0x510e527fade682d1),
W64LIT(0x9b05688c2b3e6c1f),
1116 W64LIT(0x1f83d9abfb41bd6b),
W64LIT(0x5be0cd19137e2179)};
1117 memcpy(state, s,
sizeof(s));
1120 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) 1125 W64LIT(0x428a2f98d728ae22),
W64LIT(0x7137449123ef65cd),
1126 W64LIT(0xb5c0fbcfec4d3b2f),
W64LIT(0xe9b5dba58189dbbc),
1127 W64LIT(0x3956c25bf348b538),
W64LIT(0x59f111f1b605d019),
1128 W64LIT(0x923f82a4af194f9b),
W64LIT(0xab1c5ed5da6d8118),
1129 W64LIT(0xd807aa98a3030242),
W64LIT(0x12835b0145706fbe),
1130 W64LIT(0x243185be4ee4b28c),
W64LIT(0x550c7dc3d5ffb4e2),
1131 W64LIT(0x72be5d74f27b896f),
W64LIT(0x80deb1fe3b1696b1),
1132 W64LIT(0x9bdc06a725c71235),
W64LIT(0xc19bf174cf692694),
1133 W64LIT(0xe49b69c19ef14ad2),
W64LIT(0xefbe4786384f25e3),
1134 W64LIT(0x0fc19dc68b8cd5b5),
W64LIT(0x240ca1cc77ac9c65),
1135 W64LIT(0x2de92c6f592b0275),
W64LIT(0x4a7484aa6ea6e483),
1136 W64LIT(0x5cb0a9dcbd41fbd4),
W64LIT(0x76f988da831153b5),
1137 W64LIT(0x983e5152ee66dfab),
W64LIT(0xa831c66d2db43210),
1138 W64LIT(0xb00327c898fb213f),
W64LIT(0xbf597fc7beef0ee4),
1139 W64LIT(0xc6e00bf33da88fc2),
W64LIT(0xd5a79147930aa725),
1140 W64LIT(0x06ca6351e003826f),
W64LIT(0x142929670a0e6e70),
1141 W64LIT(0x27b70a8546d22ffc),
W64LIT(0x2e1b21385c26c926),
1142 W64LIT(0x4d2c6dfc5ac42aed),
W64LIT(0x53380d139d95b3df),
1143 W64LIT(0x650a73548baf63de),
W64LIT(0x766a0abb3c77b2a8),
1144 W64LIT(0x81c2c92e47edaee6),
W64LIT(0x92722c851482353b),
1145 W64LIT(0xa2bfe8a14cf10364),
W64LIT(0xa81a664bbc423001),
1146 W64LIT(0xc24b8b70d0f89791),
W64LIT(0xc76c51a30654be30),
1147 W64LIT(0xd192e819d6ef5218),
W64LIT(0xd69906245565a910),
1148 W64LIT(0xf40e35855771202a),
W64LIT(0x106aa07032bbd1b8),
1149 W64LIT(0x19a4c116b8d2d0c8),
W64LIT(0x1e376c085141ab53),
1150 W64LIT(0x2748774cdf8eeb99),
W64LIT(0x34b0bcb5e19b48a8),
1151 W64LIT(0x391c0cb3c5c95a63),
W64LIT(0x4ed8aa4ae3418acb),
1152 W64LIT(0x5b9cca4f7763e373),
W64LIT(0x682e6ff3d6b2b8a3),
1153 W64LIT(0x748f82ee5defb2fc),
W64LIT(0x78a5636f43172f60),
1154 W64LIT(0x84c87814a1f0ab72),
W64LIT(0x8cc702081a6439ec),
1155 W64LIT(0x90befffa23631e28),
W64LIT(0xa4506cebde82bde9),
1156 W64LIT(0xbef9a3f7b2c67915),
W64LIT(0xc67178f2e372532b),
1157 W64LIT(0xca273eceea26619c),
W64LIT(0xd186b8c721c0c207),
1158 W64LIT(0xeada7dd6cde0eb1e),
W64LIT(0xf57d4f7fee6ed178),
1159 W64LIT(0x06f067aa72176fba),
W64LIT(0x0a637dc5a2c898a6),
1160 W64LIT(0x113f9804bef90dae),
W64LIT(0x1b710b35131c471b),
1161 W64LIT(0x28db77f523047d84),
W64LIT(0x32caab7b40c72493),
1162 W64LIT(0x3c9ebe0a15c9bebc),
W64LIT(0x431d67c49c100d4c),
1163 W64LIT(0x4cc5d4becb3e42b6),
W64LIT(0x597f299cfc657e2a),
1167 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) 1172 __asm__ __volatile__
1181 AS2( lea ebx, SHA512_K)
1185 AS2( and esp, 0xfffffff0)
1186 AS2( sub esp, 27*16)
1191 AS2( lea edi, [esp+8+8*8])
1192 AS2( lea esi, [esp+8+20*8+8])
1194 AS2( lea edi, [esp+4+8*8])
1195 AS2( lea esi, [esp+4+20*8+8])
1198 AS2( movdqa xmm0, [ecx+0*16])
1199 AS2( movdq2q mm4, xmm0)
1200 AS2( movdqa [edi+0*16], xmm0)
1201 AS2( movdqa xmm0, [ecx+1*16])
1202 AS2( movdqa [edi+1*16], xmm0)
1203 AS2( movdqa xmm0, [ecx+2*16])
1204 AS2( movdq2q mm5, xmm0)
1205 AS2( movdqa [edi+2*16], xmm0)
1206 AS2( movdqa xmm0, [ecx+3*16])
1207 AS2( movdqa [edi+3*16], xmm0)
1210 #define SSE2_S0_S1(r,
a,
b,
c) \
1214 AS2( psllq mm6, 64-
c)\
1215 AS2( pxor mm7, mm6)\
1218 AS2( psllq mm6,
c-
b)\
1219 AS2( pxor mm7, mm6)\
1222 AS2( psllq mm6,
b-
a)\
1225 #define SSE2_s0(r,
a,
b,
c) \
1226 AS2( movdqa xmm6, r)\
1228 AS2( movdqa xmm7, r)\
1229 AS2( psllq xmm6, 64-
c)\
1230 AS2( pxor xmm7, xmm6)\
1235 AS2( psllq xmm6,
c-
a)\
1238 #define SSE2_s1(r,
a,
b,
c) \
1239 AS2( movdqa xmm6, r)\
1241 AS2( movdqa xmm7, r)\
1242 AS2( psllq xmm6, 64-
c)\
1243 AS2( pxor xmm7, xmm6)\
1246 AS2( psllq xmm6,
c-
b)\
1247 AS2( pxor xmm7, xmm6)\
1253 AS2( paddq mm0, [edi+7*8])
1254 AS2( movq mm2, [edi+5*8])
1255 AS2( movq mm3, [edi+6*8])
1258 SSE2_S0_S1(mm5,14,18,41)
1260 AS2( paddq mm0, mm2)
1261 AS2( paddq mm5, mm0)
1262 AS2( movq mm2, [edi+1*8])
1265 AS2( pand mm2, [edi+2*8])
1268 AS2( paddq mm1, mm5)
1269 AS2( paddq mm5, [edi+3*8])
1270 AS2( movq [edi+3*8], mm5)
1271 AS2( movq [edi+11*8], mm5)
1272 SSE2_S0_S1(mm4,28,34,39)
1273 AS2( paddq mm4, mm1)
1274 AS2( movq [edi-8], mm4)
1275 AS2( movq [edi+7*8], mm4)
1280 AS2( movq mm0, [edx+eax*8])
1281 AS2( movq [esi+eax*8], mm0)
1282 AS2( movq [esi+eax*8+16*8], mm0)
1283 AS2( paddq mm0, [ebx+eax*8])
1284 ASC( call, SHA512_Round)
1294 AS2( movdqu xmm0, [esi+(16-2)*8])
1297 AS2( movdqu xmm3, [esi])
1298 AS2( paddq xmm3, [esi+(16-7)*8])
1299 AS2( movdqa xmm2, [esi+(16-15)*8])
1300 SSE2_s1(xmm0, 6, 19, 61)
1301 AS2( paddq xmm0, xmm3)
1302 SSE2_s0(xmm2, 1, 7, 8)
1303 AS2( paddq xmm0, xmm2)
1304 AS2( movdq2q mm0, xmm0)
1305 AS2( movhlps xmm1, xmm0)
1306 AS2( paddq mm0, [ebx+eax*8])
1307 AS2( movlps [esi], xmm0)
1308 AS2( movlps [esi+8], xmm1)
1309 AS2( movlps [esi+8*16], xmm0)
1310 AS2( movlps [esi+8*17], xmm1)
1312 ASC( call, SHA512_Round)
1314 AS2( movdq2q mm0, xmm1)
1315 AS2( paddq mm0, [ebx+eax*8+8])
1316 ASC( call, SHA512_Round)
1327 AS2( lea esi, [esp+8+20*8+8+esi*8])
1329 AS2( lea esi, [esp+4+20*8+8+esi*8])
1335 #define SSE2_CombineState(i) \
1336 AS2( movdqa xmm0, [edi+i*16])\
1337 AS2( paddq xmm0, [ecx+i*16])\
1338 AS2( movdqa [ecx+i*16], xmm0)
1340 SSE2_CombineState(0)
1341 SSE2_CombineState(1)
1342 SSE2_CombineState(2)
1343 SSE2_CombineState(3)
1348 #
if defined(__GNUC__)
1352 :
"a" (SHA512_K),
"c" (state),
"d" (data)
1353 :
"%esi",
"%edi",
"memory",
"cc" 1362 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1369 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) 1372 SHA512_SSE2_Transform(state, data);
1377 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39)) 1378 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41)) 1379 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7)) 1380 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6)) 1382 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\ 1383 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) 1388 memcpy(T, state,
sizeof(T));
1390 for (
unsigned int j=0; j<80; j+=16)
1392 R( 0);
R( 1);
R( 2);
R( 3);
1393 R( 4);
R( 5);
R( 6);
R( 7);
1394 R( 8);
R( 9);
R(10);
R(11);
1395 R(12);
R(13);
R(14);
R(15);
1410 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 1411 #endif // #ifndef CRYPTOPP_IMPORTS
#define CRYPTOPP_BOOL_X64
Utility functions for the Crypto++ library.
pfnSHATransform InitializeSHA1Transform()
#define R2(v, w, x, y, z, i)
#define NAMESPACE_BEGIN(x)
static void CRYPTOPP_API InitState(HashWordType *state)
pfnSHATransform InitializeSHA256Transform()
Library configuration file.
Classes and functions for secure memory allocations.
#define ROUND(lh, ll, rh, rl, kh, kl)
static void CRYPTOPP_API InitState(HashWordType *state)
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
const word32 SHA256_K[64]
static void CRYPTOPP_API InitState(HashWordType *state)
#define R3(v, w, x, y, z, i)
Fixed size stack-based SecBlock with 16-byte alignment.
#define R1(v, w, x, y, z, i)
unsigned long long word64
void(* pfnSHATransform)(word32 *state, const word32 *data)
void SHA256_CXX_Transform(word32 *state, const word32 *data)
#define R0(v, w, x, y, z, i)
#define CRYPTOPP_ASSERT(exp)
Functions for CPU features and intrinsics.
static void CRYPTOPP_API Transform(word32 *digest, const word32 *data)
Classes for SHA-1 and SHA-2 family of message digests.
static void CRYPTOPP_API Transform(word32 *digest, const word32 *data)
#define CRYPTOPP_BOOL_X32
#define CRYPTOPP_BOOL_X86
void * memcpy(void *a, const void *b, size_t c)
static void CRYPTOPP_API InitState(HashWordType *state)
virtual size_t HashMultipleBlocks(const word32 *input, size_t length)
#define CRYPTOPP_FASTCALL
void(CRYPTOPP_FASTCALL * pfnSHAHashBlocks)(word32 *state, const word32 *data, size_t length)
static void CRYPTOPP_API InitState(HashWordType *state)
#define R4(v, w, x, y, z, i)
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16
static void CRYPTOPP_API Transform(word64 *digest, const word64 *data)
#define CRYPTOPP_SECTION_ALIGN16