8 #ifndef CRYPTOPP_GENERATE_X64_MASM 15 #if CRYPTOPP_MSC_VERSION 16 # pragma warning(disable: 4702 4740) 23 #if defined(CRYPTOPP_DISABLE_SALSA_ASM) 24 # undef CRYPTOPP_X86_ASM_AVAILABLE 25 # undef CRYPTOPP_X32_ASM_AVAILABLE 26 # undef CRYPTOPP_X64_ASM_AVAILABLE 27 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 28 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 29 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0 30 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0 35 #if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING) 36 void Salsa20_TestInstantiations()
58 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
59 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
79 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SALSA_ASM) 82 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 87 return GetAlignmentOf<word32>();
92 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 94 return 4*BYTES_PER_ITERATION;
97 return BYTES_PER_ITERATION;
101 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 103 void Salsa20_OperateKeystream(
byte *output,
const byte *input,
size_t iterationCount,
int rounds,
void *state);
107 #if CRYPTOPP_MSC_VERSION 108 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 113 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 115 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 120 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 121 #ifdef CRYPTOPP_GENERATE_X64_MASM 123 Salsa20_OperateKeystream PROC FRAME
124 mov r10, [rsp + 5*8] ; state
125 alloc_stack(10*16 + 32*16 + 8)
126 save_xmm128 xmm6, 0200
h 127 save_xmm128 xmm7, 0210
h 128 save_xmm128 xmm8, 0220
h 129 save_xmm128 xmm9, 0230
h 130 save_xmm128 xmm10, 0240
h 131 save_xmm128 xmm11, 0250
h 132 save_xmm128 xmm12, 0260
h 133 save_xmm128 xmm13, 0270
h 134 save_xmm128 xmm14, 0280
h 135 save_xmm128 xmm15, 0290
h 138 #define REG_output rcx 139 #define REG_input rdx 140 #define REG_iterationCount r8 141 #define REG_state r10 142 #define REG_rounds e9d 143 #define REG_roundsLeft eax 144 #define REG_temp32 r11d 146 #define SSE2_WORKSPACE rsp 150 #if CRYPTOPP_BOOL_X64 151 #define REG_output %1 153 #define REG_iterationCount %2 155 #define REG_rounds %3 156 #define REG_roundsLeft eax 157 #define REG_temp32 edx 159 #define SSE2_WORKSPACE %5 163 #define REG_output edi 164 #define REG_input eax 165 #define REG_iterationCount ecx 166 #define REG_state esi 167 #define REG_rounds edx 168 #define REG_roundsLeft ebx 169 #define REG_temp32 ebp 171 #define SSE2_WORKSPACE esp + WORD_SZ 183 AS2( mov REG_iterationCount, iterationCount)
184 AS2( mov REG_input, input)
185 AS2( mov REG_output, output)
186 AS2( mov REG_state, s)
187 AS2( mov REG_rounds, r)
189 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 192 AS2( cmp REG_iterationCount, 4)
195 #if CRYPTOPP_BOOL_X86 202 #define SSE2_EXPAND_S(i, j) \ 203 ASS( pshufd xmm4, xmm##i, j, j, j, j) \ 204 AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) 206 AS2( movdqa xmm0, [REG_state + 0*16])
207 AS2( movdqa xmm1, [REG_state + 1*16])
208 AS2( movdqa xmm2, [REG_state + 2*16])
209 AS2( movdqa xmm3, [REG_state + 3*16])
225 #define SSE2_EXPAND_S85(i) \ 226 AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ 227 AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ 228 AS2( add REG_roundsLeft, 1) \ 229 AS2( adc REG_temp32, 0) 232 AS2( mov REG_roundsLeft,
dword ptr [REG_state + 8*4])
233 AS2( mov REG_temp32,
dword ptr [REG_state + 5*4])
238 AS2( mov
dword ptr [REG_state + 8*4], REG_roundsLeft)
239 AS2( mov
dword ptr [REG_state + 5*4], REG_temp32)
241 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 242 AS2( movdqa xmm4, xmm##d) \ 243 AS2( paddd xmm4, xmm##a) \ 244 AS2( movdqa xmm5, xmm4) \ 245 AS2( pslld xmm4, i) \ 246 AS2( psrld xmm5, 32-i) \ 247 AS2( pxor xmm##b, xmm4) \ 248 AS2( pxor xmm##b, xmm5) 250 #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 251 #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) 252 #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 253 #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 254 #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) 255 #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) 256 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 257 #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 258 #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) 259 #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 260 #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 261 #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 262 #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) 263 #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) 264 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 265 #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 266 #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) 267 #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 268 #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) 269 #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 270 #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) 271 #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) 272 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 273 #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 274 #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) 275 #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) 276 #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 277 #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) 278 #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) 279 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 280 #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 281 #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) 283 #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ 284 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ 285 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ 286 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ 287 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ 288 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ 289 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ 290 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ 291 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ 292 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ 293 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ 294 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ 295 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ 296 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ 297 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ 298 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ 299 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ 300 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ 301 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ 302 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ 303 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ 304 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ 305 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ 306 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ 307 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ 308 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ 309 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ 310 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ 311 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ 312 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ 313 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ 314 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ 315 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) 317 #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ 318 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ 319 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ 320 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ 321 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ 322 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ 323 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ 324 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ 325 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ 326 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \ 327 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ 328 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ 329 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ 330 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ 331 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ 332 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ 333 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ 334 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ 335 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ 336 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ 337 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ 338 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ 339 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ 340 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ 341 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ 342 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ 343 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ 344 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ 345 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ 346 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ 347 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ 348 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ 349 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i) 351 #if CRYPTOPP_BOOL_X64 352 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
354 SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
355 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
357 AS2( mov REG_roundsLeft, REG_rounds)
360 ASL(SSE2_Salsa_Output)
361 AS2( movdqa xmm0, xmm4)
362 AS2( punpckldq xmm4, xmm5)
363 AS2( movdqa xmm1, xmm6)
364 AS2( punpckldq xmm6, xmm7)
365 AS2( movdqa xmm2, xmm4)
366 AS2( punpcklqdq xmm4, xmm6)
367 AS2( punpckhqdq xmm2, xmm6)
368 AS2( punpckhdq xmm0, xmm5)
369 AS2( punpckhdq xmm1, xmm7)
370 AS2( movdqa xmm6, xmm0)
371 AS2( punpcklqdq xmm0, xmm1)
372 AS2( punpckhqdq xmm6, xmm1)
373 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
377 #if CRYPTOPP_BOOL_X64 378 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
380 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
382 SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
383 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
385 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
386 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
388 AS2( sub REG_roundsLeft, 2)
391 #define SSE2_OUTPUT_4(a, b, c, d) \ 392 AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ 393 AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ 394 AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ 395 AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ 396 AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ 397 AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ 398 AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ 399 AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ 400 ASC( call, SSE2_Salsa_Output) 402 SSE2_OUTPUT_4(0, 13, 10, 7)
403 SSE2_OUTPUT_4(4, 1, 14, 11)
404 SSE2_OUTPUT_4(8, 5, 2, 15)
405 SSE2_OUTPUT_4(12, 9, 6, 3)
406 AS2(
test REG_input, REG_input)
408 AS2( add REG_input, 12*16)
410 AS2( add REG_output, 12*16)
411 AS2( sub REG_iterationCount, 4)
412 AS2( cmp REG_iterationCount, 4)
417 AS2( sub REG_iterationCount, 1)
419 AS2( movdqa xmm0, [REG_state + 0*16])
420 AS2( movdqa xmm1, [REG_state + 1*16])
421 AS2( movdqa xmm2, [REG_state + 2*16])
422 AS2( movdqa xmm3, [REG_state + 3*16])
423 AS2( mov REG_roundsLeft, REG_rounds)
426 SSE2_QUARTER_ROUND(0, 1, 3, 7)
427 SSE2_QUARTER_ROUND(1, 2, 0, 9)
428 SSE2_QUARTER_ROUND(2, 3, 1, 13)
429 SSE2_QUARTER_ROUND(3, 0, 2, 18)
430 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3)
431 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
432 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1)
433 SSE2_QUARTER_ROUND(0, 3, 1, 7)
434 SSE2_QUARTER_ROUND(3, 2, 0, 9)
435 SSE2_QUARTER_ROUND(2, 1, 3, 13)
436 SSE2_QUARTER_ROUND(1, 0, 2, 18)
437 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1)
438 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
439 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3)
440 AS2( sub REG_roundsLeft, 2)
443 AS2( paddd xmm0, [REG_state + 0*16])
444 AS2( paddd xmm1, [REG_state + 1*16])
445 AS2( paddd xmm2, [REG_state + 2*16])
446 AS2( paddd xmm3, [REG_state + 3*16])
448 AS2( add
dword ptr [REG_state + 8*4], 1)
449 AS2( adc
dword ptr [REG_state + 5*4], 0)
451 AS2( pcmpeqb xmm6, xmm6)
453 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3)
454 AS2( movdqa xmm4, xmm0)
455 AS2( movdqa xmm5, xmm3)
456 AS2( pand xmm0, xmm7)
457 AS2( pand xmm4, xmm6)
458 AS2( pand xmm3, xmm6)
459 AS2( pand xmm5, xmm7)
461 AS2( movdqa xmm5, xmm1)
462 AS2( pand xmm1, xmm7)
463 AS2( pand xmm5, xmm6)
465 AS2( pand xmm6, xmm2)
466 AS2( pand xmm2, xmm7)
470 AS2( movdqa xmm5, xmm4)
471 AS2( movdqa xmm6, xmm0)
472 AS3( shufpd xmm4, xmm1, 2)
473 AS3( shufpd xmm0, xmm2, 2)
474 AS3( shufpd xmm1, xmm5, 2)
475 AS3( shufpd xmm2, xmm6, 2)
478 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
486 #if CRYPTOPP_BOOL_X64 487 :
"+r" (input),
"+r" (output),
"+r" (iterationCount)
489 :
"%eax",
"%rdx",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm12",
"%xmm13",
"%xmm14",
"%xmm15" 491 :
"+a" (input),
"+D" (output),
"+c" (iterationCount)
497 #ifdef CRYPTOPP_GENERATE_X64_MASM 498 movdqa xmm6, [rsp + 0200
h]
499 movdqa xmm7, [rsp + 0210
h]
500 movdqa xmm8, [rsp + 0220
h]
501 movdqa xmm9, [rsp + 0230
h]
502 movdqa xmm10, [rsp + 0240
h]
503 movdqa xmm11, [rsp + 0250
h]
504 movdqa xmm12, [rsp + 0260
h]
505 movdqa xmm13, [rsp + 0270
h]
506 movdqa xmm14, [rsp + 0280
h]
507 movdqa xmm15, [rsp + 0290
h]
508 add rsp, 10*16 + 32*16 + 8
510 Salsa20_OperateKeystream ENDP
516 #ifndef CRYPTOPP_GENERATE_X64_MASM 518 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
520 while (iterationCount--)
529 #define QUARTER_ROUND(a, b, c, d) \ 530 b = b ^ rotlFixed(a + d, 7); \ 531 c = c ^ rotlFixed(b + a, 9); \ 532 d = d ^ rotlFixed(c + b, 13); \ 533 a = a ^ rotlFixed(d + c, 18); 546 #define SALSA_OUTPUT(x) {\ 547 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ 548 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ 549 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ 550 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ 551 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ 552 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ 553 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ 554 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ 555 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ 556 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ 557 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ 558 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ 559 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ 560 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ 561 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ 562 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);} 564 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 583 memcpy(m_key.begin()+4, m_key.begin(), 16);
587 m_state[1] = 0x3320646e;
588 m_state[2] = 0x79622d32;
589 m_state[3] = 0x6b206574;
597 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
602 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3];
603 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7];
626 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
Standard names for retrieving values by name when working with NameValuePairs.
virtual unsigned int GetOptimalBlockSize() const
Provides number of ideal bytes to process.
void GetUserKey(ByteOrder order, T *out, size_t outlen, const byte *in, size_t inlen)
Utility functions for the Crypto++ library.
unsigned int GetAlignment() const
Provides data alignment requirements.
#define NAMESPACE_BEGIN(x)
void SeekToIteration(lword iterationCount)
Seeks to a random position in the stream.
Library configuration file.
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
Key the cipher.
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
Resynchronize the cipher.
#define CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(x, y)
Helper macro to implement OperateKeystream.
byte order is little-endian
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
Key the cipher.
#define CRYPTOPP_ALIGN_DATA(x)
Exception thrown when an invalid number of rounds is encountered.
CRYPTOPP_DLL int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
A::pointer data()
Provides a pointer to the first element in the memory block.
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
Resynchronize the cipher.
CRYPTOPP_STATIC_CONSTEXPR const char * StaticAlgorithmName()
Safely right shift values when undefined behavior could occur.
#define CRYPTOPP_ASSERT(exp)
Functions for CPU features and intrinsics.
Classes for Salsa and Salsa20 stream ciphers.
void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
Operates the keystream.
void * memcpy(void *a, const void *b, size_t c)
#define CRYPTOPP_UNUSED(x)
Access a block of memory.
KeystreamOperation
Keystream operation flags.
SymmetricCipher implementation.
FixedSizeAlignedSecBlock< word32, 16 > m_state
CRYPTOPP_STATIC_CONSTEXPR const char * StaticAlgorithmName()
#define QUARTER_ROUND(a, b, c, d)
Interface for retrieving values given their names.