Fabcoin Core
0.16.2
P2P Digital Currency
Main Page
Related Pages
Modules
Namespaces
Classes
Files
Examples
File List
File Members
src
cuda
blake2
blake2b-load-sse41.h
Go to the documentation of this file.
1
/*
2
BLAKE2 reference source code package - optimized C implementations
3
4
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5
6
To the extent possible under law, the author(s) have dedicated all copyright
7
and related and neighboring rights to this software to the public domain
8
worldwide. This software is distributed without any warranty.
9
10
You should have received a copy of the CC0 Public Domain Dedication along with
11
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12
*/
13
#pragma once
14
#ifndef __BLAKE2B_LOAD_SSE41_H__
15
#define __BLAKE2B_LOAD_SSE41_H__
16
17
#define LOAD_MSG_0_1(b0, b1) \
18
do \
19
{ \
20
b0 = _mm_unpacklo_epi64(m0, m1); \
21
b1 = _mm_unpacklo_epi64(m2, m3); \
22
} while(0)
23
24
25
#define LOAD_MSG_0_2(b0, b1) \
26
do \
27
{ \
28
b0 = _mm_unpackhi_epi64(m0, m1); \
29
b1 = _mm_unpackhi_epi64(m2, m3); \
30
} while(0)
31
32
33
#define LOAD_MSG_0_3(b0, b1) \
34
do \
35
{ \
36
b0 = _mm_unpacklo_epi64(m4, m5); \
37
b1 = _mm_unpacklo_epi64(m6, m7); \
38
} while(0)
39
40
41
#define LOAD_MSG_0_4(b0, b1) \
42
do \
43
{ \
44
b0 = _mm_unpackhi_epi64(m4, m5); \
45
b1 = _mm_unpackhi_epi64(m6, m7); \
46
} while(0)
47
48
49
#define LOAD_MSG_1_1(b0, b1) \
50
do \
51
{ \
52
b0 = _mm_unpacklo_epi64(m7, m2); \
53
b1 = _mm_unpackhi_epi64(m4, m6); \
54
} while(0)
55
56
57
#define LOAD_MSG_1_2(b0, b1) \
58
do \
59
{ \
60
b0 = _mm_unpacklo_epi64(m5, m4); \
61
b1 = _mm_alignr_epi8(m3, m7, 8); \
62
} while(0)
63
64
65
#define LOAD_MSG_1_3(b0, b1) \
66
do \
67
{ \
68
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
69
b1 = _mm_unpackhi_epi64(m5, m2); \
70
} while(0)
71
72
73
#define LOAD_MSG_1_4(b0, b1) \
74
do \
75
{ \
76
b0 = _mm_unpacklo_epi64(m6, m1); \
77
b1 = _mm_unpackhi_epi64(m3, m1); \
78
} while(0)
79
80
81
#define LOAD_MSG_2_1(b0, b1) \
82
do \
83
{ \
84
b0 = _mm_alignr_epi8(m6, m5, 8); \
85
b1 = _mm_unpackhi_epi64(m2, m7); \
86
} while(0)
87
88
89
#define LOAD_MSG_2_2(b0, b1) \
90
do \
91
{ \
92
b0 = _mm_unpacklo_epi64(m4, m0); \
93
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
94
} while(0)
95
96
97
#define LOAD_MSG_2_3(b0, b1) \
98
do \
99
{ \
100
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
101
b1 = _mm_unpackhi_epi64(m3, m4); \
102
} while(0)
103
104
105
#define LOAD_MSG_2_4(b0, b1) \
106
do \
107
{ \
108
b0 = _mm_unpacklo_epi64(m7, m3); \
109
b1 = _mm_alignr_epi8(m2, m0, 8); \
110
} while(0)
111
112
113
#define LOAD_MSG_3_1(b0, b1) \
114
do \
115
{ \
116
b0 = _mm_unpackhi_epi64(m3, m1); \
117
b1 = _mm_unpackhi_epi64(m6, m5); \
118
} while(0)
119
120
121
#define LOAD_MSG_3_2(b0, b1) \
122
do \
123
{ \
124
b0 = _mm_unpackhi_epi64(m4, m0); \
125
b1 = _mm_unpacklo_epi64(m6, m7); \
126
} while(0)
127
128
129
#define LOAD_MSG_3_3(b0, b1) \
130
do \
131
{ \
132
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
133
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
134
} while(0)
135
136
137
#define LOAD_MSG_3_4(b0, b1) \
138
do \
139
{ \
140
b0 = _mm_unpacklo_epi64(m3, m5); \
141
b1 = _mm_unpacklo_epi64(m0, m4); \
142
} while(0)
143
144
145
#define LOAD_MSG_4_1(b0, b1) \
146
do \
147
{ \
148
b0 = _mm_unpackhi_epi64(m4, m2); \
149
b1 = _mm_unpacklo_epi64(m1, m5); \
150
} while(0)
151
152
153
#define LOAD_MSG_4_2(b0, b1) \
154
do \
155
{ \
156
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
157
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
158
} while(0)
159
160
161
#define LOAD_MSG_4_3(b0, b1) \
162
do \
163
{ \
164
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
165
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
166
} while(0)
167
168
169
#define LOAD_MSG_4_4(b0, b1) \
170
do \
171
{ \
172
b0 = _mm_alignr_epi8(m6, m0, 8); \
173
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
174
} while(0)
175
176
177
#define LOAD_MSG_5_1(b0, b1) \
178
do \
179
{ \
180
b0 = _mm_unpacklo_epi64(m1, m3); \
181
b1 = _mm_unpacklo_epi64(m0, m4); \
182
} while(0)
183
184
185
#define LOAD_MSG_5_2(b0, b1) \
186
do \
187
{ \
188
b0 = _mm_unpacklo_epi64(m6, m5); \
189
b1 = _mm_unpackhi_epi64(m5, m1); \
190
} while(0)
191
192
193
#define LOAD_MSG_5_3(b0, b1) \
194
do \
195
{ \
196
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
197
b1 = _mm_unpackhi_epi64(m7, m0); \
198
} while(0)
199
200
201
#define LOAD_MSG_5_4(b0, b1) \
202
do \
203
{ \
204
b0 = _mm_unpackhi_epi64(m6, m2); \
205
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
206
} while(0)
207
208
209
#define LOAD_MSG_6_1(b0, b1) \
210
do \
211
{ \
212
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
213
b1 = _mm_unpacklo_epi64(m7, m2); \
214
} while(0)
215
216
217
#define LOAD_MSG_6_2(b0, b1) \
218
do \
219
{ \
220
b0 = _mm_unpackhi_epi64(m2, m7); \
221
b1 = _mm_alignr_epi8(m5, m6, 8); \
222
} while(0)
223
224
225
#define LOAD_MSG_6_3(b0, b1) \
226
do \
227
{ \
228
b0 = _mm_unpacklo_epi64(m0, m3); \
229
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
230
} while(0)
231
232
233
#define LOAD_MSG_6_4(b0, b1) \
234
do \
235
{ \
236
b0 = _mm_unpackhi_epi64(m3, m1); \
237
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
238
} while(0)
239
240
241
#define LOAD_MSG_7_1(b0, b1) \
242
do \
243
{ \
244
b0 = _mm_unpackhi_epi64(m6, m3); \
245
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
246
} while(0)
247
248
249
#define LOAD_MSG_7_2(b0, b1) \
250
do \
251
{ \
252
b0 = _mm_alignr_epi8(m7, m5, 8); \
253
b1 = _mm_unpackhi_epi64(m0, m4); \
254
} while(0)
255
256
257
#define LOAD_MSG_7_3(b0, b1) \
258
do \
259
{ \
260
b0 = _mm_unpackhi_epi64(m2, m7); \
261
b1 = _mm_unpacklo_epi64(m4, m1); \
262
} while(0)
263
264
265
#define LOAD_MSG_7_4(b0, b1) \
266
do \
267
{ \
268
b0 = _mm_unpacklo_epi64(m0, m2); \
269
b1 = _mm_unpacklo_epi64(m3, m5); \
270
} while(0)
271
272
273
#define LOAD_MSG_8_1(b0, b1) \
274
do \
275
{ \
276
b0 = _mm_unpacklo_epi64(m3, m7); \
277
b1 = _mm_alignr_epi8(m0, m5, 8); \
278
} while(0)
279
280
281
#define LOAD_MSG_8_2(b0, b1) \
282
do \
283
{ \
284
b0 = _mm_unpackhi_epi64(m7, m4); \
285
b1 = _mm_alignr_epi8(m4, m1, 8); \
286
} while(0)
287
288
289
#define LOAD_MSG_8_3(b0, b1) \
290
do \
291
{ \
292
b0 = m6; \
293
b1 = _mm_alignr_epi8(m5, m0, 8); \
294
} while(0)
295
296
297
#define LOAD_MSG_8_4(b0, b1) \
298
do \
299
{ \
300
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
301
b1 = m2; \
302
} while(0)
303
304
305
#define LOAD_MSG_9_1(b0, b1) \
306
do \
307
{ \
308
b0 = _mm_unpacklo_epi64(m5, m4); \
309
b1 = _mm_unpackhi_epi64(m3, m0); \
310
} while(0)
311
312
313
#define LOAD_MSG_9_2(b0, b1) \
314
do \
315
{ \
316
b0 = _mm_unpacklo_epi64(m1, m2); \
317
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
318
} while(0)
319
320
321
#define LOAD_MSG_9_3(b0, b1) \
322
do \
323
{ \
324
b0 = _mm_unpackhi_epi64(m7, m4); \
325
b1 = _mm_unpackhi_epi64(m1, m6); \
326
} while(0)
327
328
329
#define LOAD_MSG_9_4(b0, b1) \
330
do \
331
{ \
332
b0 = _mm_alignr_epi8(m7, m5, 8); \
333
b1 = _mm_unpacklo_epi64(m6, m0); \
334
} while(0)
335
336
337
#define LOAD_MSG_10_1(b0, b1) \
338
do \
339
{ \
340
b0 = _mm_unpacklo_epi64(m0, m1); \
341
b1 = _mm_unpacklo_epi64(m2, m3); \
342
} while(0)
343
344
345
#define LOAD_MSG_10_2(b0, b1) \
346
do \
347
{ \
348
b0 = _mm_unpackhi_epi64(m0, m1); \
349
b1 = _mm_unpackhi_epi64(m2, m3); \
350
} while(0)
351
352
353
#define LOAD_MSG_10_3(b0, b1) \
354
do \
355
{ \
356
b0 = _mm_unpacklo_epi64(m4, m5); \
357
b1 = _mm_unpacklo_epi64(m6, m7); \
358
} while(0)
359
360
361
#define LOAD_MSG_10_4(b0, b1) \
362
do \
363
{ \
364
b0 = _mm_unpackhi_epi64(m4, m5); \
365
b1 = _mm_unpackhi_epi64(m6, m7); \
366
} while(0)
367
368
369
#define LOAD_MSG_11_1(b0, b1) \
370
do \
371
{ \
372
b0 = _mm_unpacklo_epi64(m7, m2); \
373
b1 = _mm_unpackhi_epi64(m4, m6); \
374
} while(0)
375
376
377
#define LOAD_MSG_11_2(b0, b1) \
378
do \
379
{ \
380
b0 = _mm_unpacklo_epi64(m5, m4); \
381
b1 = _mm_alignr_epi8(m3, m7, 8); \
382
} while(0)
383
384
385
#define LOAD_MSG_11_3(b0, b1) \
386
do \
387
{ \
388
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
389
b1 = _mm_unpackhi_epi64(m5, m2); \
390
} while(0)
391
392
393
#define LOAD_MSG_11_4(b0, b1) \
394
do \
395
{ \
396
b0 = _mm_unpacklo_epi64(m6, m1); \
397
b1 = _mm_unpackhi_epi64(m3, m1); \
398
} while(0)
399
400
401
#endif
402
Generated on Mon Oct 22 2018 15:15:33 for Fabcoin Core by
1.8.11