5 # 1 "/usr/include/stdc-predef.h" 1 3 4 15 uint values[10][(1 << 9)]; 19 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable 21 __constant ulong blake_iv[] = 23 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 24 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, 25 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 26 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, 33 void kernel_init_ht(__global char *ht, __global uint *rowCounters) 35 rowCounters[get_global_id(0)] = 0; 38 uint ht_store(uint round, __global char *ht, uint i, 39 ulong xi0, ulong xi1, ulong xi2, ulong xi3, __global uint *rowCounters) 46 row = (xi0 & 0xffff) | ((xi0 & 0xf00000) >> 4); 48 row = ((xi0 & 0xf0000) >> 0) | 49 ((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) | 50 ((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12); 54 xi0 = (xi0 >> 16) | (xi1 << (64 - 16)); 55 xi1 = (xi1 >> 16) | (xi2 << (64 - 16)); 56 xi2 = (xi2 >> 16) | (xi3 << (64 - 16)); 57 p = ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 32; 59 uint rowOffset = 4*(row%8); 60 uint xcnt = atomic_add(rowCounters + rowIdx, 1 << rowOffset); 61 xcnt = (xcnt >> rowOffset) & 0x0F; 63 if (cnt >= ((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)) 66 atomic_sub(rowCounters + rowIdx, 1 << rowOffset); 69 p += cnt * 32 + (8 + ((round) / 2) * 4); 71 *(__global uint *)(p - 4) = i; 72 if (round == 0 || round == 1) 75 *(__global ulong *)(p + 0) = xi0; 76 *(__global ulong *)(p + 8) = xi1; 77 *(__global ulong *)(p + 16) = xi2; 82 *(__global uint *)(p + 0) = xi0; 83 *(__global ulong *)(p + 4) = (xi0 >> 32) | (xi1 << 32); 84 *(__global ulong *)(p + 12) = (xi1 >> 32) | (xi2 << 32); 89 *(__global uint *)(p + 0) = xi0; 90 *(__global ulong *)(p + 4) = (xi0 >> 32) | (xi1 << 32); 91 *(__global uint *)(p + 12) = (xi1 >> 32); 96 *(__global ulong *)(p + 0) = xi0; 97 *(__global ulong *)(p + 8) = xi1; 102 *(__global ulong *)(p + 0) = xi0; 103 *(__global uint *)(p + 8) = xi1; 105 else if (round == 6 || round == 7) 108 *(__global uint *)(p + 0) = xi0; 109 *(__global uint *)(p + 4) = (xi0 >> 32); 114 *(__global uint *)(p + 0) = xi0; 119 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 120 void kernel_round0(__global ulong *blake_state, __global char *ht, __global ulong *buf, 121 __global uint *rowCounters, __global uint *debug) 123 uint tid = get_global_id(0); 125 uint inputs_per_thread = (1 << (200 / (9 + 1))) / get_global_size(0); 126 uint input = tid * inputs_per_thread; 127 uint input_end = (tid + 1) * inputs_per_thread; 130 ulong val1 = 0, val9 = 0; 144 while (input < input_end) 146 v[0] = blake_state[0]; 147 v[1] = blake_state[1]; 148 v[2] = blake_state[2]; 149 v[3] = blake_state[3]; 150 v[4] = blake_state[4]; 151 v[5] = blake_state[5]; 152 v[6] = blake_state[6]; 153 v[7] = blake_state[7]; 163 v[12] ^= 128 + buflen + 4 ; 169 val1 = buf[2] | (ulong)input<<(buflen%8*8); 173 val9 = buf[10] | (ulong)input<<(buflen%8*8); 176 v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val1 ); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 177 v[1] = (v[1] + v[5] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 178 v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 179 v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 180 v[0] = (v[0] + v[5] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val9); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 181 v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 182 v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+13 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 183 v[3] = (v[3] + v[4] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 186 v[0] = (v[0] + v[4] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 187 v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 188 v[2] = (v[2] + v[6] + val9); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 189 v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 190 v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 191 v[1] = (v[1] + v[6] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 192 v[2] = (v[2] + v[7] + buf[ 1+11 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 193 v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 195 v[0] = (v[0] + v[4] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+8 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 196 v[1] = (v[1] + v[5] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+0 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 197 v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 198 v[3] = (v[3] + v[7] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 199 v[0] = (v[0] + v[5] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 200 v[1] = (v[1] + v[6] + buf[ 1+3 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 201 v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + val1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 202 v[3] = (v[3] + v[4] + val9); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 204 v[0] = (v[0] + v[4] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val9); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 205 v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + val1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 206 v[2] = (v[2] + v[6] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+12 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 207 v[3] = (v[3] + v[7] + buf[ 1+11 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 208 v[0] = (v[0] + v[5] + buf[ 1+2 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 209 v[1] = (v[1] + v[6] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 210 v[2] = (v[2] + v[7] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+0 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 211 v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+8 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 213 v[0] = (v[0] + v[4] + val9); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 214 v[1] = (v[1] + v[5] + buf[ 1+5 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 215 v[2] = (v[2] + v[6] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 216 v[3] = (v[3] + v[7] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 217 v[0] = (v[0] + v[5] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 218 v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 219 v[2] = (v[2] + v[7] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 220 v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 222 v[0] = (v[0] + v[4] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 223 v[1] = (v[1] + v[5] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+10 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 224 v[2] = (v[2] + v[6] + buf[ 1+0 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 225 v[3] = (v[3] + v[7] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+3 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 226 v[0] = (v[0] + v[5] + buf[ 1+4 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 227 v[1] = (v[1] + v[6] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 228 v[2] = (v[2] + v[7] + buf[ 1+15 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 229 v[3] = (v[3] + v[4] + val1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + val9); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 231 v[0] = (v[0] + v[4] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 232 v[1] = (v[1] + v[5] + val1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+15 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 233 v[2] = (v[2] + v[6] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 234 v[3] = (v[3] + v[7] + buf[ 1+4 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 235 v[0] = (v[0] + v[5] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 236 v[1] = (v[1] + v[6] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+3 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 237 v[2] = (v[2] + v[7] + val9); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 238 v[3] = (v[3] + v[4] + buf[ 1+8 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 240 v[0] = (v[0] + v[4] + buf[ 1+13 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 241 v[1] = (v[1] + v[5] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 242 v[2] = (v[2] + v[6] + buf[ 1+12 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + val1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 243 v[3] = (v[3] + v[7] + buf[ 1+3 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + val9); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 244 v[0] = (v[0] + v[5] + buf[ 1+5 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 245 v[1] = (v[1] + v[6] + buf[ 1+15 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+4 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 246 v[2] = (v[2] + v[7] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 247 v[3] = (v[3] + v[4] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+10 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 249 v[0] = (v[0] + v[4] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+15 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 250 v[1] = (v[1] + v[5] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + val9); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 251 v[2] = (v[2] + v[6] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 252 v[3] = (v[3] + v[7] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 253 v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+2 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 254 v[1] = (v[1] + v[6] + buf[ 1+13 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 255 v[2] = (v[2] + v[7] + val1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 256 v[3] = (v[3] + v[4] + buf[ 1+10 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 258 v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 259 v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 260 v[2] = (v[2] + v[6] + buf[ 1+7 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+6 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 261 v[3] = (v[3] + v[7] + val1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+5 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 262 v[0] = (v[0] + v[5] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+11 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 263 v[1] = (v[1] + v[6] + val9); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 264 v[2] = (v[2] + v[7] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 265 v[3] = (v[3] + v[4] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+0 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 267 v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 268 v[1] = (v[1] + v[5] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 269 v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 270 v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 271 v[0] = (v[0] + v[5] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val9); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 272 v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 273 v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+13 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 274 v[3] = (v[3] + v[4] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 276 v[0] = (v[0] + v[4] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 277 v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 278 v[2] = (v[2] + v[6] + val9); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 279 v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 280 v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 281 v[1] = (v[1] + v[6] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 282 v[2] = (v[2] + v[7] + buf[ 1+11 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 283 v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 288 h[0] = blake_state[0] ^ v[0] ^ v[8]; 289 h[1] = blake_state[1] ^ v[1] ^ v[9]; 290 h[2] = blake_state[2] ^ v[2] ^ v[10]; 291 h[3] = blake_state[3] ^ v[3] ^ v[11]; 292 h[4] = blake_state[4] ^ v[4] ^ v[12]; 293 h[5] = blake_state[5] ^ v[5] ^ v[13]; 294 h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff; 298 dropped += ht_store(0, ht, input * 2, 303 dropped += ht_store(0, ht, input * 2 + 1, 304 (h[3] >> 8) | (h[4] << (64 - 8)), 305 (h[4] >> 8) | (h[5] << (64 - 8)), 306 (h[5] >> 8) | (h[6] << (64 - 8)), 307 (h[6] >> 8), rowCounters); 317 ulong half_aligned_long(__global ulong *p, uint offset) 320 (((ulong)*(__global uint *)((__global char *)p + offset + 0)) << 0) | 321 (((ulong)*(__global uint *)((__global char *)p + offset + 4)) << 32); 327 uint well_aligned_int(__global ulong *_p, uint offset) 329 __global char *p = (__global char *)_p; 330 return *(__global uint *)(p + offset); 333 uint xor_and_store(uint round, __global char *ht_dst, uint row, 334 uint slot_a, uint slot_b, __global ulong *a, __global ulong *b, 335 __global uint *rowCounters) 341 if (round == 1 || round == 2) 344 xi0 = *(a++) ^ *(b++); 345 xi1 = *(a++) ^ *(b++); 350 xi0 = (xi0 >> 8) | (xi1 << (64 - 8)); 351 xi1 = (xi1 >> 8) | (xi2 << (64 - 8)); 358 xi0 = half_aligned_long(a, 0) ^ half_aligned_long(b, 0); 359 xi1 = half_aligned_long(a, 8) ^ half_aligned_long(b, 8); 360 xi2 = well_aligned_int(a, 16) ^ well_aligned_int(b, 16); 362 else if (round == 4 || round == 5) 365 xi0 = half_aligned_long(a, 0) ^ half_aligned_long(b, 0); 366 xi1 = half_aligned_long(a, 8) ^ half_aligned_long(b, 8); 371 xi0 = (xi0 >> 8) | (xi1 << (64 - 8)); 379 xi1 = *(__global uint *)a ^ *(__global uint *)b; 384 xi0 = (xi0 >> 8) | (xi1 << (64 - 8)); 388 else if (round == 7 || round == 8) 391 xi0 = half_aligned_long(a, 0) ^ half_aligned_long(b, 0); 407 return ht_store(round, ht_dst, ((row << 12) | ((slot_b & 0x3f) << 6) | (slot_a & 0x3f)), 408 xi0, xi1, xi2, 0, rowCounters); 415 void equihash_round(uint round, 416 __global char *ht_src, 417 __global char *ht_dst, 418 __global uint *debug, 419 __local uchar *first_words_data, 420 __local uint *collisionsData, 421 __local uint *collisionsNum, 422 __global uint *rowCountersSrc, 423 __global uint *rowCountersDst) 425 uint tid = get_global_id(0); 426 uint tlid = get_local_id(0); 429 __local uchar *first_words = &first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*tlid]; 435 uint dropped_coll = 0; 436 uint dropped_stor = 0; 437 __global ulong *a, *b; 440 xi_offset = (8 + ((round - 1) / 2) * 4); 448 barrier(CLK_LOCAL_MEM_FENCE); 449 p = (ht_src + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 32); 451 uint rowOffset = 4*(tid%8); 452 cnt = (rowCountersSrc[rowIdx] >> rowOffset) & 0x0F; 453 cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)); 458 for (i = 0; i < cnt; i++, p += 32) 459 first_words[i] = (*(__global uchar *)p) & mask; 461 for (i = 0; i < cnt-1 && thCollNum < (((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5); i++) 463 uchar data_i = first_words[i]; 464 uint collision = (tid << 10) | (i << 5) | (i + 1); 465 for (j = i+1; (j+4) < cnt;) 468 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 472 uint index = atomic_inc(collisionsNum); 473 collisionsData[index] = collision; 479 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 483 uint index = atomic_inc(collisionsNum); 484 collisionsData[index] = collision; 490 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 494 uint index = atomic_inc(collisionsNum); 495 collisionsData[index] = collision; 501 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 505 uint index = atomic_inc(collisionsNum); 506 collisionsData[index] = collision; 514 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 518 uint index = atomic_inc(collisionsNum); 519 collisionsData[index] = collision; 526 barrier(CLK_LOCAL_MEM_FENCE); 527 uint totalCollisions = *collisionsNum; 528 for (uint index = tlid; index < totalCollisions; index += get_local_size(0)) 530 uint collision = collisionsData[index]; 531 uint collisionThreadId = collision >> 10; 532 uint i = (collision >> 5) & 0x1F; 533 uint j = collision & 0x1F; 534 __global uchar *ptr = (__global uchar *)ht_src + collisionThreadId * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 32 + 536 a = (__global ulong *)(ptr + i * 32); 537 b = (__global ulong *)(ptr + j * 32); 538 dropped_stor += xor_and_store(round, ht_dst, collisionThreadId, i, j, 539 a, b, rowCountersDst); 547 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; __local uint collisionsNum; equihash_round(1, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 548 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; __local uint collisionsNum; equihash_round(2, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 549 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; __local uint collisionsNum; equihash_round(3, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 550 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; __local uint collisionsNum; equihash_round(4, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 551 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; __local uint collisionsNum; equihash_round(5, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 552 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round6(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; __local uint collisionsNum; equihash_round(6, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 553 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round7(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; __local uint collisionsNum; equihash_round(7, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 556 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 557 void kernel_round8(__global char *ht_src, __global char *ht_dst, 558 __global uint *rowCountersSrc, __global uint *rowCountersDst, 559 __global uint *debug, __global sols_t *sols) 561 uint tid = get_global_id(0); 562 __local uchar first_words_data[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)+2)*64]; 563 __local uint collisionsData[(((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 5) * 64]; 564 __local uint collisionsNum; 565 equihash_round(8, ht_src, ht_dst, debug, first_words_data, collisionsData, 566 &collisionsNum, rowCountersSrc, rowCountersDst); 568 sols->nr = sols->likely_invalids = 0; 571 uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot) 573 return *(__global uint *)(ht + row * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 32 + 574 slot * 32 + xi_offset - 4); 582 uint expand_refs(uint *ins, uint nr_inputs, __global char **htabs, 585 __global char *ht = htabs[round % 2]; 586 uint i = nr_inputs - 1; 587 uint j = nr_inputs * 2 - 1; 588 uint xi_offset = (8 + ((round) / 2) * 4); 589 int dup_to_watch = -1; 592 ins[j] = expand_ref(ht, xi_offset, 593 (ins[i] >> 12), ((ins[i] >> 6) & 0x3f)); 594 ins[j - 1] = expand_ref(ht, xi_offset, 595 (ins[i] >> 12), (ins[i] & 0x3f)); 598 if (dup_to_watch == -1) 599 dup_to_watch = ins[j]; 600 else if (ins[j] == dup_to_watch || ins[j - 1] == dup_to_watch) 615 void potential_sol(__global char **htabs, __global sols_t *sols, 616 uint ref0, uint ref1) 619 uint values_tmp[(1 << 9)]; 623 values_tmp[nr_values++] = ref0; 624 values_tmp[nr_values++] = ref1; 629 if (!expand_refs(values_tmp, nr_values, htabs, round)) 635 sol_i = atomic_inc(&sols->nr); 638 for (i = 0; i < (1 << 9); i++) 639 sols->values[sol_i][i] = values_tmp[i]; 640 sols->valid[sol_i] = 1; 646 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 647 void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols, 648 __global uint *rowCountersSrc, __global uint *rowCountersDst) 650 uint tid = get_global_id(0); 651 __global char *htabs[2] = { ht0, ht1 }; 652 // __global char *hcounters[2] = { rowCountersSrc, rowCountersDst }; 653 uint ht_i = (9 - 1) % 2; 655 uint xi_offset = (8 + ((9 - 1) / 2) * 4); 657 __global char *a, *b; 666 uint mask = 0xffffff; 670 a = htabs[ht_i] + tid * ((1 << (((200 / (9 + 1)) + 1) - 20)) * 6) * 32; 672 uint rowOffset = 4*(tid%8); 673 cnt = (rowCountersSrc[rowIdx] >> rowOffset) & 0x0F; 674 cnt = min(cnt, (uint)((1 << (((200 / (9 + 1)) + 1) - 20)) * 6)); 677 for (i = 0; i < cnt; i++, a += 32) 679 uint a_data = ((*(__global uint *)a) & mask); 680 ref_i = *(__global uint *)(a - 4); 681 for (j = i + 1, b = a + 32; j < cnt; j++, b += 32) 683 if (a_data == ((*(__global uint *)b) & mask)) 685 ref_j = *(__global uint *)(b - 4); 686 collisions = ((ulong)ref_i << 32) | ref_j; 694 potential_sol(htabs, sols, collisions >> 32, collisions & 0xffffffff); const unsigned char CL_MINER_KERNEL[]
const size_t CL_MINER_KERNEL_SIZE