5 # 1 "/usr/include/stdc-predef.h" 1 3 4 14 uint values[10][(1 << 9)]; 18 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable 20 __constant ulong blake_iv[] = 22 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 23 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, 24 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 25 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, 32 void kernel_init_ht(__global char *ht, __global uint *rowCounters) 34 rowCounters[get_global_id(0)] = 0; 37 uint ht_store(uint round, __global char *ht, uint i, 38 ulong xi0, ulong xi1, ulong xi2, ulong xi3, __global uint *rowCounters) 45 row = (xi0 & 0xffff) | ((xi0 & 0xf00000) >> 4); 47 row = ((xi0 & 0xf0000) >> 0) | 48 ((xi0 & 0xf00) << 4) | ((xi0 & 0xf00000) >> 12) | 49 ((xi0 & 0xf) << 4) | ((xi0 & 0xf000) >> 12); 53 xi0 = (xi0 >> 16) | (xi1 << (64 - 16)); 54 xi1 = (xi1 >> 16) | (xi2 << (64 - 16)); 55 xi2 = (xi2 >> 16) | (xi3 << (64 - 16)); 56 p = ht + row * (( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32; 58 uint rowOffset = 8*(row%4); 59 uint xcnt = atomic_add(rowCounters + rowIdx, 1 << rowOffset); 60 xcnt = (xcnt >> rowOffset) & 0xFF; 62 if (cnt >= (( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)) 65 atomic_sub(rowCounters + rowIdx, 1 << rowOffset); 68 p += cnt * 32 + (8 + ((round) / 2) * 4); 70 *(__global uint *)(p - 4) = i; 71 if (round == 0 || round == 1) 74 *(__global ulong *)(p + 0) = xi0; 75 *(__global ulong *)(p + 8) = xi1; 76 *(__global ulong *)(p + 16) = xi2; 81 *(__global uint *)(p + 0) = xi0; 82 *(__global ulong *)(p + 4) = (xi0 >> 32) | (xi1 << 32); 83 *(__global ulong *)(p + 12) = (xi1 >> 32) | (xi2 << 32); 88 *(__global uint *)(p + 0) = xi0; 89 *(__global ulong *)(p + 4) = (xi0 >> 32) | (xi1 << 32); 90 *(__global uint *)(p + 12) = (xi1 >> 32); 95 *(__global ulong *)(p + 0) = xi0; 96 *(__global ulong *)(p + 8) = xi1; 101 *(__global ulong *)(p + 0) = xi0; 102 *(__global uint *)(p + 8) = xi1; 104 else if (round == 6 || round == 7) 107 *(__global uint *)(p + 0) = xi0; 108 *(__global uint *)(p + 4) = (xi0 >> 32); 113 *(__global uint *)(p + 0) = xi0; 118 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 119 void kernel_round0(__global ulong *blake_state, __global char *ht, __global ulong *buf, 120 __global uint *rowCounters, __global uint *debug) 122 uint tid = get_global_id(0); 124 uint inputs_per_thread = (1 << (200 / (9 + 1))) / get_global_size(0); 125 uint input = tid * inputs_per_thread; 126 uint input_end = (tid + 1) * inputs_per_thread; 128 ulong val1 = 0, val9 = 0; 141 while (input < input_end) 144 v[0] = blake_state[0]; 145 v[1] = blake_state[1]; 146 v[2] = blake_state[2]; 147 v[3] = blake_state[3]; 148 v[4] = blake_state[4]; 149 v[5] = blake_state[5]; 150 v[6] = blake_state[6]; 151 v[7] = blake_state[7]; 161 v[12] ^= 128 + buflen + 4 ; 167 val1 = buf[2] | (ulong)input<<(buflen%8*8); 171 val9 = buf[10] | (ulong)input<<(buflen%8*8); 175 v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 176 v[1] = (v[1] + v[5] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 177 v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 178 v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 179 v[0] = (v[0] + v[5] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val9); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 180 v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 181 v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+13 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 182 v[3] = (v[3] + v[4] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 184 v[0] = (v[0] + v[4] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 185 v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 186 v[2] = (v[2] + v[6] + val9); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 187 v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 188 v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 189 v[1] = (v[1] + v[6] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 190 v[2] = (v[2] + v[7] + buf[ 1+11 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 191 v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 193 v[0] = (v[0] + v[4] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+8 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 194 v[1] = (v[1] + v[5] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+0 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 195 v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 196 v[3] = (v[3] + v[7] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 197 v[0] = (v[0] + v[5] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 198 v[1] = (v[1] + v[6] + buf[ 1+3 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 199 v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + val1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 200 v[3] = (v[3] + v[4] + val9); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 202 v[0] = (v[0] + v[4] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val9); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 203 v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + val1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 204 v[2] = (v[2] + v[6] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+12 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 205 v[3] = (v[3] + v[7] + buf[ 1+11 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 206 v[0] = (v[0] + v[5] + buf[ 1+2 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 207 v[1] = (v[1] + v[6] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 208 v[2] = (v[2] + v[7] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+0 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 209 v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+8 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 211 v[0] = (v[0] + v[4] + val9); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 212 v[1] = (v[1] + v[5] + buf[ 1+5 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 213 v[2] = (v[2] + v[6] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 214 v[3] = (v[3] + v[7] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 215 v[0] = (v[0] + v[5] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 216 v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 217 v[2] = (v[2] + v[7] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 218 v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 220 v[0] = (v[0] + v[4] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 221 v[1] = (v[1] + v[5] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+10 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 222 v[2] = (v[2] + v[6] + buf[ 1+0 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 223 v[3] = (v[3] + v[7] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+3 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 224 v[0] = (v[0] + v[5] + buf[ 1+4 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 225 v[1] = (v[1] + v[6] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 226 v[2] = (v[2] + v[7] + buf[ 1+15 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 227 v[3] = (v[3] + v[4] + val1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + val9); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 229 v[0] = (v[0] + v[4] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 230 v[1] = (v[1] + v[5] + val1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+15 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 231 v[2] = (v[2] + v[6] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 232 v[3] = (v[3] + v[7] + buf[ 1+4 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 233 v[0] = (v[0] + v[5] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 234 v[1] = (v[1] + v[6] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+3 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 235 v[2] = (v[2] + v[7] + val9); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 236 v[3] = (v[3] + v[4] + buf[ 1+8 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 238 v[0] = (v[0] + v[4] + buf[ 1+13 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 239 v[1] = (v[1] + v[5] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 240 v[2] = (v[2] + v[6] + buf[ 1+12 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + val1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 241 v[3] = (v[3] + v[7] + buf[ 1+3 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + val9); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 242 v[0] = (v[0] + v[5] + buf[ 1+5 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 243 v[1] = (v[1] + v[6] + buf[ 1+15 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+4 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 244 v[2] = (v[2] + v[7] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 245 v[3] = (v[3] + v[4] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+10 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 247 v[0] = (v[0] + v[4] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+15 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 248 v[1] = (v[1] + v[5] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + val9); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 249 v[2] = (v[2] + v[6] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 250 v[3] = (v[3] + v[7] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 251 v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+2 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 252 v[1] = (v[1] + v[6] + buf[ 1+13 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 253 v[2] = (v[2] + v[7] + val1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 254 v[3] = (v[3] + v[4] + buf[ 1+10 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 256 v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 257 v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 258 v[2] = (v[2] + v[6] + buf[ 1+7 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+6 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 259 v[3] = (v[3] + v[7] + val1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+5 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 260 v[0] = (v[0] + v[5] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+11 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 261 v[1] = (v[1] + v[6] + val9); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 262 v[2] = (v[2] + v[7] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 263 v[3] = (v[3] + v[4] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+0 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 265 v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 266 v[1] = (v[1] + v[5] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 267 v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 268 v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 269 v[0] = (v[0] + v[5] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val9); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 270 v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 271 v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+13 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 272 v[3] = (v[3] + v[4] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 274 v[0] = (v[0] + v[4] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 275 v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 276 v[2] = (v[2] + v[6] + val9); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 277 v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 278 v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 279 v[1] = (v[1] + v[6] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 280 v[2] = (v[2] + v[7] + buf[ 1+11 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 281 v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 286 h[0] = blake_state[0] ^ v[0] ^ v[8]; 287 h[1] = blake_state[1] ^ v[1] ^ v[9]; 288 h[2] = blake_state[2] ^ v[2] ^ v[10]; 289 h[3] = blake_state[3] ^ v[3] ^ v[11]; 290 h[4] = blake_state[4] ^ v[4] ^ v[12]; 291 h[5] = blake_state[5] ^ v[5] ^ v[13]; 294 h[6] = (blake_state[6] ^ v[6] ^ v[14]) & 0xffff; 299 dropped += ht_store(0, ht, input * 2, 304 dropped += ht_store(0, ht, input * 2 + 1, 305 (h[3] >> 8) | (h[4] << (64 - 8)), 306 (h[4] >> 8) | (h[5] << (64 - 8)), 307 (h[5] >> 8) | (h[6] << (64 - 8)), 308 (h[6] >> 8), rowCounters); 320 ulong half_aligned_long(__global ulong *p, uint offset) 323 (((ulong)*(__global uint *)((__global char *)p + offset + 0)) << 0) | 324 (((ulong)*(__global uint *)((__global char *)p + offset + 4)) << 32); 330 uint well_aligned_int(__global ulong *_p, uint offset) 332 __global char *p = (__global char *)_p; 333 return *(__global uint *)(p + offset); 336 uint xor_and_store(uint round, __global char *ht_dst, uint row, 337 uint slot_a, uint slot_b, __global ulong *a, __global ulong *b, 338 __global uint *rowCounters) 344 if (round == 1 || round == 2) 347 xi0 = *(a++) ^ *(b++); 348 xi1 = *(a++) ^ *(b++); 353 xi0 = (xi0 >> 8) | (xi1 << (64 - 8)); 354 xi1 = (xi1 >> 8) | (xi2 << (64 - 8)); 361 xi0 = half_aligned_long(a, 0) ^ half_aligned_long(b, 0); 362 xi1 = half_aligned_long(a, 8) ^ half_aligned_long(b, 8); 363 xi2 = well_aligned_int(a, 16) ^ well_aligned_int(b, 16); 365 else if (round == 4 || round == 5) 368 xi0 = half_aligned_long(a, 0) ^ half_aligned_long(b, 0); 369 xi1 = half_aligned_long(a, 8) ^ half_aligned_long(b, 8); 374 xi0 = (xi0 >> 8) | (xi1 << (64 - 8)); 382 xi1 = *(__global uint *)a ^ *(__global uint *)b; 387 xi0 = (xi0 >> 8) | (xi1 << (64 - 8)); 391 else if (round == 7 || round == 8) 394 xi0 = half_aligned_long(a, 0) ^ half_aligned_long(b, 0); 410 return ht_store(round, ht_dst, ((row << 12) | ((slot_b & 0x3f) << 6) | (slot_a & 0x3f)), 411 xi0, xi1, xi2, 0, rowCounters); 418 void equihash_round(uint round, 419 __global char *ht_src, 420 __global char *ht_dst, 421 __global uint *debug, 422 __local uchar *first_words_data, 423 __local uint *collisionsData, 424 __local uint *collisionsNum, 425 __global uint *rowCountersSrc, 426 __global uint *rowCountersDst) 428 uint tid = get_global_id(0); 429 uint tlid = get_local_id(0); 432 __local uchar *first_words = &first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*tlid]; 438 uint dropped_coll = 0; 439 uint dropped_stor = 0; 440 __global ulong *a, *b; 443 xi_offset = (8 + ((round - 1) / 2) * 4); 451 barrier(CLK_LOCAL_MEM_FENCE); 452 p = (ht_src + tid * (( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32); 454 uint rowOffset = 8*(tid%4); 455 cnt = (rowCountersSrc[rowIdx] >> rowOffset) & 0xFF; 456 cnt = min(cnt, (uint)(( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)); 462 for (i = 0; i < cnt; i++, p += 32) 463 first_words[i] = (*(__global uchar *)p) & mask; 466 for (i = 0; i < cnt-1 && thCollNum < ((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5); i++) 468 uchar data_i = first_words[i]; 469 uint collision = (tid << 10) | (i << 5) | (i + 1); 470 for (j = i+1; (j+4) < cnt;) 473 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 477 uint index = atomic_inc(collisionsNum); 478 collisionsData[index] = collision; 484 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 488 uint index = atomic_inc(collisionsNum); 489 collisionsData[index] = collision; 495 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 499 uint index = atomic_inc(collisionsNum); 500 collisionsData[index] = collision; 506 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 510 uint index = atomic_inc(collisionsNum); 511 collisionsData[index] = collision; 519 uint isColl = ((data_i == first_words[j]) ? 1 : 0); 523 uint index = atomic_inc(collisionsNum); 524 collisionsData[index] = collision; 531 barrier(CLK_LOCAL_MEM_FENCE); 532 uint totalCollisions = *collisionsNum; 533 for (uint index = tlid; index < totalCollisions; index += get_local_size(0)) 535 uint collision = collisionsData[index]; 536 uint collisionThreadId = collision >> 10; 537 uint i = (collision >> 5) & 0x1F; 538 uint j = collision & 0x1F; 539 __global uchar *ptr = (__global uchar *)ht_src + collisionThreadId * (( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 + 541 a = (__global ulong *)(ptr + i * 32); 542 b = (__global ulong *)(ptr + j * 32); 543 dropped_stor += xor_and_store(round, ht_dst, collisionThreadId, i, j, 544 a, b, rowCountersDst); 552 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; __local uint collisionsNum; equihash_round(1, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 553 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; __local uint collisionsNum; equihash_round(2, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 554 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; __local uint collisionsNum; equihash_round(3, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 555 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; __local uint collisionsNum; equihash_round(4, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 556 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; __local uint collisionsNum; equihash_round(5, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 557 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round6(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; __local uint collisionsNum; equihash_round(6, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 558 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round7(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; __local uint collisionsNum; equihash_round(7, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 561 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 562 void kernel_round8(__global char *ht_src, __global char *ht_dst, 563 __global uint *rowCountersSrc, __global uint *rowCountersDst, 564 __global uint *debug, __global sols_t *sols) 566 uint tid = get_global_id(0); 567 __local uchar first_words_data[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)+2)*64]; 568 __local uint collisionsData[((( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 5) * 64]; 569 __local uint collisionsNum; 570 equihash_round(8, ht_src, ht_dst, debug, first_words_data, collisionsData, 571 &collisionsNum, rowCountersSrc, rowCountersDst); 573 sols->nr = sols->likely_invalids = 0; 576 uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot) 578 return *(__global uint *)(ht + row * (( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32 + 579 slot * 32 + xi_offset - 4); 587 uint expand_refs(uint *ins, uint nr_inputs, __global char **htabs, 590 __global char *ht = htabs[round % 2]; 591 uint i = nr_inputs - 1; 592 uint j = nr_inputs * 2 - 1; 593 uint xi_offset = (8 + ((round) / 2) * 4); 594 int dup_to_watch = -1; 598 ins[j] = expand_ref(ht, xi_offset, 599 (ins[i] >> 12), ((ins[i] >> 6) & 0x3f)); 600 ins[j - 1] = expand_ref(ht, xi_offset, 601 (ins[i] >> 12), (ins[i] & 0x3f)); 605 if (dup_to_watch == -1) 606 dup_to_watch = ins[j]; 607 else if (ins[j] == dup_to_watch || ins[j - 1] == dup_to_watch) 623 void potential_sol(__global char **htabs, __global sols_t *sols, 624 uint ref0, uint ref1) 627 uint values_tmp[(1 << 9)]; 631 values_tmp[nr_values++] = ref0; 632 values_tmp[nr_values++] = ref1; 637 if (!expand_refs(values_tmp, nr_values, htabs, round)) 643 sol_i = atomic_inc(&sols->nr); 647 for (i = 0; i < (1 << 9); i++) 649 sols->values[sol_i][i] = values_tmp[i]; 651 sols->valid[sol_i] = 1; 657 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 658 void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols, 659 __global uint *rowCountersSrc, __global uint *rowCountersDst) 661 uint tid = get_global_id(0); 662 __global char *htabs[2] = { ht0, ht1 }; 664 uint ht_i = (9 - 1) % 2; 666 uint xi_offset = (8 + ((9 - 1) / 2) * 4); 668 __global char *a, *b; 677 uint mask = 0xffffff; 681 a = htabs[ht_i] + tid * (( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9) * 32; 683 uint rowOffset = 8*(tid%4); 684 cnt = (rowCountersSrc[rowIdx] >> rowOffset) & 0xFF; 685 cnt = min(cnt, (uint)(( 1 << (((200 / (9 + 1)) + 1) - 20)) * 9)); 688 for (i = 0; i < cnt; i++, a += 32) 690 uint a_data = ((*(__global uint *)a) & mask); 691 ref_i = *(__global uint *)(a - 4); 692 for (j = i + 1, b = a + 32; j < cnt; j++, b += 32) 694 if (a_data == ((*(__global uint *)b) & mask)) 696 ref_j = *(__global uint *)(b - 4); 697 collisions = ((ulong)ref_i << 32) | ref_j; 705 potential_sol(htabs, sols, collisions >> 32, collisions & 0xffffffff); const unsigned char CL_MINER_KERNEL[]
const size_t CL_MINER_KERNEL_SIZE