5 # 1 "/usr/include/stdc-predef.h" 1 3 4 14 uint values[10][(1 << 9)]; 18 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable 20 __constant ulong blake_iv[] = 22 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 23 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, 24 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 25 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, 32 void kernel_init_ht(__global char *ht, __global uint *rowCounters) 34 rowCounters[get_global_id(0)] = 0; 37 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 38 void kernel_round0(__global ulong *blake_state, __global char *ht, __global ulong *buf, 39 __global uint *rowCounters, __global uint *debug) 41 uint tid = get_global_id(0); 43 uint inputs_per_thread = (1 << (184 / (7 + 1))) / get_global_size(0); 44 uint input = tid * inputs_per_thread; 45 uint input_end = (tid + 1) * inputs_per_thread; 47 ulong val1 = 0, val9 = 0; 60 while (input < input_end) 63 v[0] = blake_state[0]; 64 v[1] = blake_state[1]; 65 v[2] = blake_state[2]; 66 v[3] = blake_state[3]; 67 v[4] = blake_state[4]; 68 v[5] = blake_state[5]; 69 v[6] = blake_state[6]; 70 v[7] = blake_state[7]; 80 v[12] ^= 128 + buflen + 4 ; 86 val1 = buf[2] | (ulong)input<<(buflen%8*8); 90 val9 = buf[10] | (ulong)input<<(buflen%8*8); 94 v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 95 v[1] = (v[1] + v[5] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 96 v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 97 v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 98 v[0] = (v[0] + v[5] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val9); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 99 v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 100 v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+13 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 101 v[3] = (v[3] + v[4] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 103 v[0] = (v[0] + v[4] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 104 v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 105 v[2] = (v[2] + v[6] + val9); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 106 v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 107 v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 108 v[1] = (v[1] + v[6] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 109 v[2] = (v[2] + v[7] + buf[ 1+11 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 110 v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 112 v[0] = (v[0] + v[4] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+8 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 113 v[1] = (v[1] + v[5] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+0 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 114 v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 115 v[3] = (v[3] + v[7] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 116 v[0] = (v[0] + v[5] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 117 v[1] = (v[1] + v[6] + buf[ 1+3 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 118 v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + val1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 119 v[3] = (v[3] + v[4] + val9); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 121 v[0] = (v[0] + v[4] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val9); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 122 v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + val1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 123 v[2] = (v[2] + v[6] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+12 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 124 v[3] = (v[3] + v[7] + buf[ 1+11 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 125 v[0] = (v[0] + v[5] + buf[ 1+2 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 126 v[1] = (v[1] + v[6] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 127 v[2] = (v[2] + v[7] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+0 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 128 v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+8 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 130 v[0] = (v[0] + v[4] + val9); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 131 v[1] = (v[1] + v[5] + buf[ 1+5 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 132 v[2] = (v[2] + v[6] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 133 v[3] = (v[3] + v[7] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 134 v[0] = (v[0] + v[5] + buf[ 1+14 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 135 v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 136 v[2] = (v[2] + v[7] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 137 v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 139 v[0] = (v[0] + v[4] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 140 v[1] = (v[1] + v[5] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+10 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 141 v[2] = (v[2] + v[6] + buf[ 1+0 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 142 v[3] = (v[3] + v[7] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+3 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 143 v[0] = (v[0] + v[5] + buf[ 1+4 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 144 v[1] = (v[1] + v[6] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 145 v[2] = (v[2] + v[7] + buf[ 1+15 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 146 v[3] = (v[3] + v[4] + val1); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + val9); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 148 v[0] = (v[0] + v[4] + buf[ 1+12 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+5 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 149 v[1] = (v[1] + v[5] + val1); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+15 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 150 v[2] = (v[2] + v[6] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 151 v[3] = (v[3] + v[7] + buf[ 1+4 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+10 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 152 v[0] = (v[0] + v[5] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 153 v[1] = (v[1] + v[6] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+3 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 154 v[2] = (v[2] + v[7] + val9); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 155 v[3] = (v[3] + v[4] + buf[ 1+8 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 157 v[0] = (v[0] + v[4] + buf[ 1+13 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 158 v[1] = (v[1] + v[5] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 159 v[2] = (v[2] + v[6] + buf[ 1+12 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + val1); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 160 v[3] = (v[3] + v[7] + buf[ 1+3 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + val9); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 161 v[0] = (v[0] + v[5] + buf[ 1+5 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 162 v[1] = (v[1] + v[6] + buf[ 1+15 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+4 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 163 v[2] = (v[2] + v[7] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+6 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 164 v[3] = (v[3] + v[4] + buf[ 1+2 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+10 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 166 v[0] = (v[0] + v[4] + buf[ 1+6 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+15 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 167 v[1] = (v[1] + v[5] + buf[ 1+14 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + val9); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 168 v[2] = (v[2] + v[6] + buf[ 1+11 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 169 v[3] = (v[3] + v[7] + buf[ 1+0 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 170 v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+2 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 171 v[1] = (v[1] + v[6] + buf[ 1+13 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+7 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 172 v[2] = (v[2] + v[7] + val1); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 173 v[3] = (v[3] + v[4] + buf[ 1+10 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 175 v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 176 v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 177 v[2] = (v[2] + v[6] + buf[ 1+7 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+6 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 178 v[3] = (v[3] + v[7] + val1); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+5 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 179 v[0] = (v[0] + v[5] + buf[ 1+15 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+11 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 180 v[1] = (v[1] + v[6] + val9); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 181 v[2] = (v[2] + v[7] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 182 v[3] = (v[3] + v[4] + buf[ 1+13 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+0 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 184 v[0] = (v[0] + v[4] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + val1); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 185 v[1] = (v[1] + v[5] + buf[ 1+2 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+3 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 186 v[2] = (v[2] + v[6] + buf[ 1+4 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 187 v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+7 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 188 v[0] = (v[0] + v[5] + buf[ 1+8 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + val9); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 189 v[1] = (v[1] + v[6] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+11 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 190 v[2] = (v[2] + v[7] + buf[ 1+12 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+13 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 191 v[3] = (v[3] + v[4] + buf[ 1+14 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 193 v[0] = (v[0] + v[4] + buf[ 1+14 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 32); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 24); v[0] = (v[0] + v[4] + buf[ 1+10 ]); v[12] = rotate((v[12] ^ v[0]), (ulong)64 - 16); v[8] = (v[8] + v[12]); v[4] = rotate((v[4] ^ v[8]), (ulong)64 - 63);; 194 v[1] = (v[1] + v[5] + buf[ 1+4 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 32); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 24); v[1] = (v[1] + v[5] + buf[ 1+8 ]); v[13] = rotate((v[13] ^ v[1]), (ulong)64 - 16); v[9] = (v[9] + v[13]); v[5] = rotate((v[5] ^ v[9]), (ulong)64 - 63);; 195 v[2] = (v[2] + v[6] + val9); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 32); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 24); v[2] = (v[2] + v[6] + buf[ 1+15 ]); v[14] = rotate((v[14] ^ v[2]), (ulong)64 - 16); v[10] = (v[10] + v[14]); v[6] = rotate((v[6] ^ v[10]), (ulong)64 - 63);; 196 v[3] = (v[3] + v[7] + buf[ 1+13 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 32); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 24); v[3] = (v[3] + v[7] + buf[ 1+6 ]); v[15] = rotate((v[15] ^ v[3]), (ulong)64 - 16); v[11] = (v[11] + v[15]); v[7] = rotate((v[7] ^ v[11]), (ulong)64 - 63);; 197 v[0] = (v[0] + v[5] + val1); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 32); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 24); v[0] = (v[0] + v[5] + buf[ 1+12 ]); v[15] = rotate((v[15] ^ v[0]), (ulong)64 - 16); v[10] = (v[10] + v[15]); v[5] = rotate((v[5] ^ v[10]), (ulong)64 - 63);; 198 v[1] = (v[1] + v[6] + buf[ 1+0 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 32); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 24); v[1] = (v[1] + v[6] + buf[ 1+2 ]); v[12] = rotate((v[12] ^ v[1]), (ulong)64 - 16); v[11] = (v[11] + v[12]); v[6] = rotate((v[6] ^ v[11]), (ulong)64 - 63);; 199 v[2] = (v[2] + v[7] + buf[ 1+11 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 32); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 24); v[2] = (v[2] + v[7] + buf[ 1+7 ]); v[13] = rotate((v[13] ^ v[2]), (ulong)64 - 16); v[8] = (v[8] + v[13]); v[7] = rotate((v[7] ^ v[8]), (ulong)64 - 63);; 200 v[3] = (v[3] + v[4] + buf[ 1+5 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 32); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 24); v[3] = (v[3] + v[4] + buf[ 1+3 ]); v[14] = rotate((v[14] ^ v[3]), (ulong)64 - 16); v[9] = (v[9] + v[14]); v[4] = rotate((v[4] ^ v[9]), (ulong)64 - 63);; 206 h[0] = blake_state[0] ^ v[0] ^ v[8]; 207 h[1] = blake_state[1] ^ v[1] ^ v[9]; 208 h[2] = blake_state[2] ^ v[2] ^ v[10]; 209 h[3] = blake_state[3] ^ v[3] ^ v[11]; 210 h[4] = blake_state[4] ^ v[4] ^ v[12]; 212 h[5] = (blake_state[5] ^ v[5] ^ v[13]) & 0xffffffffffff ; 214 uchar *hash = (uchar *)h; 215 for( uint i= 0; i < 2; i++ ) 217 hash = hash + 23 * i; 223 xi0 = *(ulong *)(hash+0); 224 xi1 = *(ulong *)(hash+8); 225 xi2 = *(ulong *)(hash+16); 228 row = ((((uint)hash[0] << 8) | hash[1]) << 5) | hash[2] >> 3; 233 xi0 = (xi0 >> 16) | (xi1 << (64 - 16)); 234 xi1 = (xi1 >> 16) | (xi2 << (64 - 16)); 237 p = ht + row * (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 32; 239 uint rowOffset = 8*(row%4); 240 uint xcnt = atomic_add(rowCounters + rowIdx, 1 << rowOffset); 241 xcnt = (xcnt >> rowOffset) & 0xFF; 243 if (cnt < (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)) 245 p += cnt * 32 + (8 + ((0) / 2) * 4); 248 *(__global uint *)(p - 4) = (input<<1) + i; 251 *(__global ulong *)(p + 0) = xi0; 252 *(__global ulong *)(p + 8) = xi1; 253 *(__global ulong *)(p + 16) = xi2 & 0xffffffffff; 269 uint getrow(uint round, __global ulong *a, __global ulong *b) 273 __global uchar *p1 = (__global uchar *)a; 274 __global uchar *p2 = (__global uchar *)b; 279 row = ((uint)(p1[0] ^ p2[0]) & 0x1) << 20 280 | ((uint)(p1[0+1] ^ p2[0+1])) << 12 281 | ((uint)(p1[0+2] ^ p2[0+2])) << 4 282 | (p1[0+3] ^ p2[0+3]) >> 4; 285 row = ((uint)(p1[0] ^ p2[0]) & 0x3) << 19 286 | ((uint)(p1[0+1] ^ p2[0+1])) << 11 287 | ((uint)(p1[0+2] ^ p2[0+2])) << 3 288 | (p1[0+3] ^ p2[0+3]) >> 5; 291 row = ((uint)(p1[0] ^ p2[0]) & 0x7) << 18 292 | ((uint)(p1[0+1] ^ p2[0+1])) << 10 293 | ((uint)(p1[0+2] ^ p2[0+2])) << 2 294 | (p1[0+3] ^ p2[0+3]) >> 6; 297 row = ((uint)(p1[0] ^ p2[0]) & 0xf) << 17 298 | ((uint)(p1[0+1] ^ p2[0+1])) << 9 299 | ((uint)(p1[0+2] ^ p2[0+2])) << 1 300 | (p1[0+3] ^ p2[0+3]) >> 7; 303 row = ((uint)(p1[0] ^ p2[0]) & 0x1f ) << 16 304 | ((uint)(p1[1] ^ p2[1])) << 8 308 row = (((uint)(p1[0] ^ p2[0]) & 0x3f ) << 15 ) 309 | ((uint)(p1[1] ^ p2[1])) << 7 310 | (p1[2] ^ p2[2]) >> 1; 320 uint ht_store(uint round, __global char *ht_src, __global char *ht, uint tree, __global uint *rowCounters) 322 ulong xi0, xi1, xi2, xi3 = 0; 326 __global ulong *a8, *b8; 327 __global uint *a4, *b4; 329 uint collisionThreadId = (tree >> 8); 330 uint slot_a = (tree & 0xf); 331 uint slot_b = ((tree >> 4) & 0xf); 333 __global uchar *ptr = (__global uchar *)ht_src + collisionThreadId * (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 32 + (8 + ((round - 1) / 2) * 4); 335 a8 = (__global ulong *)(ptr + slot_a * 32); 336 b8 = (__global ulong *)(ptr + slot_b * 32); 337 a4 = (__global uint *)(ptr + slot_a * 32); 338 b4 = (__global uint *)(ptr + slot_b * 32); 359 xi0 = (a4[0] ^ b4[0]) | ((ulong)(a4[1] ^ b4[1]) << 32); 360 xi1 = (a4[2] ^ b4[2]) | ((ulong)((a4[3] ^ b4[3]) & 0xffffff ) << 32); 363 else if (round == 4 ) 366 xi0 = (a4[0] ^ b4[0]) | ((ulong)(a4[1] ^ b4[1]) << 32); 367 xi1 = (a4[2] ^ b4[2]) ; 370 else if (round == 5 ) 392 xi0 = (xi0 >> 16) | (xi1 << (64 - 16)); 393 xi1 = (xi1 >> 16) | (xi2 << (64 - 16)); 398 xi0 = (xi0 >> 24) | (xi1 << (64 - 24)); 399 xi1 = (xi1 >> 24) | (xi2 << (64 - 24)); 404 row = getrow(round, a8, b8); 406 p = ht + row * (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 32; 408 uint rowOffset = 8*(row%4); 409 uint xcnt = atomic_add(rowCounters + rowIdx, 1 << rowOffset); 410 xcnt = (xcnt >> rowOffset) & 0xFF; 412 if (cnt >= (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)) 415 atomic_sub(rowCounters + rowIdx, 1 << rowOffset); 419 p += cnt * 32 + (8 + ((round) / 2) * 4); 422 *(__global uint *)(p - 4) = tree; 426 *(__global ulong *)(p + 0) = xi0; 427 *(__global ulong *)(p + 8) = xi1; 428 *(__global ulong *)(p + 16) = xi2 & 0xffff; 433 *(__global uint *)(p + 0) = xi0; 434 *(__global ulong *)(p + 4) = (xi0 >> 32) | (xi1 << 32); 435 *(__global uint *)(p + 12) = ((xi1 >> 32) & 0xffffff ); 440 *(__global uint *)(p + 0) = xi0 & 0xffffffff; 441 *(__global ulong *)(p + 4) = (xi0 >> 32) | (xi1 << 32); 446 *(__global ulong *)(p + 0) = xi0; 447 *(__global ulong *)(p + 8) = xi1 & 0xff; 452 *(__global ulong *)(p + 0) = xi0 & 0xffffffffffff; 454 else if (round == 6 ) 457 *(__global uint *)(p + 0) = xi0 & 0xffffffff; 467 void equihash_round(uint round, 468 __global char *ht_src, 469 __global char *ht_dst, 470 __global uint *debug, 471 __local uchar *first_words_data, 472 __local uint *collisionsData, 473 __local uint *collisionsNum, 474 __global uint *rowCountersSrc, 475 __global uint *rowCountersDst) 477 uint tid = get_global_id(0); 478 uint tlid = get_local_id(0); 482 __local uchar *first_words = &first_words_data[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)+2)*tlid]; 489 uint dropped_coll = 0; 490 uint dropped_stor = 0; 491 __global ulong *a, *b; 495 xi_offset = (8 + ((round - 1) / 2) * 4); 499 barrier(CLK_LOCAL_MEM_FENCE); 500 p = (ht_src + tid * (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 32); 502 uint rowOffset = 8*(tid%4); 503 cnt = (rowCountersSrc[rowIdx] >> rowOffset) & 0xFF; 504 cnt = min(cnt, (uint)(( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)); 515 for (i = 0; i < cnt; i++, p += 32) 520 first_words[i] = (p[0] & 0x07 ) >> 1; 523 first_words[i] = (p[0] & 0x0f ) >> 2; 526 first_words[i] = (p[0] & 0x1f ) >> 3; 529 first_words[i] = (p[0] & 0x3f ) >> 4; 532 first_words[i] = (p[0] & 0x7f ) >> 5; 535 first_words[i] = p[0] >> 6; 538 first_words[i] = (( p[0] & 0x1 ) << 1 )| ( p[1] >> 7 ); 545 for (i = 0; i < cnt-1 && thCollNum < ((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 5); i++) 547 uint collision = (tid << 8) | (i << 4) | (i + 1); 549 for (j = i+1; (j+4) < cnt;) 552 if( first_words[i] == first_words[j] ) 556 uint index = atomic_inc(collisionsNum); 557 collisionsData[index] = collision; 565 if (first_words[i] == first_words[j]) 568 uint index = atomic_inc(collisionsNum); 569 collisionsData[index] = collision; 576 if (first_words[i] == first_words[j]) 579 uint index = atomic_inc(collisionsNum); 580 collisionsData[index] = collision; 587 if (first_words[i] == first_words[j]) 590 uint index = atomic_inc(collisionsNum); 591 collisionsData[index] = collision; 600 if (first_words[i] == first_words[j]) 603 uint index = atomic_inc(collisionsNum); 604 collisionsData[index] = collision; 611 barrier(CLK_LOCAL_MEM_FENCE); 613 uint totalCollisions = *collisionsNum; 614 for (uint index = tlid; index < totalCollisions; index += get_local_size(0)) 616 dropped_stor += ht_store(round, ht_src, ht_dst, collisionsData[index], rowCountersDst); 624 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round1(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)+2)*64]; __local uint collisionsData[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 5) * 64]; __local uint collisionsNum; equihash_round(1, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 625 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round2(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)+2)*64]; __local uint collisionsData[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 5) * 64]; __local uint collisionsNum; equihash_round(2, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 626 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round3(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)+2)*64]; __local uint collisionsData[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 5) * 64]; __local uint collisionsNum; equihash_round(3, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 627 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round4(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)+2)*64]; __local uint collisionsData[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 5) * 64]; __local uint collisionsNum; equihash_round(4, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 628 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) void kernel_round5(__global char *ht_src, __global char *ht_dst, __global uint *rowCountersSrc, __global uint *rowCountersDst, __global uint *debug) { __local uchar first_words_data[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)+2)*64]; __local uint collisionsData[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 5) * 64]; __local uint collisionsNum; equihash_round(5, ht_src, ht_dst, debug, first_words_data, collisionsData, &collisionsNum, rowCountersSrc, rowCountersDst); } 631 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 632 void kernel_round6(__global char *ht_src, __global char *ht_dst, 633 __global uint *rowCountersSrc, __global uint *rowCountersDst, 634 __global uint *debug, __global sols_t *sols) 636 uint tid = get_global_id(0); 638 __local uchar first_words_data[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)+2)*64]; 639 __local uint collisionsData[((( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 5) * 64]; 640 __local uint collisionsNum; 642 equihash_round(6, ht_src, ht_dst, debug, first_words_data, collisionsData, 643 &collisionsNum, rowCountersSrc, rowCountersDst); 646 sols->nr = sols->likely_invalids = 0; 649 uint expand_ref(__global char *ht, uint xi_offset, uint row, uint slot) 651 return *(__global uint *)(ht + row * (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 32 + 652 slot * 32 + xi_offset - 4); 660 uint expand_refs(uint *ins, uint nr_inputs, __global char **htabs, uint round) 662 __global char *ht = htabs[round % 2]; 663 uint i = nr_inputs - 1; 664 uint j = nr_inputs * 2 - 1; 665 uint xi_offset = (8 + ((round) / 2) * 4); 666 int dup_to_watch = -1; 670 ins[j] = expand_ref(ht, xi_offset, 671 (ins[i] >> 8), ((ins[i] >> 4) & 0xf)); 672 ins[j - 1] = expand_ref(ht, xi_offset, 673 (ins[i] >> 8), (ins[i] & 0xf)); 677 if (dup_to_watch == -1) 678 dup_to_watch = ins[j]; 679 else if (ins[j] == dup_to_watch || ins[j - 1] == dup_to_watch) 695 void potential_sol(__global char **htabs, __global sols_t *sols, 696 uint ref0, uint ref1) 699 uint values_tmp[(1 << 7)]; 703 values_tmp[nr_values++] = ref0; 704 values_tmp[nr_values++] = ref1; 710 if (!expand_refs(values_tmp, nr_values, htabs, round)) 716 sol_i = atomic_inc(&sols->nr); 720 for (i = 0; i < (1 << 7); i++) 721 sols->values[sol_i][i] = values_tmp[i]; 722 sols->valid[sol_i] = 1; 728 __kernel __attribute__((reqd_work_group_size(64, 1, 1))) 729 void kernel_sols(__global char *ht0, __global char *ht1, __global sols_t *sols, 730 __global uint *rowCountersSrc, __global uint *rowCountersDst) 732 uint tid = get_global_id(0); 734 __global char *htabs[2] = { ht0, ht1 }; 735 __global char *hcounters[2] = { rowCountersSrc, rowCountersDst }; 736 uint ht_i = (7 - 1) % 2; 738 uint xi_offset = (8 + ((7 - 1) / 2) * 4); 740 __global char *a, *b; 751 uint mask = 0xffffffff; 756 a = htabs[ht_i] + tid * (( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2) * 32; 758 uint rowOffset = 8*(tid%4); 759 cnt = (rowCountersSrc[rowIdx] >> rowOffset) & 0xFF; 760 cnt = min(cnt, (uint)(( 1 << (((184 / (7 + 1)) + 1) - 21)) * 2)); 764 for (i = 0; i < cnt; i++, a += 32) 766 uint a_data = ((*(__global uint *)a) & mask); 767 ref_i = *(__global uint *)(a - 4); 769 for (j = i + 1, b = a + 32; j < cnt; j++, b += 32) 771 if (a_data == ((*(__global uint *)b) & mask)) 773 ref_j = *(__global uint *)(b - 4); 774 collisions = ((ulong)ref_i << 32) | ref_j; 782 potential_sol(htabs, sols, collisions >> 32, collisions & 0xffffffff); const size_t CL_MINER_KERNEL_SIZE184
const unsigned char CL_MINER_KERNEL184[]