3 #define __CL_ENABLE_EXCEPTIONS 4 #define CL_USE_DEPRECATED_OPENCL_2_0_APIS 10 #pragma clang diagnostic push 11 #pragma clang diagnostic ignored "-Wunused-parameter" 13 #pragma clang diagnostic pop 14 #elif defined(__GNUC__) 15 #pragma GCC diagnostic push 16 #pragma GCC diagnostic ignored "-Wstack-protector" 18 #pragma GCC diagnostic pop 36 #define EQUIHASH_N 200 39 #define NUM_COLLISION_BITS (EQUIHASH_N / (EQUIHASH_K + 1)) 40 #define NUM_INDICES (1 << EQUIHASH_K) 42 #define NUM_VALUES (1 << (NUM_COLLISION_BITS+1)) 43 #define NUM_BUCKETS (1 << NUM_COLLISION_BITS) 44 #define DIGEST_SIZE 25 62 uint32_t dropped_coll;
63 uint32_t dropped_stor;
80 static unsigned getNumPlatforms();
81 static unsigned getNumDevices(
unsigned _platformId = 0);
82 static std::string platform_info(
unsigned _platformId = 0,
unsigned _deviceId = 0);
83 static std::vector<cl::Device> getDevices(std::vector<cl::Platform>
const& _platforms,
unsigned _platformId);
84 static std::vector<cl::Platform> getPlatforms();
85 static void listDevices();
88 static bool configureGPU(
90 unsigned _localWorkSize,
91 unsigned _globalWorkSize
97 std::vector<std::string> _kernels
100 void run(uint8_t *header,
size_t header_len,
uint256 nonce,
sols_t * indices, uint32_t * n_sol,
uint256 * ptr);
106 static unsigned const c_defaultLocalWorkSize;
108 static unsigned const c_defaultGlobalWorkSizeMultiplier;
110 static unsigned const c_defaultMSPerBatch;
113 static const unsigned int z_n = 200;
114 static const unsigned int z_k = 9;
115 static const size_t z_collision_bit_length = z_n / (z_k + 1);
116 static const eh_index z_N = 1 << (z_collision_bit_length + 1);
119 for(
size_t i = 0; i < n_current_indices; ++i, ++
a, ++
b) {
131 for(
size_t step_index = 0; step_index <
EQUIHASH_K; ++step_index) {
132 for(
size_t i = 0; i <
NUM_INDICES; i += (1 << (step_index+1))) {
133 if(compare_indices32(indices+i, indices+i+(1 << step_index), (1 << step_index)) > 0) {
134 uint32_t tmp_indices[(1 << step_index)];
135 memcpy(tmp_indices, indices+i, (1 << step_index)*
sizeof(uint32_t));
136 memcpy(indices+i, indices+i+(1 << step_index), (1 << step_index)*
sizeof(uint32_t));
137 memcpy(indices+i+(1 << step_index), tmp_indices, (1 << step_index)*
sizeof(uint32_t));
144 const uint8_t *
a = (
const uint8_t *) _a;
145 static char buf[1024];
147 for (i = 0; i < a_len && i + 2 <
sizeof (buf); i++)
148 sprintf(buf + i * 2,
"%02x", a[i]);
169 uint32_t *
b = a + len;
170 uint32_t tmp, need_sorting = 0;
171 for (uint32_t i = 0; i < len; i++)
172 if (need_sorting || a[i] > b[i])
179 else if (a[i] < b[i])
184 uint32_t *inputs = sols->
values[sol_i];
185 uint32_t seen_len = (1 << (
PREFIX + 1)) / 8;
186 uint8_t seen[seen_len];
190 memset(seen, 0, seen_len);
191 for (i = 0; i < (1 <<
PARAM_K); i++)
193 tmp = seen[inputs[i] / 8];
194 seen[inputs[i] / 8] |= 1 << (inputs[i] & 7);
195 if (tmp == seen[inputs[i] / 8])
198 sols->
valid[sol_i] = 0;
204 sols->
valid[sol_i] = 1;
206 for (uint32_t level = 0; level <
PARAM_K; level++)
207 for (i = 0; i < (1 <<
PARAM_K); i += (2 << level))
208 sort_pair(&inputs[i], 1 << level);
213 std::vector<cl::Kernel> m_gpuKernels;
221 size_t dbg_size = 1 *
sizeof (
debug_t);
223 const cl_int zero = 0;
225 uint32_t * dst_solutions;
227 unsigned m_globalWorkSize;
228 bool m_openclOnePointOne;
229 unsigned m_deviceBits;
232 unsigned int m_stepWorkSizeAdjust;
234 int m_wayWorkSizeAdjust = 0;
237 static unsigned s_workgroupSize;
239 static unsigned s_initialGlobalWorkSize;
241 static unsigned s_msPerBatch;
243 static bool s_allowCPU;
246 static unsigned s_extraRequiredGPUMem;
252 case 0:
return "CL_SUCCESS";
253 case -1:
return "CL_DEVICE_NOT_FOUND";
254 case -2:
return "CL_DEVICE_NOT_AVAILABLE";
255 case -3:
return "CL_COMPILER_NOT_AVAILABLE";
256 case -4:
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
257 case -5:
return "CL_OUT_OF_RESOURCES";
258 case -6:
return "CL_OUT_OF_HOST_MEMORY";
259 case -7:
return "CL_PROFILING_INFO_NOT_AVAILABLE";
260 case -8:
return "CL_MEM_COPY_OVERLAP";
261 case -9:
return "CL_IMAGE_FORMAT_MISMATCH";
262 case -10:
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
263 case -11:
return "CL_BUILD_PROGRAM_FAILURE";
264 case -12:
return "CL_MAP_FAILURE";
265 case -13:
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
266 case -14:
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
267 case -15:
return "CL_COMPILE_PROGRAM_FAILURE";
268 case -16:
return "CL_LINKER_NOT_AVAILABLE";
269 case -17:
return "CL_LINK_PROGRAM_FAILURE";
270 case -18:
return "CL_DEVICE_PARTITION_FAILED";
271 case -19:
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
274 case -30:
return "CL_INVALID_VALUE";
275 case -31:
return "CL_INVALID_DEVICE_TYPE";
276 case -32:
return "CL_INVALID_PLATFORM";
277 case -33:
return "CL_INVALID_DEVICE";
278 case -34:
return "CL_INVALID_CONTEXT";
279 case -35:
return "CL_INVALID_QUEUE_PROPERTIES";
280 case -36:
return "CL_INVALID_COMMAND_QUEUE";
281 case -37:
return "CL_INVALID_HOST_PTR";
282 case -38:
return "CL_INVALID_MEM_OBJECT";
283 case -39:
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
284 case -40:
return "CL_INVALID_IMAGE_SIZE";
285 case -41:
return "CL_INVALID_SAMPLER";
286 case -42:
return "CL_INVALID_BINARY";
287 case -43:
return "CL_INVALID_BUILD_OPTIONS";
288 case -44:
return "CL_INVALID_PROGRAM";
289 case -45:
return "CL_INVALID_PROGRAM_EXECUTABLE";
290 case -46:
return "CL_INVALID_KERNEL_NAME";
291 case -47:
return "CL_INVALID_KERNEL_DEFINITION";
292 case -48:
return "CL_INVALID_KERNEL";
293 case -49:
return "CL_INVALID_ARG_INDEX";
294 case -50:
return "CL_INVALID_ARG_VALUE";
295 case -51:
return "CL_INVALID_ARG_SIZE";
296 case -52:
return "CL_INVALID_KERNEL_ARGS";
297 case -53:
return "CL_INVALID_WORK_DIMENSION";
298 case -54:
return "CL_INVALID_WORK_GROUP_SIZE";
299 case -55:
return "CL_INVALID_WORK_ITEM_SIZE";
300 case -56:
return "CL_INVALID_GLOBAL_OFFSET";
301 case -57:
return "CL_INVALID_EVENT_WAIT_LIST";
302 case -58:
return "CL_INVALID_EVENT";
303 case -59:
return "CL_INVALID_OPERATION";
304 case -60:
return "CL_INVALID_GL_OBJECT";
305 case -61:
return "CL_INVALID_BUFFER_SIZE";
306 case -62:
return "CL_INVALID_MIP_LEVEL";
307 case -63:
return "CL_INVALID_GLOBAL_WORK_SIZE";
308 case -64:
return "CL_INVALID_PROPERTY";
309 case -65:
return "CL_INVALID_IMAGE_DESCRIPTOR";
310 case -66:
return "CL_INVALID_COMPILER_OPTIONS";
311 case -67:
return "CL_INVALID_LINKER_OPTIONS";
312 case -68:
return "CL_INVALID_DEVICE_PARTITION_COUNT";
315 case -1000:
return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
316 case -1001:
return "CL_PLATFORM_NOT_FOUND_KHR";
317 case -1002:
return "CL_INVALID_D3D10_DEVICE_KHR";
318 case -1003:
return "CL_INVALID_D3D10_RESOURCE_KHR";
319 case -1004:
return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
320 case -1005:
return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
321 case -9999:
return "NVIDIA: ILLEGAL READ OR WRITE TO A BUFFER";
323 fprintf(stderr,
"'%d'\n", error);
324 return "Unknown OpenCL error";
bool error(const char *fmt, const Args &...args)
#define function(a, b, c, d, k, s)
uint32_t parent_bucket_index
char * s_hexdump(const void *_a, uint32_t a_len)
concurrent_queue< JitTask > m_queue
int compare_indices32(uint32_t *a, uint32_t *b, size_t n_current_indices)
void sort_pair(uint32_t *a, uint32_t len)
void normalize_indices(uint32_t *indices)
const char * get_error_string(cl_int error)
CommandQueue interface for cl_command_queue.
uint8_t const size_t const size
void * memcpy(void *a, const void *b, size_t c)
uint values[MAX_SOLS][512]
size_t select_work_size_blake(void)
uint32_t verify_sol(sols_t *sols, unsigned sol_i)
Device interface for cl_device_id.
C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33)
uint64_t digest_t[(DIGEST_SIZE+sizeof(uint64_t)-1)/sizeof(uint64_t)]