3 #define __CL_ENABLE_EXCEPTIONS 4 #define CL_USE_DEPRECATED_OPENCL_2_0_APIS 8 #define FABCOIN_NONCE_LEN 32 21 #if defined(__clang__) 22 #pragma clang diagnostic push 23 #pragma clang diagnostic ignored "-Wunused-parameter" 25 #pragma clang diagnostic pop 26 #elif defined(__GNUC__) 27 #pragma GCC diagnostic push 28 #pragma GCC diagnostic ignored "-Wstack-protector" 30 #pragma GCC diagnostic pop 80 static unsigned getNumPlatforms();
81 static unsigned getNumDevices(
unsigned _platformId = 0);
82 static std::string platform_info(
unsigned _platformId = 0,
unsigned _deviceId = 0);
83 static std::vector<cl::Device> getDevices(std::vector<cl::Platform>
const& _platforms,
unsigned _platformId);
84 static std::vector<cl::Platform> getPlatforms();
85 static void listDevices();
88 static bool configureGPU(
90 unsigned _localWorkSize,
91 unsigned _globalWorkSize
97 std::vector<std::string> _kernels
100 void run(uint8_t *header,
size_t header_len,
uint256 nonce,
sols_t *indices, uint32_t * n_sol,
uint256 * ptr);
113 static const unsigned int z_n = 200;
114 static const unsigned int z_k = 9;
115 static const size_t z_collision_bit_length = z_n / (z_k + 1);
116 static const eh_index z_N = 1 << (z_collision_bit_length + 1);
119 for(
size_t i = 0; i < n_current_indices; ++i, ++
a, ++
b) {
131 for(
size_t step_index = 0; step_index <
PARAM_K; ++step_index) {
132 for(
size_t i = 0; i < (
unsigned int)1<<PARAM_K; i += (1 << (step_index+1))) {
133 if(compare_indices32(indices+i, indices+i+(1 << step_index), (1 << step_index)) > 0) {
134 uint32_t tmp_indices[(1 << step_index)];
135 memcpy(tmp_indices, indices+i, (1 << step_index)*
sizeof(uint32_t));
136 memcpy(indices+i, indices+i+(1 << step_index), (1 << step_index)*
sizeof(uint32_t));
137 memcpy(indices+i+(1 << step_index), tmp_indices, (1 << step_index)*
sizeof(uint32_t));
144 const uint8_t *
a = (
const uint8_t *) _a;
145 static char buf[1024];
147 for (i = 0; i < a_len && i + 2 <
sizeof (buf); i++)
148 sprintf(buf + i * 2,
"%02x", a[i]);
169 uint32_t *
b = a + len;
170 uint32_t tmp, need_sorting = 0;
171 for (uint32_t i = 0; i < len; i++)
172 if (need_sorting || a[i] > b[i])
179 else if (a[i] < b[i])
184 uint32_t *inputs = sols->
values[sol_i];
185 uint32_t seen_len = (1 << (
PREFIX() + 1)) / 8;
186 uint8_t seen[seen_len];
190 memset(seen, 0, seen_len);
191 for (i = 0; i < ((
unsigned int)1 <<
PARAM_K); i++)
193 tmp = seen[inputs[i] / 8];
194 seen[inputs[i] / 8] |= 1 << (inputs[i] & 7);
195 if (tmp == seen[inputs[i] / 8])
198 sols->
valid[sol_i] = 0;
204 sols->
valid[sol_i] = 1;
206 for (uint32_t level = 0; level <
PARAM_K; level++)
207 for (i = 0; i < ((
unsigned int)1 << PARAM_K); i += (2 << level))
208 sort_pair(&inputs[i], 1 << level);
223 const cl_int zero = 0;
234 int m_wayWorkSizeAdjust = 0;
255 return (PARAM_N / (PARAM_K + 1));
275 if( NR_ROWS_LOG == 16 )
278 if( NR_ROWS_LOG == 18 )
280 if( NR_ROWS_LOG == 19 )
282 if( NR_ROWS_LOG == 20 )
284 if( NR_ROWS_LOG == 21 )
307 return 512/PARAM_N*((PARAM_N+7)/8);
322 case 0:
return "CL_SUCCESS";
323 case -1:
return "CL_DEVICE_NOT_FOUND";
324 case -2:
return "CL_DEVICE_NOT_AVAILABLE";
325 case -3:
return "CL_COMPILER_NOT_AVAILABLE";
326 case -4:
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
327 case -5:
return "CL_OUT_OF_RESOURCES";
328 case -6:
return "CL_OUT_OF_HOST_MEMORY";
329 case -7:
return "CL_PROFILING_INFO_NOT_AVAILABLE";
330 case -8:
return "CL_MEM_COPY_OVERLAP";
331 case -9:
return "CL_IMAGE_FORMAT_MISMATCH";
332 case -10:
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
333 case -11:
return "CL_BUILD_PROGRAM_FAILURE";
334 case -12:
return "CL_MAP_FAILURE";
335 case -13:
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
336 case -14:
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
337 case -15:
return "CL_COMPILE_PROGRAM_FAILURE";
338 case -16:
return "CL_LINKER_NOT_AVAILABLE";
339 case -17:
return "CL_LINK_PROGRAM_FAILURE";
340 case -18:
return "CL_DEVICE_PARTITION_FAILED";
341 case -19:
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
344 case -30:
return "CL_INVALID_VALUE";
345 case -31:
return "CL_INVALID_DEVICE_TYPE";
346 case -32:
return "CL_INVALID_PLATFORM";
347 case -33:
return "CL_INVALID_DEVICE";
348 case -34:
return "CL_INVALID_CONTEXT";
349 case -35:
return "CL_INVALID_QUEUE_PROPERTIES";
350 case -36:
return "CL_INVALID_COMMAND_QUEUE";
351 case -37:
return "CL_INVALID_HOST_PTR";
352 case -38:
return "CL_INVALID_MEM_OBJECT";
353 case -39:
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
354 case -40:
return "CL_INVALID_IMAGE_SIZE";
355 case -41:
return "CL_INVALID_SAMPLER";
356 case -42:
return "CL_INVALID_BINARY";
357 case -43:
return "CL_INVALID_BUILD_OPTIONS";
358 case -44:
return "CL_INVALID_PROGRAM";
359 case -45:
return "CL_INVALID_PROGRAM_EXECUTABLE";
360 case -46:
return "CL_INVALID_KERNEL_NAME";
361 case -47:
return "CL_INVALID_KERNEL_DEFINITION";
362 case -48:
return "CL_INVALID_KERNEL";
363 case -49:
return "CL_INVALID_ARG_INDEX";
364 case -50:
return "CL_INVALID_ARG_VALUE";
365 case -51:
return "CL_INVALID_ARG_SIZE";
366 case -52:
return "CL_INVALID_KERNEL_ARGS";
367 case -53:
return "CL_INVALID_WORK_DIMENSION";
368 case -54:
return "CL_INVALID_WORK_GROUP_SIZE";
369 case -55:
return "CL_INVALID_WORK_ITEM_SIZE";
370 case -56:
return "CL_INVALID_GLOBAL_OFFSET";
371 case -57:
return "CL_INVALID_EVENT_WAIT_LIST";
372 case -58:
return "CL_INVALID_EVENT";
373 case -59:
return "CL_INVALID_OPERATION";
374 case -60:
return "CL_INVALID_GL_OBJECT";
375 case -61:
return "CL_INVALID_BUFFER_SIZE";
376 case -62:
return "CL_INVALID_MIP_LEVEL";
377 case -63:
return "CL_INVALID_GLOBAL_WORK_SIZE";
378 case -64:
return "CL_INVALID_PROPERTY";
379 case -65:
return "CL_INVALID_IMAGE_DESCRIPTOR";
380 case -66:
return "CL_INVALID_COMPILER_OPTIONS";
381 case -67:
return "CL_INVALID_LINKER_OPTIONS";
382 case -68:
return "CL_INVALID_DEVICE_PARTITION_COUNT";
385 case -1000:
return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
386 case -1001:
return "CL_PLATFORM_NOT_FOUND_KHR";
387 case -1002:
return "CL_INVALID_D3D10_DEVICE_KHR";
388 case -1003:
return "CL_INVALID_D3D10_RESOURCE_KHR";
389 case -1004:
return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
390 case -1005:
return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
391 case -9999:
return "NVIDIA: ILLEGAL READ OR WRITE TO A BUFFER";
393 fprintf(stderr,
"'%d'\n", error);
394 return "Unknown OpenCL error";
bool error(const char *fmt, const Args &...args)
#define function(a, b, c, d, k, s)
unsigned int m_stepWorkSizeAdjust
The step used in the work size adjustment.
static unsigned const c_defaultMSPerBatch
Default value of the milliseconds per global work size (per batch)
unsigned m_globalWorkSize
static unsigned s_msPerBatch
The target milliseconds per batch for the search. If 0, then no adjustment will happen.
unsigned int FABCOIN_HASH_LEN()
uint32_t parent_bucket_index
static unsigned s_extraRequiredGPUMem
GPU memory required for other things, like window rendering e.t.c.
static unsigned const c_defaultLocalWorkSize
Default value of the local work size. Also known as workgroup size.
char * s_hexdump(const void *_a, uint32_t a_len)
int compare_indices32(uint32_t *a, uint32_t *b, size_t n_current_indices)
unsigned int APX_NR_ELMS_LOG()
unsigned int COLL_DATA_SIZE_PER_TH()
void sort_pair(uint32_t *a, uint32_t len)
static bool s_allowCPU
Allow CPU to appear as an OpenCL device or not. Default is false.
C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33)
std::vector< cl::Kernel > m_gpuKernels
void normalize_indices(uint32_t *indices)
const char * get_error_string(cl_int error)
unsigned int ROWS_PER_UINT()
CommandQueue interface for cl_command_queue.
void * memcpy(void *a, const void *b, size_t c)
uint values[MAX_SOLS][512]
size_t select_work_size_blake(void)
uint32_t verify_sol(sols_t *sols, unsigned sol_i)
static unsigned s_workgroupSize
The local work size for the search.
Device interface for cl_device_id.
static unsigned const c_defaultGlobalWorkSizeMultiplier
Default value of the global work size as a multiplier of the local work size.
static unsigned s_initialGlobalWorkSize
The initial global work size for the searches.