Fabcoin Core  0.16.2
P2P Digital Currency
libclwrapper.h
Go to the documentation of this file.
1 #pragma once
2 
3 #define __CL_ENABLE_EXCEPTIONS
4 #define CL_USE_DEPRECATED_OPENCL_2_0_APIS
5 
6 // Length of 1 element (slot) in bytes
7 #define SLOT_LEN 32
8 #define FABCOIN_NONCE_LEN 32
9 // Maximum number of solutions reported by kernel to host
10 #define MAX_SOLS 10
11 #define BLAKE_WPS 10
12 
13 typedef struct sols_s
14 {
17  uint8_t valid[MAX_SOLS];
19 }sols_t;
20 
21 #if defined(__clang__)
22 #pragma clang diagnostic push
23 #pragma clang diagnostic ignored "-Wunused-parameter"
24 #include "cl.hpp"
25 #pragma clang diagnostic pop
26 #elif defined(__GNUC__)
27 #pragma GCC diagnostic push
28 #pragma GCC diagnostic ignored "-Wstack-protector"
29 #include "cl.hpp"
30 #pragma GCC diagnostic pop
31 #else
32 #include "cl.hpp"
33 #endif
34 #include <time.h>
35 #include <functional>
36 
37 #include "sodium.h"
38 
39 typedef uint8_t uchar;
40 typedef uint32_t uint;
41 typedef uint64_t ulong;
42 
43 #include "blake.h"
44 #include <cassert>
45 #include "uint256.h"
46 
47 typedef struct element element_t;
48 
49 struct element {
50  uint32_t digest_index;
52 };
53 
54 
55 typedef struct bucket {
56  unsigned size;
58 } bucket_t;
59 
60 typedef struct debug_s
61 {
62  uint32_t dropped_coll;
63  uint32_t dropped_stor;
64 } debug_t;
65 
66 typedef uint32_t eh_index;
67 
69 {
70 
71 public:
72 
73  cl_gpuminer(unsigned int n, unsigned int k);
74  ~cl_gpuminer();
75 
76  static bool searchForAllDevices(unsigned _platformId, std::function<bool(cl::Device const&)> _callback);
77  static bool searchForAllDevices(std::function<bool(cl::Device const&)> _callback);
78  static void doForAllDevices(unsigned _platformId, std::function<void(cl::Device const&)> _callback);
79  static void doForAllDevices(std::function<void(cl::Device const&)> _callback);
80  static unsigned getNumPlatforms();
81  static unsigned getNumDevices(unsigned _platformId = 0);
82  static std::string platform_info(unsigned _platformId = 0, unsigned _deviceId = 0);
83  static std::vector<cl::Device> getDevices(std::vector<cl::Platform> const& _platforms, unsigned _platformId);
84  static std::vector<cl::Platform> getPlatforms();
85  static void listDevices();
86 
87  // Currently just prints memory of the GPU
88  static bool configureGPU(
89  unsigned _platformId,
90  unsigned _localWorkSize,
91  unsigned _globalWorkSize
92  );
93 
94  bool init(
95  unsigned _platformId,
96  unsigned _deviceId,
97  std::vector<std::string> _kernels
98  );
99 
100  void run(uint8_t *header, size_t header_len, uint256 nonce, sols_t *indices, uint32_t * n_sol, uint256 * ptr);
101 
102  void finish();
103 
104  /* -- default values -- */
106  static unsigned const c_defaultLocalWorkSize;
108  static unsigned const c_defaultGlobalWorkSizeMultiplier;
110  static unsigned const c_defaultMSPerBatch;
111 
112 private:
113  static const unsigned int z_n = 200;
114  static const unsigned int z_k = 9;
115  static const size_t z_collision_bit_length = z_n / (z_k + 1);
116  static const eh_index z_N = 1 << (z_collision_bit_length + 1);
117 
118  int compare_indices32(uint32_t* a, uint32_t* b, size_t n_current_indices) {
119  for(size_t i = 0; i < n_current_indices; ++i, ++a, ++b) {
120  if(*a < *b) {
121  return -1;
122  } else if(*a > *b) {
123  return 1;
124  } else {
125  return 0;
126  }
127  }
128  return 0;
129  }
130  void normalize_indices(uint32_t* indices) {
131  for(size_t step_index = 0; step_index < PARAM_K; ++step_index) {
132  for(size_t i = 0; i < (unsigned int)1<<PARAM_K; i += (1 << (step_index+1))) {
133  if(compare_indices32(indices+i, indices+i+(1 << step_index), (1 << step_index)) > 0) {
134  uint32_t tmp_indices[(1 << step_index)];
135  memcpy(tmp_indices, indices+i, (1 << step_index)*sizeof(uint32_t));
136  memcpy(indices+i, indices+i+(1 << step_index), (1 << step_index)*sizeof(uint32_t));
137  memcpy(indices+i+(1 << step_index), tmp_indices, (1 << step_index)*sizeof(uint32_t));
138  }
139  }
140  }
141  }
142  char *s_hexdump(const void *_a, uint32_t a_len)
143  {
144  const uint8_t *a = (const uint8_t *) _a;
145  static char buf[1024];
146  uint32_t i;
147  for (i = 0; i < a_len && i + 2 < sizeof (buf); i++)
148  sprintf(buf + i * 2, "%02x", a[i]);
149  buf[i * 2] = 0;
150  return buf;
151  }
153  {
154  size_t work_size =
155  64 * /* thread per wavefront */
156  BLAKE_WPS * /* wavefront per simd */
157  4 * /* simd per compute unit */
158  36;
159  // Make the work group size a multiple of the nr of wavefronts, while
160  // dividing the number of inputs. This results in the worksize being a
161  // power of 2.
162  while (NR_INPUTS() % work_size)
163  work_size += 64;
164  //debug("Blake: work size %zd\n", work_size);
165  return work_size;
166  }
167  void sort_pair(uint32_t *a, uint32_t len)
168  {
169  uint32_t *b = a + len;
170  uint32_t tmp, need_sorting = 0;
171  for (uint32_t i = 0; i < len; i++)
172  if (need_sorting || a[i] > b[i])
173  {
174  need_sorting = 1;
175  tmp = a[i];
176  a[i] = b[i];
177  b[i] = tmp;
178  }
179  else if (a[i] < b[i])
180  return ;
181  }
182 
183  uint32_t verify_sol(sols_t *sols, unsigned sol_i) {
184  uint32_t *inputs = sols->values[sol_i];
185  uint32_t seen_len = (1 << (PREFIX() + 1)) / 8;
186  uint8_t seen[seen_len];
187  uint32_t i;
188  uint8_t tmp;
189  // look for duplicate inputs
190  memset(seen, 0, seen_len);
191  for (i = 0; i < ((unsigned int)1 << PARAM_K); i++)
192  {
193  tmp = seen[inputs[i] / 8];
194  seen[inputs[i] / 8] |= 1 << (inputs[i] & 7);
195  if (tmp == seen[inputs[i] / 8])
196  {
197  // at least one input value is a duplicate
198  sols->valid[sol_i] = 0;
199  return 0;
200  }
201  }
202  // the valid flag is already set by the GPU, but set it again because
203  // I plan to change the GPU code to not set it
204  sols->valid[sol_i] = 1;
205  // sort the pairs in place
206  for (uint32_t level = 0; level < PARAM_K; level++)
207  for (i = 0; i < ((unsigned int)1 << PARAM_K); i += (2 << level))
208  sort_pair(&inputs[i], 1 << level);
209  return 1;
210  }
213  std::vector<cl::Kernel> m_gpuKernels;
214  cl::Buffer buf_ht[2];
217  cl::Buffer rowCounters[2];
218 
219  uint64_t nonce;
220  uint64_t total;
221  size_t dbg_size = 1 * sizeof (debug_t);
222 
223  const cl_int zero = 0;
224  uint32_t solutions;
225  uint32_t * dst_solutions;
226 
229  unsigned m_deviceBits;
230 
232  unsigned int m_stepWorkSizeAdjust;
234  int m_wayWorkSizeAdjust = 0;
235 
237  static unsigned s_workgroupSize;
239  static unsigned s_initialGlobalWorkSize;
241  static unsigned s_msPerBatch;
243  static bool s_allowCPU;
246  static unsigned s_extraRequiredGPUMem;
247 
248 public:
249  unsigned int PARAM_N;
250  unsigned int PARAM_K;
251  unsigned int NR_ROWS_LOG;
252 
253  unsigned int PREFIX()
254  {
255  return (PARAM_N / (PARAM_K + 1));
256  }
257 
258  unsigned int NR_INPUTS()
259  {
260  return 1 << PREFIX();
261  }
262 
263  unsigned int APX_NR_ELMS_LOG()
264  {
265  return PREFIX() + 1;
266  }
267 
268  unsigned int COLL_DATA_SIZE_PER_TH()
269  {
270  return NR_SLOTS() * 5;
271  }
272 
273  unsigned int OVERHEAD()
274  {
275  if( NR_ROWS_LOG == 16 )
276  //error "NR_ROWS_LOG = 16 is currently broken - do not use"
277  return 3;
278  if( NR_ROWS_LOG == 18 )
279  return 3;
280  if( NR_ROWS_LOG == 19 )
281  return 5;
282  if( NR_ROWS_LOG == 20 )
283  return 9;
284  if( NR_ROWS_LOG == 21 )
285  return 3;
286  }
287 
288  unsigned long NR_ROWS()
289  {
290  return 1 << NR_ROWS_LOG;
291  }
292 
293  unsigned long NR_SLOTS()
294  {
295  return ((1 << (APX_NR_ELMS_LOG() - NR_ROWS_LOG)) * OVERHEAD());
296  }
297 
298  unsigned long HT_SIZE()
299  {
300  uint64_t htsize = NR_ROWS() * NR_SLOTS();
301  htsize *= SLOT_LEN;
302  return htsize;
303  }
304 
305  unsigned int FABCOIN_HASH_LEN()
306  {
307  return 512/PARAM_N*((PARAM_N+7)/8);
308  }
309 
310  unsigned int ROWS_PER_UINT()
311  {
312  if (NR_SLOTS() < 16)
313  return 8;
314  else
315  return 4;
316  }
317 
318  const char *get_error_string(cl_int error)
319  {
320  switch(error){
321  // run-time and JIT compiler errors
322  case 0: return "CL_SUCCESS";
323  case -1: return "CL_DEVICE_NOT_FOUND";
324  case -2: return "CL_DEVICE_NOT_AVAILABLE";
325  case -3: return "CL_COMPILER_NOT_AVAILABLE";
326  case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
327  case -5: return "CL_OUT_OF_RESOURCES";
328  case -6: return "CL_OUT_OF_HOST_MEMORY";
329  case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
330  case -8: return "CL_MEM_COPY_OVERLAP";
331  case -9: return "CL_IMAGE_FORMAT_MISMATCH";
332  case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
333  case -11: return "CL_BUILD_PROGRAM_FAILURE";
334  case -12: return "CL_MAP_FAILURE";
335  case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
336  case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
337  case -15: return "CL_COMPILE_PROGRAM_FAILURE";
338  case -16: return "CL_LINKER_NOT_AVAILABLE";
339  case -17: return "CL_LINK_PROGRAM_FAILURE";
340  case -18: return "CL_DEVICE_PARTITION_FAILED";
341  case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
342 
343  // compile-time errors
344  case -30: return "CL_INVALID_VALUE";
345  case -31: return "CL_INVALID_DEVICE_TYPE";
346  case -32: return "CL_INVALID_PLATFORM";
347  case -33: return "CL_INVALID_DEVICE";
348  case -34: return "CL_INVALID_CONTEXT";
349  case -35: return "CL_INVALID_QUEUE_PROPERTIES";
350  case -36: return "CL_INVALID_COMMAND_QUEUE";
351  case -37: return "CL_INVALID_HOST_PTR";
352  case -38: return "CL_INVALID_MEM_OBJECT";
353  case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
354  case -40: return "CL_INVALID_IMAGE_SIZE";
355  case -41: return "CL_INVALID_SAMPLER";
356  case -42: return "CL_INVALID_BINARY";
357  case -43: return "CL_INVALID_BUILD_OPTIONS";
358  case -44: return "CL_INVALID_PROGRAM";
359  case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
360  case -46: return "CL_INVALID_KERNEL_NAME";
361  case -47: return "CL_INVALID_KERNEL_DEFINITION";
362  case -48: return "CL_INVALID_KERNEL";
363  case -49: return "CL_INVALID_ARG_INDEX";
364  case -50: return "CL_INVALID_ARG_VALUE";
365  case -51: return "CL_INVALID_ARG_SIZE";
366  case -52: return "CL_INVALID_KERNEL_ARGS";
367  case -53: return "CL_INVALID_WORK_DIMENSION";
368  case -54: return "CL_INVALID_WORK_GROUP_SIZE";
369  case -55: return "CL_INVALID_WORK_ITEM_SIZE";
370  case -56: return "CL_INVALID_GLOBAL_OFFSET";
371  case -57: return "CL_INVALID_EVENT_WAIT_LIST";
372  case -58: return "CL_INVALID_EVENT";
373  case -59: return "CL_INVALID_OPERATION";
374  case -60: return "CL_INVALID_GL_OBJECT";
375  case -61: return "CL_INVALID_BUFFER_SIZE";
376  case -62: return "CL_INVALID_MIP_LEVEL";
377  case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
378  case -64: return "CL_INVALID_PROPERTY";
379  case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
380  case -66: return "CL_INVALID_COMPILER_OPTIONS";
381  case -67: return "CL_INVALID_LINKER_OPTIONS";
382  case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
383 
384  // extension errors
385  case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
386  case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
387  case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
388  case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
389  case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
390  case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
391  case -9999: return "NVIDIA: ILLEGAL READ OR WRITE TO A BUFFER";
392  default:
393  fprintf(stderr, "'%d'\n", error);
394  return "Unknown OpenCL error";
395  }
396  }
397 };
unsigned int OVERHEAD()
Definition: libclwrapper.h:273
bool error(const char *fmt, const Args &...args)
Definition: util.h:178
#define function(a, b, c, d, k, s)
unsigned int m_stepWorkSizeAdjust
The step used in the work size adjustment.
Definition: libclwrapper.h:232
cl::CommandQueue m_queue
Definition: libclwrapper.h:212
static unsigned const c_defaultMSPerBatch
Default value of the milliseconds per global work size (per batch)
Definition: libclwrapper.h:110
unsigned m_globalWorkSize
Definition: libclwrapper.h:227
#define BLAKE_WPS
Definition: libclwrapper.h:11
static unsigned s_msPerBatch
The target milliseconds per batch for the search. If 0, then no adjustment will happen.
Definition: libclwrapper.h:241
#define APX_NR_ELMS_LOG
Definition: param.h:8
unsigned long NR_SLOTS()
Definition: libclwrapper.h:293
unsigned long HT_SIZE()
Definition: libclwrapper.h:298
uint32_t uint
Definition: libclwrapper.h:40
unsigned int FABCOIN_HASH_LEN()
Definition: libclwrapper.h:305
#define NR_ROWS
Definition: param.h:48
#define NR_SLOTS
Definition: param.h:49
uint32_t parent_bucket_index
Definition: libclwrapper.h:51
static unsigned s_extraRequiredGPUMem
GPU memory required for other things, like window rendering e.t.c.
Definition: libclwrapper.h:246
static unsigned const c_defaultLocalWorkSize
Default value of the local work size. Also known as workgroup size.
Definition: libclwrapper.h:106
char * s_hexdump(const void *_a, uint32_t a_len)
Definition: libclwrapper.h:142
#define NR_ROWS_LOG
Definition: param.h:10
uint32_t solutions
Definition: libclwrapper.h:224
unsigned long NR_ROWS()
Definition: libclwrapper.h:288
uint8_t valid[MAX_SOLS]
Definition: libclwrapper.h:17
uint64_t ulong
Definition: libclwrapper.h:41
int compare_indices32(uint32_t *a, uint32_t *b, size_t n_current_indices)
Definition: libclwrapper.h:118
unsigned int APX_NR_ELMS_LOG()
Definition: libclwrapper.h:263
unsigned int COLL_DATA_SIZE_PER_TH()
Definition: libclwrapper.h:268
void sort_pair(uint32_t *a, uint32_t len)
Definition: libclwrapper.h:167
static bool s_allowCPU
Allow CPU to appear as an OpenCL device or not. Default is false.
Definition: libclwrapper.h:243
#define a(i)
C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33)
uint32_t digest_index
Definition: libclwrapper.h:50
std::vector< cl::Kernel > m_gpuKernels
Definition: libclwrapper.h:213
uint8_t uchar
Definition: libclwrapper.h:39
uint32_t * dst_solutions
Definition: libclwrapper.h:225
void normalize_indices(uint32_t *indices)
Definition: libclwrapper.h:130
const char * get_error_string(cl_int error)
Definition: libclwrapper.h:318
uint32_t eh_index
Definition: libclwrapper.h:66
cl::Buffer buf_sols
Definition: libclwrapper.h:215
uint64_t nonce
Definition: libclwrapper.h:219
#define b(i, j)
cl::Context m_context
Definition: libclwrapper.h:211
unsigned int ROWS_PER_UINT()
Definition: libclwrapper.h:310
uint likely_invalids
Definition: libclwrapper.h:16
unsigned m_deviceBits
Definition: libclwrapper.h:229
uint64_t total
Definition: libclwrapper.h:220
unsigned size
Definition: libclwrapper.h:56
#define MAX_SOLS
Definition: libclwrapper.h:10
cl::Buffer buf_dbg
Definition: libclwrapper.h:216
256-bit opaque blob.
Definition: uint256.h:132
CommandQueue interface for cl_command_queue.
Definition: cl.hpp:2566
unsigned int NR_INPUTS()
Definition: libclwrapper.h:258
uint nr
Definition: libclwrapper.h:15
#define NR_INPUTS
Definition: param.h:6
void * memcpy(void *a, const void *b, size_t c)
bool m_openclOnePointOne
Definition: libclwrapper.h:228
struct bucket bucket_t
struct debug_s debug_t
#define OVERHEAD
Definition: param.h:43
uint32_t dropped_stor
Definition: libclwrapper.h:63
uint values[MAX_SOLS][512]
Definition: libclwrapper.h:18
unsigned int PARAM_K
Definition: libclwrapper.h:250
Memory buffer interface.
Definition: cl.hpp:1748
size_t select_work_size_blake(void)
Definition: libclwrapper.h:152
uint32_t verify_sol(sols_t *sols, unsigned sol_i)
Definition: libclwrapper.h:183
unsigned int NR_ROWS_LOG
Definition: libclwrapper.h:251
#define SLOT_LEN
Definition: libclwrapper.h:7
#define PREFIX
Definition: param.h:5
static unsigned s_workgroupSize
The local work size for the search.
Definition: libclwrapper.h:237
Device interface for cl_device_id.
Definition: cl.hpp:1190
uint32_t eh_index
Definition: equihash.h:25
uint32_t dropped_coll
Definition: libclwrapper.h:62
#define PARAM_K
Definition: param.h:4
unsigned int PREFIX()
Definition: libclwrapper.h:253
uint8_t const * data
Definition: sha3.h:19
static unsigned const c_defaultGlobalWorkSizeMultiplier
Default value of the global work size as a multiplier of the local work size.
Definition: libclwrapper.h:108
struct sols_s sols_t
static unsigned s_initialGlobalWorkSize
The initial global work size for the searches.
Definition: libclwrapper.h:239
unsigned int PARAM_N
Definition: libclwrapper.h:249